From 4e1a943563b95a0f1d70edf347c23404b908acd9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 12 Oct 2023 13:52:24 +0200 Subject: [PATCH 001/249] bulk decompression of text columns --- tsl/src/compression/array.c | 83 +++++++++++++++++ tsl/src/compression/array.h | 7 ++ tsl/src/compression/compression.c | 23 ++++- tsl/src/compression/compression.h | 13 ++- tsl/src/compression/decompress_test_impl.c | 4 +- tsl/src/compression/dictionary.c | 65 +++++++++++++ tsl/src/compression/dictionary.h | 7 ++ .../compression/simple8b_rle_decompress_all.h | 2 +- .../nodes/decompress_chunk/compressed_batch.c | 93 +++++++++++++------ tsl/src/nodes/decompress_chunk/planner.c | 21 ++++- 10 files changed, 278 insertions(+), 40 deletions(-) diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index 58535be92e6..3c98cc4ead8 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -19,6 +19,8 @@ #include "compression/simple8b_rle.h" #include "datum_serialize.h" +#include "compression/arrow_c_data_interface.h" + /* A "compressed" array * uint8 has_nulls: 1 iff this has a nulls bitmap stored before the data * Oid element_type: the element stored by this array @@ -459,6 +461,87 @@ tsl_array_decompression_iterator_from_datum_reverse(Datum compressed_array, Oid return &iterator->base; } +static uint64 +pad64(uint64 value) +{ + return ((value + 63) / 64) * 64; +} + +#define ELEMENT_TYPE uint16 +#include "simple8b_rle_decompress_all.h" +#undef ELEMENT_TYPE + +ArrowArray * +tsl_text_array_decompress_all(Datum compressed_array, Oid element_type, MemoryContext dest_mctx) +{ + Assert(element_type == TEXTOID); + void *compressed_data = PG_DETOAST_DATUM(compressed_array); + StringInfoData si = { .data = compressed_data, .len = VARSIZE(compressed_data) }; + ArrayCompressed *header = consumeCompressedData(&si, sizeof(ArrayCompressed)); + + Assert(header->compression_algorithm == COMPRESSION_ALGORITHM_ARRAY); + CheckCompressedData(header->element_type == TEXTOID); + + return text_array_decompress_all_serialized_no_header(&si, header->has_nulls, dest_mctx); +} + +ArrowArray * +text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, + MemoryContext dest_mctx) +{ + Simple8bRleSerialized *nulls_serialized = NULL; + if (has_nulls) + { + Assert(false); + nulls_serialized = bytes_deserialize_simple8b_and_advance(si); + } + (void) nulls_serialized; + + Simple8bRleSerialized *sizes_serialized = bytes_deserialize_simple8b_and_advance(si); + + uint16 sizes[GLOBAL_MAX_ROWS_PER_COMPRESSION]; + const uint16 n = simple8brle_decompress_all_buf_uint16(sizes_serialized, + sizes, + sizeof(sizes) / sizeof(sizes[0])); + + uint32 *offsets = + (uint32 *) MemoryContextAllocZero(dest_mctx, pad64(sizeof(*offsets) * (n + 1))); + uint8 *arrow_bodies = (uint8 *) MemoryContextAllocZero(dest_mctx, pad64(si->len - si->cursor)); + + int offset = 0; + for (int i = 0; i < n; i++) + { + void *vardata = consumeCompressedData(si, sizes[i]); + // CheckCompressedData(VARSIZE_ANY(vardata) == sizes[i]); + // CheckCompressedData(sizes[i] > VARHDRSZ); + const int textlen = VARSIZE_ANY_EXHDR(vardata); + memcpy(&arrow_bodies[offset], VARDATA_ANY(vardata), textlen); + + // fprintf(stderr, "%d: copied: '%s' len %d varsize %d result %.*s\n", + // i, text_to_cstring(vardata), textlen, (int) VARSIZE_ANY(vardata), textlen, + //&arrow_bodies[offset]); + + offsets[i] = offset; + offset += textlen; + } + offsets[n] = offset; + + const int validity_bitmap_bytes = sizeof(uint64) * pad64(n); + uint64 *restrict validity_bitmap = MemoryContextAlloc(dest_mctx, validity_bitmap_bytes); + memset(validity_bitmap, 0xFF, validity_bitmap_bytes); + + ArrowArray *result = MemoryContextAllocZero(dest_mctx, sizeof(ArrowArray) + sizeof(void *) * 3); + const void **buffers = (const void **) &result[1]; + buffers[0] = validity_bitmap; + buffers[1] = offsets; + buffers[2] = arrow_bodies; + result->n_buffers = 3; + result->buffers = buffers; + result->length = n; + result->null_count = 0; + return result; +} + DecompressResult array_decompression_iterator_try_next_reverse(DecompressionIterator *base_iter) { diff --git a/tsl/src/compression/array.h b/tsl/src/compression/array.h index 9573eff955d..705fb10ef43 100644 --- a/tsl/src/compression/array.h +++ b/tsl/src/compression/array.h @@ -66,6 +66,12 @@ extern void array_compressed_send(CompressedDataHeader *header, StringInfo buffe extern Datum tsl_array_compressor_append(PG_FUNCTION_ARGS); extern Datum tsl_array_compressor_finish(PG_FUNCTION_ARGS); +ArrowArray *tsl_text_array_decompress_all(Datum compressed_array, Oid element_type, + MemoryContext dest_mctx); + +ArrowArray *text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, + MemoryContext dest_mctx); + #define ARRAY_ALGORITHM_DEFINITION \ { \ .iterator_init_forward = tsl_array_decompression_iterator_from_datum_forward, \ @@ -74,6 +80,7 @@ extern Datum tsl_array_compressor_finish(PG_FUNCTION_ARGS); .compressed_data_recv = array_compressed_recv, \ .compressor_for_type = array_compressor_for_type, \ .compressed_data_storage = TOAST_STORAGE_EXTENDED, \ + .decompress_all = tsl_text_array_decompress_all, \ } #endif diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index fcdf77417a2..91c252cd13a 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -126,11 +126,17 @@ DecompressionIterator *(*tsl_get_decompression_iterator_init(CompressionAlgorith } DecompressAllFunction -tsl_get_decompress_all_function(CompressionAlgorithms algorithm) +tsl_get_decompress_all_function(CompressionAlgorithms algorithm, Oid type) { if (algorithm >= _END_COMPRESSION_ALGORITHMS) elog(ERROR, "invalid compression algorithm %d", algorithm); + if (type != TEXTOID && + (algorithm == COMPRESSION_ALGORITHM_DICTIONARY || algorithm == COMPRESSION_ALGORITHM_ARRAY)) + { + return NULL; + } + return definitions[algorithm].decompress_all; } @@ -1758,6 +1764,21 @@ tsl_compressed_data_decompress_reverse(PG_FUNCTION_ARGS) ; } +TS_FUNCTION_INFO_V1(tsl_compressed_data_info); + +Datum +tsl_compressed_data_info(PG_FUNCTION_ARGS) +{ + StringInfoData buf = { 0 }; + initStringInfo(&buf); + + CompressedDataHeader *header = get_compressed_data_header(PG_GETARG_DATUM(0)); + + appendStringInfo(&buf, "algo: %d", header->compression_algorithm); + + PG_RETURN_CSTRING(buf.data); +} + Datum tsl_compressed_data_send(PG_FUNCTION_ARGS) { diff --git a/tsl/src/compression/compression.h b/tsl/src/compression/compression.h index fbfd6e49b1d..d06ab55953d 100644 --- a/tsl/src/compression/compression.h +++ b/tsl/src/compression/compression.h @@ -316,7 +316,8 @@ extern void decompress_chunk(Oid in_table, Oid out_table); extern DecompressionIterator *(*tsl_get_decompression_iterator_init( CompressionAlgorithms algorithm, bool reverse))(Datum, Oid element_type); -extern DecompressAllFunction tsl_get_decompress_all_function(CompressionAlgorithms algorithm); +extern DecompressAllFunction tsl_get_decompress_all_function(CompressionAlgorithms algorithm, + Oid type); typedef struct Chunk Chunk; typedef struct ChunkInsertState ChunkInsertState; @@ -374,9 +375,13 @@ extern RowDecompressor build_decompressor(Relation in_rel, Relation out_rel); #define CORRUPT_DATA_MESSAGE (errcode(ERRCODE_DATA_CORRUPTED)) #endif -#define CheckCompressedData(X) \ - if (unlikely(!(X))) \ - ereport(ERROR, CORRUPT_DATA_MESSAGE) +#define CDSTR(X) #X +#define CDSTR2(X) CDSTR(X) + +#define CheckCompressedData Assert +// #define CheckCompressedData(X) \ +// if (unlikely(!(X))) \ +// ereport(ERROR, CORRUPT_DATA_MESSAGE, errdetail(#X)) inline static void * consumeCompressedData(StringInfo si, int bytes) diff --git a/tsl/src/compression/decompress_test_impl.c b/tsl/src/compression/decompress_test_impl.c index 69897d45b99..c452b173345 100644 --- a/tsl/src/compression/decompress_test_impl.c +++ b/tsl/src/compression/decompress_test_impl.c @@ -42,7 +42,7 @@ FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size, bool extra_checks) * For routine fuzzing, we only run bulk decompression to make it faster * and the coverage space smaller. */ - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(algo); + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(algo, PGTYPE); decompress_all(compressed_data, PGTYPE, CurrentMemoryContext); return 0; } @@ -53,7 +53,7 @@ FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size, bool extra_checks) * the row-by-row is old and stable. */ ArrowArray *arrow = NULL; - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(algo); + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(algo, PGTYPE); if (decompress_all) { arrow = decompress_all(compressed_data, PGTYPE, CurrentMemoryContext); diff --git a/tsl/src/compression/dictionary.c b/tsl/src/compression/dictionary.c index 06fa9c6d5ec..695a8303f9f 100644 --- a/tsl/src/compression/dictionary.c +++ b/tsl/src/compression/dictionary.c @@ -25,6 +25,7 @@ #include "compression/array.h" #include "compression/dictionary_hash.h" #include "compression/datum_serialize.h" +#include "compression/arrow_c_data_interface.h" /* * A compression bitmap is stored as @@ -334,6 +335,13 @@ dictionary_compressor_finish(DictionaryCompressor *compressor) average_element_size = sizes.dictionary_size / sizes.num_distinct; expected_array_size = average_element_size * sizes.dictionary_compressed_indexes->num_elements; compressed = dictionary_compressed_from_serialization_info(sizes, compressor->type); + fprintf(stderr, + "dict size %ld, distinct %ld, avg element size %ld, easize %ld, totalsize %ld\n", + sizes.dictionary_size, + (uint64) sizes.num_distinct, + average_element_size, + expected_array_size, + sizes.total_size); if (expected_array_size < sizes.total_size) return dictionary_compressed_to_array_compressed(compressed); @@ -395,6 +403,63 @@ dictionary_decompression_iterator_init(DictionaryDecompressionIterator *iter, co } Assert(array_decompression_iterator_try_next_forward(dictionary_iterator).is_done); } + +static uint64 +pad64(uint64 value) +{ + return ((value + 63) / 64) * 64; +} + +#define ELEMENT_TYPE int16 +#include "simple8b_rle_decompress_all.h" +#undef ELEMENT_TYPE + +ArrowArray * +tsl_text_dictionary_decompress_all(Datum compressed, Oid element_type, MemoryContext dest_mctx) +{ + Assert(element_type == TEXTOID); + + compressed = PointerGetDatum(PG_DETOAST_DATUM(compressed)); + + StringInfoData si = { .data = DatumGetPointer(compressed), .len = VARSIZE(compressed) }; + + const DictionaryCompressed *header = consumeCompressedData(&si, sizeof(DictionaryCompressed)); + + Assert(header->compression_algorithm == COMPRESSION_ALGORITHM_DICTIONARY); + CheckCompressedData(header->element_type == TEXTOID); + + Simple8bRleSerialized *indices_serialized = bytes_deserialize_simple8b_and_advance(&si); + const uint16 n_padded = indices_serialized->num_elements + 63; + int16 *indices = MemoryContextAlloc(dest_mctx, sizeof(int16) * n_padded); + const uint16 n = simple8brle_decompress_all_buf_int16(indices_serialized, indices, n_padded); + + if (header->has_nulls) + { + Assert(false); + Simple8bRleSerialized *nulls_serialized = bytes_deserialize_simple8b_and_advance(&si); + (void) nulls_serialized; + } + + const int validity_bitmap_bytes = sizeof(uint64) * pad64(n); + uint64 *restrict validity_bitmap = MemoryContextAlloc(dest_mctx, validity_bitmap_bytes); + memset(validity_bitmap, 0xFF, validity_bitmap_bytes); + + ArrowArray *dict = + text_array_decompress_all_serialized_no_header(&si, /* has_nulls = */ false, dest_mctx); + CheckCompressedData(header->num_distinct == dict->length); + + ArrowArray *result = MemoryContextAllocZero(dest_mctx, sizeof(ArrowArray) + sizeof(void *) * 2); + const void **buffers = (const void **) &result[1]; + buffers[0] = validity_bitmap; + buffers[1] = indices; + result->n_buffers = 2; + result->buffers = buffers; + result->length = n; + result->null_count = 0; + result->dictionary = dict; + return result; +} + DecompressionIterator * tsl_dictionary_decompression_iterator_from_datum_forward(Datum dictionary_compressed, Oid element_type) diff --git a/tsl/src/compression/dictionary.h b/tsl/src/compression/dictionary.h index 081bf578b8b..a316824af26 100644 --- a/tsl/src/compression/dictionary.h +++ b/tsl/src/compression/dictionary.h @@ -47,6 +47,12 @@ extern Datum dictionary_compressed_recv(StringInfo buf); extern Datum tsl_dictionary_compressor_append(PG_FUNCTION_ARGS); extern Datum tsl_dictionary_compressor_finish(PG_FUNCTION_ARGS); +ArrowArray *tsl_text_array_decompress_all(Datum compressed_array, Oid element_type, + MemoryContext dest_mctx); + +ArrowArray *tsl_text_dictionary_decompress_all(Datum compressed, Oid element_type, + MemoryContext dest_mctx); + #define DICTIONARY_ALGORITHM_DEFINITION \ { \ .iterator_init_forward = tsl_dictionary_decompression_iterator_from_datum_forward, \ @@ -55,6 +61,7 @@ extern Datum tsl_dictionary_compressor_finish(PG_FUNCTION_ARGS); .compressed_data_recv = dictionary_compressed_recv, \ .compressor_for_type = dictionary_compressor_for_type, \ .compressed_data_storage = TOAST_STORAGE_EXTENDED, \ + .decompress_all = tsl_text_dictionary_decompress_all, \ } #endif diff --git a/tsl/src/compression/simple8b_rle_decompress_all.h b/tsl/src/compression/simple8b_rle_decompress_all.h index 48a168fb581..5466d1efc56 100644 --- a/tsl/src/compression/simple8b_rle_decompress_all.h +++ b/tsl/src/compression/simple8b_rle_decompress_all.h @@ -86,7 +86,7 @@ FUNCTION_NAME(simple8brle_decompress_all_buf, * might be incorrect. \ */ \ const uint16 n_block_values = SIMPLE8B_NUM_ELEMENTS[X]; \ - CheckCompressedData(decompressed_index + n_block_values < n_buffer_elements); \ + CheckCompressedData(decompressed_index + n_block_values <= n_buffer_elements); \ \ const uint64 bitmask = simple8brle_selector_get_bitmask(X); \ \ diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 97492449189..60cfa61cf02 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -335,7 +335,8 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, } DecompressAllFunction decompress_all = - tsl_get_decompress_all_function(header->compression_algorithm); + tsl_get_decompress_all_function(header->compression_algorithm, + column_description->typid); Assert(decompress_all != NULL); MemoryContext context_before_decompression = @@ -434,6 +435,20 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, MemoryContextSwitchTo(old_context); } +static Datum +get_text_datum(ArrowArray *arrow, int arrow_row) +{ + Assert(arrow->dictionary == NULL); + const uint32 start = ((uint32 *) arrow->buffers[1])[arrow_row]; + const int32 value_bytes = ((uint32 *) arrow->buffers[1])[arrow_row + 1] - start; + Assert(value_bytes >= 0); + Datum datum = PointerGetDatum(palloc0(value_bytes + VARHDRSZ)); + SET_VARSIZE(datum, value_bytes + VARHDRSZ); + memcpy(VARDATA(datum), &((uint8 *) arrow->buffers[2])[start], value_bytes); + + return datum; +} + /* * Construct the next tuple in the decompressed scan slot. * Doesn't check the quals. @@ -473,39 +488,59 @@ compressed_batch_make_next_tuple(DecompressChunkState *chunk_state, } else if (column_values.arrow_values != NULL) { - const char *restrict src = column_values.arrow_values; - Assert(column_values.value_bytes > 0); - - /* - * The conversion of Datum to more narrow types will truncate - * the higher bytes, so we don't care if we read some garbage - * into them, and can always read 8 bytes. These are unaligned - * reads, so technically we have to do memcpy. - */ - uint64 value; - memcpy(&value, &src[column_values.value_bytes * arrow_row], 8); - -#ifdef USE_FLOAT8_BYVAL - Datum datum = Int64GetDatum(value); -#else - /* - * On 32-bit systems, the data larger than 4 bytes go by - * reference, so we have to jump through these hoops. - */ - Datum datum; - if (column_values.value_bytes <= 4) + const AttrNumber attr = AttrNumberGetAttrOffset(column_values.output_attno); + if (column_values.value_bytes == -1) { - datum = Int32GetDatum((uint32) value); + if (column_values.arrow->dictionary == NULL) + { + decompressed_scan_slot->tts_values[attr] = + get_text_datum(column_values.arrow, arrow_row); + } + else + { + const int16 index = ((int16 *) column_values.arrow->buffers[1])[arrow_row]; + decompressed_scan_slot->tts_values[attr] = + get_text_datum(column_values.arrow->dictionary, index); + } + + decompressed_scan_slot->tts_isnull[attr] = + !arrow_row_is_valid(column_values.arrow->buffers[0], arrow_row); } else { - datum = Int64GetDatum(value); - } + Assert(column_values.value_bytes > 0); + const char *restrict src = column_values.arrow_values; + + /* + * The conversion of Datum to more narrow types will truncate + * the higher bytes, so we don't care if we read some garbage + * into them, and can always read 8 bytes. These are unaligned + * reads, so technically we have to do memcpy. + */ + uint64 value; + memcpy(&value, &src[column_values.value_bytes * arrow_row], 8); + +#ifdef USE_FLOAT8_BYVAL + Datum datum = Int64GetDatum(value); +#else + /* + * On 32-bit systems, the data larger than 4 bytes go by + * reference, so we have to jump through these hoops. + */ + Datum datum; + if (column_values.value_bytes <= 4) + { + datum = Int32GetDatum((uint32) value); + } + else + { + datum = Int64GetDatum(value); + } #endif - const AttrNumber attr = AttrNumberGetAttrOffset(column_values.output_attno); - decompressed_scan_slot->tts_values[attr] = datum; - decompressed_scan_slot->tts_isnull[attr] = - !arrow_row_is_valid(column_values.arrow_validity, arrow_row); + decompressed_scan_slot->tts_values[attr] = datum; + decompressed_scan_slot->tts_isnull[attr] = + !arrow_row_is_valid(column_values.arrow_validity, arrow_row); + } } } diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index bc83707e0b1..443c4345ea4 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -254,13 +254,28 @@ build_decompression_map(PlannerInfo *root, DecompressChunkPath *path, List *scan lappend_int(path->is_segmentby_column, compression_info && compression_info->segmentby_column_index != 0); - const bool bulk_decompression_possible = - destination_attno_in_uncompressed_chunk > 0 && compression_info && - tsl_get_decompress_all_function(compression_info->algo_id) != NULL; + /* + * Determine if we can use bulk decompression for this column. + */ + bool bulk_decompression_possible = false; + if (destination_attno_in_uncompressed_chunk > 0 && compression_info) + { + Oid typid = + get_atttype(path->info->chunk_rte->relid, destination_attno_in_uncompressed_chunk); + Assert(OidIsValid(typid)); + if (tsl_get_decompress_all_function(compression_info->algo_id, typid) != NULL) + { + bulk_decompression_possible = true; + } + } path->have_bulk_decompression_columns |= bulk_decompression_possible; path->bulk_decompression_column = lappend_int(path->bulk_decompression_column, bulk_decompression_possible); + /* + * Save information about decompressed columns in uncompressed chunk + * for planning of vectorized filters. + */ if (destination_attno_in_uncompressed_chunk > 0) { path->uncompressed_chunk_attno_to_compression_info From 0880fe060edc71022905791f271f8b80628eee13 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 12 Oct 2023 15:10:28 +0200 Subject: [PATCH 002/249] memory handling cleanup --- .../nodes/decompress_chunk/compressed_batch.c | 44 ++++++++++++------- tsl/src/nodes/decompress_chunk/exec.c | 9 +++- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 60cfa61cf02..5b331e92509 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -258,13 +258,9 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, Assert(chunk_state->decompressed_slot_scan_tdesc->tdrefcount == -1); batch_state->decompressed_scan_slot = - MakeSingleTupleTableSlot(chunk_state->decompressed_slot_scan_tdesc, slot->tts_ops); + MakeSingleTupleTableSlot(chunk_state->decompressed_slot_scan_tdesc, + &TTSOpsVirtual); - /* Ensure that all fields are empty. Calling ExecClearTuple is not enough - * because some attributes might not be populated (e.g., due to a dropped - * column) and these attributes need to be set to null. */ - ExecStoreAllNullTuple(batch_state->decompressed_scan_slot); - ExecClearTuple(batch_state->decompressed_scan_slot); } else { @@ -272,6 +268,12 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, Assert(batch_state->decompressed_scan_slot != NULL); } + /* Ensure that all fields are empty. Calling ExecClearTuple is not enough + * because some attributes might not be populated (e.g., due to a dropped + * column) and these attributes need to be set to null. */ + ExecStoreAllNullTuple(batch_state->decompressed_scan_slot); + ExecClearTuple(batch_state->decompressed_scan_slot); + ExecCopySlot(batch_state->compressed_slot, subslot); Assert(!TupIsNull(batch_state->compressed_slot)); @@ -435,18 +437,26 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, MemoryContextSwitchTo(old_context); } -static Datum -get_text_datum(ArrowArray *arrow, int arrow_row) +static void +store_text_datum(ArrowArray *arrow, int arrow_row, Datum *dest, MemoryContext mctx) { Assert(arrow->dictionary == NULL); const uint32 start = ((uint32 *) arrow->buffers[1])[arrow_row]; const int32 value_bytes = ((uint32 *) arrow->buffers[1])[arrow_row + 1] - start; Assert(value_bytes >= 0); - Datum datum = PointerGetDatum(palloc0(value_bytes + VARHDRSZ)); - SET_VARSIZE(datum, value_bytes + VARHDRSZ); - memcpy(VARDATA(datum), &((uint8 *) arrow->buffers[2])[start], value_bytes); - return datum; + const int total_bytes = value_bytes + VARHDRSZ; + if (DatumGetPointer(*dest) == NULL) + { + *dest = PointerGetDatum(MemoryContextAlloc(mctx, total_bytes)); + } + else + { + *dest = PointerGetDatum(repalloc(DatumGetPointer(*dest), total_bytes)); + } + + SET_VARSIZE(*dest, total_bytes); + memcpy(VARDATA(*dest), &((uint8 *) arrow->buffers[2])[start], value_bytes); } /* @@ -493,14 +503,16 @@ compressed_batch_make_next_tuple(DecompressChunkState *chunk_state, { if (column_values.arrow->dictionary == NULL) { - decompressed_scan_slot->tts_values[attr] = - get_text_datum(column_values.arrow, arrow_row); + store_text_datum(column_values.arrow, arrow_row, + &decompressed_scan_slot->tts_values[attr], + batch_state->per_batch_context); } else { const int16 index = ((int16 *) column_values.arrow->buffers[1])[arrow_row]; - decompressed_scan_slot->tts_values[attr] = - get_text_datum(column_values.arrow->dictionary, index); + store_text_datum(column_values.arrow->dictionary, index, + &decompressed_scan_slot->tts_values[attr], + batch_state->per_batch_context); } decompressed_scan_slot->tts_isnull[attr] = diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index 38c05b03503..9f7f0bc3f4b 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -425,9 +425,14 @@ decompress_chunk_begin(CustomScanState *node, EState *estate, int eflags) DecompressChunkColumnDescription *column = &chunk_state->template_columns[i]; if (column->bulk_decompression_supported) { - /* Values array, with 64 element padding (actually we have less). */ + /* + * Values array, with 64 element padding (actually we have less). + * + * For variable-length types (we only have text) we can't + * estimate the width currently. + */ chunk_state->batch_memory_context_bytes += - (GLOBAL_MAX_ROWS_PER_COMPRESSION + 64) * column->value_bytes; + (GLOBAL_MAX_ROWS_PER_COMPRESSION + 64) * ( column->value_bytes > 0 ? column->value_bytes : 16 ); /* Also nulls bitmap. */ chunk_state->batch_memory_context_bytes += GLOBAL_MAX_ROWS_PER_COMPRESSION / (64 * sizeof(uint64)); From 3aaf81bae37db982ebf21caa501f365dcc6adae0 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 12 Oct 2023 17:23:50 +0200 Subject: [PATCH 003/249] filters --- .../nodes/decompress_chunk/compressed_batch.c | 59 ++++++--- tsl/src/nodes/decompress_chunk/exec.c | 3 +- .../decompress_chunk/vector_predicates.c | 118 ++++++++++++++++++ 3 files changed, 160 insertions(+), 20 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 5b331e92509..326a031f5d6 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -206,6 +206,23 @@ apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batc } } +static int +get_max_element_bytes(ArrowArray *text_array) +{ + int maxbytes = 0; + uint32 *offsets = (uint32 *) text_array->buffers[1]; + for (int i = 0; i < text_array->length; i++) + { + const int curbytes = offsets[i + 1] - offsets[i]; + if (curbytes > maxbytes) + { + maxbytes = curbytes; + } + } + + return maxbytes; +} + /* * Initialize the batch decompression state with the new compressed tuple. */ @@ -258,9 +275,7 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, Assert(chunk_state->decompressed_slot_scan_tdesc->tdrefcount == -1); batch_state->decompressed_scan_slot = - MakeSingleTupleTableSlot(chunk_state->decompressed_slot_scan_tdesc, - &TTSOpsVirtual); - + MakeSingleTupleTableSlot(chunk_state->decompressed_slot_scan_tdesc, &TTSOpsVirtual); } else { @@ -370,6 +385,20 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, column_values->value_bytes = get_typlen(column_description->typid); + if (column_values->value_bytes == -1) + { + const int maxbytes = + VARHDRSZ + + (column_values->arrow->dictionary ? + get_max_element_bytes(column_values->arrow->dictionary) : + get_max_element_bytes(column_values->arrow)); + + const AttrNumber attr = + AttrNumberGetAttrOffset(column_values->output_attno); + batch_state->decompressed_scan_slot->tts_values[attr] = PointerGetDatum( + MemoryContextAlloc(batch_state->per_batch_context, maxbytes)); + } + break; } @@ -438,7 +467,7 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, } static void -store_text_datum(ArrowArray *arrow, int arrow_row, Datum *dest, MemoryContext mctx) +store_text_datum(ArrowArray *arrow, int arrow_row, Datum *dest) { Assert(arrow->dictionary == NULL); const uint32 start = ((uint32 *) arrow->buffers[1])[arrow_row]; @@ -446,15 +475,7 @@ store_text_datum(ArrowArray *arrow, int arrow_row, Datum *dest, MemoryContext mc Assert(value_bytes >= 0); const int total_bytes = value_bytes + VARHDRSZ; - if (DatumGetPointer(*dest) == NULL) - { - *dest = PointerGetDatum(MemoryContextAlloc(mctx, total_bytes)); - } - else - { - *dest = PointerGetDatum(repalloc(DatumGetPointer(*dest), total_bytes)); - } - + Assert(DatumGetPointer(*dest) != NULL); SET_VARSIZE(*dest, total_bytes); memcpy(VARDATA(*dest), &((uint8 *) arrow->buffers[2])[start], value_bytes); } @@ -503,16 +524,16 @@ compressed_batch_make_next_tuple(DecompressChunkState *chunk_state, { if (column_values.arrow->dictionary == NULL) { - store_text_datum(column_values.arrow, arrow_row, - &decompressed_scan_slot->tts_values[attr], - batch_state->per_batch_context); + store_text_datum(column_values.arrow, + arrow_row, + &decompressed_scan_slot->tts_values[attr]); } else { const int16 index = ((int16 *) column_values.arrow->buffers[1])[arrow_row]; - store_text_datum(column_values.arrow->dictionary, index, - &decompressed_scan_slot->tts_values[attr], - batch_state->per_batch_context); + store_text_datum(column_values.arrow->dictionary, + index, + &decompressed_scan_slot->tts_values[attr]); } decompressed_scan_slot->tts_isnull[attr] = diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index 9f7f0bc3f4b..2a6cec9de60 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -432,7 +432,8 @@ decompress_chunk_begin(CustomScanState *node, EState *estate, int eflags) * estimate the width currently. */ chunk_state->batch_memory_context_bytes += - (GLOBAL_MAX_ROWS_PER_COMPRESSION + 64) * ( column->value_bytes > 0 ? column->value_bytes : 16 ); + (GLOBAL_MAX_ROWS_PER_COMPRESSION + 64) * + (column->value_bytes > 0 ? column->value_bytes : 16); /* Also nulls bitmap. */ chunk_state->batch_memory_context_bytes += GLOBAL_MAX_ROWS_PER_COMPRESSION / (64 * sizeof(uint64)); diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.c b/tsl/src/nodes/decompress_chunk/vector_predicates.c index c878fde6de9..1b5974b4974 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.c +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.c @@ -19,6 +19,120 @@ #include "pred_vector_const_arithmetic_all.c" +#include "compression/compression.h" + +static void +vector_const_texteq_nodict(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) +{ + Assert(!arrow->dictionary); + + text *consttext = (text *) DatumGetPointer(constdatum); + const size_t textlen = VARSIZE_ANY_EXHDR(consttext); + const uint8 *cstring = (uint8 *) VARDATA_ANY(consttext); + const uint32 *offsets = (uint32 *) arrow->buffers[1]; + const uint8 *values = (uint8 *) arrow->buffers[2]; + + const size_t n = arrow->length; + for (size_t outer = 0; outer < n / 64; outer++) + { + uint64 word = 0; + for (size_t inner = 0; inner < 64; inner++) + { + const size_t row = outer * 64 + inner; + const size_t bit_index = inner; +#define INNER_LOOP \ + const uint32 start = offsets[row]; \ + const uint32 end = offsets[row + 1]; \ + bool valid = false; \ + if (end - start == textlen) \ + { \ + valid = true; \ + for (size_t character_index = 0; character_index < textlen; character_index++) \ + { \ + valid &= cstring[character_index] == values[start + character_index]; \ + } \ + } \ + word |= ((uint64) valid) << bit_index; \ + // fprintf(stderr, "plain row %ld: valid %d\n", row, valid); + + INNER_LOOP + } + result[outer] &= word; + } + + if (n % 64) + { + uint64 word = 0; + for (size_t row = (n / 64) * 64; row < n; row++) + { + const size_t bit_index = row % 64; + INNER_LOOP + } + result[n / 64] &= word; + } + +#undef INNER_LOOP +} + +static void +vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) +{ + /* Account for nulls which shouldn't pass the predicate. */ + const size_t n = arrow->length; + const size_t n_words = (n + 63) / 64; + const uint64 *restrict validity = (uint64 *restrict) arrow->buffers[0]; + for (size_t i = 0; i < n_words; i++) + { + result[i] &= validity[i]; + } + + if (!arrow->dictionary) + { + vector_const_texteq_nodict(arrow, constdatum, result); + return; + } + + /* Run the predicate on dictionary. */ + uint64 dict_result[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64 * 64]; + memset(dict_result, 0xFF, n_words * 8); + vector_const_texteq_nodict(arrow->dictionary, constdatum, dict_result); + + /* Translate dictionary results to per-value results. */ + int16 *restrict indices = (int16 *) arrow->buffers[1]; + for (size_t outer = 0; outer < n / 64; outer++) + { + uint64 word = 0; + for (size_t inner = 0; inner < 64; inner++) + { + const size_t row = outer * 64 + inner; + const size_t bit_index = inner; +#define INNER_LOOP \ + const int16 index = indices[row]; \ + const bool valid = arrow_row_is_valid(dict_result, index); \ + word |= ((uint64) valid) << bit_index; + + INNER_LOOP + + // fprintf(stderr, "dict-coded row %ld: index %d, valid %d\n", row, index, + //valid); + } + result[outer] &= word; + } + + if (n % 64) + { + uint64 word = 0; + for (size_t row = (n / 64) * 64; row < n; row++) + { + const size_t bit_index = row % 64; + + INNER_LOOP + } + result[n / 64] &= word; + } +#undef INNER_LOOP +} + /* * Look up the vectorized implementation for a Postgres predicate, specified by * its Oid in pg_proc. Note that this Oid is different from the opcode. @@ -31,6 +145,10 @@ void (*get_vector_const_predicate(Oid pg_predicate))(const ArrowArray *, const D #define GENERATE_DISPATCH_TABLE #include "pred_vector_const_arithmetic_all.c" #undef GENERATE_DISPATCH_TABLE + + case F_TEXTEQ: + return vector_const_texteq; } + return NULL; } From 362d7f3c24c697457b7f38c908517ea928b60b4b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 17 Oct 2023 14:47:14 +0200 Subject: [PATCH 004/249] nulls --- tsl/src/compression/array.c | 69 ++++++++++++++++--- tsl/src/compression/dictionary.c | 69 ++++++++++++++++--- .../decompress_chunk/vector_predicates.c | 13 +--- 3 files changed, 119 insertions(+), 32 deletions(-) diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index 3c98cc4ead8..9acd8f1a037 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -17,6 +17,7 @@ #include "compression/array.h" #include "compression/compression.h" #include "compression/simple8b_rle.h" +#include "compression/simple8b_rle_bitmap.h" #include "datum_serialize.h" #include "compression/arrow_c_data_interface.h" @@ -492,24 +493,24 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, Simple8bRleSerialized *nulls_serialized = NULL; if (has_nulls) { - Assert(false); nulls_serialized = bytes_deserialize_simple8b_and_advance(si); } - (void) nulls_serialized; Simple8bRleSerialized *sizes_serialized = bytes_deserialize_simple8b_and_advance(si); uint16 sizes[GLOBAL_MAX_ROWS_PER_COMPRESSION]; - const uint16 n = simple8brle_decompress_all_buf_uint16(sizes_serialized, - sizes, - sizeof(sizes) / sizeof(sizes[0])); + const uint16 n_notnull = + simple8brle_decompress_all_buf_uint16(sizes_serialized, + sizes, + sizeof(sizes) / sizeof(sizes[0])); + const int n_total = has_nulls ? nulls_serialized->num_elements : n_notnull; uint32 *offsets = - (uint32 *) MemoryContextAllocZero(dest_mctx, pad64(sizeof(*offsets) * (n + 1))); + (uint32 *) MemoryContextAllocZero(dest_mctx, pad64(sizeof(*offsets) * (n_total + 1))); uint8 *arrow_bodies = (uint8 *) MemoryContextAllocZero(dest_mctx, pad64(si->len - si->cursor)); int offset = 0; - for (int i = 0; i < n; i++) + for (int i = 0; i < n_notnull; i++) { void *vardata = consumeCompressedData(si, sizes[i]); // CheckCompressedData(VARSIZE_ANY(vardata) == sizes[i]); @@ -524,12 +525,58 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, offsets[i] = offset; offset += textlen; } - offsets[n] = offset; + offsets[n_notnull] = offset; - const int validity_bitmap_bytes = sizeof(uint64) * pad64(n); + const int validity_bitmap_bytes = sizeof(uint64) * pad64(n_total); uint64 *restrict validity_bitmap = MemoryContextAlloc(dest_mctx, validity_bitmap_bytes); memset(validity_bitmap, 0xFF, validity_bitmap_bytes); + if (has_nulls) + { + /* + * We have decompressed the data with nulls skipped, reshuffle it + * according to the nulls bitmap. + */ + Simple8bRleBitmap nulls = simple8brle_bitmap_decompress(nulls_serialized); + CheckCompressedData(n_notnull + simple8brle_bitmap_num_ones(&nulls) == n_total); + + int current_notnull_element = n_notnull - 1; + for (int i = n_total - 1; i >= 0; i--) + { + Assert(i >= current_notnull_element); + + if (simple8brle_bitmap_get_at(&nulls, i)) + { + arrow_set_row_validity(validity_bitmap, i, false); + } + else + { + Assert(current_notnull_element >= 0); + /* + * The index of the corresponding offset is higher by one than + * the index of the element. The offset[0] is never affected by + * this shuffling and is always 0. + */ + offsets[i + 1] = offsets[current_notnull_element + 1]; + current_notnull_element--; + } + } + + Assert(current_notnull_element == -1); + } + else + { + /* + * The validity bitmap size is a multiple of 64 bits. Fill the tail bits + * with zeros, because the corresponding elements are not valid. + */ + if (n_total % 64) + { + const uint64 tail_mask = -1ULL >> (64 - n_total % 64); + validity_bitmap[n_total / 64] &= tail_mask; + } + } + ArrowArray *result = MemoryContextAllocZero(dest_mctx, sizeof(ArrowArray) + sizeof(void *) * 3); const void **buffers = (const void **) &result[1]; buffers[0] = validity_bitmap; @@ -537,8 +584,8 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, buffers[2] = arrow_bodies; result->n_buffers = 3; result->buffers = buffers; - result->length = n; - result->null_count = 0; + result->length = n_total; + result->null_count = n_total - n_notnull; return result; } diff --git a/tsl/src/compression/dictionary.c b/tsl/src/compression/dictionary.c index 695a8303f9f..12ea121bf29 100644 --- a/tsl/src/compression/dictionary.c +++ b/tsl/src/compression/dictionary.c @@ -22,6 +22,7 @@ #include "compression/compression.h" #include "compression/dictionary.h" #include "compression/simple8b_rle.h" +#include "compression/simple8b_rle_bitmap.h" #include "compression/array.h" #include "compression/dictionary_hash.h" #include "compression/datum_serialize.h" @@ -429,33 +430,79 @@ tsl_text_dictionary_decompress_all(Datum compressed, Oid element_type, MemoryCon CheckCompressedData(header->element_type == TEXTOID); Simple8bRleSerialized *indices_serialized = bytes_deserialize_simple8b_and_advance(&si); - const uint16 n_padded = indices_serialized->num_elements + 63; - int16 *indices = MemoryContextAlloc(dest_mctx, sizeof(int16) * n_padded); - const uint16 n = simple8brle_decompress_all_buf_int16(indices_serialized, indices, n_padded); + Simple8bRleSerialized *nulls_serialized = NULL; if (header->has_nulls) { - Assert(false); - Simple8bRleSerialized *nulls_serialized = bytes_deserialize_simple8b_and_advance(&si); - (void) nulls_serialized; + nulls_serialized = bytes_deserialize_simple8b_and_advance(&si); } - const int validity_bitmap_bytes = sizeof(uint64) * pad64(n); - uint64 *restrict validity_bitmap = MemoryContextAlloc(dest_mctx, validity_bitmap_bytes); - memset(validity_bitmap, 0xFF, validity_bitmap_bytes); + const uint16 n_notnull = indices_serialized->num_elements; + const uint16 n_total = header->has_nulls ? nulls_serialized->num_elements : n_notnull; + const uint16 n_padded = n_total + 63; + int16 *indices = MemoryContextAlloc(dest_mctx, sizeof(int16) * n_padded); + + const uint16 n_decompressed = + simple8brle_decompress_all_buf_int16(indices_serialized, indices, n_padded); + CheckCompressedData(n_decompressed == n_notnull); ArrowArray *dict = text_array_decompress_all_serialized_no_header(&si, /* has_nulls = */ false, dest_mctx); CheckCompressedData(header->num_distinct == dict->length); + const int validity_bitmap_bytes = sizeof(uint64) * pad64(n_total); + uint64 *restrict validity_bitmap = MemoryContextAlloc(dest_mctx, validity_bitmap_bytes); + memset(validity_bitmap, 0xFF, validity_bitmap_bytes); + + if (header->has_nulls) + { + /* + * We have decompressed the data with nulls skipped, reshuffle it + * according to the nulls bitmap. + */ + Simple8bRleBitmap nulls = simple8brle_bitmap_decompress(nulls_serialized); + CheckCompressedData(n_notnull + simple8brle_bitmap_num_ones(&nulls) == n_total); + + int current_notnull_element = n_notnull - 1; + for (int i = n_total - 1; i >= 0; i--) + { + Assert(i >= current_notnull_element); + + if (simple8brle_bitmap_get_at(&nulls, i)) + { + arrow_set_row_validity(validity_bitmap, i, false); + } + else + { + Assert(current_notnull_element >= 0); + indices[i] = indices[current_notnull_element]; + current_notnull_element--; + } + } + + Assert(current_notnull_element == -1); + } + else + { + /* + * The validity bitmap size is a multiple of 64 bits. Fill the tail bits + * with zeros, because the corresponding elements are not valid. + */ + if (n_total % 64) + { + const uint64 tail_mask = -1ULL >> (64 - n_total % 64); + validity_bitmap[n_total / 64] &= tail_mask; + } + } + ArrowArray *result = MemoryContextAllocZero(dest_mctx, sizeof(ArrowArray) + sizeof(void *) * 2); const void **buffers = (const void **) &result[1]; buffers[0] = validity_bitmap; buffers[1] = indices; result->n_buffers = 2; result->buffers = buffers; - result->length = n; - result->null_count = 0; + result->length = n_total; + result->null_count = n_total - n_notnull; result->dictionary = dict; return result; } diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.c b/tsl/src/nodes/decompress_chunk/vector_predicates.c index 1b5974b4974..981073dd015 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.c +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.c @@ -43,15 +43,8 @@ vector_const_texteq_nodict(const ArrowArray *arrow, const Datum constdatum, uint #define INNER_LOOP \ const uint32 start = offsets[row]; \ const uint32 end = offsets[row + 1]; \ - bool valid = false; \ - if (end - start == textlen) \ - { \ - valid = true; \ - for (size_t character_index = 0; character_index < textlen; character_index++) \ - { \ - valid &= cstring[character_index] == values[start + character_index]; \ - } \ - } \ + const uint32 veclen = end - start; \ + bool valid = veclen != textlen ? false : (strncmp((char *) &values[start], (char *) cstring, textlen) == 0); \ word |= ((uint64) valid) << bit_index; \ // fprintf(stderr, "plain row %ld: valid %d\n", row, valid); @@ -114,7 +107,7 @@ vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *res INNER_LOOP // fprintf(stderr, "dict-coded row %ld: index %d, valid %d\n", row, index, - //valid); + // valid); } result[outer] &= word; } From 6d10901cc41820f2533491243d2c7934f05517b7 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 17 Oct 2023 14:49:29 +0200 Subject: [PATCH 005/249] wrong export --- tsl/src/compression/compression.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index 91c252cd13a..cf7525c7bb4 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -1764,10 +1764,10 @@ tsl_compressed_data_decompress_reverse(PG_FUNCTION_ARGS) ; } -TS_FUNCTION_INFO_V1(tsl_compressed_data_info); +TS_FUNCTION_INFO_V1(ts_compressed_data_info); Datum -tsl_compressed_data_info(PG_FUNCTION_ARGS) +ts_compressed_data_info(PG_FUNCTION_ARGS) { StringInfoData buf = { 0 }; initStringInfo(&buf); From 890505e19d390916ef95fa8b3406f9becadbb16a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 18 Oct 2023 13:19:35 +0200 Subject: [PATCH 006/249] prewhere tmp --- .../nodes/decompress_chunk/compressed_batch.c | 267 ++++++++++++------ 1 file changed, 180 insertions(+), 87 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 97492449189..cd71d8e783c 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -78,12 +78,134 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) return arrow; } +static int +get_max_element_bytes(ArrowArray *text_array) +{ + int maxbytes = 0; + uint32 *offsets = (uint32 *) text_array->buffers[1]; + for (int i = 0; i < text_array->length; i++) + { + const int curbytes = offsets[i + 1] - offsets[i]; + if (curbytes > maxbytes) + { + maxbytes = curbytes; + } + } + + return maxbytes; +} + static void +compressed_batch_decompress_column(DecompressChunkState *chunk_state, + DecompressBatchState *batch_state, int i) +{ + DecompressChunkColumnDescription *column_description = &chunk_state->template_columns[i]; + CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; + column_values->iterator = NULL; + column_values->arrow = NULL; + column_values->value_bytes = -1; + column_values->arrow_values = NULL; + column_values->arrow_validity = NULL; + column_values->output_attno = column_description->output_attno; + column_values->value_bytes = get_typlen(column_description->typid); + Assert(column_values->value_bytes != 0); + + bool isnull; + Datum value = slot_getattr(batch_state->compressed_slot, + column_description->compressed_scan_attno, + &isnull); + + if (isnull) + { + /* + * The column will have a default value for the entire batch, + * set it now. + */ + column_values->iterator = NULL; + AttrNumber attr = AttrNumberGetAttrOffset(column_description->output_attno); + + batch_state->decompressed_scan_slot->tts_values[attr] = + getmissingattr(batch_state->decompressed_scan_slot->tts_tupleDescriptor, + attr + 1, + &batch_state->decompressed_scan_slot->tts_isnull[attr]); + return; + } + + /* Decompress the entire batch if it is supported. */ + CompressedDataHeader *header = (CompressedDataHeader *) PG_DETOAST_DATUM(value); + ArrowArray *arrow = NULL; + if (chunk_state->enable_bulk_decompression && column_description->bulk_decompression_supported) + { + if (chunk_state->bulk_decompression_context == NULL) + { + chunk_state->bulk_decompression_context = + AllocSetContextCreate(MemoryContextGetParent(batch_state->per_batch_context), + "bulk decompression", + /* minContextSize = */ 0, + /* initBlockSize = */ 64 * 1024, + /* maxBlockSize = */ 64 * 1024); + } + + DecompressAllFunction decompress_all = + tsl_get_decompress_all_function(header->compression_algorithm, + column_description->typid); + Assert(decompress_all != NULL); + + MemoryContext context_before_decompression = + MemoryContextSwitchTo(chunk_state->bulk_decompression_context); + + arrow = decompress_all(PointerGetDatum(header), + column_description->typid, + batch_state->per_batch_context); + + MemoryContextReset(chunk_state->bulk_decompression_context); + + MemoryContextSwitchTo(context_before_decompression); + } + + if (arrow) + { + if (batch_state->total_batch_rows == 0) + { + batch_state->total_batch_rows = arrow->length; + } + else if (batch_state->total_batch_rows != arrow->length) + { + elog(ERROR, "compressed column out of sync with batch counter"); + } + + column_values->arrow = arrow; + column_values->arrow_values = arrow->buffers[1]; + column_values->arrow_validity = arrow->buffers[0]; + + if (column_values->value_bytes == -1) + { + const int maxbytes = + VARHDRSZ + (column_values->arrow->dictionary ? + get_max_element_bytes(column_values->arrow->dictionary) : + get_max_element_bytes(column_values->arrow)); + + const AttrNumber attr = AttrNumberGetAttrOffset(column_values->output_attno); + batch_state->decompressed_scan_slot->tts_values[attr] = + PointerGetDatum(MemoryContextAlloc(batch_state->per_batch_context, maxbytes)); + } + + return; + } + + /* As a fallback, decompress row-by-row. */ + column_values->iterator = + tsl_get_decompression_iterator_init(header->compression_algorithm, + chunk_state->reverse)(PointerGetDatum(header), + column_description->typid); +} + +static bool apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batch_state) { if (!chunk_state->vectorized_quals) { - return; + return true; } /* @@ -126,7 +248,20 @@ apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batc Ensure(column_description->type == COMPRESSED_COLUMN, "only compressed columns are supported in vectorized quals"); Assert(column_index < chunk_state->num_compressed_columns); + CompressedColumnValues *column_values = &batch_state->compressed_columns[column_index]; + + if (column_values->value_bytes == 0) + { + /* + * We decompress the compressed columns on demand, so that we can + * skip decompressing some columns if the entire batch doesn't pass + * the quals. + */ + compressed_batch_decompress_column(chunk_state, batch_state, column_index); + Assert(column_values->value_bytes != 0); + } + Ensure(column_values->iterator == NULL, "only arrow columns are supported in vectorized quals"); @@ -203,7 +338,23 @@ apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batc } } } + + /* + * If we don't have any passing rows, break out early to avoid + * reading and decompressing other columns. + */ + bool have_passing_rows = false; + for (int i = 0; i < bitmap_bytes / 8; i++) + { + have_passing_rows |= batch_state->vector_qual_result[i]; + } + if (!have_passing_rows) + { + return false; + } } + + return true; } /* @@ -290,93 +441,15 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, case COMPRESSED_COLUMN: { Assert(i < chunk_state->num_compressed_columns); + /* + * We decompress the compressed columns on demand, so that we can + * skip decompressing some columns if the entire batch doesn't pass + * the quals. Skip them for now. + */ CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; - column_values->iterator = NULL; - column_values->arrow = NULL; - column_values->value_bytes = -1; - column_values->arrow_values = NULL; - column_values->arrow_validity = NULL; - column_values->output_attno = column_description->output_attno; - bool isnull; - Datum value = slot_getattr(batch_state->compressed_slot, - column_description->compressed_scan_attno, - &isnull); - if (isnull) - { - /* - * The column will have a default value for the entire batch, - * set it now. - */ - column_values->iterator = NULL; - AttrNumber attr = AttrNumberGetAttrOffset(column_description->output_attno); - - batch_state->decompressed_scan_slot->tts_values[attr] = - getmissingattr(batch_state->decompressed_scan_slot->tts_tupleDescriptor, - attr + 1, - &batch_state->decompressed_scan_slot->tts_isnull[attr]); - break; - } - - /* Decompress the entire batch if it is supported. */ - CompressedDataHeader *header = (CompressedDataHeader *) PG_DETOAST_DATUM(value); - ArrowArray *arrow = NULL; - if (chunk_state->enable_bulk_decompression && - column_description->bulk_decompression_supported) - { - if (chunk_state->bulk_decompression_context == NULL) - { - chunk_state->bulk_decompression_context = - AllocSetContextCreate(MemoryContextGetParent( - batch_state->per_batch_context), - "bulk decompression", - /* minContextSize = */ 0, - /* initBlockSize = */ 64 * 1024, - /* maxBlockSize = */ 64 * 1024); - } - - DecompressAllFunction decompress_all = - tsl_get_decompress_all_function(header->compression_algorithm); - Assert(decompress_all != NULL); - - MemoryContext context_before_decompression = - MemoryContextSwitchTo(chunk_state->bulk_decompression_context); - - arrow = decompress_all(PointerGetDatum(header), - column_description->typid, - batch_state->per_batch_context); - - MemoryContextReset(chunk_state->bulk_decompression_context); - - MemoryContextSwitchTo(context_before_decompression); - } - - if (arrow) - { - if (batch_state->total_batch_rows == 0) - { - batch_state->total_batch_rows = arrow->length; - } - else if (batch_state->total_batch_rows != arrow->length) - { - elog(ERROR, "compressed column out of sync with batch counter"); - } - - column_values->arrow = arrow; - column_values->arrow_values = arrow->buffers[1]; - column_values->arrow_validity = arrow->buffers[0]; - - column_values->value_bytes = get_typlen(column_description->typid); - - break; - } - - /* As a fallback, decompress row-by-row. */ - column_values->iterator = - tsl_get_decompression_iterator_init(header->compression_algorithm, - chunk_state - ->reverse)(PointerGetDatum(header), - column_description->typid); + column_values->value_bytes = 0; + // compressed_batch_decompress_column(chunk_state, batch_state, i); break; } case SEGMENTBY_COLUMN: @@ -429,7 +502,27 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, } } - apply_vector_quals(chunk_state, batch_state); + if (apply_vector_quals(chunk_state, batch_state)) + { + /* + * Have rows that actually pass the vector quals, have to decompress the + * rest of the compressed columns. + */ + const int num_compressed_columns = chunk_state->num_compressed_columns; + for (int i = 0; i < num_compressed_columns; i++) + { + CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; + if (column_values->value_bytes == 0) + { + compressed_batch_decompress_column(chunk_state, batch_state, i); + Assert(column_values->value_bytes != 0); + } + } + } + else + { + //fprintf(stderr, "the entire batch didn't pass!!!\n"); + } MemoryContextSwitchTo(old_context); } From 03ccd4d90d9201c41709834693942a23830fa660 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 18 Oct 2023 14:12:39 +0200 Subject: [PATCH 007/249] more prewhere --- .../nodes/decompress_chunk/compressed_batch.c | 80 ++++++++++++------- 1 file changed, 49 insertions(+), 31 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index cd71d8e783c..b4bccb5c57b 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -96,8 +96,7 @@ get_max_element_bytes(ArrowArray *text_array) } static void -compressed_batch_decompress_column(DecompressChunkState *chunk_state, - DecompressBatchState *batch_state, int i) +decompress_column(DecompressChunkState *chunk_state, DecompressBatchState *batch_state, int i) { DecompressChunkColumnDescription *column_description = &chunk_state->template_columns[i]; CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; @@ -147,8 +146,7 @@ compressed_batch_decompress_column(DecompressChunkState *chunk_state, } DecompressAllFunction decompress_all = - tsl_get_decompress_all_function(header->compression_algorithm, - column_description->typid); + tsl_get_decompress_all_function(header->compression_algorithm); Assert(decompress_all != NULL); MemoryContext context_before_decompression = @@ -258,7 +256,7 @@ apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batc * skip decompressing some columns if the entire batch doesn't pass * the quals. */ - compressed_batch_decompress_column(chunk_state, batch_state, column_index); + decompress_column(chunk_state, batch_state, column_index); Assert(column_values->value_bytes != 0); } @@ -448,8 +446,6 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, */ CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; column_values->value_bytes = 0; - - // compressed_batch_decompress_column(chunk_state, batch_state, i); break; } case SEGMENTBY_COLUMN: @@ -514,14 +510,14 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; if (column_values->value_bytes == 0) { - compressed_batch_decompress_column(chunk_state, batch_state, i); + decompress_column(chunk_state, batch_state, i); Assert(column_values->value_bytes != 0); } } } else { - //fprintf(stderr, "the entire batch didn't pass!!!\n"); + // fprintf(stderr, "the entire batch didn't pass!!!\n"); } MemoryContextSwitchTo(old_context); @@ -532,8 +528,7 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, * Doesn't check the quals. */ static void -compressed_batch_make_next_tuple(DecompressChunkState *chunk_state, - DecompressBatchState *batch_state) +make_next_tuple(DecompressChunkState *chunk_state, DecompressBatchState *batch_state) { TupleTableSlot *decompressed_scan_slot = batch_state->decompressed_scan_slot; Assert(decompressed_scan_slot != NULL); @@ -549,25 +544,26 @@ compressed_batch_make_next_tuple(DecompressChunkState *chunk_state, const int num_compressed_columns = chunk_state->num_compressed_columns; for (int i = 0; i < num_compressed_columns; i++) { - CompressedColumnValues column_values = batch_state->compressed_columns[i]; - - if (column_values.iterator != NULL) + CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; + Ensure(column_values->value_bytes != 0, "the column is not decompressed"); + if (column_values->iterator != NULL) { - DecompressResult result = column_values.iterator->try_next(column_values.iterator); + DecompressResult result = column_values->iterator->try_next(column_values->iterator); if (result.is_done) { elog(ERROR, "compressed column out of sync with batch counter"); } - const AttrNumber attr = AttrNumberGetAttrOffset(column_values.output_attno); + const AttrNumber attr = AttrNumberGetAttrOffset(column_values->output_attno); decompressed_scan_slot->tts_isnull[attr] = result.is_null; decompressed_scan_slot->tts_values[attr] = result.val; } - else if (column_values.arrow_values != NULL) + else if (column_values->arrow_values != NULL) { - const char *restrict src = column_values.arrow_values; - Assert(column_values.value_bytes > 0); + const AttrNumber attr = AttrNumberGetAttrOffset(column_values->output_attno); + Assert(column_values->value_bytes > 0); + const char *restrict src = column_values->arrow_values; /* * The conversion of Datum to more narrow types will truncate @@ -576,7 +572,7 @@ compressed_batch_make_next_tuple(DecompressChunkState *chunk_state, * reads, so technically we have to do memcpy. */ uint64 value; - memcpy(&value, &src[column_values.value_bytes * arrow_row], 8); + memcpy(&value, &src[column_values->value_bytes * arrow_row], 8); #ifdef USE_FLOAT8_BYVAL Datum datum = Int64GetDatum(value); @@ -586,7 +582,7 @@ compressed_batch_make_next_tuple(DecompressChunkState *chunk_state, * reference, so we have to jump through these hoops. */ Datum datum; - if (column_values.value_bytes <= 4) + if (column_values->value_bytes <= 4) { datum = Int32GetDatum((uint32) value); } @@ -595,10 +591,9 @@ compressed_batch_make_next_tuple(DecompressChunkState *chunk_state, datum = Int64GetDatum(value); } #endif - const AttrNumber attr = AttrNumberGetAttrOffset(column_values.output_attno); decompressed_scan_slot->tts_values[attr] = datum; decompressed_scan_slot->tts_isnull[attr] = - !arrow_row_is_valid(column_values.arrow_validity, arrow_row); + !arrow_row_is_valid(column_values->arrow_validity, arrow_row); } } @@ -619,7 +614,7 @@ compressed_batch_make_next_tuple(DecompressChunkState *chunk_state, } static bool -compressed_batch_vector_qual(DecompressChunkState *chunk_state, DecompressBatchState *batch_state) +vector_qual(DecompressChunkState *chunk_state, DecompressBatchState *batch_state) { Assert(batch_state->total_batch_rows > 0); Assert(batch_state->next_batch_row < batch_state->total_batch_rows); @@ -637,7 +632,7 @@ compressed_batch_vector_qual(DecompressChunkState *chunk_state, DecompressBatchS } static bool -compressed_batch_postgres_qual(DecompressChunkState *chunk_state, DecompressBatchState *batch_state) +postgres_qual(DecompressChunkState *chunk_state, DecompressBatchState *batch_state) { TupleTableSlot *decompressed_scan_slot = batch_state->decompressed_scan_slot; Assert(!TupIsNull(decompressed_scan_slot)); @@ -672,7 +667,7 @@ compressed_batch_advance(DecompressChunkState *chunk_state, DecompressBatchState for (; batch_state->next_batch_row < batch_state->total_batch_rows; batch_state->next_batch_row++) { - if (!compressed_batch_vector_qual(chunk_state, batch_state)) + if (!vector_qual(chunk_state, batch_state)) { /* * This row doesn't pass the vectorized quals. Advance the iterated @@ -690,9 +685,9 @@ compressed_batch_advance(DecompressChunkState *chunk_state, DecompressBatchState continue; } - compressed_batch_make_next_tuple(chunk_state, batch_state); + make_next_tuple(chunk_state, batch_state); - if (!compressed_batch_postgres_qual(chunk_state, batch_state)) + if (!postgres_qual(chunk_state, batch_state)) { /* * The tuple didn't pass the qual, fetch the next one in the next @@ -743,11 +738,34 @@ compressed_batch_save_first_tuple(DecompressChunkState *chunk_state, Assert(batch_state->total_batch_rows > 0); Assert(TupIsNull(batch_state->decompressed_scan_slot)); - compressed_batch_make_next_tuple(chunk_state, batch_state); + /* + * We might not have decompressed some columns if the vector quals didn't + * pass for the entire batch. Have to decompress them anyway if we're asked + * to save the first tuple. This doesn't actually happen yet, because the + * vectorized decompression is disabled with sorted merge, but we might want + * to enable it for some queries. + */ + const int num_compressed_columns = chunk_state->num_compressed_columns; + for (int i = 0; i < num_compressed_columns; i++) + { + CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; + if (column_values->value_bytes == 0) + { + decompress_column(chunk_state, batch_state, i); + Assert(column_values->value_bytes != 0); + } + } + + /* Make the first tuple and save it. */ + make_next_tuple(chunk_state, batch_state); ExecCopySlot(first_tuple_slot, batch_state->decompressed_scan_slot); - const bool qual_passed = compressed_batch_vector_qual(chunk_state, batch_state) && - compressed_batch_postgres_qual(chunk_state, batch_state); + /* + * Check the quals and advance, so that the batch is in the correct state + * for the subsequent calls (matching tuple is in decompressed scan slot). + */ + const bool qual_passed = + vector_qual(chunk_state, batch_state) && postgres_qual(chunk_state, batch_state); batch_state->next_batch_row++; if (!qual_passed) From 9031760af58decd519cfbaa678504e730e69ca94 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 18 Oct 2023 16:14:18 +0200 Subject: [PATCH 008/249] remove extra code --- .../nodes/decompress_chunk/compressed_batch.c | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index b4bccb5c57b..5d7b6e0acc8 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -78,23 +78,6 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) return arrow; } -static int -get_max_element_bytes(ArrowArray *text_array) -{ - int maxbytes = 0; - uint32 *offsets = (uint32 *) text_array->buffers[1]; - for (int i = 0; i < text_array->length; i++) - { - const int curbytes = offsets[i + 1] - offsets[i]; - if (curbytes > maxbytes) - { - maxbytes = curbytes; - } - } - - return maxbytes; -} - static void decompress_column(DecompressChunkState *chunk_state, DecompressBatchState *batch_state, int i) { @@ -102,7 +85,6 @@ decompress_column(DecompressChunkState *chunk_state, DecompressBatchState *batch CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; column_values->iterator = NULL; column_values->arrow = NULL; - column_values->value_bytes = -1; column_values->arrow_values = NULL; column_values->arrow_validity = NULL; column_values->output_attno = column_description->output_attno; @@ -176,18 +158,6 @@ decompress_column(DecompressChunkState *chunk_state, DecompressBatchState *batch column_values->arrow_values = arrow->buffers[1]; column_values->arrow_validity = arrow->buffers[0]; - if (column_values->value_bytes == -1) - { - const int maxbytes = - VARHDRSZ + (column_values->arrow->dictionary ? - get_max_element_bytes(column_values->arrow->dictionary) : - get_max_element_bytes(column_values->arrow)); - - const AttrNumber attr = AttrNumberGetAttrOffset(column_values->output_attno); - batch_state->decompressed_scan_slot->tts_values[attr] = - PointerGetDatum(MemoryContextAlloc(batch_state->per_batch_context, maxbytes)); - } - return; } From 1ae010a283bfaf1f62729257547af3ab50fef3f0 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 18 Oct 2023 16:16:54 +0200 Subject: [PATCH 009/249] dead code --- tsl/src/nodes/decompress_chunk/compressed_batch.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 5d7b6e0acc8..1e289303cb1 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -713,18 +713,17 @@ compressed_batch_save_first_tuple(DecompressChunkState *chunk_state, * pass for the entire batch. Have to decompress them anyway if we're asked * to save the first tuple. This doesn't actually happen yet, because the * vectorized decompression is disabled with sorted merge, but we might want - * to enable it for some queries. + * to enable it for some queries. For now, just assert that it doesn't + * happen. */ +#ifdef USE_ASSERT_CHECKING const int num_compressed_columns = chunk_state->num_compressed_columns; for (int i = 0; i < num_compressed_columns; i++) { CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; - if (column_values->value_bytes == 0) - { - decompress_column(chunk_state, batch_state, i); - Assert(column_values->value_bytes != 0); - } + Assert(column_values->value_bytes != 0); } +#endif /* Make the first tuple and save it. */ make_next_tuple(chunk_state, batch_state); From 90cffa993814a3486215868816a90628c3b37df8 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 23 Oct 2023 16:59:54 +0200 Subject: [PATCH 010/249] fix the padding --- tsl/src/compression/compression.h | 8 ++++---- tsl/src/compression/simple8b_rle_decompress_all.h | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tsl/src/compression/compression.h b/tsl/src/compression/compression.h index d06ab55953d..751708f1d60 100644 --- a/tsl/src/compression/compression.h +++ b/tsl/src/compression/compression.h @@ -378,10 +378,10 @@ extern RowDecompressor build_decompressor(Relation in_rel, Relation out_rel); #define CDSTR(X) #X #define CDSTR2(X) CDSTR(X) -#define CheckCompressedData Assert -// #define CheckCompressedData(X) \ -// if (unlikely(!(X))) \ -// ereport(ERROR, CORRUPT_DATA_MESSAGE, errdetail(#X)) +//#define CheckCompressedData Assert +#define CheckCompressedData(X) \ + if (unlikely(!(X))) \ + ereport(ERROR, CORRUPT_DATA_MESSAGE, errdetail(#X)) inline static void * consumeCompressedData(StringInfo si, int bytes) diff --git a/tsl/src/compression/simple8b_rle_decompress_all.h b/tsl/src/compression/simple8b_rle_decompress_all.h index 5466d1efc56..874ba71662b 100644 --- a/tsl/src/compression/simple8b_rle_decompress_all.h +++ b/tsl/src/compression/simple8b_rle_decompress_all.h @@ -155,10 +155,11 @@ FUNCTION_NAME(simple8brle_decompress_all, ELEMENT_TYPE)(Simple8bRleSerialized *c Assert(n_total_values <= GLOBAL_MAX_ROWS_PER_COMPRESSION); /* - * We need a significant padding of 64 elements, not bytes, here, because we - * work in Simple8B blocks which can contain up to 64 elements. + * We need a quite significant padding of 63 elements, not bytes, after the + * last element, because we work in Simple8B blocks which can contain up to + * 64 elements. */ - const uint16 n_buffer_elements = ((n_total_values + 63) / 64 + 1) * 64; + const uint16 n_buffer_elements = n_total_values + 63; ELEMENT_TYPE *restrict decompressed_values = palloc(sizeof(ELEMENT_TYPE) * n_buffer_elements); From 5d55403262b6e9144fbc21ae79de204d6360fafd Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 24 Oct 2023 10:33:27 +0200 Subject: [PATCH 011/249] adjust test output --- tsl/test/expected/compression_algos.out | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tsl/test/expected/compression_algos.out b/tsl/test/expected/compression_algos.out index d2a8a6a5622..34d430ecada 100644 --- a/tsl/test/expected/compression_algos.out +++ b/tsl/test/expected/compression_algos.out @@ -1560,8 +1560,8 @@ from ts_read_compressed_data_directory('deltadelta', 'int8', (:'TEST_INPUT_DIR' group by 2 order by 1 desc; count | result -------+-------- - 157 | XX001 - 80 | true + 168 | XX001 + 69 | true 13 | 08P01 1 | false (4 rows) From 3980a830c09c0ab7747e4ac9862d06f83a26a92e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 24 Oct 2023 16:33:05 +0200 Subject: [PATCH 012/249] debug --- tsl/src/compression/dictionary.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tsl/src/compression/dictionary.c b/tsl/src/compression/dictionary.c index 12ea121bf29..8f76226ead8 100644 --- a/tsl/src/compression/dictionary.c +++ b/tsl/src/compression/dictionary.c @@ -336,13 +336,13 @@ dictionary_compressor_finish(DictionaryCompressor *compressor) average_element_size = sizes.dictionary_size / sizes.num_distinct; expected_array_size = average_element_size * sizes.dictionary_compressed_indexes->num_elements; compressed = dictionary_compressed_from_serialization_info(sizes, compressor->type); - fprintf(stderr, - "dict size %ld, distinct %ld, avg element size %ld, easize %ld, totalsize %ld\n", - sizes.dictionary_size, - (uint64) sizes.num_distinct, - average_element_size, - expected_array_size, - sizes.total_size); +// fprintf(stderr, +// "dict size %ld, distinct %ld, avg element size %ld, easize %ld, totalsize %ld\n", +// sizes.dictionary_size, +// (uint64) sizes.num_distinct, +// average_element_size, +// expected_array_size, +// sizes.total_size); if (expected_array_size < sizes.total_size) return dictionary_compressed_to_array_compressed(compressed); From 13ee7dccdf45392048341f2b18bbfd487f2fb41a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 31 Oct 2023 16:51:52 +0100 Subject: [PATCH 013/249] some tests --- tsl/src/compression/compression.c | 27 +- ...pl.c => decompress_arithmetic_test_impl.c} | 3 +- .../compression/decompress_text_test_impl.c | 235 ++++++++++++++++++ tsl/src/compression/dictionary.c | 14 +- .../decompress_chunk/vector_predicates.c | 6 +- tsl/test/expected/compression_algos.out | 18 ++ .../fuzzing/compression/array-text/array1 | Bin 0 -> 15917 bytes .../fuzzing/compression/dictionary-text/dict1 | Bin 0 -> 56 bytes tsl/test/sql/compression_algos.sql | 12 +- 9 files changed, 300 insertions(+), 15 deletions(-) rename tsl/src/compression/{decompress_test_impl.c => decompress_arithmetic_test_impl.c} (98%) create mode 100644 tsl/src/compression/decompress_text_test_impl.c create mode 100644 tsl/test/fuzzing/compression/array-text/array1 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/dict1 diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index e197fdc25a3..ee43a4c8a1b 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -2185,6 +2185,14 @@ get_compression_algorithm(char *name) { return COMPRESSION_ALGORITHM_GORILLA; } + else if (pg_strcasecmp(name, "array") == 0) + { + return COMPRESSION_ALGORITHM_ARRAY; + } + else if (pg_strcasecmp(name, "dictionary") == 0) + { + return COMPRESSION_ALGORITHM_DICTIONARY; + } ereport(ERROR, (errmsg("unknown comrpession algorithm %s", name))); return _INVALID_COMPRESSION_ALGORITHM; @@ -2194,7 +2202,7 @@ get_compression_algorithm(char *name) #define CTYPE float8 #define PGTYPE FLOAT8OID #define DATUM_TO_CTYPE DatumGetFloat8 -#include "decompress_test_impl.c" +#include "decompress_arithmetic_test_impl.c" #undef ALGO #undef CTYPE #undef PGTYPE @@ -2204,12 +2212,14 @@ get_compression_algorithm(char *name) #define CTYPE int64 #define PGTYPE INT8OID #define DATUM_TO_CTYPE DatumGetInt64 -#include "decompress_test_impl.c" +#include "decompress_arithmetic_test_impl.c" #undef ALGO #undef CTYPE #undef PGTYPE #undef DATUM_TO_CTYPE +#include "decompress_text_test_impl.c" + static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Size, bool extra_checks) { @@ -2221,6 +2231,14 @@ static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Si { return decompress_deltadelta_int64; } + else if (algo == COMPRESSION_ALGORITHM_ARRAY && type == TEXTOID) + { + return decompress_array_text; + } + else if (algo == COMPRESSION_ALGORITHM_DICTIONARY && type == TEXTOID) + { + return decompress_dictionary_text; + } elog(ERROR, "no decompression function for compression algorithm %d with element type %d", @@ -2242,7 +2260,8 @@ read_compressed_data_file_impl(int algo, Oid type, const char *path, volatile in if (!f) { - elog(ERROR, "could not open the file '%s'", path); + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FILE), errmsg("could not open the file '%s'", path))); } fseek(f, 0, SEEK_END); @@ -2266,7 +2285,7 @@ read_compressed_data_file_impl(int algo, Oid type, const char *path, volatile in if (elements_read != 1) { - elog(ERROR, "failed to read file '%s'", path); + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FILE), errmsg("failed to read file '%s'", path))); } fclose(f); diff --git a/tsl/src/compression/decompress_test_impl.c b/tsl/src/compression/decompress_arithmetic_test_impl.c similarity index 98% rename from tsl/src/compression/decompress_test_impl.c rename to tsl/src/compression/decompress_arithmetic_test_impl.c index c452b173345..90d7cc4bf2c 100644 --- a/tsl/src/compression/decompress_test_impl.c +++ b/tsl/src/compression/decompress_arithmetic_test_impl.c @@ -13,7 +13,8 @@ /* * Try to decompress the given compressed data. Used for fuzzing and for checking * the examples found by fuzzing. For fuzzing we do less checks to keep it - * faster and the coverage space smaller. + * faster and the coverage space smaller. This is a generic implementation + * for arithmetic types. */ static int FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size, bool extra_checks) diff --git a/tsl/src/compression/decompress_text_test_impl.c b/tsl/src/compression/decompress_text_test_impl.c new file mode 100644 index 00000000000..1f44d91d3f0 --- /dev/null +++ b/tsl/src/compression/decompress_text_test_impl.c @@ -0,0 +1,235 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +static void +arrow_get_str(ArrowArray *arrow, int arrow_row, const char **str, size_t *len) +{ + if (!arrow->dictionary) + { + const uint32 *offsets = (uint32 *) arrow->buffers[1]; + const char *values = (char *) arrow->buffers[2]; + + const uint32 start = offsets[arrow_row]; + const uint32 end = offsets[arrow_row + 1]; + const uint32 arrow_len = end - start; + + *len = arrow_len; + *str = &values[start]; + return; + } + + const int16 dict_row = ((int16 *) arrow->buffers[1])[arrow_row]; + arrow_get_str(arrow->dictionary, dict_row, str, len); +} + +/* + * Try to decompress the given compressed data. Used for fuzzing and for checking + * the examples found by fuzzing. For fuzzing we do less checks to keep it + * faster and the coverage space smaller. This is a generic implementation + * for arithmetic types. + */ +static int +decompress_generic_text(const uint8 *Data, size_t Size, bool extra_checks, int requested_algo) +{ + StringInfoData si = { .data = (char *) Data, .len = Size }; + + const int data_algo = pq_getmsgbyte(&si); + + CheckCompressedData(data_algo > 0 && data_algo < _END_COMPRESSION_ALGORITHMS); + + if (data_algo != requested_algo) + { + /* + * It's convenient to fuzz only one algorithm at a time. We specialize + * the fuzz target for one algorithm, so that the fuzzer doesn't waste + * time discovering others from scratch. + */ + return -1; + } + + Datum compressed_data = definitions[data_algo].compressed_data_recv(&si); + + if (!extra_checks) + { + /* + * For routine fuzzing, we only run bulk decompression to make it faster + * and the coverage space smaller. + */ + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo, TEXTOID); + decompress_all(compressed_data, TEXTOID, CurrentMemoryContext); + return 0; + } + + /* + * Test bulk decompression. This might hide some errors in the row-by-row + * decompression, but testing both is significantly more complicated, and + * the row-by-row is old and stable. + */ + ArrowArray *arrow = NULL; + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo, TEXTOID); + if (decompress_all) + { + arrow = decompress_all(compressed_data, TEXTOID, CurrentMemoryContext); + } + + /* + * Test row-by-row decompression. + */ + DecompressionIterator *iter = + definitions[data_algo].iterator_init_forward(compressed_data, TEXTOID); + DecompressResult results[GLOBAL_MAX_ROWS_PER_COMPRESSION]; + int n = 0; + for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) + { + if (n >= GLOBAL_MAX_ROWS_PER_COMPRESSION) + { + elog(ERROR, "too many compressed rows"); + } + + results[n++] = r; + } + + /* Check that both ways of decompression match. */ + if (arrow) + { + if (n != arrow->length) + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("Expected %d elements, got %d.", n, (int) arrow->length))); + } + + for (int i = 0; i < n; i++) + { + const bool arrow_isnull = !arrow_row_is_valid(arrow->buffers[0], i); + if (arrow_isnull != results[i].is_null) + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("Expected null %d, got %d at row %d.", + results[i].is_null, + arrow_isnull, + i))); + } + + if (!results[i].is_null) + { + const char *arrow_cstring; + size_t arrow_len; + arrow_get_str(arrow, i, &arrow_cstring, &arrow_len); + + const Datum rowbyrow_varlena = results[i].val; + const size_t rowbyrow_len = VARSIZE_ANY_EXHDR(rowbyrow_varlena); + const char *rowbyrow_cstring = VARDATA_ANY(rowbyrow_varlena); + + // fprintf(stderr, "arrow: '%.*s'(%ld), rbr: '%.*s'(%ld)\n", + // (int) arrow_len, arrow_cstring, arrow_len, + // (int) rowbyrow_len, rowbyrow_cstring, rowbyrow_len); + + if (rowbyrow_len != arrow_len) + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("At row %d\n", i))); + } + + if (strncmp(arrow_cstring, rowbyrow_cstring, rowbyrow_len)) + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("At row %d\n", i))); + } + } + } + } + + /* + * Check that the result is still the same after we compress and decompress + * back. + * + * 1) Compress. + */ + Compressor *compressor = definitions[data_algo].compressor_for_type(TEXTOID); + + for (int i = 0; i < n; i++) + { + if (results[i].is_null) + { + compressor->append_null(compressor); + } + else + { + compressor->append_val(compressor, results[i].val); + } + } + + compressed_data = (Datum) compressor->finish(compressor); + if (compressed_data == 0) + { + /* The gorilla compressor returns NULL for all-null input sets. */ + return n; + }; + + /* + * 2) Decompress and check that it's the same. + */ + iter = definitions[data_algo].iterator_init_forward(compressed_data, TEXTOID); + int nn = 0; + for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) + { + if (r.is_null != results[nn].is_null) + { + elog(ERROR, "the repeated decompression result doesn't match"); + } + + if (!r.is_null) + { + const Datum old_value = results[nn].val; + const Datum new_value = r.val; + + /* + * Floats can also be NaN/infinite and the comparison doesn't + * work in that case. + */ + if (VARSIZE_ANY_EXHDR(old_value) != VARSIZE_ANY_EXHDR(new_value)) + { + elog(ERROR, "the repeated decompression result doesn't match"); + } + + if (strncmp(VARDATA_ANY(old_value), + VARDATA_ANY(new_value), + VARSIZE_ANY_EXHDR(new_value))) + { + elog(ERROR, "the repeated decompression result doesn't match"); + } + } + + nn++; + + if (nn > n) + { + elog(ERROR, "the repeated recompression result doesn't match"); + } + } + + return n; +} + +static int +decompress_array_text(const uint8 *Data, size_t Size, bool extra_checks) +{ + return decompress_generic_text(Data, Size, extra_checks, COMPRESSION_ALGORITHM_ARRAY); +} + +static int +decompress_dictionary_text(const uint8 *Data, size_t Size, bool extra_checks) +{ + return decompress_generic_text(Data, Size, extra_checks, COMPRESSION_ALGORITHM_DICTIONARY); +} diff --git a/tsl/src/compression/dictionary.c b/tsl/src/compression/dictionary.c index 8f76226ead8..0551dc89bfe 100644 --- a/tsl/src/compression/dictionary.c +++ b/tsl/src/compression/dictionary.c @@ -336,13 +336,13 @@ dictionary_compressor_finish(DictionaryCompressor *compressor) average_element_size = sizes.dictionary_size / sizes.num_distinct; expected_array_size = average_element_size * sizes.dictionary_compressed_indexes->num_elements; compressed = dictionary_compressed_from_serialization_info(sizes, compressor->type); -// fprintf(stderr, -// "dict size %ld, distinct %ld, avg element size %ld, easize %ld, totalsize %ld\n", -// sizes.dictionary_size, -// (uint64) sizes.num_distinct, -// average_element_size, -// expected_array_size, -// sizes.total_size); + // fprintf(stderr, + // "dict size %ld, distinct %ld, avg element size %ld, easize %ld, totalsize %ld\n", + // sizes.dictionary_size, + // (uint64) sizes.num_distinct, + // average_element_size, + // expected_array_size, + // sizes.total_size); if (expected_array_size < sizes.total_size) return dictionary_compressed_to_array_compressed(compressed); diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.c b/tsl/src/nodes/decompress_chunk/vector_predicates.c index 981073dd015..2f322eba1f7 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.c +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.c @@ -43,8 +43,10 @@ vector_const_texteq_nodict(const ArrowArray *arrow, const Datum constdatum, uint #define INNER_LOOP \ const uint32 start = offsets[row]; \ const uint32 end = offsets[row + 1]; \ - const uint32 veclen = end - start; \ - bool valid = veclen != textlen ? false : (strncmp((char *) &values[start], (char *) cstring, textlen) == 0); \ + const uint32 veclen = end - start; \ + bool valid = veclen != textlen ? \ + false : \ + (strncmp((char *) &values[start], (char *) cstring, textlen) == 0); \ word |= ((uint64) valid) << bit_index; \ // fprintf(stderr, "plain row %ld: valid %d\n", row, valid); diff --git a/tsl/test/expected/compression_algos.out b/tsl/test/expected/compression_algos.out index 34d430ecada..b77d575f156 100644 --- a/tsl/test/expected/compression_algos.out +++ b/tsl/test/expected/compression_algos.out @@ -1568,5 +1568,23 @@ group by 2 order by 1 desc; create or replace function ts_read_compressed_data_file(cstring, regtype, cstring) returns int as :TSL_MODULE_PATHNAME, 'ts_read_compressed_data_file' language c; +\set ON_ERROR_STOP 0 select ts_read_compressed_data_file('gorilla', 'float8', '--nonexistent'); ERROR: could not open the file '--nonexistent' +\set ON_ERROR_STOP 1 +select count(*), coalesce((rows >= 0)::text, sqlstate) result +from ts_read_compressed_data_directory('array', 'text', (:'TEST_INPUT_DIR' || '/fuzzing/compression/array-text')::cstring) +group by 2 order by 1 desc; + count | result +-------+-------- + 1 | true +(1 row) + +select count(*), coalesce((rows >= 0)::text, sqlstate) result +from ts_read_compressed_data_directory('dictionary', 'text', (:'TEST_INPUT_DIR' || '/fuzzing/compression/dictionary-text')::cstring) +group by 2 order by 1 desc; + count | result +-------+-------- + 1 | true +(1 row) + diff --git a/tsl/test/fuzzing/compression/array-text/array1 b/tsl/test/fuzzing/compression/array-text/array1 new file mode 100644 index 0000000000000000000000000000000000000000..50dfba19732b8428b250e63489743522452728c0 GIT binary patch literal 15917 zcmZA8F|IC24S>-*A{$^Y*JZn0cQt4PZ$?N65PAViW*NM{gZaBp=yvkSL7k|7`uO*6 zfBEY#-+%eXzkd7p{_VftKR$l?`1r&BK0ZGF{I|dV`s=sRt zeL>$ReV_Dw()UTLTH`ectK<;kJF=>f+$GectNA;Jv*pxQ)+ST@u{3&s$v%+_ukKT?*W` z&s$vvytfwtxAA$a3xM19d8_OBzipqlx*q@A_Ia!8>A!8Cx4Itw_x5`CZ{zb;*QU!#L+vlyWhyJ#G-s*bhZ`;?c lety5AkLlI)-n|a>o?b=o>2;>}^y+y}uOq#uSIv8h{{tyD3_Sn< literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/dict1 b/tsl/test/fuzzing/compression/dictionary-text/dict1 new file mode 100644 index 0000000000000000000000000000000000000000..08ae62cb6b1d9b958cf4342fe295478b58187fef GIT binary patch literal 56 ycmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEiK7zFqk80;FrERYBYvlJ)hrUC#*m= 0)::text, sqlstate) result +from ts_read_compressed_data_directory('array', 'text', (:'TEST_INPUT_DIR' || '/fuzzing/compression/array-text')::cstring) +group by 2 order by 1 desc; + +select count(*), coalesce((rows >= 0)::text, sqlstate) result +from ts_read_compressed_data_directory('dictionary', 'text', (:'TEST_INPUT_DIR' || '/fuzzing/compression/dictionary-text')::cstring) +group by 2 order by 1 desc; From d64c6851b9578fc947b30ef1c898975cdcae0eec Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 31 Oct 2023 16:58:25 +0100 Subject: [PATCH 014/249] fuzz --- .github/workflows/libfuzzer.yaml | 3 ++- tsl/src/compression/compression.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 883d7111371..ed2876c5a78 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -16,7 +16,8 @@ jobs: strategy: fail-fast: false matrix: - case: [ { algo: gorilla, type: float8 }, { algo: deltadelta, type: int8 } ] + case: [ { algo: gorilla, type: float8 }, { algo: deltadelta, type: int8 }, + { algo: array, type: text }, { algo: dictionary, type: text } ] name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.type }} runs-on: ubuntu-22.04 diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index ee43a4c8a1b..94528673660 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -2483,6 +2483,16 @@ llvm_fuzz_target_deltadelta_int64(const uint8_t *Data, size_t Size) { return llvm_fuzz_target_generic(decompress_deltadelta_int64, Data, Size); } +static int +llvm_fuzz_target_array_text(const uint8_t *Data, size_t Size) +{ + return llvm_fuzz_target_generic(decompress_array_text, Data, Size); +} +static int +llvm_fuzz_target_dictionary_text(const uint8_t *Data, size_t Size) +{ + return llvm_fuzz_target_generic(decompress_dictionary_text, Data, Size); +} /* * libfuzzer fuzzing driver that we import from LLVM libraries. It will run our @@ -2537,6 +2547,14 @@ ts_fuzz_compression(PG_FUNCTION_ARGS) { target = llvm_fuzz_target_deltadelta_int64; } + else if (algo == COMPRESSION_ALGORITHM_ARRAY && type == TEXTOID) + { + target = llvm_fuzz_target_array_text; + } + else if (algo == COMPRESSION_ALGORITHM_DICTIONARY && type == TEXTOID) + { + target = llvm_fuzz_target_dictionary_text; + } else { elog(ERROR, "no llvm fuzz target for compression algorithm %d and type %d", algo, type); From aa0cd9aee3a09945b2ae215accf073c3b02e7607 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 31 Oct 2023 19:03:38 +0100 Subject: [PATCH 015/249] fuzzer fixes --- tsl/src/compression/array.c | 9 ++++++--- tsl/src/compression/dictionary.c | 31 +++++++++++++++++-------------- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index 9acd8f1a037..b37e06361da 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -243,6 +243,7 @@ array_compression_serialization_size(ArrayCompressorSerializationInfo *info) uint32 array_compression_serialization_num_elements(ArrayCompressorSerializationInfo *info) { + CheckCompressedData(info->sizes != NULL); return info->sizes->num_elements; } @@ -732,7 +733,6 @@ array_compressed_data_send(StringInfo buffer, const char *_serialized_data, Size Datum array_compressed_recv(StringInfo buffer) { - ArrayCompressorSerializationInfo *data; uint8 has_nulls; Oid element_type; @@ -741,9 +741,12 @@ array_compressed_recv(StringInfo buffer) element_type = binary_string_get_type(buffer); - data = array_compressed_data_recv(buffer, element_type); + ArrayCompressorSerializationInfo *info = array_compressed_data_recv(buffer, element_type); - PG_RETURN_POINTER(array_compressed_from_serialization_info(data, element_type)); + CheckCompressedData(info->sizes != NULL); + CheckCompressedData(has_nulls == (info->nulls != NULL)); + + PG_RETURN_POINTER(array_compressed_from_serialization_info(info, element_type)); } void diff --git a/tsl/src/compression/dictionary.c b/tsl/src/compression/dictionary.c index 0551dc89bfe..e0218d4f44c 100644 --- a/tsl/src/compression/dictionary.c +++ b/tsl/src/compression/dictionary.c @@ -708,7 +708,7 @@ dictionary_compressed_send(CompressedDataHeader *header, StringInfo buffer) Datum dictionary_compressed_recv(StringInfo buffer) { - DictionaryCompressorSerializationInfo data = { 0 }; + DictionaryCompressorSerializationInfo info = { 0 }; uint8 has_nulls; Oid element_type; @@ -716,27 +716,30 @@ dictionary_compressed_recv(StringInfo buffer) CheckCompressedData(has_nulls == 0 || has_nulls == 1); element_type = binary_string_get_type(buffer); - data.dictionary_compressed_indexes = simple8brle_serialized_recv(buffer); - data.bitmaps_size = simple8brle_serialized_total_size(data.dictionary_compressed_indexes); - data.total_size = MAXALIGN(sizeof(DictionaryCompressed)) + data.bitmaps_size; + info.dictionary_compressed_indexes = simple8brle_serialized_recv(buffer); + info.bitmaps_size = simple8brle_serialized_total_size(info.dictionary_compressed_indexes); + info.total_size = MAXALIGN(sizeof(DictionaryCompressed)) + info.bitmaps_size; if (has_nulls) { - data.compressed_nulls = simple8brle_serialized_recv(buffer); - data.nulls_size = simple8brle_serialized_total_size(data.compressed_nulls); - data.total_size += data.nulls_size; + info.compressed_nulls = simple8brle_serialized_recv(buffer); + info.nulls_size = simple8brle_serialized_total_size(info.compressed_nulls); + info.total_size += info.nulls_size; } - data.dictionary_serialization_info = array_compressed_data_recv(buffer, element_type); - data.dictionary_size = array_compression_serialization_size(data.dictionary_serialization_info); - data.total_size += data.dictionary_size; - data.num_distinct = - array_compression_serialization_num_elements(data.dictionary_serialization_info); + info.dictionary_serialization_info = array_compressed_data_recv(buffer, element_type); - if (!AllocSizeIsValid(data.total_size)) + CheckCompressedData(info.dictionary_serialization_info != NULL); + + info.dictionary_size = array_compression_serialization_size(info.dictionary_serialization_info); + info.total_size += info.dictionary_size; + info.num_distinct = + array_compression_serialization_num_elements(info.dictionary_serialization_info); + + if (!AllocSizeIsValid(info.total_size)) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("compressed size exceeds the maximum allowed (%d)", (int) MaxAllocSize))); - return PointerGetDatum(dictionary_compressed_from_serialization_info(data, element_type)); + return PointerGetDatum(dictionary_compressed_from_serialization_info(info, element_type)); } From 7fd33d0fe65b893ebe95808a099a82d2688f5f0d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 31 Oct 2023 19:41:20 +0100 Subject: [PATCH 016/249] fuzzing fixes --- tsl/src/compression/array.c | 3 +-- ...crash-1ac67d1b8ebedeb93c515b5244f6e60613c1af0b | Bin 0 -> 440 bytes ...crash-b6cfa8632a8bf28e90198ec167f3f63258880f77 | Bin 0 -> 478 bytes ...crash-49789ae0866d7d630f2075dc26812433f4af1db3 | Bin 0 -> 231 bytes ...crash-b0db762535226b28c0b55ffe00d5537fd8ef7e39 | Bin 0 -> 52 bytes ...crash-bd6e8aa1ebeb96cf8aa644c5ef6eb2214dee0ffc | Bin 0 -> 56 bytes 6 files changed, 1 insertion(+), 2 deletions(-) create mode 100644 tsl/test/fuzzing/compression/array-text/crash-1ac67d1b8ebedeb93c515b5244f6e60613c1af0b create mode 100644 tsl/test/fuzzing/compression/array-text/crash-b6cfa8632a8bf28e90198ec167f3f63258880f77 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-49789ae0866d7d630f2075dc26812433f4af1db3 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-b0db762535226b28c0b55ffe00d5537fd8ef7e39 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-bd6e8aa1ebeb96cf8aa644c5ef6eb2214dee0ffc diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index b37e06361da..bbdf4c36908 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -514,8 +514,7 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, for (int i = 0; i < n_notnull; i++) { void *vardata = consumeCompressedData(si, sizes[i]); - // CheckCompressedData(VARSIZE_ANY(vardata) == sizes[i]); - // CheckCompressedData(sizes[i] > VARHDRSZ); + CheckCompressedData(VARSIZE_ANY(vardata) == sizes[i]); const int textlen = VARSIZE_ANY_EXHDR(vardata); memcpy(&arrow_bodies[offset], VARDATA_ANY(vardata), textlen); diff --git a/tsl/test/fuzzing/compression/array-text/crash-1ac67d1b8ebedeb93c515b5244f6e60613c1af0b b/tsl/test/fuzzing/compression/array-text/crash-1ac67d1b8ebedeb93c515b5244f6e60613c1af0b new file mode 100644 index 0000000000000000000000000000000000000000..305bfd2a1814bfc7e8effbc8a29c55cdd9daab65 GIT binary patch literal 440 zcmZQ%C`gY_PAo~x$xmk}Nv$Yh0E4u&)S}e95)*S%AeSd46V5k5;~S&$?ahrK0ts*( zhNvpc(fDSTsPbkOXnb=tz8M-H>1J>Gw5>A7vHm#5lpeh4L<`~#gY4G3hk~}5<>m>y}$4l3y z_VySl#aALK$Y!_Ib@kTWroRqDoMQjyO!1yVE!t4d4aN*yd6u(db=bSSoaS>C(mH9} z^&m&!18@xtdIEX^dhGQA2j9nEJ^L{AWAJ0}WANi2zxDEK7RqFiOcuyw&&k9eklu%a literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-49789ae0866d7d630f2075dc26812433f4af1db3 b/tsl/test/fuzzing/compression/dictionary-text/crash-49789ae0866d7d630f2075dc26812433f4af1db3 new file mode 100644 index 0000000000000000000000000000000000000000..cf31c012e9210d5588c44ed71abbebed662689a3 GIT binary patch literal 231 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEiK7zFqk80;Dt8o+EY4V)PIuulNdL^vA& literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-b0db762535226b28c0b55ffe00d5537fd8ef7e39 b/tsl/test/fuzzing/compression/dictionary-text/crash-b0db762535226b28c0b55ffe00d5537fd8ef7e39 new file mode 100644 index 0000000000000000000000000000000000000000..a75b22fb44f6f6f52a8e2183159a8aced13ee384 GIT binary patch literal 52 ucmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEi~7)0_jaN0FMMHq?`b5j8*NC`v$ literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-bd6e8aa1ebeb96cf8aa644c5ef6eb2214dee0ffc b/tsl/test/fuzzing/compression/dictionary-text/crash-bd6e8aa1ebeb96cf8aa644c5ef6eb2214dee0ffc new file mode 100644 index 0000000000000000000000000000000000000000..8262f92c11513080a09cb9e6e0b8b455d952fdcd GIT binary patch literal 56 tcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?YW1SA-N7zFq&AT&aRr8qG+6#zeF2*CgV literal 0 HcmV?d00001 From 94656ea93970c531f482cf02665cb50282f20c8e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 1 Nov 2023 11:31:40 +0100 Subject: [PATCH 017/249] fuzzing fixes --- .github/workflows/libfuzzer.yaml | 12 ++++++++++-- tsl/src/compression/array.c | 2 ++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index ed2876c5a78..57e5d67302a 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -90,6 +90,10 @@ jobs: export LIBFUZZER_PATH=$(dirname "$(find $(llvm-config --libdir) -name libclang_rt.fuzzer_no_main-x86_64.a | head -1)") + # Some pointers for the next time we have linking/undefined symbol problems: + # http://web.archive.org/web/20200926071757/https://github.com/google/sanitizers/issues/111 + # http://web.archive.org/web/20231101091231/https://github.com/cms-sw/cmssw/issues/40680 + cmake -B build -S . -DASSERTIONS=ON -DLINTER=OFF -DCMAKE_VERBOSE_MAKEFILE=1 \ -DWARNINGS_AS_ERRORS=1 -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_COMPILER=clang \ -DCMAKE_C_FLAGS="-fsanitize=fuzzer-no-link -lstdc++ -L$LIBFUZZER_PATH -l:libclang_rt.fuzzer_no_main-x86_64.a -static-libsan" \ @@ -144,17 +148,21 @@ jobs: export MODULE_NAME=$(basename $(find $HOME/$PG_INSTALL_DIR -name "timescaledb-tsl-*.so")) psql -a -c "create or replace function fuzz(algo cstring, type regtype, runs int) returns int as '"$MODULE_NAME"', 'ts_fuzz_compression' language c;" + # array has a peculiar recv function that recompresses all input, so + # fuzzing it is much slower. + runs=${{ matrix.case.algo == 'array' && 1000000 || 100000000 }} + # Start more fuzzing processes in the background. We won't even monitor # their progress, because the server will panic if they find an error. for x in {2..$(nproc)} do - psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', '${{ matrix.case.type }}', 100000000);" & + psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', '${{ matrix.case.type }}', $runs);" & done # Start the one fuzzing process that we will monitor, in foreground. # The LLVM fuzzing driver calls exit(), so we expect to lose the connection. ret=0 - psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', '${{ matrix.case.type }}', 100000000);" || ret=$? + psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', '${{ matrix.case.type }}', $runs);" || ret=$? if ! [ $ret -eq 2 ] then >&2 echo "Unexpected psql exit code $ret" diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index bbdf4c36908..e46e57c1656 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -514,6 +514,8 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, for (int i = 0; i < n_notnull; i++) { void *vardata = consumeCompressedData(si, sizes[i]); + CheckCompressedData(!VARATT_IS_EXTERNAL(vardata)); + CheckCompressedData(!VARATT_IS_COMPRESSED(vardata)); CheckCompressedData(VARSIZE_ANY(vardata) == sizes[i]); const int textlen = VARSIZE_ANY_EXHDR(vardata); memcpy(&arrow_bodies[offset], VARDATA_ANY(vardata), textlen); From 98cae15cad9a6ed488ffe56756e6c4c74f3e3b4b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 1 Nov 2023 11:53:20 +0100 Subject: [PATCH 018/249] fixes after merge --- tsl/src/nodes/decompress_chunk/exec.c | 2 +- tsl/src/partialize_agg.c | 2 +- .../crash-707526606a02c72364e1c8ea82357eead6c74f60 | Bin 0 -> 81 bytes 3 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-707526606a02c72364e1c8ea82357eead6c74f60 diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index 143ede755c1..02b5f26f321 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -623,7 +623,7 @@ perform_vectorized_sum_int4(DecompressChunkState *chunk_state, Aggref *aggref) ArrowArray *arrow = NULL; DecompressAllFunction decompress_all = - tsl_get_decompress_all_function(header->compression_algorithm); + tsl_get_decompress_all_function(header->compression_algorithm, column_description->typid); Assert(decompress_all != NULL); MemoryContext context_before_decompression = diff --git a/tsl/src/partialize_agg.c b/tsl/src/partialize_agg.c index d13a75cd16b..46b429c88a8 100644 --- a/tsl/src/partialize_agg.c +++ b/tsl/src/partialize_agg.c @@ -123,7 +123,7 @@ is_vectorizable_agg_path(PlannerInfo *root, AggPath *agg_path, Path *path) if (ci->segmentby_column_index > 0) continue; - bool bulk_decompression_possible = (tsl_get_decompress_all_function(ci->algo_id) != NULL); + bool bulk_decompression_possible = (tsl_get_decompress_all_function(ci->algo_id, var->vartype) != NULL); if (!bulk_decompression_possible) return false; diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-707526606a02c72364e1c8ea82357eead6c74f60 b/tsl/test/fuzzing/compression/dictionary-text/crash-707526606a02c72364e1c8ea82357eead6c74f60 new file mode 100644 index 0000000000000000000000000000000000000000..2061e2e7aad1a533bf4e22a2ec38d64897318540 GIT binary patch literal 81 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh17+4q>1R4G_FgS|oFnqIX0LcI`6GMS| W{N#Dvzd>ps%0Z+Z5Hm1vFaQAMFAv-R literal 0 HcmV?d00001 From 7f38d1dabff3da542c2c92ee628865de67850a34 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 1 Nov 2023 15:01:34 +0100 Subject: [PATCH 019/249] testing improvements --- tsl/src/compression/compression.c | 15 +- tsl/src/compression/datum_serialize.c | 4 + .../decompress_arithmetic_test_impl.c | 148 ++++++++------- .../compression/decompress_text_test_impl.c | 169 ++++++++++-------- tsl/src/compression/dictionary.c | 2 +- tsl/src/nodes/decompress_chunk/exec.c | 3 +- tsl/src/partialize_agg.c | 3 +- ...h-707526606a02c72364e1c8ea82357eead6c74f60 | Bin 81 -> 0 bytes tsl/test/sql/compression_algos.sql | 61 +++++-- 9 files changed, 239 insertions(+), 166 deletions(-) delete mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-707526606a02c72364e1c8ea82357eead6c74f60 diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index 94528673660..cd3c8453fae 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -2198,6 +2198,8 @@ get_compression_algorithm(char *name) return _INVALID_COMPRESSION_ALGORITHM; } +typedef enum { DTT_Fuzzing, DTT_RowByRow, DTT_Bulk } DecompressionTestType; + #define ALGO gorilla #define CTYPE float8 #define PGTYPE FLOAT8OID @@ -2221,7 +2223,7 @@ get_compression_algorithm(char *name) #include "decompress_text_test_impl.c" static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Size, - bool extra_checks) + DecompressionTestType test_type) { if (algo == COMPRESSION_ALGORITHM_GORILLA && type == FLOAT8OID) { @@ -2254,7 +2256,7 @@ static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Si * if we error out later. */ static void -read_compressed_data_file_impl(int algo, Oid type, const char *path, volatile int *bytes, int *rows) +read_compressed_data_file_impl(int algo, Oid type, const char *path, bool bulk, volatile int *bytes, int *rows) { FILE *f = fopen(path, "r"); @@ -2292,7 +2294,7 @@ read_compressed_data_file_impl(int algo, Oid type, const char *path, volatile in string[fsize] = 0; - *rows = get_decompress_fn(algo, type)((const uint8 *) string, fsize, /* extra_checks = */ true); + *rows = get_decompress_fn(algo, type)((const uint8 *) string, fsize, /* test_type = */ bulk ? DTT_Bulk : DTT_RowByRow); } TS_FUNCTION_INFO_V1(ts_read_compressed_data_file); @@ -2306,6 +2308,7 @@ ts_read_compressed_data_file(PG_FUNCTION_ARGS) read_compressed_data_file_impl(get_compression_algorithm(PG_GETARG_CSTRING(0)), PG_GETARG_OID(1), PG_GETARG_CSTRING(2), + PG_GETARG_BOOL(3), &bytes, &rows); PG_RETURN_INT32(rows); @@ -2407,7 +2410,7 @@ ts_read_compressed_data_directory(PG_FUNCTION_ARGS) volatile int bytes = 0; PG_TRY(); { - read_compressed_data_file_impl(algo, PG_GETARG_OID(1), path, &bytes, &rows); + read_compressed_data_file_impl(algo, PG_GETARG_OID(1), path, PG_GETARG_BOOL(3), &bytes, &rows); values[out_rows] = Int32GetDatum(rows); nulls[out_rows] = false; } @@ -2453,7 +2456,7 @@ ts_read_compressed_data_directory(PG_FUNCTION_ARGS) * has to catch the postgres exceptions normally produced for corrupt data. */ static int -llvm_fuzz_target_generic(int (*target)(const uint8_t *Data, size_t Size, bool extra_checks), +llvm_fuzz_target_generic(int (*target)(const uint8_t *Data, size_t Size, DecompressionTestType test_type), const uint8_t *Data, size_t Size) { MemoryContextReset(CurrentMemoryContext); @@ -2461,7 +2464,7 @@ llvm_fuzz_target_generic(int (*target)(const uint8_t *Data, size_t Size, bool ex PG_TRY(); { CHECK_FOR_INTERRUPTS(); - target(Data, Size, /* extra_checks = */ false); + target(Data, Size, /* test_type = */ DTT_Fuzzing); } PG_CATCH(); { diff --git a/tsl/src/compression/datum_serialize.c b/tsl/src/compression/datum_serialize.c index c2ac0c06bee..4849a4b3bc8 100644 --- a/tsl/src/compression/datum_serialize.c +++ b/tsl/src/compression/datum_serialize.c @@ -20,6 +20,8 @@ #include "datum_serialize.h" #include "compat/compat.h" +#include "compression.h" + typedef struct DatumSerializer { Oid type_oid; @@ -305,6 +307,8 @@ bytes_to_datum_and_advance(DatumDeserializer *deserializer, const char **ptr) *ptr = (Pointer) att_align_pointer(*ptr, deserializer->type_align, deserializer->type_len, *ptr); +// CheckCompressedData(!VARATT_IS_EXTERNAL(*ptr)); +// CheckCompressedData(!VARATT_IS_COMPRESSED(*ptr)); res = fetch_att(*ptr, deserializer->type_by_val, deserializer->type_len); *ptr = att_addlength_pointer(*ptr, deserializer->type_len, *ptr); return res; diff --git a/tsl/src/compression/decompress_arithmetic_test_impl.c b/tsl/src/compression/decompress_arithmetic_test_impl.c index 90d7cc4bf2c..9f06d9ef934 100644 --- a/tsl/src/compression/decompress_arithmetic_test_impl.c +++ b/tsl/src/compression/decompress_arithmetic_test_impl.c @@ -4,12 +4,68 @@ * LICENSE-TIMESCALE for a copy of the license. */ -#define FUNCTION_NAME_HELPER(X, Y) decompress_##X##_##Y -#define FUNCTION_NAME(X, Y) FUNCTION_NAME_HELPER(X, Y) +#define FUNCTION_NAME_HELPER3(X, Y, Z) X##_##Y##_##Z +#define FUNCTION_NAME3(X, Y, Z) FUNCTION_NAME_HELPER3(X, Y, Z) +#define FUNCTION_NAME_HELPER2(X, Y) X##_##Y +#define FUNCTION_NAME2(X, Y) FUNCTION_NAME_HELPER2(X, Y) #define TOSTRING_HELPER(x) #x #define TOSTRING(x) TOSTRING_HELPER(x) + +static void +FUNCTION_NAME2(check_arrow, CTYPE)(ArrowArray *arrow, int error_type, DecompressResult *results, int n) +{ + if (n != arrow->length) + { + ereport(error_type, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("Expected %d elements, got %d.", n, (int) arrow->length))); + } + + for (int i = 0; i < n; i++) + { + const bool arrow_isnull = !arrow_row_is_valid(arrow->buffers[0], i); + if (arrow_isnull != results[i].is_null) + { + ereport(error_type, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("Expected null %d, got %d at row %d.", + results[i].is_null, + arrow_isnull, + i))); + } + + if (!results[i].is_null) + { + const CTYPE arrow_value = ((CTYPE *) arrow->buffers[1])[i]; + const CTYPE rowbyrow_value = DATUM_TO_CTYPE(results[i].val); + + /* + * Floats can also be NaN/infinite and the comparison doesn't + * work in that case. + */ + if (isfinite((double) arrow_value) != isfinite((double) rowbyrow_value)) + { + ereport(error_type, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("At row %d\n", i))); + } + + if (isfinite((double) arrow_value) && arrow_value != rowbyrow_value) + { + ereport(error_type, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("At row %d\n", i))); + } + } + } +} + /* * Try to decompress the given compressed data. Used for fuzzing and for checking * the examples found by fuzzing. For fuzzing we do less checks to keep it @@ -17,7 +73,7 @@ * for arithmetic types. */ static int -FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size, bool extra_checks) +FUNCTION_NAME3(decompress, ALGO, CTYPE)(const uint8 *Data, size_t Size, DecompressionTestType test_type) { StringInfoData si = { .data = (char *) Data, .len = Size }; @@ -37,26 +93,25 @@ FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size, bool extra_checks) Datum compressed_data = definitions[algo].compressed_data_recv(&si); - if (!extra_checks) + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(algo, PGTYPE); + + if (test_type == DTT_Fuzzing) { /* * For routine fuzzing, we only run bulk decompression to make it faster * and the coverage space smaller. */ - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(algo, PGTYPE); decompress_all(compressed_data, PGTYPE, CurrentMemoryContext); return 0; } - /* - * Test bulk decompression. This might hide some errors in the row-by-row - * decompression, but testing both is significantly more complicated, and - * the row-by-row is old and stable. - */ ArrowArray *arrow = NULL; - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(algo, PGTYPE); - if (decompress_all) + if (test_type == DTT_Bulk) { + /* + * Test bulk decompression. Have to do this before row-by-row decompression + * so that the latter doesn't hide the errors. + */ arrow = decompress_all(compressed_data, PGTYPE, CurrentMemoryContext); } @@ -77,61 +132,15 @@ FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size, bool extra_checks) } /* Check that both ways of decompression match. */ - if (arrow) + if (test_type == DTT_Bulk) { - if (n != arrow->length) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("the bulk decompression result does not match"), - errdetail("Expected %d elements, got %d.", n, (int) arrow->length))); - } - - for (int i = 0; i < n; i++) - { - const bool arrow_isnull = !arrow_row_is_valid(arrow->buffers[0], i); - if (arrow_isnull != results[i].is_null) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("the bulk decompression result does not match"), - errdetail("Expected null %d, got %d at row %d.", - results[i].is_null, - arrow_isnull, - i))); - } - - if (!results[i].is_null) - { - const CTYPE arrow_value = ((CTYPE *) arrow->buffers[1])[i]; - const CTYPE rowbyrow_value = DATUM_TO_CTYPE(results[i].val); - - /* - * Floats can also be NaN/infinite and the comparison doesn't - * work in that case. - */ - if (isfinite((double) arrow_value) != isfinite((double) rowbyrow_value)) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("the bulk decompression result does not match"), - errdetail("At row %d\n", i))); - } - - if (isfinite((double) arrow_value) && arrow_value != rowbyrow_value) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("the bulk decompression result does not match"), - errdetail("At row %d\n", i))); - } - } - } + FUNCTION_NAME2(check_arrow, CTYPE)(arrow, ERROR, results, n); + return n; } /* - * Check that the result is still the same after we compress and decompress - * back. + * For row-by-row decompression, check that the result is still the same + * after we compress and decompress back. * * 1) Compress. */ @@ -195,11 +204,20 @@ FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size, bool extra_checks) } } + /* + * 3) The bulk decompression must absolutely work on the correct compressed + * data we've just generated. + */ + arrow = decompress_all(compressed_data, PGTYPE, CurrentMemoryContext); + FUNCTION_NAME2(check_arrow, CTYPE)(arrow, PANIC, results, n); + return n; } #undef TOSTRING #undef TOSTRING_HELPER -#undef FUNCTION_NAME -#undef FUNCTION_NAME_HELPER +#undef FUNCTION_NAME3 +#undef FUNCTION_NAME_HELPER3 +#undef FUNCTION_NAME2 +#undef FUNCTION_NAME_HELPER2 diff --git a/tsl/src/compression/decompress_text_test_impl.c b/tsl/src/compression/decompress_text_test_impl.c index 1f44d91d3f0..d28d36a8a55 100644 --- a/tsl/src/compression/decompress_text_test_impl.c +++ b/tsl/src/compression/decompress_text_test_impl.c @@ -25,6 +25,65 @@ arrow_get_str(ArrowArray *arrow, int arrow_row, const char **str, size_t *len) arrow_get_str(arrow->dictionary, dict_row, str, len); } +static void +decompress_generic_text_check_arrow(ArrowArray *arrow, int errorlevel, DecompressResult *results, int n) +{ + /* Check that both ways of decompression match. */ + if (n != arrow->length) + { + ereport(errorlevel, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("Expected %d elements, got %d.", n, (int) arrow->length))); + } + + for (int i = 0; i < n; i++) + { + const bool arrow_isnull = !arrow_row_is_valid(arrow->buffers[0], i); + if (arrow_isnull != results[i].is_null) + { + ereport(errorlevel, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("Expected null %d, got %d at row %d.", + results[i].is_null, + arrow_isnull, + i))); + } + + if (!results[i].is_null) + { + const char *arrow_cstring; + size_t arrow_len; + arrow_get_str(arrow, i, &arrow_cstring, &arrow_len); + + const Datum rowbyrow_varlena = results[i].val; + const size_t rowbyrow_len = VARSIZE_ANY_EXHDR(rowbyrow_varlena); + const char *rowbyrow_cstring = VARDATA_ANY(rowbyrow_varlena); + + // fprintf(stderr, "arrow: '%.*s'(%ld), rbr: '%.*s'(%ld)\n", + // (int) arrow_len, arrow_cstring, arrow_len, + // (int) rowbyrow_len, rowbyrow_cstring, rowbyrow_len); + + if (rowbyrow_len != arrow_len) + { + ereport(errorlevel, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("At row %d\n", i))); + } + + if (strncmp(arrow_cstring, rowbyrow_cstring, rowbyrow_len)) + { + ereport(errorlevel, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("At row %d\n", i))); + } + } + } +} + /* * Try to decompress the given compressed data. Used for fuzzing and for checking * the examples found by fuzzing. For fuzzing we do less checks to keep it @@ -32,7 +91,7 @@ arrow_get_str(ArrowArray *arrow, int arrow_row, const char **str, size_t *len) * for arithmetic types. */ static int -decompress_generic_text(const uint8 *Data, size_t Size, bool extra_checks, int requested_algo) +decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType test_type, int requested_algo) { StringInfoData si = { .data = (char *) Data, .len = Size }; @@ -52,26 +111,25 @@ decompress_generic_text(const uint8 *Data, size_t Size, bool extra_checks, int r Datum compressed_data = definitions[data_algo].compressed_data_recv(&si); - if (!extra_checks) + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo, TEXTOID); + + if (test_type == DTT_Fuzzing) { /* * For routine fuzzing, we only run bulk decompression to make it faster * and the coverage space smaller. */ - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo, TEXTOID); decompress_all(compressed_data, TEXTOID, CurrentMemoryContext); return 0; } - /* - * Test bulk decompression. This might hide some errors in the row-by-row - * decompression, but testing both is significantly more complicated, and - * the row-by-row is old and stable. - */ - ArrowArray *arrow = NULL; - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo, TEXTOID); - if (decompress_all) + ArrowArray * arrow = NULL; + if (test_type == DTT_Bulk) { + /* + * Check that the arrow decompression works. Have to do this before the + * row-by-row decompression so that it doesn't hide the possible errors. + */ arrow = decompress_all(compressed_data, TEXTOID, CurrentMemoryContext); } @@ -92,68 +150,26 @@ decompress_generic_text(const uint8 *Data, size_t Size, bool extra_checks, int r results[n++] = r; } - /* Check that both ways of decompression match. */ - if (arrow) + if (test_type == DTT_Bulk) { - if (n != arrow->length) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("the bulk decompression result does not match"), - errdetail("Expected %d elements, got %d.", n, (int) arrow->length))); - } - - for (int i = 0; i < n; i++) - { - const bool arrow_isnull = !arrow_row_is_valid(arrow->buffers[0], i); - if (arrow_isnull != results[i].is_null) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("the bulk decompression result does not match"), - errdetail("Expected null %d, got %d at row %d.", - results[i].is_null, - arrow_isnull, - i))); - } - - if (!results[i].is_null) - { - const char *arrow_cstring; - size_t arrow_len; - arrow_get_str(arrow, i, &arrow_cstring, &arrow_len); - - const Datum rowbyrow_varlena = results[i].val; - const size_t rowbyrow_len = VARSIZE_ANY_EXHDR(rowbyrow_varlena); - const char *rowbyrow_cstring = VARDATA_ANY(rowbyrow_varlena); - - // fprintf(stderr, "arrow: '%.*s'(%ld), rbr: '%.*s'(%ld)\n", - // (int) arrow_len, arrow_cstring, arrow_len, - // (int) rowbyrow_len, rowbyrow_cstring, rowbyrow_len); - - if (rowbyrow_len != arrow_len) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("the bulk decompression result does not match"), - errdetail("At row %d\n", i))); - } - - if (strncmp(arrow_cstring, rowbyrow_cstring, rowbyrow_len)) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("the bulk decompression result does not match"), - errdetail("At row %d\n", i))); - } - } - } + /* + * Check that the arrow decompression result matches. + */ + decompress_generic_text_check_arrow(arrow, ERROR, results, n); + return n; } /* - * Check that the result is still the same after we compress and decompress - * back. - * + * For row-by-row decompression, check that the result is still the same + * after we compress and decompress back. + * Don't perform this check for other types of tests. + */ + if (test_type != DTT_RowByRow) + { + return n; + } + + /* * 1) Compress. */ Compressor *compressor = definitions[data_algo].compressor_for_type(TEXTOID); @@ -219,17 +235,24 @@ decompress_generic_text(const uint8 *Data, size_t Size, bool extra_checks, int r } } + /* + * 3) The bulk decompression must absolutely work on the correct compressed + * data we've just generated. + */ + arrow = decompress_all(compressed_data, TEXTOID, CurrentMemoryContext); + decompress_generic_text_check_arrow(arrow, PANIC, results, n); + return n; } static int -decompress_array_text(const uint8 *Data, size_t Size, bool extra_checks) +decompress_array_text(const uint8 *Data, size_t Size, DecompressionTestType test_type) { - return decompress_generic_text(Data, Size, extra_checks, COMPRESSION_ALGORITHM_ARRAY); + return decompress_generic_text(Data, Size, test_type, COMPRESSION_ALGORITHM_ARRAY); } static int -decompress_dictionary_text(const uint8 *Data, size_t Size, bool extra_checks) +decompress_dictionary_text(const uint8 *Data, size_t Size, DecompressionTestType test_type) { - return decompress_generic_text(Data, Size, extra_checks, COMPRESSION_ALGORITHM_DICTIONARY); + return decompress_generic_text(Data, Size, test_type, COMPRESSION_ALGORITHM_DICTIONARY); } diff --git a/tsl/src/compression/dictionary.c b/tsl/src/compression/dictionary.c index e0218d4f44c..145762404fc 100644 --- a/tsl/src/compression/dictionary.c +++ b/tsl/src/compression/dictionary.c @@ -562,7 +562,7 @@ dictionary_decompression_iterator_try_next_forward(DecompressionIterator *iter_b .is_done = true, }; - Assert(result.val < iter->compressed->num_distinct); + CheckCompressedData(result.val < iter->compressed->num_distinct); return (DecompressResult){ .val = iter->values[result.val], .is_null = false, diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index 02b5f26f321..52de4837fb1 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -623,7 +623,8 @@ perform_vectorized_sum_int4(DecompressChunkState *chunk_state, Aggref *aggref) ArrowArray *arrow = NULL; DecompressAllFunction decompress_all = - tsl_get_decompress_all_function(header->compression_algorithm, column_description->typid); + tsl_get_decompress_all_function(header->compression_algorithm, + column_description->typid); Assert(decompress_all != NULL); MemoryContext context_before_decompression = diff --git a/tsl/src/partialize_agg.c b/tsl/src/partialize_agg.c index 46b429c88a8..7a8885d3b9e 100644 --- a/tsl/src/partialize_agg.c +++ b/tsl/src/partialize_agg.c @@ -123,7 +123,8 @@ is_vectorizable_agg_path(PlannerInfo *root, AggPath *agg_path, Path *path) if (ci->segmentby_column_index > 0) continue; - bool bulk_decompression_possible = (tsl_get_decompress_all_function(ci->algo_id, var->vartype) != NULL); + bool bulk_decompression_possible = + (tsl_get_decompress_all_function(ci->algo_id, var->vartype) != NULL); if (!bulk_decompression_possible) return false; diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-707526606a02c72364e1c8ea82357eead6c74f60 b/tsl/test/fuzzing/compression/dictionary-text/crash-707526606a02c72364e1c8ea82357eead6c74f60 deleted file mode 100644 index 2061e2e7aad1a533bf4e22a2ec38d64897318540..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 81 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh17+4q>1R4G_FgS|oFnqIX0LcI`6GMS| W{N#Dvzd>ps%0Z+Z5Hm1vFaQAMFAv-R diff --git a/tsl/test/sql/compression_algos.sql b/tsl/test/sql/compression_algos.sql index 3a4666944e1..412e3d7a7b3 100644 --- a/tsl/test/sql/compression_algos.sql +++ b/tsl/test/sql/compression_algos.sql @@ -376,29 +376,52 @@ DROP TABLE base_texts; \c :TEST_DBNAME :ROLE_SUPERUSER -create or replace function ts_read_compressed_data_directory(cstring, regtype, cstring) -returns table(path text, bytes int, rows int, sqlstate text, location text) -as :TSL_MODULE_PATHNAME, 'ts_read_compressed_data_directory' language c; - -select count(*), coalesce((rows >= 0)::text, sqlstate) result -from ts_read_compressed_data_directory('gorilla', 'float8', (:'TEST_INPUT_DIR' || '/fuzzing/compression/gorilla-float8')::cstring) -group by 2 order by 1 desc; - -select count(*), coalesce((rows >= 0)::text, sqlstate) result -from ts_read_compressed_data_directory('deltadelta', 'int8', (:'TEST_INPUT_DIR' || '/fuzzing/compression/deltadelta-int8')::cstring) -group by 2 order by 1 desc; - -create or replace function ts_read_compressed_data_file(cstring, regtype, cstring) returns int +create or replace function ts_read_compressed_data_file(cstring, regtype, cstring, bool = true) returns int as :TSL_MODULE_PATHNAME, 'ts_read_compressed_data_file' language c; \set ON_ERROR_STOP 0 select ts_read_compressed_data_file('gorilla', 'float8', '--nonexistent'); \set ON_ERROR_STOP 1 -select count(*), coalesce((rows >= 0)::text, sqlstate) result -from ts_read_compressed_data_directory('array', 'text', (:'TEST_INPUT_DIR' || '/fuzzing/compression/array-text')::cstring) -group by 2 order by 1 desc; +create or replace function ts_read_compressed_data_directory(cstring, regtype, cstring, bool) +returns table(path text, bytes int, rows int, sqlstate text, location text) +as :TSL_MODULE_PATHNAME, 'ts_read_compressed_data_directory' language c; + +\set fn 'ts_read_compressed_data_directory(:''algo'', :''type'', format(''%s/fuzzing/compression/%s-%s'', :''TEST_INPUT_DIR'', :''algo'', :''type'')::cstring, ' + +\set algo gorilla +\set type float8 +select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc +; + +\set algo deltadelta +\set type int8 +select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc +; + +\set algo array +\set type text +select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc +; + +\set algo dictionary +\set type text +select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc +; -select count(*), coalesce((rows >= 0)::text, sqlstate) result -from ts_read_compressed_data_directory('dictionary', 'text', (:'TEST_INPUT_DIR' || '/fuzzing/compression/dictionary-text')::cstring) -group by 2 order by 1 desc; From 4f5c1a4d1b58508fcfad163d48af3977ac6d3c5d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 2 Nov 2023 13:08:16 +0100 Subject: [PATCH 020/249] more checks and some cleanup --- tsl/src/compression/compression.c | 24 +++- tsl/src/compression/datum_serialize.c | 11 +- .../decompress_arithmetic_test_impl.c | 13 ++- .../compression/decompress_text_test_impl.c | 21 +++- tsl/test/expected/compression_algos.out | 103 +++++++++++------- ...h-707526606a02c72364e1c8ea82357eead6c74f60 | Bin 0 -> 81 bytes 6 files changed, 116 insertions(+), 56 deletions(-) create mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-707526606a02c72364e1c8ea82357eead6c74f60 diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index cd3c8453fae..39c76f02ec4 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -2198,7 +2198,12 @@ get_compression_algorithm(char *name) return _INVALID_COMPRESSION_ALGORITHM; } -typedef enum { DTT_Fuzzing, DTT_RowByRow, DTT_Bulk } DecompressionTestType; +typedef enum +{ + DTT_Fuzzing, + DTT_RowByRow, + DTT_Bulk +} DecompressionTestType; #define ALGO gorilla #define CTYPE float8 @@ -2256,7 +2261,8 @@ static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Si * if we error out later. */ static void -read_compressed_data_file_impl(int algo, Oid type, const char *path, bool bulk, volatile int *bytes, int *rows) +read_compressed_data_file_impl(int algo, Oid type, const char *path, bool bulk, volatile int *bytes, + int *rows) { FILE *f = fopen(path, "r"); @@ -2294,7 +2300,9 @@ read_compressed_data_file_impl(int algo, Oid type, const char *path, bool bulk, string[fsize] = 0; - *rows = get_decompress_fn(algo, type)((const uint8 *) string, fsize, /* test_type = */ bulk ? DTT_Bulk : DTT_RowByRow); + *rows = get_decompress_fn(algo, type)((const uint8 *) string, + fsize, + /* test_type = */ bulk ? DTT_Bulk : DTT_RowByRow); } TS_FUNCTION_INFO_V1(ts_read_compressed_data_file); @@ -2410,7 +2418,12 @@ ts_read_compressed_data_directory(PG_FUNCTION_ARGS) volatile int bytes = 0; PG_TRY(); { - read_compressed_data_file_impl(algo, PG_GETARG_OID(1), path, PG_GETARG_BOOL(3), &bytes, &rows); + read_compressed_data_file_impl(algo, + PG_GETARG_OID(1), + path, + PG_GETARG_BOOL(3), + &bytes, + &rows); values[out_rows] = Int32GetDatum(rows); nulls[out_rows] = false; } @@ -2456,7 +2469,8 @@ ts_read_compressed_data_directory(PG_FUNCTION_ARGS) * has to catch the postgres exceptions normally produced for corrupt data. */ static int -llvm_fuzz_target_generic(int (*target)(const uint8_t *Data, size_t Size, DecompressionTestType test_type), +llvm_fuzz_target_generic(int (*target)(const uint8_t *Data, size_t Size, + DecompressionTestType test_type), const uint8_t *Data, size_t Size) { MemoryContextReset(CurrentMemoryContext); diff --git a/tsl/src/compression/datum_serialize.c b/tsl/src/compression/datum_serialize.c index 4849a4b3bc8..bf3a799cf25 100644 --- a/tsl/src/compression/datum_serialize.c +++ b/tsl/src/compression/datum_serialize.c @@ -307,8 +307,15 @@ bytes_to_datum_and_advance(DatumDeserializer *deserializer, const char **ptr) *ptr = (Pointer) att_align_pointer(*ptr, deserializer->type_align, deserializer->type_len, *ptr); -// CheckCompressedData(!VARATT_IS_EXTERNAL(*ptr)); -// CheckCompressedData(!VARATT_IS_COMPRESSED(*ptr)); + if (deserializer->type_len == -1) + { + /* + * Check for potentially corrupt varlena headers since we're reading them + * directly from compressed data. We can only have a plain datum + * with 1-byte or 4-byte header here, no TOAST or compressed data. + */ + CheckCompressedData(VARATT_IS_4B_U(*ptr) || (VARATT_IS_1B(*ptr) && !VARATT_IS_1B_E(*ptr))); + } res = fetch_att(*ptr, deserializer->type_by_val, deserializer->type_len); *ptr = att_addlength_pointer(*ptr, deserializer->type_len, *ptr); return res; diff --git a/tsl/src/compression/decompress_arithmetic_test_impl.c b/tsl/src/compression/decompress_arithmetic_test_impl.c index 9f06d9ef934..b3d2b261149 100644 --- a/tsl/src/compression/decompress_arithmetic_test_impl.c +++ b/tsl/src/compression/decompress_arithmetic_test_impl.c @@ -12,9 +12,9 @@ #define TOSTRING_HELPER(x) #x #define TOSTRING(x) TOSTRING_HELPER(x) - static void -FUNCTION_NAME2(check_arrow, CTYPE)(ArrowArray *arrow, int error_type, DecompressResult *results, int n) +FUNCTION_NAME2(check_arrow, CTYPE)(ArrowArray *arrow, int error_type, DecompressResult *results, + int n) { if (n != arrow->length) { @@ -44,9 +44,9 @@ FUNCTION_NAME2(check_arrow, CTYPE)(ArrowArray *arrow, int error_type, Decompress const CTYPE rowbyrow_value = DATUM_TO_CTYPE(results[i].val); /* - * Floats can also be NaN/infinite and the comparison doesn't - * work in that case. - */ + * Floats can also be NaN/infinite and the comparison doesn't + * work in that case. + */ if (isfinite((double) arrow_value) != isfinite((double) rowbyrow_value)) { ereport(error_type, @@ -73,7 +73,8 @@ FUNCTION_NAME2(check_arrow, CTYPE)(ArrowArray *arrow, int error_type, Decompress * for arithmetic types. */ static int -FUNCTION_NAME3(decompress, ALGO, CTYPE)(const uint8 *Data, size_t Size, DecompressionTestType test_type) +FUNCTION_NAME3(decompress, ALGO, CTYPE)(const uint8 *Data, size_t Size, + DecompressionTestType test_type) { StringInfoData si = { .data = (char *) Data, .len = Size }; diff --git a/tsl/src/compression/decompress_text_test_impl.c b/tsl/src/compression/decompress_text_test_impl.c index d28d36a8a55..0e9a18992e7 100644 --- a/tsl/src/compression/decompress_text_test_impl.c +++ b/tsl/src/compression/decompress_text_test_impl.c @@ -26,7 +26,8 @@ arrow_get_str(ArrowArray *arrow, int arrow_row, const char **str, size_t *len) } static void -decompress_generic_text_check_arrow(ArrowArray *arrow, int errorlevel, DecompressResult *results, int n) +decompress_generic_text_check_arrow(ArrowArray *arrow, int errorlevel, DecompressResult *results, + int n) { /* Check that both ways of decompression match. */ if (n != arrow->length) @@ -91,7 +92,8 @@ decompress_generic_text_check_arrow(ArrowArray *arrow, int errorlevel, Decompres * for arithmetic types. */ static int -decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType test_type, int requested_algo) +decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType test_type, + int requested_algo) { StringInfoData si = { .data = (char *) Data, .len = Size }; @@ -119,11 +121,22 @@ decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType te * For routine fuzzing, we only run bulk decompression to make it faster * and the coverage space smaller. */ + + /* + DecompressionIterator *iter = + definitions[data_algo].iterator_init_forward(compressed_data, TEXTOID); + for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) + { + } + return 0; + /*/ + decompress_all(compressed_data, TEXTOID, CurrentMemoryContext); return 0; + //*/ } - ArrowArray * arrow = NULL; + ArrowArray *arrow = NULL; if (test_type == DTT_Bulk) { /* @@ -169,7 +182,7 @@ decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType te return n; } - /* + /* * 1) Compress. */ Compressor *compressor = definitions[data_algo].compressor_for_type(TEXTOID); diff --git a/tsl/test/expected/compression_algos.out b/tsl/test/expected/compression_algos.out index b77d575f156..1ff24380529 100644 --- a/tsl/test/expected/compression_algos.out +++ b/tsl/test/expected/compression_algos.out @@ -1542,49 +1542,74 @@ DROP TABLE base_texts; -- Interesting corrupt data found by fuzzing -- ----------------------------------------------- \c :TEST_DBNAME :ROLE_SUPERUSER -create or replace function ts_read_compressed_data_directory(cstring, regtype, cstring) -returns table(path text, bytes int, rows int, sqlstate text, location text) -as :TSL_MODULE_PATHNAME, 'ts_read_compressed_data_directory' language c; -select count(*), coalesce((rows >= 0)::text, sqlstate) result -from ts_read_compressed_data_directory('gorilla', 'float8', (:'TEST_INPUT_DIR' || '/fuzzing/compression/gorilla-float8')::cstring) -group by 2 order by 1 desc; - count | result --------+-------- - 224 | XX001 - 55 | true - 23 | 08P01 -(3 rows) - -select count(*), coalesce((rows >= 0)::text, sqlstate) result -from ts_read_compressed_data_directory('deltadelta', 'int8', (:'TEST_INPUT_DIR' || '/fuzzing/compression/deltadelta-int8')::cstring) -group by 2 order by 1 desc; - count | result --------+-------- - 168 | XX001 - 69 | true - 13 | 08P01 - 1 | false -(4 rows) - -create or replace function ts_read_compressed_data_file(cstring, regtype, cstring) returns int +create or replace function ts_read_compressed_data_file(cstring, regtype, cstring, bool = true) returns int as :TSL_MODULE_PATHNAME, 'ts_read_compressed_data_file' language c; \set ON_ERROR_STOP 0 select ts_read_compressed_data_file('gorilla', 'float8', '--nonexistent'); ERROR: could not open the file '--nonexistent' \set ON_ERROR_STOP 1 -select count(*), coalesce((rows >= 0)::text, sqlstate) result -from ts_read_compressed_data_directory('array', 'text', (:'TEST_INPUT_DIR' || '/fuzzing/compression/array-text')::cstring) -group by 2 order by 1 desc; - count | result --------+-------- - 1 | true -(1 row) +create or replace function ts_read_compressed_data_directory(cstring, regtype, cstring, bool) +returns table(path text, bytes int, rows int, sqlstate text, location text) +as :TSL_MODULE_PATHNAME, 'ts_read_compressed_data_directory' language c; +\set fn 'ts_read_compressed_data_directory(:''algo'', :''type'', format(''%s/fuzzing/compression/%s-%s'', :''TEST_INPUT_DIR'', :''algo'', :''type'')::cstring, ' +\set algo gorilla +\set type float8 +select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc +; + count | bulk_result | rowbyrow_result +-------+-------------+----------------- + 142 | XX001 | true + 82 | XX001 | XX001 + 55 | true | true + 23 | 08P01 | 08P01 +(4 rows) -select count(*), coalesce((rows >= 0)::text, sqlstate) result -from ts_read_compressed_data_directory('dictionary', 'text', (:'TEST_INPUT_DIR' || '/fuzzing/compression/dictionary-text')::cstring) -group by 2 order by 1 desc; - count | result --------+-------- - 1 | true -(1 row) +\set algo deltadelta +\set type int8 +select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc +; + count | bulk_result | rowbyrow_result +-------+-------------+----------------- + 106 | XX001 | XX001 + 69 | true | true + 62 | XX001 | true + 13 | 08P01 | 08P01 + 1 | false | false +(5 rows) + +\set algo array +\set type text +select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc +; + count | bulk_result | rowbyrow_result +-------+-------------+----------------- + 2 | XX001 | XX001 + 1 | true | true +(2 rows) + +\set algo dictionary +\set type text +select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc +; + count | bulk_result | rowbyrow_result +-------+-------------+----------------- + 4 | XX001 | XX001 + 1 | true | true +(2 rows) diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-707526606a02c72364e1c8ea82357eead6c74f60 b/tsl/test/fuzzing/compression/dictionary-text/crash-707526606a02c72364e1c8ea82357eead6c74f60 new file mode 100644 index 0000000000000000000000000000000000000000..2061e2e7aad1a533bf4e22a2ec38d64897318540 GIT binary patch literal 81 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh17+4q>1R4G_FgS|oFnqIX0LcI`6GMS| W{N#Dvzd>ps%0Z+Z5Hm1vFaQAMFAv-R literal 0 HcmV?d00001 From c7efe40e063b4629eee60c86a7303899377b9c65 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 2 Nov 2023 17:20:48 +0100 Subject: [PATCH 021/249] typo --- tsl/src/nodes/decompress_chunk/compressed_batch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 29d29fbbad4..ea615c3422b 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -119,7 +119,8 @@ decompress_column(DecompressChunkState *chunk_state, DecompressBatchState *batch { if (chunk_state->bulk_decompression_context == NULL) { - init_bulk_decompression_mctx(chunk_state, CurrentMemoryContext); + init_bulk_decompression_mctx(chunk_state, + MemoryContextGetParent(batch_state->per_batch_context)); } DecompressAllFunction decompress_all = From 6520bc561baef203a0075c4671de8a9ccedc3ed5 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 1 Nov 2023 14:52:59 +0100 Subject: [PATCH 022/249] Evaluate stable expressions in vectorized filters at run time This allows vectorizing common filters such as ts > now() - interval '1 day'. --- .../nodes/decompress_chunk/compressed_batch.c | 4 +- tsl/src/nodes/decompress_chunk/exec.c | 25 +- tsl/src/nodes/decompress_chunk/exec.h | 7 +- tsl/src/nodes/decompress_chunk/planner.c | 97 +++++- tsl/test/expected/agg_partials_pushdown.out | 16 +- tsl/test/expected/decompress_vector_qual.out | 34 ++ .../expected/transparent_decompression-13.out | 26 +- .../expected/transparent_decompression-14.out | 26 +- .../expected/transparent_decompression-15.out | 26 +- ...sparent_decompression_ordered_index-13.out | 38 +-- ...sparent_decompression_ordered_index-14.out | 38 +-- ...sparent_decompression_ordered_index-15.out | 38 +-- .../constify_timestamptz_op_interval.out | 4 +- .../constraint_exclusion_prepared.out | 290 +++++++++--------- .../shared/expected/ordered_append-13.out | 142 ++++----- .../shared/expected/ordered_append-14.out | 142 ++++----- .../shared/expected/ordered_append-15.out | 142 ++++----- .../expected/ordered_append_join-13.out | 69 +++-- .../expected/ordered_append_join-14.out | 69 +++-- .../expected/ordered_append_join-15.out | 69 +++-- .../transparent_decompress_chunk-13.out | 2 +- .../transparent_decompress_chunk-14.out | 2 +- .../transparent_decompress_chunk-15.out | 2 +- tsl/test/sql/decompress_vector_qual.sql | 18 ++ 24 files changed, 737 insertions(+), 589 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 0d5dcb37e6d..3d63c1f9623 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -81,7 +81,7 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) static void apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batch_state) { - if (!chunk_state->vectorized_quals) + if (!chunk_state->vectorized_quals_constified) { return; } @@ -98,7 +98,7 @@ apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batc * Compute the quals. */ ListCell *lc; - foreach (lc, chunk_state->vectorized_quals) + foreach (lc, chunk_state->vectorized_quals_constified) { /* For now we only support "Var ? Const" predicates. */ OpExpr *oe = castNode(OpExpr, lfirst(lc)); diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index f8665a36cdf..3690e47e420 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -160,7 +161,7 @@ decompress_chunk_state_create(CustomScan *cscan) Assert(IsA(cscan->custom_exprs, List)); Assert(list_length(cscan->custom_exprs) == 1); - chunk_state->vectorized_quals = linitial(cscan->custom_exprs); + chunk_state->vectorized_quals_original = linitial(cscan->custom_exprs); return (Node *) chunk_state; } @@ -475,6 +476,24 @@ decompress_chunk_begin(CustomScanState *node, EState *estate, int eflags) { elog(ERROR, "debug: batch sorted merge is required but not used"); } + + /* Constify stable expressions in vectorized predicates. */ + PlannerGlobal glob = { + .boundParams = node->ss.ps.state->es_param_list_info, + }; + PlannerInfo root = { + .glob = &glob, + }; + ListCell *lc; + foreach (lc, chunk_state->vectorized_quals_original) + { + OpExpr *constified = + castNode(OpExpr, estimate_expression_value(&root, (Node *) lfirst(lc))); + Ensure(IsA(lsecond(constified->args), Const), + "failed to evaluate runtime constant in vectorized filter"); + chunk_state->vectorized_quals_constified = + lappend(chunk_state->vectorized_quals_constified, constified); + } } /* @@ -806,13 +825,13 @@ decompress_chunk_explain(CustomScanState *node, List *ancestors, ExplainState *e { DecompressChunkState *chunk_state = (DecompressChunkState *) node; - ts_show_scan_qual(chunk_state->vectorized_quals, + ts_show_scan_qual(chunk_state->vectorized_quals_original, "Vectorized Filter", &node->ss.ps, ancestors, es); - if (!node->ss.ps.plan->qual && chunk_state->vectorized_quals) + if (!node->ss.ps.plan->qual && chunk_state->vectorized_quals_original) { /* * The normal explain won't show this if there are no normal quals but diff --git a/tsl/src/nodes/decompress_chunk/exec.h b/tsl/src/nodes/decompress_chunk/exec.h index 1d9c189a412..a64dcf8d0f3 100644 --- a/tsl/src/nodes/decompress_chunk/exec.h +++ b/tsl/src/nodes/decompress_chunk/exec.h @@ -97,9 +97,12 @@ typedef struct DecompressChunkState /* * For some predicates, we have more efficient implementation that work on * the entire compressed batch in one go. They go to this list, and the rest - * goes into the usual ss.ps.qual. + * goes into the usual ss.ps.qual. Note that we constify stable functions + * in these predicates at execution time, but have to keep the original + * version for EXPLAIN. */ - List *vectorized_quals; + List *vectorized_quals_original; + List *vectorized_quals_constified; /* * Make non-refcounted copies of the tupdesc for reuse across all batch states diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index 89a56f41c53..db2ddf6323b 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -377,27 +377,84 @@ find_attr_pos_in_tlist(List *targetlist, AttrNumber pos) } static bool -qual_is_vectorizable(DecompressChunkPath *path, Node *qual) +contains_volatile_functions_checker(Oid func_id, void *context) +{ + return (func_volatile(func_id) == PROVOLATILE_VOLATILE); +} + +static bool +is_not_runtime_constant_walker(Node *node, void *context) +{ + switch (nodeTag(node)) + { + case T_Var: + case T_PlaceHolderVar: + case T_Param: + /* + * We might want to support these nodes to have vectorizable + * join clauses (T_Var), join clauses referencing a variable that is + * above outer join (T_PlaceHolderVar) or initplan parameters and + * prepared statement parameters (T_Param). We don't support them at + * the moment. + */ + return true; + default: + if (check_functions_in_node(node, + contains_volatile_functions_checker, + /* context = */ NULL)) + { + return true; + } + return expression_tree_walker(node, + is_not_runtime_constant_walker, + /* context = */ NULL); + } +} + +/* + * Check if the given node is a run-time constant, i.e. it doesn't contain + * volatile functions or variables or parameters. This means we can evaluate + * it at run time, allowing us to apply the vectorized comparison operators + * that have the form "Var op Const". This applies for example to filter + * expressions like `time > now() - interval '1 hour'`. + * Note that we do the same evaluation when doing run time chunk exclusion, but + * there is no good way to pass the evaluated clauses to the underlying nodes + * like this DecompressChunk node. + */ +static bool +is_not_runtime_constant(Node *node) +{ + bool result = is_not_runtime_constant_walker(node, /* context = */ NULL); + return result; +} + +/* + * Try to check if the current qual is vectorizable, and if needed make a + * commuted copy. If not, return NULL. + */ +static Node * +make_vectorized_qual(DecompressChunkPath *path, Node *qual) { /* Only simple "Var op Const" binary predicates for now. */ if (!IsA(qual, OpExpr)) { - return false; + return NULL; } OpExpr *o = castNode(OpExpr, qual); if (list_length(o->args) != 2) { - return false; + return NULL; } - if (IsA(lsecond(o->args), Var) && IsA(linitial(o->args), Const)) + if (IsA(lsecond(o->args), Var)) { /* Try to commute the operator if the constant is on the right. */ Oid commutator_opno = get_commutator(o->opno); if (OidIsValid(commutator_opno)) { + o = (OpExpr *) copyObject(o); o->opno = commutator_opno; /* * opfuncid is a cache, we can set it to InvalidOid like the @@ -408,9 +465,14 @@ qual_is_vectorizable(DecompressChunkPath *path, Node *qual) } } - if (!IsA(linitial(o->args), Var) || !IsA(lsecond(o->args), Const)) + /* + * We can vectorize the operation where the left side is a Var and the right + * side is a constant or can be evaluated to a constant at run time (e.g. + * contains stable functions). + */ + if (!IsA(linitial(o->args), Var) || is_not_runtime_constant(lsecond(o->args))) { - return false; + return NULL; } Var *var = castNode(Var, linitial(o->args)); @@ -424,16 +486,16 @@ qual_is_vectorizable(DecompressChunkPath *path, Node *qual) .bulk_decompression_possible) { /* This column doesn't support bulk decompression. */ - return false; + return NULL; } Oid opcode = get_opcode(o->opno); if (get_vector_const_predicate(opcode)) { - return true; + return (Node *) o; } - return false; + return NULL; } /* @@ -441,15 +503,22 @@ qual_is_vectorizable(DecompressChunkPath *path, Node *qual) * list. */ static void -find_vectorized_quals(DecompressChunkPath *path, List *qual, List **vectorized, +find_vectorized_quals(DecompressChunkPath *path, List *qual_list, List **vectorized, List **nonvectorized) { ListCell *lc; - foreach (lc, qual) + foreach (lc, qual_list) { - Node *node = lfirst(lc); - List **dest = qual_is_vectorizable(path, node) ? vectorized : nonvectorized; - *dest = lappend(*dest, node); + Node *source_qual = lfirst(lc); + Node *vectorized_qual = make_vectorized_qual(path, source_qual); + if (vectorized_qual) + { + *vectorized = lappend(*vectorized, vectorized_qual); + } + else + { + *nonvectorized = lappend(*nonvectorized, source_qual); + } } } diff --git a/tsl/test/expected/agg_partials_pushdown.out b/tsl/test/expected/agg_partials_pushdown.out index a2ce5a58e24..7400f4eae72 100644 --- a/tsl/test/expected/agg_partials_pushdown.out +++ b/tsl/test/expected/agg_partials_pushdown.out @@ -127,8 +127,7 @@ SELECT count(*), sum(v0), sum(v1), sum(v2), sum(v3) FROM testtable WHERE time >= Output: PARTIAL count(*), PARTIAL sum(_hyper_1_1_chunk.v0), PARTIAL sum(_hyper_1_1_chunk.v1), PARTIAL sum(_hyper_1_1_chunk.v2), PARTIAL sum(_hyper_1_1_chunk.v3) -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_1_chunk (actual rows=25 loops=1) Output: _hyper_1_1_chunk.v0, _hyper_1_1_chunk.v1, _hyper_1_1_chunk.v2, _hyper_1_1_chunk.v3 - Filter: (_hyper_1_1_chunk."time" >= ('2000-01-01 00:00:00+0'::cstring)::timestamp with time zone) - Vectorized Filter: (_hyper_1_1_chunk."time" <= 'Mon Jan 31 16:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: ((_hyper_1_1_chunk."time" <= 'Mon Jan 31 16:00:00 2000 PST'::timestamp with time zone) AND (_hyper_1_1_chunk."time" >= ('2000-01-01 00:00:00+0'::cstring)::timestamp with time zone)) Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_2_3_chunk (actual rows=5 loops=1) Output: compress_hyper_2_3_chunk.filter_1, compress_hyper_2_3_chunk.filler_2, compress_hyper_2_3_chunk.filler_3, compress_hyper_2_3_chunk."time", compress_hyper_2_3_chunk.device_id, compress_hyper_2_3_chunk.v0, compress_hyper_2_3_chunk.v1, compress_hyper_2_3_chunk.v2, compress_hyper_2_3_chunk.v3, compress_hyper_2_3_chunk._ts_meta_count, compress_hyper_2_3_chunk._ts_meta_sequence_num, compress_hyper_2_3_chunk._ts_meta_min_1, compress_hyper_2_3_chunk._ts_meta_max_1 @@ -142,8 +141,7 @@ SELECT count(*), sum(v0), sum(v1), sum(v2), sum(v3) FROM testtable WHERE time >= Output: PARTIAL count(*), PARTIAL sum(_hyper_1_2_chunk.v0), PARTIAL sum(_hyper_1_2_chunk.v1), PARTIAL sum(_hyper_1_2_chunk.v2), PARTIAL sum(_hyper_1_2_chunk.v3) -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_2_chunk (actual rows=25 loops=1) Output: _hyper_1_2_chunk.v0, _hyper_1_2_chunk.v1, _hyper_1_2_chunk.v2, _hyper_1_2_chunk.v3 - Filter: (_hyper_1_2_chunk."time" >= ('2000-01-01 00:00:00+0'::cstring)::timestamp with time zone) - Vectorized Filter: (_hyper_1_2_chunk."time" <= 'Mon Jan 31 16:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: ((_hyper_1_2_chunk."time" <= 'Mon Jan 31 16:00:00 2000 PST'::timestamp with time zone) AND (_hyper_1_2_chunk."time" >= ('2000-01-01 00:00:00+0'::cstring)::timestamp with time zone)) Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_2_4_chunk (actual rows=5 loops=1) Output: compress_hyper_2_4_chunk.filter_1, compress_hyper_2_4_chunk.filler_2, compress_hyper_2_4_chunk.filler_3, compress_hyper_2_4_chunk."time", compress_hyper_2_4_chunk.device_id, compress_hyper_2_4_chunk.v0, compress_hyper_2_4_chunk.v1, compress_hyper_2_4_chunk.v2, compress_hyper_2_4_chunk.v3, compress_hyper_2_4_chunk._ts_meta_count, compress_hyper_2_4_chunk._ts_meta_sequence_num, compress_hyper_2_4_chunk._ts_meta_min_1, compress_hyper_2_4_chunk._ts_meta_max_1 @@ -153,7 +151,7 @@ SELECT count(*), sum(v0), sum(v1), sum(v2), sum(v3) FROM testtable WHERE time >= -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk (actual rows=25 loops=1) Output: _hyper_1_2_chunk.v0, _hyper_1_2_chunk.v1, _hyper_1_2_chunk.v2, _hyper_1_2_chunk.v3 Filter: ((_hyper_1_2_chunk."time" <= 'Mon Jan 31 16:00:00 2000 PST'::timestamp with time zone) AND (_hyper_1_2_chunk."time" >= ('2000-01-01 00:00:00+0'::cstring)::timestamp with time zone)) -(37 rows) +(35 rows) -- Force plain / sorted aggregation SET enable_hashagg = OFF; @@ -178,8 +176,7 @@ SELECT count(*), sum(v0), sum(v1), sum(v2), sum(v3) FROM testtable WHERE time >= Output: PARTIAL count(*), PARTIAL sum(_hyper_1_1_chunk.v0), PARTIAL sum(_hyper_1_1_chunk.v1), PARTIAL sum(_hyper_1_1_chunk.v2), PARTIAL sum(_hyper_1_1_chunk.v3) -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_1_chunk (actual rows=25 loops=1) Output: _hyper_1_1_chunk.v0, _hyper_1_1_chunk.v1, _hyper_1_1_chunk.v2, _hyper_1_1_chunk.v3 - Filter: (_hyper_1_1_chunk."time" >= ('2000-01-01 00:00:00+0'::cstring)::timestamp with time zone) - Vectorized Filter: (_hyper_1_1_chunk."time" <= 'Mon Jan 31 16:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: ((_hyper_1_1_chunk."time" <= 'Mon Jan 31 16:00:00 2000 PST'::timestamp with time zone) AND (_hyper_1_1_chunk."time" >= ('2000-01-01 00:00:00+0'::cstring)::timestamp with time zone)) Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_2_3_chunk (actual rows=5 loops=1) Output: compress_hyper_2_3_chunk.filter_1, compress_hyper_2_3_chunk.filler_2, compress_hyper_2_3_chunk.filler_3, compress_hyper_2_3_chunk."time", compress_hyper_2_3_chunk.device_id, compress_hyper_2_3_chunk.v0, compress_hyper_2_3_chunk.v1, compress_hyper_2_3_chunk.v2, compress_hyper_2_3_chunk.v3, compress_hyper_2_3_chunk._ts_meta_count, compress_hyper_2_3_chunk._ts_meta_sequence_num, compress_hyper_2_3_chunk._ts_meta_min_1, compress_hyper_2_3_chunk._ts_meta_max_1 @@ -193,8 +190,7 @@ SELECT count(*), sum(v0), sum(v1), sum(v2), sum(v3) FROM testtable WHERE time >= Output: PARTIAL count(*), PARTIAL sum(_hyper_1_2_chunk.v0), PARTIAL sum(_hyper_1_2_chunk.v1), PARTIAL sum(_hyper_1_2_chunk.v2), PARTIAL sum(_hyper_1_2_chunk.v3) -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_2_chunk (actual rows=25 loops=1) Output: _hyper_1_2_chunk.v0, _hyper_1_2_chunk.v1, _hyper_1_2_chunk.v2, _hyper_1_2_chunk.v3 - Filter: (_hyper_1_2_chunk."time" >= ('2000-01-01 00:00:00+0'::cstring)::timestamp with time zone) - Vectorized Filter: (_hyper_1_2_chunk."time" <= 'Mon Jan 31 16:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: ((_hyper_1_2_chunk."time" <= 'Mon Jan 31 16:00:00 2000 PST'::timestamp with time zone) AND (_hyper_1_2_chunk."time" >= ('2000-01-01 00:00:00+0'::cstring)::timestamp with time zone)) Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_2_4_chunk (actual rows=5 loops=1) Output: compress_hyper_2_4_chunk.filter_1, compress_hyper_2_4_chunk.filler_2, compress_hyper_2_4_chunk.filler_3, compress_hyper_2_4_chunk."time", compress_hyper_2_4_chunk.device_id, compress_hyper_2_4_chunk.v0, compress_hyper_2_4_chunk.v1, compress_hyper_2_4_chunk.v2, compress_hyper_2_4_chunk.v3, compress_hyper_2_4_chunk._ts_meta_count, compress_hyper_2_4_chunk._ts_meta_sequence_num, compress_hyper_2_4_chunk._ts_meta_min_1, compress_hyper_2_4_chunk._ts_meta_max_1 @@ -204,7 +200,7 @@ SELECT count(*), sum(v0), sum(v1), sum(v2), sum(v3) FROM testtable WHERE time >= -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk (actual rows=25 loops=1) Output: _hyper_1_2_chunk.v0, _hyper_1_2_chunk.v1, _hyper_1_2_chunk.v2, _hyper_1_2_chunk.v3 Filter: ((_hyper_1_2_chunk."time" <= 'Mon Jan 31 16:00:00 2000 PST'::timestamp with time zone) AND (_hyper_1_2_chunk."time" >= ('2000-01-01 00:00:00+0'::cstring)::timestamp with time zone)) -(37 rows) +(35 rows) RESET enable_hashagg; -- Check Append Node under ChunkAppend diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 9e13d119c2c..070b8c509dd 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -198,6 +198,40 @@ select count(*) from vectorqual where metric4 is not null; 2 (1 row) +-- Vectorized filters also work if we have only stable functions on the right +-- side that can be evaluated to a constant at run time. +set timescaledb.debug_require_vector_qual to 'only'; +select count(*) from vectorqual where ts > '2021-01-01 00:00:00'::timestamptz::timestamp; + count +------- + 3 +(1 row) + +select count(*) from vectorqual where ts > '2021-01-01 00:00:00'::timestamp - interval '1 day'; + count +------- + 4 +(1 row) + +-- This filter is not vectorized because the 'timestamp > timestamptz' +-- operator is stable, not immutable, because it uses the current session +-- timezone. We could transform it to something like +-- 'timestamp > timestamptz::timestamp' to allow our stable function evaluation +-- to handle this case, but we don't do it at the moment. +set timescaledb.debug_require_vector_qual to 'forbid'; +select count(*) from vectorqual where ts > '2021-01-01 00:00:00'::timestamptz; + count +------- + 3 +(1 row) + +-- Can't vectorize comparison with a volatile function. +select count(*) from vectorqual where metric3 > random()::int - 100; + count +------- + 5 +(1 row) + -- Test that the vectorized quals are disabled by disabling the bulk decompression. set timescaledb.enable_bulk_decompression to off; set timescaledb.debug_require_vector_qual to 'forbid'; diff --git a/tsl/test/expected/transparent_decompression-13.out b/tsl/test/expected/transparent_decompression-13.out index 7b6a6f7c3a5..389f69bd993 100644 --- a/tsl/test/expected/transparent_decompression-13.out +++ b/tsl/test/expected/transparent_decompression-13.out @@ -1200,7 +1200,7 @@ LIMIT 10; Sort Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device_id Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_1_1_chunk (actual rows=1800 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_5_15_chunk (actual rows=5 loops=1) -> Sort (never executed) Sort Key: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device_id @@ -1209,7 +1209,7 @@ LIMIT 10; -> Sort (never executed) Sort Key: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device_id -> Custom Scan (DecompressChunk) on _hyper_1_3_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_5_16_chunk (never executed) (19 rows) @@ -1643,7 +1643,7 @@ ORDER BY time, Sort Key: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_1_3_chunk (actual rows=2520 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_5_16_chunk (actual rows=5 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) (15 rows) @@ -5067,19 +5067,19 @@ LIMIT 10; Sort Key: _hyper_2_4_chunk."time", _hyper_2_4_chunk.device_id Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_2_4_chunk (actual rows=360 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_6_17_chunk (actual rows=1 loops=1) -> Sort (actual rows=6 loops=1) Sort Key: _hyper_2_5_chunk."time", _hyper_2_5_chunk.device_id Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_2_5_chunk (actual rows=1080 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_6_18_chunk (actual rows=3 loops=1) -> Sort (actual rows=3 loops=1) Sort Key: _hyper_2_6_chunk."time", _hyper_2_6_chunk.device_id Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_2_6_chunk (actual rows=360 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_6_19_chunk (actual rows=1 loops=1) -> Merge Append (never executed) Sort Key: _hyper_2_7_chunk."time", _hyper_2_7_chunk.device_id @@ -5106,12 +5106,12 @@ LIMIT 10; -> Sort (never executed) Sort Key: _hyper_2_10_chunk."time", _hyper_2_10_chunk.device_id -> Custom Scan (DecompressChunk) on _hyper_2_10_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_6_20_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_2_11_chunk."time", _hyper_2_11_chunk.device_id -> Custom Scan (DecompressChunk) on _hyper_2_11_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_6_21_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_2_12_chunk."time", _hyper_2_12_chunk.device_id @@ -5771,7 +5771,7 @@ ORDER BY time, Sort Key: _hyper_2_4_chunk."time", _hyper_2_4_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_2_4_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_6_17_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 1 @@ -5779,7 +5779,7 @@ ORDER BY time, Sort Key: _hyper_2_5_chunk."time", _hyper_2_5_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_2_5_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_6_18_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 3 @@ -5787,7 +5787,7 @@ ORDER BY time, Sort Key: _hyper_2_6_chunk."time", _hyper_2_6_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_2_6_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_6_19_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 1 @@ -5825,14 +5825,14 @@ ORDER BY time, Sort Key: _hyper_2_10_chunk."time", _hyper_2_10_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_2_10_chunk (actual rows=504 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_6_20_chunk (actual rows=1 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (actual rows=1512 loops=1) Sort Key: _hyper_2_11_chunk."time", _hyper_2_11_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_2_11_chunk (actual rows=1512 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_6_21_chunk (actual rows=3 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (actual rows=504 loops=1) diff --git a/tsl/test/expected/transparent_decompression-14.out b/tsl/test/expected/transparent_decompression-14.out index 7d2f7af842b..9f6daa018e4 100644 --- a/tsl/test/expected/transparent_decompression-14.out +++ b/tsl/test/expected/transparent_decompression-14.out @@ -1200,7 +1200,7 @@ LIMIT 10; Sort Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device_id Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_1_1_chunk (actual rows=1800 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_5_15_chunk (actual rows=5 loops=1) -> Sort (never executed) Sort Key: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device_id @@ -1209,7 +1209,7 @@ LIMIT 10; -> Sort (never executed) Sort Key: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device_id -> Custom Scan (DecompressChunk) on _hyper_1_3_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_5_16_chunk (never executed) (19 rows) @@ -1643,7 +1643,7 @@ ORDER BY time, Sort Key: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_1_3_chunk (actual rows=2520 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_5_16_chunk (actual rows=5 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) (15 rows) @@ -5067,19 +5067,19 @@ LIMIT 10; Sort Key: _hyper_2_4_chunk."time", _hyper_2_4_chunk.device_id Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_2_4_chunk (actual rows=360 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_6_17_chunk (actual rows=1 loops=1) -> Sort (actual rows=6 loops=1) Sort Key: _hyper_2_5_chunk."time", _hyper_2_5_chunk.device_id Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_2_5_chunk (actual rows=1080 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_6_18_chunk (actual rows=3 loops=1) -> Sort (actual rows=3 loops=1) Sort Key: _hyper_2_6_chunk."time", _hyper_2_6_chunk.device_id Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_2_6_chunk (actual rows=360 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_6_19_chunk (actual rows=1 loops=1) -> Merge Append (never executed) Sort Key: _hyper_2_7_chunk."time", _hyper_2_7_chunk.device_id @@ -5106,12 +5106,12 @@ LIMIT 10; -> Sort (never executed) Sort Key: _hyper_2_10_chunk."time", _hyper_2_10_chunk.device_id -> Custom Scan (DecompressChunk) on _hyper_2_10_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_6_20_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_2_11_chunk."time", _hyper_2_11_chunk.device_id -> Custom Scan (DecompressChunk) on _hyper_2_11_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_6_21_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_2_12_chunk."time", _hyper_2_12_chunk.device_id @@ -5771,7 +5771,7 @@ ORDER BY time, Sort Key: _hyper_2_4_chunk."time", _hyper_2_4_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_2_4_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_6_17_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 1 @@ -5779,7 +5779,7 @@ ORDER BY time, Sort Key: _hyper_2_5_chunk."time", _hyper_2_5_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_2_5_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_6_18_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 3 @@ -5787,7 +5787,7 @@ ORDER BY time, Sort Key: _hyper_2_6_chunk."time", _hyper_2_6_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_2_6_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_6_19_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 1 @@ -5825,14 +5825,14 @@ ORDER BY time, Sort Key: _hyper_2_10_chunk."time", _hyper_2_10_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_2_10_chunk (actual rows=504 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_6_20_chunk (actual rows=1 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (actual rows=1512 loops=1) Sort Key: _hyper_2_11_chunk."time", _hyper_2_11_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_2_11_chunk (actual rows=1512 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_6_21_chunk (actual rows=3 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (actual rows=504 loops=1) diff --git a/tsl/test/expected/transparent_decompression-15.out b/tsl/test/expected/transparent_decompression-15.out index cdbe21e2b1f..81e62cccd96 100644 --- a/tsl/test/expected/transparent_decompression-15.out +++ b/tsl/test/expected/transparent_decompression-15.out @@ -1201,7 +1201,7 @@ LIMIT 10; Sort Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device_id Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_1_1_chunk (actual rows=1800 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_5_15_chunk (actual rows=5 loops=1) -> Sort (never executed) Sort Key: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device_id @@ -1210,7 +1210,7 @@ LIMIT 10; -> Sort (never executed) Sort Key: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device_id -> Custom Scan (DecompressChunk) on _hyper_1_3_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_5_16_chunk (never executed) (19 rows) @@ -1644,7 +1644,7 @@ ORDER BY time, Sort Key: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_1_3_chunk (actual rows=2520 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_5_16_chunk (actual rows=5 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) (15 rows) @@ -5041,19 +5041,19 @@ LIMIT 10; Sort Key: _hyper_2_4_chunk."time", _hyper_2_4_chunk.device_id Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_2_4_chunk (actual rows=360 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_6_17_chunk (actual rows=1 loops=1) -> Sort (actual rows=6 loops=1) Sort Key: _hyper_2_5_chunk."time", _hyper_2_5_chunk.device_id Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_2_5_chunk (actual rows=1080 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_6_18_chunk (actual rows=3 loops=1) -> Sort (actual rows=3 loops=1) Sort Key: _hyper_2_6_chunk."time", _hyper_2_6_chunk.device_id Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_2_6_chunk (actual rows=360 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_6_19_chunk (actual rows=1 loops=1) -> Merge Append (never executed) Sort Key: _hyper_2_7_chunk."time", _hyper_2_7_chunk.device_id @@ -5080,12 +5080,12 @@ LIMIT 10; -> Sort (never executed) Sort Key: _hyper_2_10_chunk."time", _hyper_2_10_chunk.device_id -> Custom Scan (DecompressChunk) on _hyper_2_10_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_6_20_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_2_11_chunk."time", _hyper_2_11_chunk.device_id -> Custom Scan (DecompressChunk) on _hyper_2_11_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_6_21_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_2_12_chunk."time", _hyper_2_12_chunk.device_id @@ -5745,7 +5745,7 @@ ORDER BY time, Sort Key: _hyper_2_4_chunk."time", _hyper_2_4_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_2_4_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_6_17_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 1 @@ -5753,7 +5753,7 @@ ORDER BY time, Sort Key: _hyper_2_5_chunk."time", _hyper_2_5_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_2_5_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_6_18_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 3 @@ -5761,7 +5761,7 @@ ORDER BY time, Sort Key: _hyper_2_6_chunk."time", _hyper_2_6_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_2_6_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_6_19_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 1 @@ -5799,14 +5799,14 @@ ORDER BY time, Sort Key: _hyper_2_10_chunk."time", _hyper_2_10_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_2_10_chunk (actual rows=504 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_6_20_chunk (actual rows=1 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (actual rows=1512 loops=1) Sort Key: _hyper_2_11_chunk."time", _hyper_2_11_chunk.device_id Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_2_11_chunk (actual rows=1512 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_6_21_chunk (actual rows=3 loops=1) Filter: (_ts_meta_max_3 > ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (actual rows=504 loops=1) diff --git a/tsl/test/expected/transparent_decompression_ordered_index-13.out b/tsl/test/expected/transparent_decompression_ordered_index-13.out index e18d7d65d62..bde17d1771a 100644 --- a/tsl/test/expected/transparent_decompression_ordered_index-13.out +++ b/tsl/test/expected/transparent_decompression_ordered_index-13.out @@ -830,23 +830,21 @@ ORDER BY 1, -> Merge Append (actual rows=10 loops=1) Sort Key: _hyper_1_4_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_1_4_chunk (actual rows=9 loops=1) - Filter: ("time" < now()) - Vectorized Filter: ("time" > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) AND ("time" < now())) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_2_9_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_2_9_chunk (actual rows=1 loops=1) Filter: ((_ts_meta_max_1 > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) AND (device_id = 4) AND (device_id_peer = 5)) -> Custom Scan (DecompressChunk) on _hyper_1_5_chunk (actual rows=1 loops=1) - Filter: ("time" < now()) - Vectorized Filter: ("time" > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) AND ("time" < now())) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_2_10_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_2_10_chunk (actual rows=1 loops=1) Filter: ((_ts_meta_max_1 > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) AND (device_id = 4) AND (device_id_peer = 5)) Rows Removed by Filter: 4 -(26 rows) +(24 rows) :PREFIX SELECT m.device_id, @@ -916,23 +914,23 @@ ORDER BY 1, Chunks excluded during startup: 0 -> Append (actual rows=1541 loops=1) -> Custom Scan (DecompressChunk) on _hyper_1_1_chunk d_1 (actual rows=480 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_6_chunk (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_2_chunk d_2 (actual rows=960 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_7_chunk (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_3_chunk d_3 (actual rows=48 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_8_chunk (actual rows=1 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_4_chunk d_4 (actual rows=48 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_9_chunk (actual rows=1 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_5_chunk d_5 (actual rows=5 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_10_chunk (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Sort (actual rows=7317 loops=1) @@ -943,23 +941,23 @@ ORDER BY 1, Chunks excluded during startup: 0 -> Append (actual rows=1541 loops=1) -> Custom Scan (DecompressChunk) on _hyper_1_1_chunk m_1 (actual rows=480 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_6_chunk compress_hyper_2_6_chunk_1 (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_2_chunk m_2 (actual rows=960 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_7_chunk compress_hyper_2_7_chunk_1 (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_3_chunk m_3 (actual rows=48 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_8_chunk compress_hyper_2_8_chunk_1 (actual rows=1 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_4_chunk m_4 (actual rows=48 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_9_chunk compress_hyper_2_9_chunk_1 (actual rows=1 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_5_chunk m_5 (actual rows=5 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_10_chunk compress_hyper_2_10_chunk_1 (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) (62 rows) @@ -982,15 +980,14 @@ ORDER BY m.v0; -> Hash Join (actual rows=0 loops=1) Hash Cond: (m.device_id = d.device_id) -> Custom Scan (DecompressChunk) on _hyper_1_5_chunk m (actual rows=0 loops=1) - Filter: ("time" < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) AND ("time" < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_2_10_chunk (actual rows=0 loops=1) Filter: ((_ts_meta_max_1 > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone)) Rows Removed by Filter: 5 -> Hash (actual rows=7 loops=1) Buckets: 1024 Batches: 1 -> Seq Scan on device_tbl d (actual rows=7 loops=1) -(14 rows) +(13 rows) -- no matches in metrics_ordered_idx but one row in device_tbl :PREFIX @@ -1013,12 +1010,11 @@ ORDER BY m.v0; Filter: (device_id = 8) Rows Removed by Filter: 6 -> Custom Scan (DecompressChunk) on _hyper_1_5_chunk m (actual rows=0 loops=1) - Filter: ("time" < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) AND ("time" < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_2_10_chunk (actual rows=0 loops=1) Filter: ((_ts_meta_max_1 > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) AND (device_id = 8) AND (_ts_meta_min_1 < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone)) Rows Removed by Filter: 5 -(14 rows) +(13 rows) -- no matches in device_tbl but 1 row in metrics_ordered_idx :PREFIX diff --git a/tsl/test/expected/transparent_decompression_ordered_index-14.out b/tsl/test/expected/transparent_decompression_ordered_index-14.out index fcedf0391e3..c8f0b3e7578 100644 --- a/tsl/test/expected/transparent_decompression_ordered_index-14.out +++ b/tsl/test/expected/transparent_decompression_ordered_index-14.out @@ -830,23 +830,21 @@ ORDER BY 1, -> Merge Append (actual rows=10 loops=1) Sort Key: _hyper_1_4_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_1_4_chunk (actual rows=9 loops=1) - Filter: ("time" < now()) - Vectorized Filter: ("time" > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) AND ("time" < now())) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_2_9_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_2_9_chunk (actual rows=1 loops=1) Filter: ((_ts_meta_max_1 > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) AND (device_id = 4) AND (device_id_peer = 5)) -> Custom Scan (DecompressChunk) on _hyper_1_5_chunk (actual rows=1 loops=1) - Filter: ("time" < now()) - Vectorized Filter: ("time" > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) AND ("time" < now())) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_2_10_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_2_10_chunk (actual rows=1 loops=1) Filter: ((_ts_meta_max_1 > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) AND (device_id = 4) AND (device_id_peer = 5)) Rows Removed by Filter: 4 -(26 rows) +(24 rows) :PREFIX SELECT m.device_id, @@ -916,23 +914,23 @@ ORDER BY 1, Chunks excluded during startup: 0 -> Append (actual rows=1541 loops=1) -> Custom Scan (DecompressChunk) on _hyper_1_1_chunk d_1 (actual rows=480 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_6_chunk (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_2_chunk d_2 (actual rows=960 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_7_chunk (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_3_chunk d_3 (actual rows=48 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_8_chunk (actual rows=1 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_4_chunk d_4 (actual rows=48 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_9_chunk (actual rows=1 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_5_chunk d_5 (actual rows=5 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_10_chunk (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Sort (actual rows=7317 loops=1) @@ -943,23 +941,23 @@ ORDER BY 1, Chunks excluded during startup: 0 -> Append (actual rows=1541 loops=1) -> Custom Scan (DecompressChunk) on _hyper_1_1_chunk m_1 (actual rows=480 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_6_chunk compress_hyper_2_6_chunk_1 (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_2_chunk m_2 (actual rows=960 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_7_chunk compress_hyper_2_7_chunk_1 (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_3_chunk m_3 (actual rows=48 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_8_chunk compress_hyper_2_8_chunk_1 (actual rows=1 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_4_chunk m_4 (actual rows=48 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_9_chunk compress_hyper_2_9_chunk_1 (actual rows=1 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_5_chunk m_5 (actual rows=5 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_10_chunk compress_hyper_2_10_chunk_1 (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) (62 rows) @@ -982,15 +980,14 @@ ORDER BY m.v0; -> Hash Join (actual rows=0 loops=1) Hash Cond: (m.device_id = d.device_id) -> Custom Scan (DecompressChunk) on _hyper_1_5_chunk m (actual rows=0 loops=1) - Filter: ("time" < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) AND ("time" < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_2_10_chunk (actual rows=0 loops=1) Filter: ((_ts_meta_max_1 > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone)) Rows Removed by Filter: 5 -> Hash (actual rows=7 loops=1) Buckets: 1024 Batches: 1 -> Seq Scan on device_tbl d (actual rows=7 loops=1) -(14 rows) +(13 rows) -- no matches in metrics_ordered_idx but one row in device_tbl :PREFIX @@ -1013,12 +1010,11 @@ ORDER BY m.v0; Filter: (device_id = 8) Rows Removed by Filter: 6 -> Custom Scan (DecompressChunk) on _hyper_1_5_chunk m (actual rows=0 loops=1) - Filter: ("time" < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) AND ("time" < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_2_10_chunk (actual rows=0 loops=1) Filter: ((_ts_meta_max_1 > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) AND (device_id = 8) AND (_ts_meta_min_1 < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone)) Rows Removed by Filter: 5 -(14 rows) +(13 rows) -- no matches in device_tbl but 1 row in metrics_ordered_idx :PREFIX diff --git a/tsl/test/expected/transparent_decompression_ordered_index-15.out b/tsl/test/expected/transparent_decompression_ordered_index-15.out index 4ce20a80bd0..af5b7e3f35f 100644 --- a/tsl/test/expected/transparent_decompression_ordered_index-15.out +++ b/tsl/test/expected/transparent_decompression_ordered_index-15.out @@ -832,23 +832,21 @@ ORDER BY 1, -> Merge Append (actual rows=10 loops=1) Sort Key: _hyper_1_4_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_1_4_chunk (actual rows=9 loops=1) - Filter: ("time" < now()) - Vectorized Filter: ("time" > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) AND ("time" < now())) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_2_9_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_2_9_chunk (actual rows=1 loops=1) Filter: ((_ts_meta_max_1 > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) AND (device_id = 4) AND (device_id_peer = 5)) -> Custom Scan (DecompressChunk) on _hyper_1_5_chunk (actual rows=1 loops=1) - Filter: ("time" < now()) - Vectorized Filter: ("time" > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) AND ("time" < now())) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_2_10_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_2_10_chunk (actual rows=1 loops=1) Filter: ((_ts_meta_max_1 > 'Tue Jan 01 00:00:00 2002 PST'::timestamp with time zone) AND (device_id = 4) AND (device_id_peer = 5)) Rows Removed by Filter: 4 -(26 rows) +(24 rows) :PREFIX SELECT m.device_id, @@ -918,23 +916,23 @@ ORDER BY 1, Chunks excluded during startup: 0 -> Append (actual rows=1541 loops=1) -> Custom Scan (DecompressChunk) on _hyper_1_1_chunk d_1 (actual rows=480 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_6_chunk (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_2_chunk d_2 (actual rows=960 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_7_chunk (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_3_chunk d_3 (actual rows=48 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_8_chunk (actual rows=1 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_4_chunk d_4 (actual rows=48 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_9_chunk (actual rows=1 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_5_chunk d_5 (actual rows=5 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_10_chunk (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Sort (actual rows=7317 loops=1) @@ -945,23 +943,23 @@ ORDER BY 1, Chunks excluded during startup: 0 -> Append (actual rows=1541 loops=1) -> Custom Scan (DecompressChunk) on _hyper_1_1_chunk m_1 (actual rows=480 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_6_chunk compress_hyper_2_6_chunk_1 (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_2_chunk m_2 (actual rows=960 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_7_chunk compress_hyper_2_7_chunk_1 (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_3_chunk m_3 (actual rows=48 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_8_chunk compress_hyper_2_8_chunk_1 (actual rows=1 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_4_chunk m_4 (actual rows=48 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_9_chunk compress_hyper_2_9_chunk_1 (actual rows=1 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_1_5_chunk m_5 (actual rows=5 loops=1) - Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_2_10_chunk compress_hyper_2_10_chunk_1 (actual rows=5 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) (62 rows) @@ -984,15 +982,14 @@ ORDER BY m.v0; -> Hash Join (actual rows=0 loops=1) Hash Cond: (m.device_id = d.device_id) -> Custom Scan (DecompressChunk) on _hyper_1_5_chunk m (actual rows=0 loops=1) - Filter: ("time" < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) AND ("time" < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_2_10_chunk (actual rows=0 loops=1) Filter: ((_ts_meta_max_1 > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone)) Rows Removed by Filter: 5 -> Hash (actual rows=7 loops=1) Buckets: 1024 Batches: 1 -> Seq Scan on device_tbl d (actual rows=7 loops=1) -(14 rows) +(13 rows) -- no matches in metrics_ordered_idx but one row in device_tbl :PREFIX @@ -1015,12 +1012,11 @@ ORDER BY m.v0; Filter: (device_id = 8) Rows Removed by Filter: 6 -> Custom Scan (DecompressChunk) on _hyper_1_5_chunk m (actual rows=0 loops=1) - Filter: ("time" < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) AND ("time" < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_2_10_chunk (actual rows=0 loops=1) Filter: ((_ts_meta_max_1 > 'Tue Jan 01 00:00:00 2019 PST'::timestamp with time zone) AND (device_id = 8) AND (_ts_meta_min_1 < ('2000-01-01 0:00:00+0'::cstring)::timestamp with time zone)) Rows Removed by Filter: 5 -(14 rows) +(13 rows) -- no matches in device_tbl but 1 row in metrics_ordered_idx :PREFIX diff --git a/tsl/test/shared/expected/constify_timestamptz_op_interval.out b/tsl/test/shared/expected/constify_timestamptz_op_interval.out index 67614b8b88a..4c6eaf82dcd 100644 --- a/tsl/test/shared/expected/constify_timestamptz_op_interval.out +++ b/tsl/test/shared/expected/constify_timestamptz_op_interval.out @@ -122,7 +122,7 @@ FROM metrics_compressed WHERE time < '2000-01-01'::timestamptz - '6h'::interval; QUERY PLAN Custom Scan (DecompressChunk) on _hyper_X_X_chunk - Filter: ("time" < ('Sat Jan 01 00:00:00 2000 PST'::timestamp with time zone - '@ 6 hours'::interval)) + Vectorized Filter: ("time" < ('Sat Jan 01 00:00:00 2000 PST'::timestamp with time zone - '@ 6 hours'::interval)) -> Seq Scan on compress_hyper_X_X_chunk Filter: (_ts_meta_min_1 < ('Sat Jan 01 00:00:00 2000 PST'::timestamp with time zone - '@ 6 hours'::interval)) (4 rows) @@ -135,7 +135,7 @@ WHERE time < '2000-01-01'::timestamptz - '6h'::interval AND device_id = 1; QUERY PLAN Custom Scan (DecompressChunk) on _hyper_X_X_chunk - Filter: ("time" < ('Sat Jan 01 00:00:00 2000 PST'::timestamp with time zone - '@ 6 hours'::interval)) + Vectorized Filter: ("time" < ('Sat Jan 01 00:00:00 2000 PST'::timestamp with time zone - '@ 6 hours'::interval)) -> Seq Scan on compress_hyper_X_X_chunk Filter: ((device_id = 1) AND (_ts_meta_min_1 < ('Sat Jan 01 00:00:00 2000 PST'::timestamp with time zone - '@ 6 hours'::interval))) (4 rows) diff --git a/tsl/test/shared/expected/constraint_exclusion_prepared.out b/tsl/test/shared/expected/constraint_exclusion_prepared.out index 10c194b8ba6..c7afcbe3130 100644 --- a/tsl/test/shared/expected/constraint_exclusion_prepared.out +++ b/tsl/test/shared/expected/constraint_exclusion_prepared.out @@ -1482,15 +1482,15 @@ QUERY PLAN Order: metrics_compressed."time" Chunks excluded during startup: 0 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (actual rows=1 loops=1) Index Cond: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (never executed) Index Cond: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (never executed) Index Cond: (device_id = 1) (16 rows) @@ -1502,15 +1502,15 @@ QUERY PLAN Order: metrics_compressed."time" Chunks excluded during startup: 0 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (actual rows=1 loops=1) Index Cond: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (never executed) Index Cond: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (never executed) Index Cond: (device_id = 1) (16 rows) @@ -1522,15 +1522,15 @@ QUERY PLAN Order: metrics_compressed."time" Chunks excluded during startup: 0 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (actual rows=1 loops=1) Index Cond: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (never executed) Index Cond: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (never executed) Index Cond: (device_id = 1) (16 rows) @@ -1542,15 +1542,15 @@ QUERY PLAN Order: metrics_compressed."time" Chunks excluded during startup: 0 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (actual rows=1 loops=1) Index Cond: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (never executed) Index Cond: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (never executed) Index Cond: (device_id = 1) (16 rows) @@ -1562,15 +1562,15 @@ QUERY PLAN Order: metrics_compressed."time" Chunks excluded during startup: 0 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (actual rows=1 loops=1) Index Cond: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (never executed) Index Cond: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (never executed) Index Cond: (device_id = 1) (16 rows) @@ -1591,12 +1591,12 @@ QUERY PLAN Order: metrics_compressed."time" Chunks excluded during startup: 1 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (actual rows=1 loops=1) Index Cond: (device_id = 1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (never executed) Index Cond: (device_id = 1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) @@ -1609,12 +1609,12 @@ QUERY PLAN Order: metrics_compressed."time" Chunks excluded during startup: 1 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (actual rows=1 loops=1) Index Cond: (device_id = 1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (never executed) Index Cond: (device_id = 1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) @@ -1627,12 +1627,12 @@ QUERY PLAN Order: metrics_compressed."time" Chunks excluded during startup: 1 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (actual rows=1 loops=1) Index Cond: (device_id = 1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (never executed) Index Cond: (device_id = 1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) @@ -1645,12 +1645,12 @@ QUERY PLAN Order: metrics_compressed."time" Chunks excluded during startup: 1 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (actual rows=1 loops=1) Index Cond: (device_id = 1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (never executed) Index Cond: (device_id = 1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) @@ -1663,12 +1663,12 @@ QUERY PLAN Order: metrics_compressed."time" Chunks excluded during startup: 1 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (actual rows=1 loops=1) Index Cond: (device_id = 1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Index Scan Backward using compress_hyper_X_X_chunk__compressed_hypertable_4_device_id__t on compress_hyper_X_X_chunk (never executed) Index Cond: (device_id = 1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) @@ -1880,13 +1880,13 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) (17 rows) @@ -1901,13 +1901,13 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) (17 rows) @@ -1922,13 +1922,13 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) (17 rows) @@ -1943,13 +1943,13 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) (17 rows) @@ -1964,13 +1964,13 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) (17 rows) @@ -1998,14 +1998,14 @@ QUERY PLAN -> Merge Append (actual rows=100 loops=1) Sort Key: _hyper_X_X_chunk.device_id, _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -2023,14 +2023,14 @@ QUERY PLAN -> Merge Append (actual rows=100 loops=1) Sort Key: _hyper_X_X_chunk.device_id, _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -2048,14 +2048,14 @@ QUERY PLAN -> Merge Append (actual rows=100 loops=1) Sort Key: _hyper_X_X_chunk.device_id, _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -2073,14 +2073,14 @@ QUERY PLAN -> Merge Append (actual rows=100 loops=1) Sort Key: _hyper_X_X_chunk.device_id, _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -2098,14 +2098,14 @@ QUERY PLAN -> Merge Append (actual rows=100 loops=1) Sort Key: _hyper_X_X_chunk.device_id, _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -2200,20 +2200,20 @@ QUERY PLAN Order: metrics_space_compressed."time" Chunks excluded during startup: 0 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Sort (never executed) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Sort (never executed) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC -> Seq Scan on compress_hyper_X_X_chunk (never executed) @@ -2227,20 +2227,20 @@ QUERY PLAN Order: metrics_space_compressed."time" Chunks excluded during startup: 0 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Sort (never executed) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Sort (never executed) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC -> Seq Scan on compress_hyper_X_X_chunk (never executed) @@ -2254,20 +2254,20 @@ QUERY PLAN Order: metrics_space_compressed."time" Chunks excluded during startup: 0 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Sort (never executed) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Sort (never executed) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC -> Seq Scan on compress_hyper_X_X_chunk (never executed) @@ -2281,20 +2281,20 @@ QUERY PLAN Order: metrics_space_compressed."time" Chunks excluded during startup: 0 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Sort (never executed) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Sort (never executed) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC -> Seq Scan on compress_hyper_X_X_chunk (never executed) @@ -2308,20 +2308,20 @@ QUERY PLAN Order: metrics_space_compressed."time" Chunks excluded during startup: 0 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Sort (never executed) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (device_id = 1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Sort (never executed) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC -> Seq Scan on compress_hyper_X_X_chunk (never executed) @@ -2344,14 +2344,14 @@ QUERY PLAN Order: metrics_space_compressed."time" Chunks excluded during startup: 1 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: ((device_id = 1) AND (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone)) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC -> Seq Scan on compress_hyper_X_X_chunk (never executed) @@ -2365,14 +2365,14 @@ QUERY PLAN Order: metrics_space_compressed."time" Chunks excluded during startup: 1 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: ((device_id = 1) AND (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone)) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC -> Seq Scan on compress_hyper_X_X_chunk (never executed) @@ -2386,14 +2386,14 @@ QUERY PLAN Order: metrics_space_compressed."time" Chunks excluded during startup: 1 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: ((device_id = 1) AND (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone)) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC -> Seq Scan on compress_hyper_X_X_chunk (never executed) @@ -2407,14 +2407,14 @@ QUERY PLAN Order: metrics_space_compressed."time" Chunks excluded during startup: 1 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: ((device_id = 1) AND (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone)) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC -> Seq Scan on compress_hyper_X_X_chunk (never executed) @@ -2428,14 +2428,14 @@ QUERY PLAN Order: metrics_space_compressed."time" Chunks excluded during startup: 1 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: ((device_id = 1) AND (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone)) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: compress_hyper_X_X_chunk._ts_meta_sequence_num DESC -> Seq Scan on compress_hyper_X_X_chunk (never executed) @@ -2769,21 +2769,21 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=60 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10794 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=21 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Merge Append (never executed) @@ -2791,19 +2791,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Merge Append (never executed) @@ -2811,19 +2811,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) (66 rows) @@ -2839,21 +2839,21 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=60 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10794 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=21 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Merge Append (never executed) @@ -2861,19 +2861,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Merge Append (never executed) @@ -2881,19 +2881,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) (66 rows) @@ -2909,21 +2909,21 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=60 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10794 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=21 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Merge Append (never executed) @@ -2931,19 +2931,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Merge Append (never executed) @@ -2951,19 +2951,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) (66 rows) @@ -2979,21 +2979,21 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=60 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10794 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=21 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Merge Append (never executed) @@ -3001,19 +3001,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Merge Append (never executed) @@ -3021,19 +3021,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) (66 rows) @@ -3049,21 +3049,21 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=60 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10794 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=21 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Merge Append (never executed) @@ -3071,19 +3071,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Merge Append (never executed) @@ -3091,19 +3091,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) (66 rows) @@ -3131,28 +3131,28 @@ QUERY PLAN -> Merge Append (actual rows=100 loops=1) Sort Key: _hyper_X_X_chunk.device_id, _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -3160,7 +3160,7 @@ QUERY PLAN Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) Rows Removed by Filter: 1 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -3168,7 +3168,7 @@ QUERY PLAN Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) Rows Removed by Filter: 3 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -3186,28 +3186,28 @@ QUERY PLAN -> Merge Append (actual rows=100 loops=1) Sort Key: _hyper_X_X_chunk.device_id, _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -3215,7 +3215,7 @@ QUERY PLAN Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) Rows Removed by Filter: 1 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -3223,7 +3223,7 @@ QUERY PLAN Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) Rows Removed by Filter: 3 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -3241,28 +3241,28 @@ QUERY PLAN -> Merge Append (actual rows=100 loops=1) Sort Key: _hyper_X_X_chunk.device_id, _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -3270,7 +3270,7 @@ QUERY PLAN Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) Rows Removed by Filter: 1 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -3278,7 +3278,7 @@ QUERY PLAN Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) Rows Removed by Filter: 3 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -3296,28 +3296,28 @@ QUERY PLAN -> Merge Append (actual rows=100 loops=1) Sort Key: _hyper_X_X_chunk.device_id, _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -3325,7 +3325,7 @@ QUERY PLAN Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) Rows Removed by Filter: 1 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -3333,7 +3333,7 @@ QUERY PLAN Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) Rows Removed by Filter: 3 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -3351,28 +3351,28 @@ QUERY PLAN -> Merge Append (actual rows=100 loops=1) Sort Key: _hyper_X_X_chunk.device_id, _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=100 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -3380,7 +3380,7 @@ QUERY PLAN Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) Rows Removed by Filter: 1 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort @@ -3388,7 +3388,7 @@ QUERY PLAN Filter: (_ts_meta_min_1 < ('2000-01-10'::cstring)::timestamp with time zone) Rows Removed by Filter: 3 -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1 loops=1) - Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-10'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk._ts_meta_sequence_num DESC Sort Method: quicksort diff --git a/tsl/test/shared/expected/ordered_append-13.out b/tsl/test/shared/expected/ordered_append-13.out index e1c9df98671..4b25227cdd3 100644 --- a/tsl/test/shared/expected/ordered_append-13.out +++ b/tsl/test/shared/expected/ordered_append-13.out @@ -2469,7 +2469,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=16785 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 3215 -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) @@ -2477,7 +2477,7 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) (19 rows) @@ -2497,13 +2497,13 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) (17 rows) @@ -2525,13 +2525,12 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=7195 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 7805 - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=15 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 15 -(14 rows) +(13 rows) :PREFIX SELECT time @@ -2549,13 +2548,12 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3595 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 6405 - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=10 loops=1) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 20 -(14 rows) +(13 rows) -- Disable hash aggregation to get a deterministic test output SET enable_hashagg = OFF; @@ -2837,17 +2835,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (20 rows) @@ -2870,17 +2868,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (20 rows) @@ -3761,7 +3759,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 4 @@ -3769,7 +3767,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 12 @@ -3777,7 +3775,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 4 @@ -3787,7 +3785,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3357 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 643 -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) @@ -3796,7 +3794,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10071 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 1929 -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) @@ -3805,7 +3803,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3357 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 643 -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) @@ -3815,19 +3813,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) (78 rows) @@ -3848,21 +3846,21 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10794 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Merge Append (never executed) @@ -3870,19 +3868,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Merge Append (never executed) @@ -3890,19 +3888,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) (66 rows) @@ -3925,8 +3923,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 4 @@ -3934,8 +3931,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 12 @@ -3943,8 +3939,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 4 @@ -3954,9 +3949,8 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1439 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 1561 - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=3 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 3 @@ -3964,9 +3958,8 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=4317 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 4683 - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 9 @@ -3974,13 +3967,12 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1439 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 1561 - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=3 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 3 -(64 rows) +(58 rows) :PREFIX SELECT time @@ -3999,9 +3991,8 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=719 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 1281 - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=1) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 4 @@ -4009,9 +4000,8 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=2157 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 3843 - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 12 @@ -4019,9 +4009,8 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=719 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 1281 - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=1) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 4 @@ -4030,25 +4019,22 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) -(58 rows) +(52 rows) -- Disable hash aggregation to get a deterministic test output SET enable_hashagg = OFF; @@ -4569,53 +4555,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (57 rows) @@ -4639,53 +4625,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=60 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=21 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (57 rows) diff --git a/tsl/test/shared/expected/ordered_append-14.out b/tsl/test/shared/expected/ordered_append-14.out index e1c9df98671..4b25227cdd3 100644 --- a/tsl/test/shared/expected/ordered_append-14.out +++ b/tsl/test/shared/expected/ordered_append-14.out @@ -2469,7 +2469,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=16785 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 3215 -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) @@ -2477,7 +2477,7 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) (19 rows) @@ -2497,13 +2497,13 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) (17 rows) @@ -2525,13 +2525,12 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=7195 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 7805 - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=15 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 15 -(14 rows) +(13 rows) :PREFIX SELECT time @@ -2549,13 +2548,12 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3595 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 6405 - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=10 loops=1) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 20 -(14 rows) +(13 rows) -- Disable hash aggregation to get a deterministic test output SET enable_hashagg = OFF; @@ -2837,17 +2835,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (20 rows) @@ -2870,17 +2868,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (20 rows) @@ -3761,7 +3759,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 4 @@ -3769,7 +3767,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 12 @@ -3777,7 +3775,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 4 @@ -3787,7 +3785,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3357 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 643 -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) @@ -3796,7 +3794,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10071 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 1929 -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) @@ -3805,7 +3803,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3357 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 643 -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) @@ -3815,19 +3813,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) (78 rows) @@ -3848,21 +3846,21 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10794 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Merge Append (never executed) @@ -3870,19 +3868,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Merge Append (never executed) @@ -3890,19 +3888,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) (66 rows) @@ -3925,8 +3923,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 4 @@ -3934,8 +3931,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 12 @@ -3943,8 +3939,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 4 @@ -3954,9 +3949,8 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1439 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 1561 - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=3 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 3 @@ -3964,9 +3958,8 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=4317 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 4683 - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 9 @@ -3974,13 +3967,12 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1439 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 1561 - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=3 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 3 -(64 rows) +(58 rows) :PREFIX SELECT time @@ -3999,9 +3991,8 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=719 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 1281 - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=1) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 4 @@ -4009,9 +4000,8 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=2157 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 3843 - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 12 @@ -4019,9 +4009,8 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=719 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 1281 - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=1) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 4 @@ -4030,25 +4019,22 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) -(58 rows) +(52 rows) -- Disable hash aggregation to get a deterministic test output SET enable_hashagg = OFF; @@ -4569,53 +4555,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (57 rows) @@ -4639,53 +4625,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=60 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=21 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (57 rows) diff --git a/tsl/test/shared/expected/ordered_append-15.out b/tsl/test/shared/expected/ordered_append-15.out index 9497c149daf..f402524ad6d 100644 --- a/tsl/test/shared/expected/ordered_append-15.out +++ b/tsl/test/shared/expected/ordered_append-15.out @@ -2490,7 +2490,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=16785 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 3215 -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) @@ -2498,7 +2498,7 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) (19 rows) @@ -2518,13 +2518,13 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) (17 rows) @@ -2546,13 +2546,12 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=7195 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 7805 - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=15 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 15 -(14 rows) +(13 rows) :PREFIX SELECT time @@ -2570,13 +2569,12 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3595 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 6405 - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=10 loops=1) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 20 -(14 rows) +(13 rows) -- Disable hash aggregation to get a deterministic test output SET enable_hashagg = OFF; @@ -2861,17 +2859,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (20 rows) @@ -2894,17 +2892,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (20 rows) @@ -3788,7 +3786,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 4 @@ -3796,7 +3794,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 12 @@ -3804,7 +3802,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 4 @@ -3814,7 +3812,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3357 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 643 -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) @@ -3823,7 +3821,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10071 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 1929 -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) @@ -3832,7 +3830,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3357 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) Rows Removed by Filter: 643 -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) @@ -3842,19 +3840,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone) (78 rows) @@ -3875,21 +3873,21 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10794 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Merge Append (never executed) @@ -3897,19 +3895,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Merge Append (never executed) @@ -3917,19 +3915,19 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone) (66 rows) @@ -3952,8 +3950,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 4 @@ -3961,8 +3958,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 12 @@ -3970,8 +3966,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 4 @@ -3981,9 +3976,8 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1439 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 1561 - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=3 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 3 @@ -3991,9 +3985,8 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=4317 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 4683 - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 9 @@ -4001,13 +3994,12 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=1439 loops=1) - Filter: ("time" > ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND ("time" > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 1561 - Vectorized Filter: ("time" < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=3 loops=1) Filter: ((_ts_meta_min_1 < 'Mon Jan 10 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_max_1 > ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 3 -(64 rows) +(58 rows) :PREFIX SELECT time @@ -4026,9 +4018,8 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=719 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 1281 - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=1) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 4 @@ -4036,9 +4027,8 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=2157 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 3843 - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 12 @@ -4046,9 +4036,8 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=719 loops=1) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 1281 - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=1) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) Rows Removed by Filter: 4 @@ -4057,25 +4046,22 @@ QUERY PLAN -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < ('2000-01-08'::cstring)::timestamp with time zone) - Vectorized Filter: ("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) + Vectorized Filter: (("time" > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND ("time" < ('2000-01-08'::cstring)::timestamp with time zone)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: ((_ts_meta_max_1 > 'Fri Jan 07 00:00:00 2000 PST'::timestamp with time zone) AND (_ts_meta_min_1 < ('2000-01-08'::cstring)::timestamp with time zone)) -(58 rows) +(52 rows) -- Disable hash aggregation to get a deterministic test output SET enable_hashagg = OFF; @@ -4599,53 +4585,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < (now() + '@ 1 mon'::interval)) + Vectorized Filter: ("time" < (now() + '@ 1 mon'::interval)) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (57 rows) @@ -4669,53 +4655,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=60 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=21 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (57 rows) diff --git a/tsl/test/shared/expected/ordered_append_join-13.out b/tsl/test/shared/expected/ordered_append_join-13.out index db8902aa68c..23d432b4465 100644 --- a/tsl/test/shared/expected/ordered_append_join-13.out +++ b/tsl/test/shared/expected/ordered_append_join-13.out @@ -2074,25 +2074,28 @@ QUERY PLAN -> Sort (never executed) Sort Key: o_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_1 (never executed) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) -> Sort (never executed) Sort Key: o_2."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_2 (never executed) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) -> Sort (actual rows=1 loops=3) Sort Key: o_3."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_3 (actual rows=3600 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 4063 + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=8 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 12 -(28 rows) +(31 rows) -- test startup and runtime exclusion together -- all chunks should be filtered @@ -3196,7 +3199,8 @@ QUERY PLAN Sort Key: o_1."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_1 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3204,7 +3208,8 @@ QUERY PLAN Sort Key: o_2."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_2 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 18 @@ -3212,7 +3217,8 @@ QUERY PLAN Sort Key: o_3."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_3 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3222,7 +3228,8 @@ QUERY PLAN Sort Key: o_4."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_4 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3230,7 +3237,8 @@ QUERY PLAN Sort Key: o_5."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_5 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 18 @@ -3238,7 +3246,8 @@ QUERY PLAN Sort Key: o_6."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_6 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3248,8 +3257,9 @@ QUERY PLAN Sort Key: o_7."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_7 (actual rows=720 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 813 + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 2 @@ -3257,8 +3267,9 @@ QUERY PLAN Sort Key: o_8."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_8 (actual rows=2160 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 2438 + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=5 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 7 @@ -3266,12 +3277,13 @@ QUERY PLAN Sort Key: o_9."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_9 (actual rows=720 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 813 + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 2 -(86 rows) +(95 rows) -- test startup and runtime exclusion together -- all chunks should be filtered @@ -3299,7 +3311,8 @@ QUERY PLAN Sort Key: o_1."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_1 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3307,7 +3320,8 @@ QUERY PLAN Sort Key: o_2."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_2 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 18 @@ -3315,7 +3329,8 @@ QUERY PLAN Sort Key: o_3."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_3 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3325,7 +3340,8 @@ QUERY PLAN Sort Key: o_4."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_4 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3333,7 +3349,8 @@ QUERY PLAN Sort Key: o_5."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_5 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 18 @@ -3341,7 +3358,8 @@ QUERY PLAN Sort Key: o_6."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_6 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3351,8 +3369,9 @@ QUERY PLAN Sort Key: o_7."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_7 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 1533 + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 2 @@ -3360,8 +3379,9 @@ QUERY PLAN Sort Key: o_8."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_8 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 4598 + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=5 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 7 @@ -3369,12 +3389,13 @@ QUERY PLAN Sort Key: o_9."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_9 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 1533 + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 2 -(86 rows) +(95 rows) -- test JOIN -- no exclusion on joined table because quals are not propagated yet diff --git a/tsl/test/shared/expected/ordered_append_join-14.out b/tsl/test/shared/expected/ordered_append_join-14.out index db8902aa68c..23d432b4465 100644 --- a/tsl/test/shared/expected/ordered_append_join-14.out +++ b/tsl/test/shared/expected/ordered_append_join-14.out @@ -2074,25 +2074,28 @@ QUERY PLAN -> Sort (never executed) Sort Key: o_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_1 (never executed) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) -> Sort (never executed) Sort Key: o_2."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_2 (never executed) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) -> Sort (actual rows=1 loops=3) Sort Key: o_3."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_3 (actual rows=3600 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 4063 + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=8 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 12 -(28 rows) +(31 rows) -- test startup and runtime exclusion together -- all chunks should be filtered @@ -3196,7 +3199,8 @@ QUERY PLAN Sort Key: o_1."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_1 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3204,7 +3208,8 @@ QUERY PLAN Sort Key: o_2."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_2 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 18 @@ -3212,7 +3217,8 @@ QUERY PLAN Sort Key: o_3."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_3 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3222,7 +3228,8 @@ QUERY PLAN Sort Key: o_4."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_4 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3230,7 +3237,8 @@ QUERY PLAN Sort Key: o_5."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_5 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 18 @@ -3238,7 +3246,8 @@ QUERY PLAN Sort Key: o_6."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_6 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3248,8 +3257,9 @@ QUERY PLAN Sort Key: o_7."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_7 (actual rows=720 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 813 + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 2 @@ -3257,8 +3267,9 @@ QUERY PLAN Sort Key: o_8."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_8 (actual rows=2160 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 2438 + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=5 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 7 @@ -3266,12 +3277,13 @@ QUERY PLAN Sort Key: o_9."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_9 (actual rows=720 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 813 + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 2 -(86 rows) +(95 rows) -- test startup and runtime exclusion together -- all chunks should be filtered @@ -3299,7 +3311,8 @@ QUERY PLAN Sort Key: o_1."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_1 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3307,7 +3320,8 @@ QUERY PLAN Sort Key: o_2."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_2 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 18 @@ -3315,7 +3329,8 @@ QUERY PLAN Sort Key: o_3."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_3 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3325,7 +3340,8 @@ QUERY PLAN Sort Key: o_4."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_4 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3333,7 +3349,8 @@ QUERY PLAN Sort Key: o_5."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_5 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 18 @@ -3341,7 +3358,8 @@ QUERY PLAN Sort Key: o_6."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_6 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3351,8 +3369,9 @@ QUERY PLAN Sort Key: o_7."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_7 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 1533 + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 2 @@ -3360,8 +3379,9 @@ QUERY PLAN Sort Key: o_8."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_8 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 4598 + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=5 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 7 @@ -3369,12 +3389,13 @@ QUERY PLAN Sort Key: o_9."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_9 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 1533 + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 2 -(86 rows) +(95 rows) -- test JOIN -- no exclusion on joined table because quals are not propagated yet diff --git a/tsl/test/shared/expected/ordered_append_join-15.out b/tsl/test/shared/expected/ordered_append_join-15.out index 2c511b0f455..34f01051ab2 100644 --- a/tsl/test/shared/expected/ordered_append_join-15.out +++ b/tsl/test/shared/expected/ordered_append_join-15.out @@ -2090,25 +2090,28 @@ QUERY PLAN -> Sort (never executed) Sort Key: o_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_1 (never executed) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) -> Sort (never executed) Sort Key: o_2."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_2 (never executed) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (never executed) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) -> Sort (actual rows=1 loops=3) Sort Key: o_3."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_3 (actual rows=3600 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 4063 + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=8 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 12 -(29 rows) +(32 rows) -- test startup and runtime exclusion together -- all chunks should be filtered @@ -3218,7 +3221,8 @@ QUERY PLAN Sort Key: o_1."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_1 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3226,7 +3230,8 @@ QUERY PLAN Sort Key: o_2."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_2 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 18 @@ -3234,7 +3239,8 @@ QUERY PLAN Sort Key: o_3."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_3 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3244,7 +3250,8 @@ QUERY PLAN Sort Key: o_4."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_4 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3252,7 +3259,8 @@ QUERY PLAN Sort Key: o_5."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_5 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 18 @@ -3260,7 +3268,8 @@ QUERY PLAN Sort Key: o_6."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_6 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3270,8 +3279,9 @@ QUERY PLAN Sort Key: o_7."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_7 (actual rows=720 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 813 + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 2 @@ -3279,8 +3289,9 @@ QUERY PLAN Sort Key: o_8."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_8 (actual rows=2160 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 2438 + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=5 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 7 @@ -3288,12 +3299,13 @@ QUERY PLAN Sort Key: o_9."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_9 (actual rows=720 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" < now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 813 + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 2 -(87 rows) +(96 rows) -- test startup and runtime exclusion together -- all chunks should be filtered @@ -3322,7 +3334,8 @@ QUERY PLAN Sort Key: o_1."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_1 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3330,7 +3343,8 @@ QUERY PLAN Sort Key: o_2."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_2 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 18 @@ -3338,7 +3352,8 @@ QUERY PLAN Sort Key: o_3."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_3 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3348,7 +3363,8 @@ QUERY PLAN Sort Key: o_4."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_4 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3356,7 +3372,8 @@ QUERY PLAN Sort Key: o_5."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_5 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 18 @@ -3364,7 +3381,8 @@ QUERY PLAN Sort Key: o_6."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_6 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=0 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 6 @@ -3374,8 +3392,9 @@ QUERY PLAN Sort Key: o_7."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_7 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 1533 + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 2 @@ -3383,8 +3402,9 @@ QUERY PLAN Sort Key: o_8."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_8 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 4598 + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=5 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 7 @@ -3392,12 +3412,13 @@ QUERY PLAN Sort Key: o_9."time" DESC Sort Method: quicksort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk o_9 (actual rows=0 loops=3) - Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval)) AND ("time" > now())) + Filter: (("time" >= g."time") AND ("time" < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 1533 + Vectorized Filter: ("time" > now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=2 loops=3) Filter: ((_ts_meta_max_1 >= g."time") AND (_ts_meta_min_1 < (g."time" + '@ 1 day'::interval))) Rows Removed by Filter: 2 -(87 rows) +(96 rows) -- test JOIN -- no exclusion on joined table because quals are not propagated yet diff --git a/tsl/test/shared/expected/transparent_decompress_chunk-13.out b/tsl/test/shared/expected/transparent_decompress_chunk-13.out index 1b9f23fa68a..b6246278b4d 100644 --- a/tsl/test/shared/expected/transparent_decompress_chunk-13.out +++ b/tsl/test/shared/expected/transparent_decompress_chunk-13.out @@ -408,7 +408,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time", _hyper_X_X_chunk.device_id Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) (7 rows) diff --git a/tsl/test/shared/expected/transparent_decompress_chunk-14.out b/tsl/test/shared/expected/transparent_decompress_chunk-14.out index 1b9f23fa68a..b6246278b4d 100644 --- a/tsl/test/shared/expected/transparent_decompress_chunk-14.out +++ b/tsl/test/shared/expected/transparent_decompress_chunk-14.out @@ -408,7 +408,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time", _hyper_X_X_chunk.device_id Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) (7 rows) diff --git a/tsl/test/shared/expected/transparent_decompress_chunk-15.out b/tsl/test/shared/expected/transparent_decompress_chunk-15.out index 0bb0dd445c7..43271c0509b 100644 --- a/tsl/test/shared/expected/transparent_decompress_chunk-15.out +++ b/tsl/test/shared/expected/transparent_decompress_chunk-15.out @@ -410,7 +410,7 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time", _hyper_X_X_chunk.device_id Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" < now()) + Vectorized Filter: ("time" < now()) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) (7 rows) diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 6168faf8ef9..1a06c81c700 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -73,6 +73,24 @@ select count(*) from vectorqual where metric4 is null; select count(*) from vectorqual where metric4 is not null; +-- Vectorized filters also work if we have only stable functions on the right +-- side that can be evaluated to a constant at run time. +set timescaledb.debug_require_vector_qual to 'only'; +select count(*) from vectorqual where ts > '2021-01-01 00:00:00'::timestamptz::timestamp; +select count(*) from vectorqual where ts > '2021-01-01 00:00:00'::timestamp - interval '1 day'; + +-- This filter is not vectorized because the 'timestamp > timestamptz' +-- operator is stable, not immutable, because it uses the current session +-- timezone. We could transform it to something like +-- 'timestamp > timestamptz::timestamp' to allow our stable function evaluation +-- to handle this case, but we don't do it at the moment. +set timescaledb.debug_require_vector_qual to 'forbid'; +select count(*) from vectorqual where ts > '2021-01-01 00:00:00'::timestamptz; + +-- Can't vectorize comparison with a volatile function. +select count(*) from vectorqual where metric3 > random()::int - 100; + + -- Test that the vectorized quals are disabled by disabling the bulk decompression. set timescaledb.enable_bulk_decompression to off; set timescaledb.debug_require_vector_qual to 'forbid'; From ba6732af5c7ac6668131fe5e700e26b16061671d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 2 Nov 2023 21:50:43 +0100 Subject: [PATCH 023/249] saop --- .../nodes/decompress_chunk/compressed_batch.c | 270 +++++++++++++++++- tsl/src/nodes/decompress_chunk/exec.c | 14 +- tsl/src/nodes/decompress_chunk/planner.c | 97 +++++-- .../pred_vector_const_arithmetic_single.c | 8 - .../decompress_chunk/vector_predicates.c | 60 +--- .../decompress_chunk/vector_predicates.h | 5 +- tsl/test/expected/decompress_vector_qual.out | 42 +++ tsl/test/sql/decompress_vector_qual.sql | 21 ++ 8 files changed, 416 insertions(+), 101 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 70bed149dc3..9ae8c25e7db 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -78,6 +78,156 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) return arrow; } +static void +translate_from_dictionary(const ArrowArray *arrow, uint64 *restrict dict_result, + uint64 *restrict final_result) +{ + Assert(arrow->dictionary != NULL); + + /* Translate dictionary results to per-value results. */ + const size_t n = arrow->length; + int16 *restrict indices = (int16 *) arrow->buffers[1]; + for (size_t outer = 0; outer < n / 64; outer++) + { + uint64 word = 0; + for (size_t inner = 0; inner < 64; inner++) + { + const size_t row = outer * 64 + inner; + const size_t bit_index = inner; +#define INNER_LOOP \ + const int16 index = indices[row]; \ + const bool valid = arrow_row_is_valid(dict_result, index); \ + word |= ((uint64) valid) << bit_index; + + INNER_LOOP + + // fprintf(stderr, "dict-coded row %ld: index %d, valid %d\n", row, index, + // valid); + } + final_result[outer] &= word; + } + + if (n % 64) + { + uint64 word = 0; + for (size_t row = (n / 64) * 64; row < n; row++) + { + const size_t bit_index = row % 64; + + INNER_LOOP + } + final_result[n / 64] &= word; + } +#undef INNER_LOOP +} + +static inline void +vector_predicate_saop_impl(VectorPredicate *vector_const_predicate, bool is_or, const ArrowArray *vector, Datum array, + uint64 *restrict final_result) +{ + const size_t result_bits = vector->length; + const size_t result_words = (result_bits + 63) / 64; + + uint64 *restrict array_result; + /* + * For OR, we need an intermediate storage to accumulate the results + * from all elements. + * For AND, we can apply predicate for each element to the final result. + */ + uint64 array_result_storage[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64]; + if (is_or) + { + array_result = array_result_storage; + for (size_t i = 0; i < result_words; i++) + { + array_result_storage[i] = 0; + } + } + else + { + array_result = final_result; + } + + ArrayType *arr = DatumGetArrayTypeP(array); + + int16 typlen; + bool typbyval; + char typalign; + get_typlenbyvalalign(ARR_ELEMTYPE(arr), &typlen, &typbyval, &typalign); + + const char *s = (const char *) ARR_DATA_PTR(arr); + Ensure(ARR_NULLBITMAP(arr) == NULL, "vectorized scalar array ops do not support nullable arrays"); + + const int nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); + + for (int i = 0; i < nitems; i++) + { + Datum constvalue = fetch_att(s, typbyval, typlen); + s = att_addlength_pointer(s, typlen, s); + s = (char *) att_align_nominal(s, typalign); + + /* + * For OR, we also need an intermediate storage for predicate result + * for each array element, since the predicates AND their result. + * + * For AND, we can and apply predicate for each array element to the + * final result. + */ + uint64 single_result_storage[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64]; + uint64 *restrict single_result; + if (is_or) + { + single_result = single_result_storage; + for (size_t outer = 0; outer < result_words; outer++) + { + single_result[outer] = -1; + } + } + else + { + single_result = final_result; + } + + vector_const_predicate(vector, constvalue, single_result); + + if (is_or) + { + for (size_t outer = 0; outer < result_words; outer++) + { + array_result[outer] |= single_result[outer]; + } + } + } + + if (is_or) + { + for (size_t outer = 0; outer < result_words; outer++) + { + /* + * The tail bits corresponding to past-the-end rows when n % 64 != 0 + * should be already zeroed out in the final_result. + */ + final_result[outer] &= array_result[outer]; + } + } +} + +static void +vector_predicate_saop_and(VectorPredicate *scalar_predicate, const ArrowArray *vector, Datum array, + uint64 *restrict result) +{ + vector_predicate_saop_impl(scalar_predicate, /* is_or = */ false, + vector, array, result); +} + +static void +vector_predicate_saop_or(VectorPredicate *scalar_predicate, const ArrowArray *vector, Datum array, + uint64 *restrict result) +{ + vector_predicate_saop_impl(scalar_predicate, /* is_or = */ true, + vector, array, result); +} + static void apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batch_state) { @@ -93,6 +243,16 @@ apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batc const int bitmap_bytes = sizeof(uint64) * ((batch_state->total_batch_rows + 63) / 64); batch_state->vector_qual_result = palloc(bitmap_bytes); memset(batch_state->vector_qual_result, 0xFF, bitmap_bytes); + if (batch_state->total_batch_rows % 64 != 0) + { + /* + * We have to zero out the bits for past-the-end elements in the last + * bitmap word. Since all predicates are ANDed to the result bitmap, + * we can do it here once instead of doing it in each predicate. + */ + const uint64 mask = ((uint64) -1) >> (64 - batch_state->total_batch_rows % 64); + batch_state->vector_qual_result[batch_state->total_batch_rows / 64] = mask; + } /* * Compute the quals. @@ -100,14 +260,39 @@ apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batc ListCell *lc; foreach (lc, chunk_state->vectorized_quals_constified) { - /* For now we only support "Var ? Const" predicates. */ - OpExpr *oe = castNode(OpExpr, lfirst(lc)); - Var *var = castNode(Var, linitial(oe->args)); - Const *constnode = castNode(Const, lsecond(oe->args)); + /* + * For now we support "Var ? Const" predicates and + * ScalarArrayOperations. + */ + List *args = NULL; + RegProcedure vector_const_opcode = InvalidOid; + ScalarArrayOpExpr *saop = NULL; + OpExpr *opexpr = NULL; + if (IsA(lfirst(lc), ScalarArrayOpExpr)) + { + saop = castNode(ScalarArrayOpExpr, lfirst(lc)); + args = saop->args; + vector_const_opcode = get_opcode(saop->opno); + } + else + { + opexpr = castNode(OpExpr, lfirst(lc)); + args = opexpr->args; + vector_const_opcode = get_opcode(opexpr->opno); + } + + /* + * Find the vector_const predicate. + */ + VectorPredicate *vector_const_predicate = get_vector_const_predicate(vector_const_opcode); + Ensure(vector_const_predicate != NULL, + "vectorized predicate not found for postgres predicate %d", + vector_const_opcode); /* * Find the compressed column referred to by the Var. */ + Var *var = castNode(Var, linitial(args)); DecompressChunkColumnDescription *column_description = NULL; int column_index = 0; for (; column_index < chunk_state->num_total_columns; column_index++) @@ -170,20 +355,81 @@ apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batc predicate_result = &default_value_predicate_result; } - /* Find and compute the predicate. */ - void (*predicate)(const ArrowArray *, Datum, uint64 *restrict) = - get_vector_const_predicate(get_opcode(oe->opno)); - Ensure(predicate != NULL, - "vectorized predicate not found for postgres predicate %d", - get_opcode(oe->opno)); - /* * The vectorizable predicates should be STRICT, so we shouldn't see null * constants here. */ + Const *constnode = castNode(Const, lsecond(args)); Ensure(!constnode->constisnull, "vectorized predicate called for a null value"); - predicate(vector, constnode->constvalue, predicate_result); + /* + * If the data is dictionary-encoded, we are going to compute the + * predicate on dictionary and then translate the results. + */ + const ArrowArray *vector_nodict = NULL; + uint64 *restrict predicate_result_nodict = NULL; + uint64 dict_result[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64]; + if (vector->dictionary) + { + const size_t dict_rows = vector->dictionary->length; + const size_t dict_result_words = (dict_rows + 63) / 64; + memset(dict_result, 0xFF, dict_result_words * 8); + predicate_result_nodict = dict_result; + vector_nodict = vector->dictionary; + } + else + { + predicate_result_nodict = predicate_result; + vector_nodict = vector; + } + + /* + * At last, compute the predicate. + */ + if (saop) + { + if (saop->useOr) + { + vector_predicate_saop_or(vector_const_predicate, + vector_nodict, + constnode->constvalue, + predicate_result_nodict); + } + else + { + vector_predicate_saop_and(vector_const_predicate, + vector_nodict, + constnode->constvalue, + predicate_result_nodict); + } + } + else + { + vector_const_predicate(vector_nodict, constnode->constvalue, predicate_result_nodict); + } + + /* + * If the vector is dictionary-encoded, we have just computed the + * predicate for dictionary and now have to translate it. + */ + if (vector->dictionary) + { + fprintf(stderr, "dictionary\n"); + translate_from_dictionary(vector, predicate_result_nodict, predicate_result); + } + else + { + fprintf(stderr, "normal\n"); + } + + /* Account for nulls which shouldn't pass the predicate. */ + const size_t n = vector->length; + const size_t n_words = (n + 63) / 64; + const uint64 *restrict validity = (uint64 *restrict) vector->buffers[0]; + for (size_t i = 0; i < n_words; i++) + { + predicate_result[i] &= validity[i]; + } /* Process the result. */ if (column_values->arrow == NULL) diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index 7b800bc3a5c..1acd65c1d49 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -493,9 +493,17 @@ decompress_chunk_begin(CustomScanState *node, EState *estate, int eflags) ListCell *lc; foreach (lc, chunk_state->vectorized_quals_original) { - OpExpr *constified = - castNode(OpExpr, estimate_expression_value(&root, (Node *) lfirst(lc))); - Ensure(IsA(lsecond(constified->args), Const), + Node *constified = estimate_expression_value(&root, (Node *) lfirst(lc)); + List *args; + if (IsA(constified, OpExpr)) + { + args = castNode(OpExpr, constified)->args; + } + else + { + args = castNode(ScalarArrayOpExpr, constified)->args; + } + Ensure(IsA(lsecond(args), Const), "failed to evaluate runtime constant in vectorized filter"); chunk_state->vectorized_quals_constified = lappend(chunk_state->vectorized_quals_constified, constified); diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index 6b3f9ab4b45..d8a5f5ad1bd 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -450,34 +450,54 @@ is_not_runtime_constant(Node *node) static Node * make_vectorized_qual(DecompressChunkPath *path, Node *qual) { + //my_print(qual); + /* Only simple "Var op Const" binary predicates for now. */ - if (!IsA(qual, OpExpr)) + if (!IsA(qual, OpExpr) && !IsA(qual, ScalarArrayOpExpr)) { return NULL; } - OpExpr *o = castNode(OpExpr, qual); + List *args = NIL; + OpExpr *opexpr = NULL; + Oid opno = InvalidOid; + ScalarArrayOpExpr *saop = NULL; + if (IsA(qual, OpExpr)) + { + opexpr = castNode(OpExpr, qual); + args = opexpr->args; + opno = opexpr->opno; + } + else + { + saop = castNode(ScalarArrayOpExpr, qual); + args = saop->args; + opno = saop->opno; + } - if (list_length(o->args) != 2) + if (list_length(args) != 2) { return NULL; } - if (IsA(lsecond(o->args), Var)) + if (opexpr && IsA(lsecond(args), Var)) { /* Try to commute the operator if the constant is on the right. */ - Oid commutator_opno = get_commutator(o->opno); - if (OidIsValid(commutator_opno)) + opno = get_commutator(opno); + if (!OidIsValid(opno)) { - o = (OpExpr *) copyObject(o); - o->opno = commutator_opno; - /* - * opfuncid is a cache, we can set it to InvalidOid like the - * CommuteOpExpr() does. - */ - o->opfuncid = InvalidOid; - o->args = list_make2(lsecond(o->args), linitial(o->args)); + return NULL; } + + opexpr = (OpExpr *) copyObject(opexpr); + opexpr->opno = opno; + /* + * opfuncid is a cache, we can set it to InvalidOid like the + * CommuteOpExpr() does. + */ + opexpr->opfuncid = InvalidOid; + args = list_make2(lsecond(args), linitial(args)); + opexpr->args = args; } /* @@ -485,12 +505,12 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) * side is a constant or can be evaluated to a constant at run time (e.g. * contains stable functions). */ - if (!IsA(linitial(o->args), Var) || is_not_runtime_constant(lsecond(o->args))) + if (!IsA(linitial(args), Var) || is_not_runtime_constant(lsecond(args))) { return NULL; } - Var *var = castNode(Var, linitial(o->args)); + Var *var = castNode(Var, linitial(args)); Assert((Index) var->varno == path->info->chunk_rel->relid); /* @@ -504,13 +524,50 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) return NULL; } - Oid opcode = get_opcode(o->opno); - if (get_vector_const_predicate(opcode)) + Oid opcode = get_opcode(opno); + if (!get_vector_const_predicate(opcode)) + { + return NULL; + } + + if (saop) { - return (Node *) o; + if (saop->hashfuncid) + { + /* + * Don't vectorize if the planner decided to build a hash table. + */ + return NULL; + } + + if (!IsA(lsecond(args), Const)) + { + /* + * Vectorizing ScalarArrayOperation requires us to know the type + * of the array elements, and the absence of nulls, at runtime, + * so unfortunately we can't apply it for arrays evaluated at run + * time. + */ + return NULL; + } + Const *constnode = castNode(Const, lsecond(args)); + if (constnode->constisnull) + { + /* + * FIXME what happens for normal operations in this case? + * And if a stable function evaluates to null at run time? + */ + return NULL; + } + ArrayType *arr = DatumGetArrayTypeP(constnode->constvalue); + if (ARR_NULLBITMAP(arr) != NULL) + { + /* We don't have a provision for null elements in arrays yet. */ + return NULL; + } } - return NULL; + return opexpr ? (Node *) opexpr : (Node *) saop; } /* diff --git a/tsl/src/nodes/decompress_chunk/pred_vector_const_arithmetic_single.c b/tsl/src/nodes/decompress_chunk/pred_vector_const_arithmetic_single.c index d6c41c4bfc3..d89f54eebfe 100644 --- a/tsl/src/nodes/decompress_chunk/pred_vector_const_arithmetic_single.c +++ b/tsl/src/nodes/decompress_chunk/pred_vector_const_arithmetic_single.c @@ -27,14 +27,6 @@ FUNCTION_NAME(PREDICATE_NAME, VECTOR_CTYPE, { const size_t n = arrow->length; - /* Account for nulls which shouldn't pass the predicate. */ - const size_t n_words = (n + 63) / 64; - const uint64 *restrict validity = (uint64 *restrict) arrow->buffers[0]; - for (size_t i = 0; i < n_words; i++) - { - result[i] &= validity[i]; - } - /* Now run the predicate itself. */ const CONST_CTYPE constvalue = CONST_CONVERSION(constdatum); const VECTOR_CTYPE *restrict vector = (VECTOR_CTYPE *restrict) arrow->buffers[1]; diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.c b/tsl/src/nodes/decompress_chunk/vector_predicates.c index dd8f4a67e73..ba985a39bb0 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.c +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.c @@ -73,68 +73,16 @@ vector_const_texteq_nodict(const ArrowArray *arrow, const Datum constdatum, uint static void vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) { - /* Account for nulls which shouldn't pass the predicate. */ - const size_t n = arrow->length; - const size_t n_words = (n + 63) / 64; - const uint64 *restrict validity = (uint64 *restrict) arrow->buffers[0]; - for (size_t i = 0; i < n_words; i++) - { - result[i] &= validity[i]; - } - - if (!arrow->dictionary) - { - vector_const_texteq_nodict(arrow, constdatum, result); - return; - } - - /* Run the predicate on dictionary. */ - uint64 dict_result[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64 * 64]; - memset(dict_result, 0xFF, n_words * 8); - vector_const_texteq_nodict(arrow->dictionary, constdatum, dict_result); - - /* Translate dictionary results to per-value results. */ - int16 *restrict indices = (int16 *) arrow->buffers[1]; - for (size_t outer = 0; outer < n / 64; outer++) - { - uint64 word = 0; - for (size_t inner = 0; inner < 64; inner++) - { - const size_t row = outer * 64 + inner; - const size_t bit_index = inner; -#define INNER_LOOP \ - const int16 index = indices[row]; \ - const bool valid = arrow_row_is_valid(dict_result, index); \ - word |= ((uint64) valid) << bit_index; - - INNER_LOOP - - // fprintf(stderr, "dict-coded row %ld: index %d, valid %d\n", row, index, - // valid); - } - result[outer] &= word; - } - - if (n % 64) - { - uint64 word = 0; - for (size_t row = (n / 64) * 64; row < n; row++) - { - const size_t bit_index = row % 64; - - INNER_LOOP - } - result[n / 64] &= word; - } -#undef INNER_LOOP + Assert(!arrow->dictionary); + vector_const_texteq_nodict(arrow, constdatum, result); } /* * Look up the vectorized implementation for a Postgres predicate, specified by * its Oid in pg_proc. Note that this Oid is different from the opcode. */ -void (*get_vector_const_predicate(Oid pg_predicate))(const ArrowArray *, const Datum, - uint64 *restrict) +VectorPredicate * +get_vector_const_predicate(Oid pg_predicate) { switch (pg_predicate) { diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.h b/tsl/src/nodes/decompress_chunk/vector_predicates.h index f00d72dfe44..ea1534a6ac8 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.h +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.h @@ -10,5 +10,6 @@ #pragma once -void (*get_vector_const_predicate(Oid pg_predicate))(const ArrowArray *, const Datum, - uint64 *restrict); +typedef void(VectorPredicate)(const ArrowArray *, Datum, uint64 *restrict); + +VectorPredicate *get_vector_const_predicate(Oid pg_predicate); diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 070b8c509dd..5e190da1ff3 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -302,3 +302,45 @@ select * from date_table where ts < '2021-01-02'; 01-01-2021 (1 row) +-- Vectorized comparison for text +create table t(ts timestamp, a text); +select create_hypertable('t', 'ts'); +WARNING: column type "timestamp without time zone" used for "ts" does not follow best practices +NOTICE: adding not-null constraint to column "ts" + create_hypertable +------------------- + (5,public,t,t) +(1 row) + +alter table t set (timescaledb.compress); +insert into t select '2021-01-01 01:01:01'::timestamp + interval '1 second' * x, 'same' +from generate_series(1, 1000) x +; +insert into t select '2021-01-01 02:01:01'::timestamp + interval '1 second' * x, 'different' || x +from generate_series(1, 1000) x +; +select count(compress_chunk(x, true)) from show_chunks('t') x; + count +------- + 1 +(1 row) + +set timescaledb.debug_require_vector_qual to 'only'; +select count(*), min(ts) from t where a = 'same'; + count | min +-------+-------------------------- + 1000 | Fri Jan 01 01:01:02 2021 +(1 row) + +select count(*), min(ts) from t where a = 'different1'; + count | min +-------+-------------------------- + 1 | Fri Jan 01 02:01:02 2021 +(1 row) + +select count(*), min(ts) from t where a = 'different1000'; + count | min +-------+-------------------------- + 1 | Fri Jan 01 02:17:41 2021 +(1 row) + diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 1a06c81c700..a6d893971d3 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -121,3 +121,24 @@ select * from date_table where ts >= '2021-01-02'; select * from date_table where ts = '2021-01-02'; select * from date_table where ts <= '2021-01-02'; select * from date_table where ts < '2021-01-02'; + +-- Vectorized comparison for text +create table t(ts timestamp, a text); +select create_hypertable('t', 'ts'); +alter table t set (timescaledb.compress); + +insert into t select '2021-01-01 01:01:01'::timestamp + interval '1 second' * x, 'same' +from generate_series(1, 1000) x +; + +insert into t select '2021-01-01 02:01:01'::timestamp + interval '1 second' * x, 'different' || x +from generate_series(1, 1000) x +; + +select count(compress_chunk(x, true)) from show_chunks('t') x; + +set timescaledb.debug_require_vector_qual to 'only'; + +select count(*), min(ts) from t where a = 'same'; +select count(*), min(ts) from t where a = 'different1'; +select count(*), min(ts) from t where a = 'different1000'; From d073bf6898ea3ba69b205ea02ca8da403bbfcdc9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 2 Nov 2023 22:21:01 +0100 Subject: [PATCH 024/249] benchmark all text-related stuff together (2) From 98e547ad2a2304bcc7989aae8897b07089eb47b7 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 3 Nov 2023 10:48:32 +0100 Subject: [PATCH 025/249] format --- .../nodes/decompress_chunk/compressed_batch.c | 38 +++++++++---------- tsl/src/nodes/decompress_chunk/planner.c | 2 +- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index bde41a9a68b..7c4da6ef378 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -80,7 +80,7 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) static void translate_from_dictionary(const ArrowArray *arrow, uint64 *restrict dict_result, - uint64 *restrict final_result) + uint64 *restrict final_result) { Assert(arrow->dictionary != NULL); @@ -122,8 +122,8 @@ translate_from_dictionary(const ArrowArray *arrow, uint64 *restrict dict_result, } static inline void -vector_predicate_saop_impl(VectorPredicate *vector_const_predicate, bool is_or, const ArrowArray *vector, Datum array, - uint64 *restrict final_result) +vector_predicate_saop_impl(VectorPredicate *vector_const_predicate, bool is_or, + const ArrowArray *vector, Datum array, uint64 *restrict final_result) { const size_t result_bits = vector->length; const size_t result_words = (result_bits + 63) / 64; @@ -156,7 +156,8 @@ vector_predicate_saop_impl(VectorPredicate *vector_const_predicate, bool is_or, get_typlenbyvalalign(ARR_ELEMTYPE(arr), &typlen, &typbyval, &typalign); const char *s = (const char *) ARR_DATA_PTR(arr); - Ensure(ARR_NULLBITMAP(arr) == NULL, "vectorized scalar array ops do not support nullable arrays"); + Ensure(ARR_NULLBITMAP(arr) == NULL, + "vectorized scalar array ops do not support nullable arrays"); const int nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); @@ -216,16 +217,14 @@ static void vector_predicate_saop_and(VectorPredicate *scalar_predicate, const ArrowArray *vector, Datum array, uint64 *restrict result) { - vector_predicate_saop_impl(scalar_predicate, /* is_or = */ false, - vector, array, result); + vector_predicate_saop_impl(scalar_predicate, /* is_or = */ false, vector, array, result); } static void vector_predicate_saop_or(VectorPredicate *scalar_predicate, const ArrowArray *vector, Datum array, - uint64 *restrict result) + uint64 *restrict result) { - vector_predicate_saop_impl(scalar_predicate, /* is_or = */ true, - vector, array, result); + vector_predicate_saop_impl(scalar_predicate, /* is_or = */ true, vector, array, result); } static int @@ -291,7 +290,8 @@ decompress_column(DecompressChunkState *chunk_state, DecompressBatchState *batch } DecompressAllFunction decompress_all = - tsl_get_decompress_all_function(header->compression_algorithm, column_description->typid); + tsl_get_decompress_all_function(header->compression_algorithm, + column_description->typid); Assert(decompress_all != NULL); MemoryContext context_before_decompression = @@ -324,18 +324,14 @@ decompress_column(DecompressChunkState *chunk_state, DecompressBatchState *batch if (column_values->value_bytes == -1) { const int maxbytes = - VARHDRSZ + - (column_values->arrow->dictionary ? - get_max_element_bytes(column_values->arrow->dictionary) : - get_max_element_bytes(column_values->arrow)); - - const AttrNumber attr = - AttrNumberGetAttrOffset(column_values->output_attno); - batch_state->decompressed_scan_slot->tts_values[attr] = PointerGetDatum( - MemoryContextAlloc(batch_state->per_batch_context, maxbytes)); - } - + VARHDRSZ + (column_values->arrow->dictionary ? + get_max_element_bytes(column_values->arrow->dictionary) : + get_max_element_bytes(column_values->arrow)); + const AttrNumber attr = AttrNumberGetAttrOffset(column_values->output_attno); + batch_state->decompressed_scan_slot->tts_values[attr] = + PointerGetDatum(MemoryContextAlloc(batch_state->per_batch_context, maxbytes)); + } return; } diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index d8a5f5ad1bd..829fe2092e9 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -450,7 +450,7 @@ is_not_runtime_constant(Node *node) static Node * make_vectorized_qual(DecompressChunkPath *path, Node *qual) { - //my_print(qual); + // my_print(qual); /* Only simple "Var op Const" binary predicates for now. */ if (!IsA(qual, OpExpr) && !IsA(qual, ScalarArrayOpExpr)) From 273759e07457fdc5a7baa580c0f5eccf2c46bf7f Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 3 Nov 2023 13:10:18 +0100 Subject: [PATCH 026/249] fixes --- .../nodes/decompress_chunk/compressed_batch.c | 16 ++++++++++++++-- tsl/src/nodes/decompress_chunk/exec.c | 7 +++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 9bd518d2c7b..9fea84639f4 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -442,6 +442,8 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, */ CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; column_values->value_bytes = 0; + column_values->arrow = NULL; + column_values->iterator = NULL; break; } case SEGMENTBY_COLUMN: @@ -494,7 +496,8 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, } } - if (apply_vector_quals(chunk_state, batch_state)) + const bool have_passing_rows = apply_vector_quals(chunk_state, batch_state); + if (have_passing_rows || chunk_state->batch_sorted_merge) { /* * Have rows that actually pass the vector quals, have to decompress the @@ -513,7 +516,14 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, } else { - // fprintf(stderr, "the entire batch didn't pass!!!\n"); + /* + * The entire batch doesn't pass the vectorized quals, so we might be + * able to avoid reading some columns. + */ + InstrCountTuples2(chunk_state, 1); + InstrCountFiltered1(chunk_state, batch_state->total_batch_rows); + Assert(!chunk_state->batch_sorted_merge); + batch_state->next_batch_row = batch_state->total_batch_rows; } MemoryContextSwitchTo(old_context); @@ -672,6 +682,7 @@ compressed_batch_advance(DecompressChunkState *chunk_state, DecompressBatchState for (int i = 0; i < num_compressed_columns; i++) { CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; + Ensure(column_values->value_bytes != 0, "the column is not decompressed"); if (column_values->iterator) { column_values->iterator->try_next(column_values->iterator); @@ -708,6 +719,7 @@ compressed_batch_advance(DecompressChunkState *chunk_state, DecompressBatchState CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; if (column_values->iterator) { + Assert(column_values->value_bytes != 0); DecompressResult result = column_values->iterator->try_next(column_values->iterator); if (!result.is_done) { diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index 89f6bdc31b5..549bd2a53ac 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -872,6 +872,13 @@ decompress_chunk_explain(CustomScanState *node, List *ancestors, ExplainState *e ts_show_instrumentation_count("Rows Removed by Filter", 1, &node->ss.ps, es); } + if (es->analyze && es->verbose + && (node->ss.ps.instrument->ntuples2 > 0 || es->format != EXPLAIN_FORMAT_TEXT)) + { + ExplainPropertyFloat("Batches Removed by Filter", NULL, + node->ss.ps.instrument->ntuples2, 0, es); + } + if (es->verbose || es->format != EXPLAIN_FORMAT_TEXT) { if (chunk_state->batch_sorted_merge) From 20b416374fc1fe54861c10a166da252ce049f74a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 3 Nov 2023 13:42:05 +0100 Subject: [PATCH 027/249] references --- tsl/test/expected/transparent_decompression-15.out | 11 +++++++++-- .../expected/transparent_decompress_chunk-15.out | 3 ++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tsl/test/expected/transparent_decompression-15.out b/tsl/test/expected/transparent_decompression-15.out index 81e62cccd96..4b3de2914a6 100644 --- a/tsl/test/expected/transparent_decompression-15.out +++ b/tsl/test/expected/transparent_decompression-15.out @@ -473,6 +473,7 @@ ORDER BY time, Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device_id, _hyper_1_1_chunk.device_id_peer, _hyper_1_1_chunk.v0, _hyper_1_1_chunk.v1, _hyper_1_1_chunk.v2, _hyper_1_1_chunk.v3 Vectorized Filter: (_hyper_1_1_chunk.v3 > '10'::double precision) Rows Removed by Filter: 1800 + Batches Removed by Filter: 5 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_5_15_chunk (actual rows=5 loops=1) Output: compress_hyper_5_15_chunk."time", compress_hyper_5_15_chunk.device_id, compress_hyper_5_15_chunk.device_id_peer, compress_hyper_5_15_chunk.v0, compress_hyper_5_15_chunk.v1, compress_hyper_5_15_chunk.v2, compress_hyper_5_15_chunk.v3, compress_hyper_5_15_chunk._ts_meta_count, compress_hyper_5_15_chunk._ts_meta_sequence_num, compress_hyper_5_15_chunk._ts_meta_min_3, compress_hyper_5_15_chunk._ts_meta_max_3, compress_hyper_5_15_chunk._ts_meta_min_1, compress_hyper_5_15_chunk._ts_meta_max_1, compress_hyper_5_15_chunk._ts_meta_min_2, compress_hyper_5_15_chunk._ts_meta_max_2 @@ -492,10 +493,11 @@ ORDER BY time, Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device_id, _hyper_1_3_chunk.device_id_peer, _hyper_1_3_chunk.v0, _hyper_1_3_chunk.v1, _hyper_1_3_chunk.v2, _hyper_1_3_chunk.v3 Vectorized Filter: (_hyper_1_3_chunk.v3 > '10'::double precision) Rows Removed by Filter: 2520 + Batches Removed by Filter: 5 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_5_16_chunk (actual rows=5 loops=1) Output: compress_hyper_5_16_chunk."time", compress_hyper_5_16_chunk.device_id, compress_hyper_5_16_chunk.device_id_peer, compress_hyper_5_16_chunk.v0, compress_hyper_5_16_chunk.v1, compress_hyper_5_16_chunk.v2, compress_hyper_5_16_chunk.v3, compress_hyper_5_16_chunk._ts_meta_count, compress_hyper_5_16_chunk._ts_meta_sequence_num, compress_hyper_5_16_chunk._ts_meta_min_3, compress_hyper_5_16_chunk._ts_meta_max_3, compress_hyper_5_16_chunk._ts_meta_min_1, compress_hyper_5_16_chunk._ts_meta_max_1, compress_hyper_5_16_chunk._ts_meta_min_2, compress_hyper_5_16_chunk._ts_meta_max_2 -(35 rows) +(37 rows) -- device_id constraint should be pushed down :PREFIX @@ -3519,6 +3521,7 @@ ORDER BY time, Output: _hyper_2_4_chunk."time", _hyper_2_4_chunk.device_id, _hyper_2_4_chunk.device_id_peer, _hyper_2_4_chunk.v0, _hyper_2_4_chunk.v1, _hyper_2_4_chunk.v2, _hyper_2_4_chunk.v3 Vectorized Filter: (_hyper_2_4_chunk.v3 > '10'::double precision) Rows Removed by Filter: 360 + Batches Removed by Filter: 1 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_6_17_chunk (actual rows=1 loops=1) Output: compress_hyper_6_17_chunk."time", compress_hyper_6_17_chunk.device_id, compress_hyper_6_17_chunk.device_id_peer, compress_hyper_6_17_chunk.v0, compress_hyper_6_17_chunk.v1, compress_hyper_6_17_chunk.v2, compress_hyper_6_17_chunk.v3, compress_hyper_6_17_chunk._ts_meta_count, compress_hyper_6_17_chunk._ts_meta_sequence_num, compress_hyper_6_17_chunk._ts_meta_min_3, compress_hyper_6_17_chunk._ts_meta_max_3, compress_hyper_6_17_chunk._ts_meta_min_1, compress_hyper_6_17_chunk._ts_meta_max_1, compress_hyper_6_17_chunk._ts_meta_min_2, compress_hyper_6_17_chunk._ts_meta_max_2 @@ -3526,6 +3529,7 @@ ORDER BY time, Output: _hyper_2_5_chunk."time", _hyper_2_5_chunk.device_id, _hyper_2_5_chunk.device_id_peer, _hyper_2_5_chunk.v0, _hyper_2_5_chunk.v1, _hyper_2_5_chunk.v2, _hyper_2_5_chunk.v3 Vectorized Filter: (_hyper_2_5_chunk.v3 > '10'::double precision) Rows Removed by Filter: 1080 + Batches Removed by Filter: 3 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_6_18_chunk (actual rows=3 loops=1) Output: compress_hyper_6_18_chunk."time", compress_hyper_6_18_chunk.device_id, compress_hyper_6_18_chunk.device_id_peer, compress_hyper_6_18_chunk.v0, compress_hyper_6_18_chunk.v1, compress_hyper_6_18_chunk.v2, compress_hyper_6_18_chunk.v3, compress_hyper_6_18_chunk._ts_meta_count, compress_hyper_6_18_chunk._ts_meta_sequence_num, compress_hyper_6_18_chunk._ts_meta_min_3, compress_hyper_6_18_chunk._ts_meta_max_3, compress_hyper_6_18_chunk._ts_meta_min_1, compress_hyper_6_18_chunk._ts_meta_max_1, compress_hyper_6_18_chunk._ts_meta_min_2, compress_hyper_6_18_chunk._ts_meta_max_2 @@ -3533,6 +3537,7 @@ ORDER BY time, Output: _hyper_2_6_chunk."time", _hyper_2_6_chunk.device_id, _hyper_2_6_chunk.device_id_peer, _hyper_2_6_chunk.v0, _hyper_2_6_chunk.v1, _hyper_2_6_chunk.v2, _hyper_2_6_chunk.v3 Vectorized Filter: (_hyper_2_6_chunk.v3 > '10'::double precision) Rows Removed by Filter: 360 + Batches Removed by Filter: 1 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_6_19_chunk (actual rows=1 loops=1) Output: compress_hyper_6_19_chunk."time", compress_hyper_6_19_chunk.device_id, compress_hyper_6_19_chunk.device_id_peer, compress_hyper_6_19_chunk.v0, compress_hyper_6_19_chunk.v1, compress_hyper_6_19_chunk.v2, compress_hyper_6_19_chunk.v3, compress_hyper_6_19_chunk._ts_meta_count, compress_hyper_6_19_chunk._ts_meta_sequence_num, compress_hyper_6_19_chunk._ts_meta_min_3, compress_hyper_6_19_chunk._ts_meta_max_3, compress_hyper_6_19_chunk._ts_meta_min_1, compress_hyper_6_19_chunk._ts_meta_max_1, compress_hyper_6_19_chunk._ts_meta_min_2, compress_hyper_6_19_chunk._ts_meta_max_2 @@ -3552,6 +3557,7 @@ ORDER BY time, Output: _hyper_2_10_chunk."time", _hyper_2_10_chunk.device_id, _hyper_2_10_chunk.device_id_peer, _hyper_2_10_chunk.v0, _hyper_2_10_chunk.v1, _hyper_2_10_chunk.v2, _hyper_2_10_chunk.v3 Vectorized Filter: (_hyper_2_10_chunk.v3 > '10'::double precision) Rows Removed by Filter: 504 + Batches Removed by Filter: 1 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_6_20_chunk (actual rows=1 loops=1) Output: compress_hyper_6_20_chunk."time", compress_hyper_6_20_chunk.device_id, compress_hyper_6_20_chunk.device_id_peer, compress_hyper_6_20_chunk.v0, compress_hyper_6_20_chunk.v1, compress_hyper_6_20_chunk.v2, compress_hyper_6_20_chunk.v3, compress_hyper_6_20_chunk._ts_meta_count, compress_hyper_6_20_chunk._ts_meta_sequence_num, compress_hyper_6_20_chunk._ts_meta_min_3, compress_hyper_6_20_chunk._ts_meta_max_3, compress_hyper_6_20_chunk._ts_meta_min_1, compress_hyper_6_20_chunk._ts_meta_max_1, compress_hyper_6_20_chunk._ts_meta_min_2, compress_hyper_6_20_chunk._ts_meta_max_2 @@ -3559,6 +3565,7 @@ ORDER BY time, Output: _hyper_2_11_chunk."time", _hyper_2_11_chunk.device_id, _hyper_2_11_chunk.device_id_peer, _hyper_2_11_chunk.v0, _hyper_2_11_chunk.v1, _hyper_2_11_chunk.v2, _hyper_2_11_chunk.v3 Vectorized Filter: (_hyper_2_11_chunk.v3 > '10'::double precision) Rows Removed by Filter: 1512 + Batches Removed by Filter: 3 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_6_21_chunk (actual rows=3 loops=1) Output: compress_hyper_6_21_chunk."time", compress_hyper_6_21_chunk.device_id, compress_hyper_6_21_chunk.device_id_peer, compress_hyper_6_21_chunk.v0, compress_hyper_6_21_chunk.v1, compress_hyper_6_21_chunk.v2, compress_hyper_6_21_chunk.v3, compress_hyper_6_21_chunk._ts_meta_count, compress_hyper_6_21_chunk._ts_meta_sequence_num, compress_hyper_6_21_chunk._ts_meta_min_3, compress_hyper_6_21_chunk._ts_meta_max_3, compress_hyper_6_21_chunk._ts_meta_min_1, compress_hyper_6_21_chunk._ts_meta_max_1, compress_hyper_6_21_chunk._ts_meta_min_2, compress_hyper_6_21_chunk._ts_meta_max_2 @@ -3566,7 +3573,7 @@ ORDER BY time, Output: _hyper_2_12_chunk."time", _hyper_2_12_chunk.device_id, _hyper_2_12_chunk.device_id_peer, _hyper_2_12_chunk.v0, _hyper_2_12_chunk.v1, _hyper_2_12_chunk.v2, _hyper_2_12_chunk.v3 Filter: (_hyper_2_12_chunk.v3 > '10'::double precision) Rows Removed by Filter: 504 -(56 rows) +(61 rows) -- device_id constraint should be pushed down :PREFIX diff --git a/tsl/test/shared/expected/transparent_decompress_chunk-15.out b/tsl/test/shared/expected/transparent_decompress_chunk-15.out index 43271c0509b..7e16e88eff4 100644 --- a/tsl/test/shared/expected/transparent_decompress_chunk-15.out +++ b/tsl/test/shared/expected/transparent_decompress_chunk-15.out @@ -123,10 +123,11 @@ QUERY PLAN Output: _hyper_X_X_chunk."time", _hyper_X_X_chunk.device_id, _hyper_X_X_chunk.v0, _hyper_X_X_chunk.v1, _hyper_X_X_chunk.v2, _hyper_X_X_chunk.v3 Vectorized Filter: (_hyper_X_X_chunk.v3 > '10'::double precision) Rows Removed by Filter: 17990 + Batches Removed by Filter: 20 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_X_X_chunk (actual rows=20 loops=1) Output: compress_hyper_X_X_chunk."time", compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk.v0, compress_hyper_X_X_chunk.v1, compress_hyper_X_X_chunk.v2, compress_hyper_X_X_chunk.v3, compress_hyper_X_X_chunk._ts_meta_count, compress_hyper_X_X_chunk._ts_meta_sequence_num, compress_hyper_X_X_chunk._ts_meta_min_1, compress_hyper_X_X_chunk._ts_meta_max_1 -(11 rows) +(12 rows) -- device_id constraint should be pushed down :PREFIX SELECT * FROM :TEST_TABLE WHERE device_id = 1 ORDER BY time, device_id LIMIT 10; From 4cb0dc3cb11b799e7e8cce6ed7218200be15555a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 3 Nov 2023 13:43:55 +0100 Subject: [PATCH 028/249] format --- tsl/src/nodes/decompress_chunk/exec.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index 549bd2a53ac..d18f2d8e12d 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -872,11 +872,14 @@ decompress_chunk_explain(CustomScanState *node, List *ancestors, ExplainState *e ts_show_instrumentation_count("Rows Removed by Filter", 1, &node->ss.ps, es); } - if (es->analyze && es->verbose - && (node->ss.ps.instrument->ntuples2 > 0 || es->format != EXPLAIN_FORMAT_TEXT)) + if (es->analyze && es->verbose && + (node->ss.ps.instrument->ntuples2 > 0 || es->format != EXPLAIN_FORMAT_TEXT)) { - ExplainPropertyFloat("Batches Removed by Filter", NULL, - node->ss.ps.instrument->ntuples2, 0, es); + ExplainPropertyFloat("Batches Removed by Filter", + NULL, + node->ss.ps.instrument->ntuples2, + 0, + es); } if (es->verbose || es->format != EXPLAIN_FORMAT_TEXT) From 0f94a1aab6a82bd15ba0cfec658ab013f01ec672 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 3 Nov 2023 13:45:20 +0100 Subject: [PATCH 029/249] reference transparent_decompression-* transparent_decompress_chunk-* --- tsl/test/expected/transparent_decompression-14.out | 11 +++++++++-- .../expected/transparent_decompress_chunk-14.out | 3 ++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tsl/test/expected/transparent_decompression-14.out b/tsl/test/expected/transparent_decompression-14.out index 9f6daa018e4..461769d8154 100644 --- a/tsl/test/expected/transparent_decompression-14.out +++ b/tsl/test/expected/transparent_decompression-14.out @@ -472,6 +472,7 @@ ORDER BY time, Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device_id, _hyper_1_1_chunk.device_id_peer, _hyper_1_1_chunk.v0, _hyper_1_1_chunk.v1, _hyper_1_1_chunk.v2, _hyper_1_1_chunk.v3 Vectorized Filter: (_hyper_1_1_chunk.v3 > '10'::double precision) Rows Removed by Filter: 1800 + Batches Removed by Filter: 5 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_5_15_chunk (actual rows=5 loops=1) Output: compress_hyper_5_15_chunk."time", compress_hyper_5_15_chunk.device_id, compress_hyper_5_15_chunk.device_id_peer, compress_hyper_5_15_chunk.v0, compress_hyper_5_15_chunk.v1, compress_hyper_5_15_chunk.v2, compress_hyper_5_15_chunk.v3, compress_hyper_5_15_chunk._ts_meta_count, compress_hyper_5_15_chunk._ts_meta_sequence_num, compress_hyper_5_15_chunk._ts_meta_min_3, compress_hyper_5_15_chunk._ts_meta_max_3, compress_hyper_5_15_chunk._ts_meta_min_1, compress_hyper_5_15_chunk._ts_meta_max_1, compress_hyper_5_15_chunk._ts_meta_min_2, compress_hyper_5_15_chunk._ts_meta_max_2 @@ -491,10 +492,11 @@ ORDER BY time, Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device_id, _hyper_1_3_chunk.device_id_peer, _hyper_1_3_chunk.v0, _hyper_1_3_chunk.v1, _hyper_1_3_chunk.v2, _hyper_1_3_chunk.v3 Vectorized Filter: (_hyper_1_3_chunk.v3 > '10'::double precision) Rows Removed by Filter: 2520 + Batches Removed by Filter: 5 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_5_16_chunk (actual rows=5 loops=1) Output: compress_hyper_5_16_chunk."time", compress_hyper_5_16_chunk.device_id, compress_hyper_5_16_chunk.device_id_peer, compress_hyper_5_16_chunk.v0, compress_hyper_5_16_chunk.v1, compress_hyper_5_16_chunk.v2, compress_hyper_5_16_chunk.v3, compress_hyper_5_16_chunk._ts_meta_count, compress_hyper_5_16_chunk._ts_meta_sequence_num, compress_hyper_5_16_chunk._ts_meta_min_3, compress_hyper_5_16_chunk._ts_meta_max_3, compress_hyper_5_16_chunk._ts_meta_min_1, compress_hyper_5_16_chunk._ts_meta_max_1, compress_hyper_5_16_chunk._ts_meta_min_2, compress_hyper_5_16_chunk._ts_meta_max_2 -(35 rows) +(37 rows) -- device_id constraint should be pushed down :PREFIX @@ -3545,6 +3547,7 @@ ORDER BY time, Output: _hyper_2_4_chunk."time", _hyper_2_4_chunk.device_id, _hyper_2_4_chunk.device_id_peer, _hyper_2_4_chunk.v0, _hyper_2_4_chunk.v1, _hyper_2_4_chunk.v2, _hyper_2_4_chunk.v3 Vectorized Filter: (_hyper_2_4_chunk.v3 > '10'::double precision) Rows Removed by Filter: 360 + Batches Removed by Filter: 1 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_6_17_chunk (actual rows=1 loops=1) Output: compress_hyper_6_17_chunk."time", compress_hyper_6_17_chunk.device_id, compress_hyper_6_17_chunk.device_id_peer, compress_hyper_6_17_chunk.v0, compress_hyper_6_17_chunk.v1, compress_hyper_6_17_chunk.v2, compress_hyper_6_17_chunk.v3, compress_hyper_6_17_chunk._ts_meta_count, compress_hyper_6_17_chunk._ts_meta_sequence_num, compress_hyper_6_17_chunk._ts_meta_min_3, compress_hyper_6_17_chunk._ts_meta_max_3, compress_hyper_6_17_chunk._ts_meta_min_1, compress_hyper_6_17_chunk._ts_meta_max_1, compress_hyper_6_17_chunk._ts_meta_min_2, compress_hyper_6_17_chunk._ts_meta_max_2 @@ -3552,6 +3555,7 @@ ORDER BY time, Output: _hyper_2_5_chunk."time", _hyper_2_5_chunk.device_id, _hyper_2_5_chunk.device_id_peer, _hyper_2_5_chunk.v0, _hyper_2_5_chunk.v1, _hyper_2_5_chunk.v2, _hyper_2_5_chunk.v3 Vectorized Filter: (_hyper_2_5_chunk.v3 > '10'::double precision) Rows Removed by Filter: 1080 + Batches Removed by Filter: 3 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_6_18_chunk (actual rows=3 loops=1) Output: compress_hyper_6_18_chunk."time", compress_hyper_6_18_chunk.device_id, compress_hyper_6_18_chunk.device_id_peer, compress_hyper_6_18_chunk.v0, compress_hyper_6_18_chunk.v1, compress_hyper_6_18_chunk.v2, compress_hyper_6_18_chunk.v3, compress_hyper_6_18_chunk._ts_meta_count, compress_hyper_6_18_chunk._ts_meta_sequence_num, compress_hyper_6_18_chunk._ts_meta_min_3, compress_hyper_6_18_chunk._ts_meta_max_3, compress_hyper_6_18_chunk._ts_meta_min_1, compress_hyper_6_18_chunk._ts_meta_max_1, compress_hyper_6_18_chunk._ts_meta_min_2, compress_hyper_6_18_chunk._ts_meta_max_2 @@ -3559,6 +3563,7 @@ ORDER BY time, Output: _hyper_2_6_chunk."time", _hyper_2_6_chunk.device_id, _hyper_2_6_chunk.device_id_peer, _hyper_2_6_chunk.v0, _hyper_2_6_chunk.v1, _hyper_2_6_chunk.v2, _hyper_2_6_chunk.v3 Vectorized Filter: (_hyper_2_6_chunk.v3 > '10'::double precision) Rows Removed by Filter: 360 + Batches Removed by Filter: 1 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_6_19_chunk (actual rows=1 loops=1) Output: compress_hyper_6_19_chunk."time", compress_hyper_6_19_chunk.device_id, compress_hyper_6_19_chunk.device_id_peer, compress_hyper_6_19_chunk.v0, compress_hyper_6_19_chunk.v1, compress_hyper_6_19_chunk.v2, compress_hyper_6_19_chunk.v3, compress_hyper_6_19_chunk._ts_meta_count, compress_hyper_6_19_chunk._ts_meta_sequence_num, compress_hyper_6_19_chunk._ts_meta_min_3, compress_hyper_6_19_chunk._ts_meta_max_3, compress_hyper_6_19_chunk._ts_meta_min_1, compress_hyper_6_19_chunk._ts_meta_max_1, compress_hyper_6_19_chunk._ts_meta_min_2, compress_hyper_6_19_chunk._ts_meta_max_2 @@ -3578,6 +3583,7 @@ ORDER BY time, Output: _hyper_2_10_chunk."time", _hyper_2_10_chunk.device_id, _hyper_2_10_chunk.device_id_peer, _hyper_2_10_chunk.v0, _hyper_2_10_chunk.v1, _hyper_2_10_chunk.v2, _hyper_2_10_chunk.v3 Vectorized Filter: (_hyper_2_10_chunk.v3 > '10'::double precision) Rows Removed by Filter: 504 + Batches Removed by Filter: 1 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_6_20_chunk (actual rows=1 loops=1) Output: compress_hyper_6_20_chunk."time", compress_hyper_6_20_chunk.device_id, compress_hyper_6_20_chunk.device_id_peer, compress_hyper_6_20_chunk.v0, compress_hyper_6_20_chunk.v1, compress_hyper_6_20_chunk.v2, compress_hyper_6_20_chunk.v3, compress_hyper_6_20_chunk._ts_meta_count, compress_hyper_6_20_chunk._ts_meta_sequence_num, compress_hyper_6_20_chunk._ts_meta_min_3, compress_hyper_6_20_chunk._ts_meta_max_3, compress_hyper_6_20_chunk._ts_meta_min_1, compress_hyper_6_20_chunk._ts_meta_max_1, compress_hyper_6_20_chunk._ts_meta_min_2, compress_hyper_6_20_chunk._ts_meta_max_2 @@ -3585,6 +3591,7 @@ ORDER BY time, Output: _hyper_2_11_chunk."time", _hyper_2_11_chunk.device_id, _hyper_2_11_chunk.device_id_peer, _hyper_2_11_chunk.v0, _hyper_2_11_chunk.v1, _hyper_2_11_chunk.v2, _hyper_2_11_chunk.v3 Vectorized Filter: (_hyper_2_11_chunk.v3 > '10'::double precision) Rows Removed by Filter: 1512 + Batches Removed by Filter: 3 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_6_21_chunk (actual rows=3 loops=1) Output: compress_hyper_6_21_chunk."time", compress_hyper_6_21_chunk.device_id, compress_hyper_6_21_chunk.device_id_peer, compress_hyper_6_21_chunk.v0, compress_hyper_6_21_chunk.v1, compress_hyper_6_21_chunk.v2, compress_hyper_6_21_chunk.v3, compress_hyper_6_21_chunk._ts_meta_count, compress_hyper_6_21_chunk._ts_meta_sequence_num, compress_hyper_6_21_chunk._ts_meta_min_3, compress_hyper_6_21_chunk._ts_meta_max_3, compress_hyper_6_21_chunk._ts_meta_min_1, compress_hyper_6_21_chunk._ts_meta_max_1, compress_hyper_6_21_chunk._ts_meta_min_2, compress_hyper_6_21_chunk._ts_meta_max_2 @@ -3592,7 +3599,7 @@ ORDER BY time, Output: _hyper_2_12_chunk."time", _hyper_2_12_chunk.device_id, _hyper_2_12_chunk.device_id_peer, _hyper_2_12_chunk.v0, _hyper_2_12_chunk.v1, _hyper_2_12_chunk.v2, _hyper_2_12_chunk.v3 Filter: (_hyper_2_12_chunk.v3 > '10'::double precision) Rows Removed by Filter: 504 -(56 rows) +(61 rows) -- device_id constraint should be pushed down :PREFIX diff --git a/tsl/test/shared/expected/transparent_decompress_chunk-14.out b/tsl/test/shared/expected/transparent_decompress_chunk-14.out index b6246278b4d..226edee4e94 100644 --- a/tsl/test/shared/expected/transparent_decompress_chunk-14.out +++ b/tsl/test/shared/expected/transparent_decompress_chunk-14.out @@ -121,10 +121,11 @@ QUERY PLAN Output: _hyper_X_X_chunk."time", _hyper_X_X_chunk.device_id, _hyper_X_X_chunk.v0, _hyper_X_X_chunk.v1, _hyper_X_X_chunk.v2, _hyper_X_X_chunk.v3 Vectorized Filter: (_hyper_X_X_chunk.v3 > '10'::double precision) Rows Removed by Filter: 17990 + Batches Removed by Filter: 20 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_X_X_chunk (actual rows=20 loops=1) Output: compress_hyper_X_X_chunk."time", compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk.v0, compress_hyper_X_X_chunk.v1, compress_hyper_X_X_chunk.v2, compress_hyper_X_X_chunk.v3, compress_hyper_X_X_chunk._ts_meta_count, compress_hyper_X_X_chunk._ts_meta_sequence_num, compress_hyper_X_X_chunk._ts_meta_min_1, compress_hyper_X_X_chunk._ts_meta_max_1 -(11 rows) +(12 rows) -- device_id constraint should be pushed down :PREFIX SELECT * FROM :TEST_TABLE WHERE device_id = 1 ORDER BY time, device_id LIMIT 10; From 22d2de85e1c19bf761a91ac446634941a641a61b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 3 Nov 2023 13:47:21 +0100 Subject: [PATCH 030/249] reference transparent_decompression-* transparent_decompress_chunk-* --- tsl/test/expected/transparent_decompression-13.out | 11 +++++++++-- .../expected/transparent_decompress_chunk-13.out | 3 ++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tsl/test/expected/transparent_decompression-13.out b/tsl/test/expected/transparent_decompression-13.out index 389f69bd993..69f76e5c035 100644 --- a/tsl/test/expected/transparent_decompression-13.out +++ b/tsl/test/expected/transparent_decompression-13.out @@ -472,6 +472,7 @@ ORDER BY time, Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device_id, _hyper_1_1_chunk.device_id_peer, _hyper_1_1_chunk.v0, _hyper_1_1_chunk.v1, _hyper_1_1_chunk.v2, _hyper_1_1_chunk.v3 Vectorized Filter: (_hyper_1_1_chunk.v3 > '10'::double precision) Rows Removed by Filter: 1800 + Batches Removed by Filter: 5 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_5_15_chunk (actual rows=5 loops=1) Output: compress_hyper_5_15_chunk."time", compress_hyper_5_15_chunk.device_id, compress_hyper_5_15_chunk.device_id_peer, compress_hyper_5_15_chunk.v0, compress_hyper_5_15_chunk.v1, compress_hyper_5_15_chunk.v2, compress_hyper_5_15_chunk.v3, compress_hyper_5_15_chunk._ts_meta_count, compress_hyper_5_15_chunk._ts_meta_sequence_num, compress_hyper_5_15_chunk._ts_meta_min_3, compress_hyper_5_15_chunk._ts_meta_max_3, compress_hyper_5_15_chunk._ts_meta_min_1, compress_hyper_5_15_chunk._ts_meta_max_1, compress_hyper_5_15_chunk._ts_meta_min_2, compress_hyper_5_15_chunk._ts_meta_max_2 @@ -491,10 +492,11 @@ ORDER BY time, Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device_id, _hyper_1_3_chunk.device_id_peer, _hyper_1_3_chunk.v0, _hyper_1_3_chunk.v1, _hyper_1_3_chunk.v2, _hyper_1_3_chunk.v3 Vectorized Filter: (_hyper_1_3_chunk.v3 > '10'::double precision) Rows Removed by Filter: 2520 + Batches Removed by Filter: 5 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_5_16_chunk (actual rows=5 loops=1) Output: compress_hyper_5_16_chunk."time", compress_hyper_5_16_chunk.device_id, compress_hyper_5_16_chunk.device_id_peer, compress_hyper_5_16_chunk.v0, compress_hyper_5_16_chunk.v1, compress_hyper_5_16_chunk.v2, compress_hyper_5_16_chunk.v3, compress_hyper_5_16_chunk._ts_meta_count, compress_hyper_5_16_chunk._ts_meta_sequence_num, compress_hyper_5_16_chunk._ts_meta_min_3, compress_hyper_5_16_chunk._ts_meta_max_3, compress_hyper_5_16_chunk._ts_meta_min_1, compress_hyper_5_16_chunk._ts_meta_max_1, compress_hyper_5_16_chunk._ts_meta_min_2, compress_hyper_5_16_chunk._ts_meta_max_2 -(35 rows) +(37 rows) -- device_id constraint should be pushed down :PREFIX @@ -3545,6 +3547,7 @@ ORDER BY time, Output: _hyper_2_4_chunk."time", _hyper_2_4_chunk.device_id, _hyper_2_4_chunk.device_id_peer, _hyper_2_4_chunk.v0, _hyper_2_4_chunk.v1, _hyper_2_4_chunk.v2, _hyper_2_4_chunk.v3 Vectorized Filter: (_hyper_2_4_chunk.v3 > '10'::double precision) Rows Removed by Filter: 360 + Batches Removed by Filter: 1 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_6_17_chunk (actual rows=1 loops=1) Output: compress_hyper_6_17_chunk."time", compress_hyper_6_17_chunk.device_id, compress_hyper_6_17_chunk.device_id_peer, compress_hyper_6_17_chunk.v0, compress_hyper_6_17_chunk.v1, compress_hyper_6_17_chunk.v2, compress_hyper_6_17_chunk.v3, compress_hyper_6_17_chunk._ts_meta_count, compress_hyper_6_17_chunk._ts_meta_sequence_num, compress_hyper_6_17_chunk._ts_meta_min_3, compress_hyper_6_17_chunk._ts_meta_max_3, compress_hyper_6_17_chunk._ts_meta_min_1, compress_hyper_6_17_chunk._ts_meta_max_1, compress_hyper_6_17_chunk._ts_meta_min_2, compress_hyper_6_17_chunk._ts_meta_max_2 @@ -3552,6 +3555,7 @@ ORDER BY time, Output: _hyper_2_5_chunk."time", _hyper_2_5_chunk.device_id, _hyper_2_5_chunk.device_id_peer, _hyper_2_5_chunk.v0, _hyper_2_5_chunk.v1, _hyper_2_5_chunk.v2, _hyper_2_5_chunk.v3 Vectorized Filter: (_hyper_2_5_chunk.v3 > '10'::double precision) Rows Removed by Filter: 1080 + Batches Removed by Filter: 3 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_6_18_chunk (actual rows=3 loops=1) Output: compress_hyper_6_18_chunk."time", compress_hyper_6_18_chunk.device_id, compress_hyper_6_18_chunk.device_id_peer, compress_hyper_6_18_chunk.v0, compress_hyper_6_18_chunk.v1, compress_hyper_6_18_chunk.v2, compress_hyper_6_18_chunk.v3, compress_hyper_6_18_chunk._ts_meta_count, compress_hyper_6_18_chunk._ts_meta_sequence_num, compress_hyper_6_18_chunk._ts_meta_min_3, compress_hyper_6_18_chunk._ts_meta_max_3, compress_hyper_6_18_chunk._ts_meta_min_1, compress_hyper_6_18_chunk._ts_meta_max_1, compress_hyper_6_18_chunk._ts_meta_min_2, compress_hyper_6_18_chunk._ts_meta_max_2 @@ -3559,6 +3563,7 @@ ORDER BY time, Output: _hyper_2_6_chunk."time", _hyper_2_6_chunk.device_id, _hyper_2_6_chunk.device_id_peer, _hyper_2_6_chunk.v0, _hyper_2_6_chunk.v1, _hyper_2_6_chunk.v2, _hyper_2_6_chunk.v3 Vectorized Filter: (_hyper_2_6_chunk.v3 > '10'::double precision) Rows Removed by Filter: 360 + Batches Removed by Filter: 1 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_6_19_chunk (actual rows=1 loops=1) Output: compress_hyper_6_19_chunk."time", compress_hyper_6_19_chunk.device_id, compress_hyper_6_19_chunk.device_id_peer, compress_hyper_6_19_chunk.v0, compress_hyper_6_19_chunk.v1, compress_hyper_6_19_chunk.v2, compress_hyper_6_19_chunk.v3, compress_hyper_6_19_chunk._ts_meta_count, compress_hyper_6_19_chunk._ts_meta_sequence_num, compress_hyper_6_19_chunk._ts_meta_min_3, compress_hyper_6_19_chunk._ts_meta_max_3, compress_hyper_6_19_chunk._ts_meta_min_1, compress_hyper_6_19_chunk._ts_meta_max_1, compress_hyper_6_19_chunk._ts_meta_min_2, compress_hyper_6_19_chunk._ts_meta_max_2 @@ -3578,6 +3583,7 @@ ORDER BY time, Output: _hyper_2_10_chunk."time", _hyper_2_10_chunk.device_id, _hyper_2_10_chunk.device_id_peer, _hyper_2_10_chunk.v0, _hyper_2_10_chunk.v1, _hyper_2_10_chunk.v2, _hyper_2_10_chunk.v3 Vectorized Filter: (_hyper_2_10_chunk.v3 > '10'::double precision) Rows Removed by Filter: 504 + Batches Removed by Filter: 1 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_6_20_chunk (actual rows=1 loops=1) Output: compress_hyper_6_20_chunk."time", compress_hyper_6_20_chunk.device_id, compress_hyper_6_20_chunk.device_id_peer, compress_hyper_6_20_chunk.v0, compress_hyper_6_20_chunk.v1, compress_hyper_6_20_chunk.v2, compress_hyper_6_20_chunk.v3, compress_hyper_6_20_chunk._ts_meta_count, compress_hyper_6_20_chunk._ts_meta_sequence_num, compress_hyper_6_20_chunk._ts_meta_min_3, compress_hyper_6_20_chunk._ts_meta_max_3, compress_hyper_6_20_chunk._ts_meta_min_1, compress_hyper_6_20_chunk._ts_meta_max_1, compress_hyper_6_20_chunk._ts_meta_min_2, compress_hyper_6_20_chunk._ts_meta_max_2 @@ -3585,6 +3591,7 @@ ORDER BY time, Output: _hyper_2_11_chunk."time", _hyper_2_11_chunk.device_id, _hyper_2_11_chunk.device_id_peer, _hyper_2_11_chunk.v0, _hyper_2_11_chunk.v1, _hyper_2_11_chunk.v2, _hyper_2_11_chunk.v3 Vectorized Filter: (_hyper_2_11_chunk.v3 > '10'::double precision) Rows Removed by Filter: 1512 + Batches Removed by Filter: 3 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_6_21_chunk (actual rows=3 loops=1) Output: compress_hyper_6_21_chunk."time", compress_hyper_6_21_chunk.device_id, compress_hyper_6_21_chunk.device_id_peer, compress_hyper_6_21_chunk.v0, compress_hyper_6_21_chunk.v1, compress_hyper_6_21_chunk.v2, compress_hyper_6_21_chunk.v3, compress_hyper_6_21_chunk._ts_meta_count, compress_hyper_6_21_chunk._ts_meta_sequence_num, compress_hyper_6_21_chunk._ts_meta_min_3, compress_hyper_6_21_chunk._ts_meta_max_3, compress_hyper_6_21_chunk._ts_meta_min_1, compress_hyper_6_21_chunk._ts_meta_max_1, compress_hyper_6_21_chunk._ts_meta_min_2, compress_hyper_6_21_chunk._ts_meta_max_2 @@ -3592,7 +3599,7 @@ ORDER BY time, Output: _hyper_2_12_chunk."time", _hyper_2_12_chunk.device_id, _hyper_2_12_chunk.device_id_peer, _hyper_2_12_chunk.v0, _hyper_2_12_chunk.v1, _hyper_2_12_chunk.v2, _hyper_2_12_chunk.v3 Filter: (_hyper_2_12_chunk.v3 > '10'::double precision) Rows Removed by Filter: 504 -(56 rows) +(61 rows) -- device_id constraint should be pushed down :PREFIX diff --git a/tsl/test/shared/expected/transparent_decompress_chunk-13.out b/tsl/test/shared/expected/transparent_decompress_chunk-13.out index b6246278b4d..226edee4e94 100644 --- a/tsl/test/shared/expected/transparent_decompress_chunk-13.out +++ b/tsl/test/shared/expected/transparent_decompress_chunk-13.out @@ -121,10 +121,11 @@ QUERY PLAN Output: _hyper_X_X_chunk."time", _hyper_X_X_chunk.device_id, _hyper_X_X_chunk.v0, _hyper_X_X_chunk.v1, _hyper_X_X_chunk.v2, _hyper_X_X_chunk.v3 Vectorized Filter: (_hyper_X_X_chunk.v3 > '10'::double precision) Rows Removed by Filter: 17990 + Batches Removed by Filter: 20 Bulk Decompression: true -> Seq Scan on _timescaledb_internal.compress_hyper_X_X_chunk (actual rows=20 loops=1) Output: compress_hyper_X_X_chunk."time", compress_hyper_X_X_chunk.device_id, compress_hyper_X_X_chunk.v0, compress_hyper_X_X_chunk.v1, compress_hyper_X_X_chunk.v2, compress_hyper_X_X_chunk.v3, compress_hyper_X_X_chunk._ts_meta_count, compress_hyper_X_X_chunk._ts_meta_sequence_num, compress_hyper_X_X_chunk._ts_meta_min_1, compress_hyper_X_X_chunk._ts_meta_max_1 -(11 rows) +(12 rows) -- device_id constraint should be pushed down :PREFIX SELECT * FROM :TEST_TABLE WHERE device_id = 1 ORDER BY time, device_id LIMIT 10; From 0ea26be9939ee4c3781ab733d1da6d0785df66dc Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 3 Nov 2023 14:14:33 +0100 Subject: [PATCH 031/249] comments and coverage fix --- .../nodes/decompress_chunk/compressed_batch.c | 62 +++++++++---------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 9fea84639f4..73913a12e70 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -141,11 +141,9 @@ decompress_column(DecompressChunkState *chunk_state, DecompressBatchState *batch if (arrow) { - if (batch_state->total_batch_rows == 0) - { - batch_state->total_batch_rows = arrow->length; - } - else if (batch_state->total_batch_rows != arrow->length) + /* Should have been filled from the count metadata column. */ + Assert(batch_state->total_batch_rows != 0); + if (batch_state->total_batch_rows != arrow->length) { elog(ERROR, "compressed column out of sync with batch counter"); } @@ -164,8 +162,13 @@ decompress_column(DecompressChunkState *chunk_state, DecompressBatchState *batch column_description->typid); } +/* + * Compute the vectorized filters. Returns if we have any passing rows. If not, + * it means the entire batch is filtered out, and we use this for further + * optimizations. + */ static bool -apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batch_state) +compute_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batch_state) { if (!chunk_state->vectorized_quals_constified) { @@ -304,8 +307,7 @@ apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batc } /* - * If we don't have any passing rows, break out early to avoid - * reading and decompressing other columns. + * Have to return whether we have any passing rows. */ bool have_passing_rows = false; for (int i = 0; i < bitmap_bytes / 8; i++) @@ -476,14 +478,8 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, count_value))); } - if (batch_state->total_batch_rows == 0) - { - batch_state->total_batch_rows = count_value; - } - else if (batch_state->total_batch_rows != count_value) - { - elog(ERROR, "compressed column out of sync with batch counter"); - } + Assert(batch_state->total_batch_rows == 0); + batch_state->total_batch_rows = count_value; break; } @@ -496,12 +492,27 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, } } - const bool have_passing_rows = apply_vector_quals(chunk_state, batch_state); - if (have_passing_rows || chunk_state->batch_sorted_merge) + const bool have_passing_rows = compute_vector_quals(chunk_state, batch_state); + if (!have_passing_rows && !chunk_state->batch_sorted_merge) + { + /* + * The entire batch doesn't pass the vectorized quals, so we might be + * able to avoid reading and decompressing other columns. Scroll it to + * the end. + * Note that this optimization doesn't work with "batch sorted merge", + * because the latter always has to read the first row of the batch for + * its sorting needs, so it always has to read and decompress all + * columns. + */ + batch_state->next_batch_row = batch_state->total_batch_rows; + InstrCountTuples2(chunk_state, 1); + InstrCountFiltered1(chunk_state, batch_state->total_batch_rows); + } + else { /* - * Have rows that actually pass the vector quals, have to decompress the - * rest of the compressed columns. + * We have some rows in the batch that pass the vectorized filters, so + * we have to decompress the rest of the compressed columns. */ const int num_compressed_columns = chunk_state->num_compressed_columns; for (int i = 0; i < num_compressed_columns; i++) @@ -514,17 +525,6 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, } } } - else - { - /* - * The entire batch doesn't pass the vectorized quals, so we might be - * able to avoid reading some columns. - */ - InstrCountTuples2(chunk_state, 1); - InstrCountFiltered1(chunk_state, batch_state->total_batch_rows); - Assert(!chunk_state->batch_sorted_merge); - batch_state->next_batch_row = batch_state->total_batch_rows; - } MemoryContextSwitchTo(old_context); } From b7c32eece5d6f066100a46c985ca8f5befae10d6 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 3 Nov 2023 14:47:37 +0100 Subject: [PATCH 032/249] assert --- tsl/src/nodes/decompress_chunk/compressed_batch.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 73913a12e70..f51bfb5b98e 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -499,14 +499,19 @@ compressed_batch_set_compressed_tuple(DecompressChunkState *chunk_state, * The entire batch doesn't pass the vectorized quals, so we might be * able to avoid reading and decompressing other columns. Scroll it to * the end. - * Note that this optimization doesn't work with "batch sorted merge", - * because the latter always has to read the first row of the batch for - * its sorting needs, so it always has to read and decompress all - * columns. */ batch_state->next_batch_row = batch_state->total_batch_rows; InstrCountTuples2(chunk_state, 1); InstrCountFiltered1(chunk_state, batch_state->total_batch_rows); + + /* + * Note that this optimization can't work with "batch sorted merge", + * because the latter always has to read the first row of the batch for + * its sorting needs, so it always has to read and decompress all + * columns. This is not a problem at the moment, because for batch + * sorted merge we disable bulk decompression entirely, at planning time. + */ + Assert(!chunk_state->batch_sorted_merge); } else { From 9dae695d1cd944585087f43271529e2899fde7d3 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 3 Nov 2023 16:34:33 +0100 Subject: [PATCH 033/249] Vectorize filters that use scalar array operations Like `device in (1, 2, 3)`. --- .../nodes/decompress_chunk/compressed_batch.c | 191 ++++++++++++++++-- tsl/src/nodes/decompress_chunk/exec.c | 12 +- tsl/src/nodes/decompress_chunk/planner.c | 97 +++++++-- .../pred_vector_const_arithmetic_single.c | 8 - .../decompress_chunk/vector_predicates.c | 6 +- .../decompress_chunk/vector_predicates.h | 5 +- tsl/test/expected/decompress_vector_qual.out | 41 ++++ tsl/test/sql/decompress_vector_qual.sql | 14 ++ 8 files changed, 328 insertions(+), 46 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 3d63c1f9623..28d0fd613e2 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -78,6 +78,112 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) return arrow; } +static inline void +vector_predicate_saop_impl(VectorPredicate *vector_const_predicate, bool is_or, + const ArrowArray *vector, Datum array, uint64 *restrict final_result) +{ + const size_t result_bits = vector->length; + const size_t result_words = (result_bits + 63) / 64; + + uint64 *restrict array_result; + /* + * For OR, we need an intermediate storage to accumulate the results + * from all elements. + * For AND, we can apply predicate for each element to the final result. + */ + uint64 array_result_storage[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64]; + if (is_or) + { + array_result = array_result_storage; + for (size_t i = 0; i < result_words; i++) + { + array_result_storage[i] = 0; + } + } + else + { + array_result = final_result; + } + + ArrayType *arr = DatumGetArrayTypeP(array); + + int16 typlen; + bool typbyval; + char typalign; + get_typlenbyvalalign(ARR_ELEMTYPE(arr), &typlen, &typbyval, &typalign); + + const char *s = (const char *) ARR_DATA_PTR(arr); + Ensure(ARR_NULLBITMAP(arr) == NULL, + "vectorized scalar array ops do not support nullable arrays"); + + const int nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); + + for (int i = 0; i < nitems; i++) + { + Datum constvalue = fetch_att(s, typbyval, typlen); + s = att_addlength_pointer(s, typlen, s); + s = (char *) att_align_nominal(s, typalign); + + /* + * For OR, we also need an intermediate storage for predicate result + * for each array element, since the predicates AND their result. + * + * For AND, we can and apply predicate for each array element to the + * final result. + */ + uint64 single_result_storage[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64]; + uint64 *restrict single_result; + if (is_or) + { + single_result = single_result_storage; + for (size_t outer = 0; outer < result_words; outer++) + { + single_result[outer] = -1; + } + } + else + { + single_result = final_result; + } + + vector_const_predicate(vector, constvalue, single_result); + + if (is_or) + { + for (size_t outer = 0; outer < result_words; outer++) + { + array_result[outer] |= single_result[outer]; + } + } + } + + if (is_or) + { + for (size_t outer = 0; outer < result_words; outer++) + { + /* + * The tail bits corresponding to past-the-end rows when n % 64 != 0 + * should be already zeroed out in the final_result. + */ + final_result[outer] &= array_result[outer]; + } + } +} + +static void +vector_predicate_saop_and(VectorPredicate *scalar_predicate, const ArrowArray *vector, Datum array, + uint64 *restrict result) +{ + vector_predicate_saop_impl(scalar_predicate, /* is_or = */ false, vector, array, result); +} + +static void +vector_predicate_saop_or(VectorPredicate *scalar_predicate, const ArrowArray *vector, Datum array, + uint64 *restrict result) +{ + vector_predicate_saop_impl(scalar_predicate, /* is_or = */ true, vector, array, result); +} + static void apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batch_state) { @@ -93,6 +199,16 @@ apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batc const int bitmap_bytes = sizeof(uint64) * ((batch_state->total_batch_rows + 63) / 64); batch_state->vector_qual_result = palloc(bitmap_bytes); memset(batch_state->vector_qual_result, 0xFF, bitmap_bytes); + if (batch_state->total_batch_rows % 64 != 0) + { + /* + * We have to zero out the bits for past-the-end elements in the last + * bitmap word. Since all predicates are ANDed to the result bitmap, + * we can do it here once instead of doing it in each predicate. + */ + const uint64 mask = ((uint64) -1) >> (64 - batch_state->total_batch_rows % 64); + batch_state->vector_qual_result[batch_state->total_batch_rows / 64] = mask; + } /* * Compute the quals. @@ -100,14 +216,39 @@ apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batc ListCell *lc; foreach (lc, chunk_state->vectorized_quals_constified) { - /* For now we only support "Var ? Const" predicates. */ - OpExpr *oe = castNode(OpExpr, lfirst(lc)); - Var *var = castNode(Var, linitial(oe->args)); - Const *constnode = castNode(Const, lsecond(oe->args)); + /* + * For now we support "Var ? Const" predicates and + * ScalarArrayOperations. + */ + List *args = NULL; + RegProcedure vector_const_opcode = InvalidOid; + ScalarArrayOpExpr *saop = NULL; + OpExpr *opexpr = NULL; + if (IsA(lfirst(lc), ScalarArrayOpExpr)) + { + saop = castNode(ScalarArrayOpExpr, lfirst(lc)); + args = saop->args; + vector_const_opcode = get_opcode(saop->opno); + } + else + { + opexpr = castNode(OpExpr, lfirst(lc)); + args = opexpr->args; + vector_const_opcode = get_opcode(opexpr->opno); + } + + /* + * Find the vector_const predicate. + */ + VectorPredicate *vector_const_predicate = get_vector_const_predicate(vector_const_opcode); + Ensure(vector_const_predicate != NULL, + "vectorized predicate not found for postgres predicate %d", + vector_const_opcode); /* * Find the compressed column referred to by the Var. */ + Var *var = castNode(Var, linitial(args)); DecompressChunkColumnDescription *column_description = NULL; int column_index = 0; for (; column_index < chunk_state->num_total_columns; column_index++) @@ -170,20 +311,46 @@ apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batc predicate_result = &default_value_predicate_result; } - /* Find and compute the predicate. */ - void (*predicate)(const ArrowArray *, Datum, uint64 *restrict) = - get_vector_const_predicate(get_opcode(oe->opno)); - Ensure(predicate != NULL, - "vectorized predicate not found for postgres predicate %d", - get_opcode(oe->opno)); - /* * The vectorizable predicates should be STRICT, so we shouldn't see null * constants here. */ + Const *constnode = castNode(Const, lsecond(args)); Ensure(!constnode->constisnull, "vectorized predicate called for a null value"); - predicate(vector, constnode->constvalue, predicate_result); + /* + * At last, compute the predicate. + */ + if (saop) + { + if (saop->useOr) + { + vector_predicate_saop_or(vector_const_predicate, + vector, + constnode->constvalue, + predicate_result); + } + else + { + vector_predicate_saop_and(vector_const_predicate, + vector, + constnode->constvalue, + predicate_result); + } + } + else + { + vector_const_predicate(vector, constnode->constvalue, predicate_result); + } + + /* Account for nulls which shouldn't pass the predicate. */ + const size_t n = vector->length; + const size_t n_words = (n + 63) / 64; + const uint64 *restrict validity = (uint64 *restrict) vector->buffers[0]; + for (size_t i = 0; i < n_words; i++) + { + predicate_result[i] &= validity[i]; + } /* Process the result. */ if (column_values->arrow == NULL) diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index 89f6bdc31b5..09f7c07e8dd 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -515,8 +515,16 @@ decompress_chunk_begin(CustomScanState *node, EState *estate, int eflags) } } - OpExpr *opexpr = castNode(OpExpr, constified); - Ensure(IsA(lsecond(opexpr->args), Const), + List *args; + if (IsA(constified, OpExpr)) + { + args = castNode(OpExpr, constified)->args; + } + else + { + args = castNode(ScalarArrayOpExpr, constified)->args; + } + Ensure(IsA(lsecond(args), Const), "failed to evaluate runtime constant in vectorized filter"); chunk_state->vectorized_quals_constified = lappend(chunk_state->vectorized_quals_constified, constified); diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index 1353af574be..c2f440ab0a4 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -440,34 +440,54 @@ is_not_runtime_constant(Node *node) static Node * make_vectorized_qual(DecompressChunkPath *path, Node *qual) { + // my_print(qual); + /* Only simple "Var op Const" binary predicates for now. */ - if (!IsA(qual, OpExpr)) + if (!IsA(qual, OpExpr) && !IsA(qual, ScalarArrayOpExpr)) { return NULL; } - OpExpr *o = castNode(OpExpr, qual); + List *args = NIL; + OpExpr *opexpr = NULL; + Oid opno = InvalidOid; + ScalarArrayOpExpr *saop = NULL; + if (IsA(qual, OpExpr)) + { + opexpr = castNode(OpExpr, qual); + args = opexpr->args; + opno = opexpr->opno; + } + else + { + saop = castNode(ScalarArrayOpExpr, qual); + args = saop->args; + opno = saop->opno; + } - if (list_length(o->args) != 2) + if (list_length(args) != 2) { return NULL; } - if (IsA(lsecond(o->args), Var)) + if (opexpr && IsA(lsecond(args), Var)) { /* Try to commute the operator if the constant is on the right. */ - Oid commutator_opno = get_commutator(o->opno); - if (OidIsValid(commutator_opno)) + opno = get_commutator(opno); + if (!OidIsValid(opno)) { - o = (OpExpr *) copyObject(o); - o->opno = commutator_opno; - /* - * opfuncid is a cache, we can set it to InvalidOid like the - * CommuteOpExpr() does. - */ - o->opfuncid = InvalidOid; - o->args = list_make2(lsecond(o->args), linitial(o->args)); + return NULL; } + + opexpr = (OpExpr *) copyObject(opexpr); + opexpr->opno = opno; + /* + * opfuncid is a cache, we can set it to InvalidOid like the + * CommuteOpExpr() does. + */ + opexpr->opfuncid = InvalidOid; + args = list_make2(lsecond(args), linitial(args)); + opexpr->args = args; } /* @@ -475,12 +495,12 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) * side is a constant or can be evaluated to a constant at run time (e.g. * contains stable functions). */ - if (!IsA(linitial(o->args), Var) || is_not_runtime_constant(lsecond(o->args))) + if (!IsA(linitial(args), Var) || is_not_runtime_constant(lsecond(args))) { return NULL; } - Var *var = castNode(Var, linitial(o->args)); + Var *var = castNode(Var, linitial(args)); Assert((Index) var->varno == path->info->chunk_rel->relid); /* @@ -494,13 +514,50 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) return NULL; } - Oid opcode = get_opcode(o->opno); - if (get_vector_const_predicate(opcode)) + Oid opcode = get_opcode(opno); + if (!get_vector_const_predicate(opcode)) + { + return NULL; + } + + if (saop) { - return (Node *) o; + if (saop->hashfuncid) + { + /* + * Don't vectorize if the planner decided to build a hash table. + */ + return NULL; + } + + if (!IsA(lsecond(args), Const)) + { + /* + * Vectorizing ScalarArrayOperation requires us to know the type + * of the array elements, and the absence of nulls, at runtime, + * so unfortunately we can't apply it for arrays evaluated at run + * time. + */ + return NULL; + } + Const *constnode = castNode(Const, lsecond(args)); + if (constnode->constisnull) + { + /* + * FIXME what happens for normal operations in this case? + * And if a stable function evaluates to null at run time? + */ + return NULL; + } + ArrayType *arr = DatumGetArrayTypeP(constnode->constvalue); + if (ARR_NULLBITMAP(arr) != NULL) + { + /* We don't have a provision for null elements in arrays yet. */ + return NULL; + } } - return NULL; + return opexpr ? (Node *) opexpr : (Node *) saop; } /* diff --git a/tsl/src/nodes/decompress_chunk/pred_vector_const_arithmetic_single.c b/tsl/src/nodes/decompress_chunk/pred_vector_const_arithmetic_single.c index d6c41c4bfc3..d89f54eebfe 100644 --- a/tsl/src/nodes/decompress_chunk/pred_vector_const_arithmetic_single.c +++ b/tsl/src/nodes/decompress_chunk/pred_vector_const_arithmetic_single.c @@ -27,14 +27,6 @@ FUNCTION_NAME(PREDICATE_NAME, VECTOR_CTYPE, { const size_t n = arrow->length; - /* Account for nulls which shouldn't pass the predicate. */ - const size_t n_words = (n + 63) / 64; - const uint64 *restrict validity = (uint64 *restrict) arrow->buffers[0]; - for (size_t i = 0; i < n_words; i++) - { - result[i] &= validity[i]; - } - /* Now run the predicate itself. */ const CONST_CTYPE constvalue = CONST_CONVERSION(constdatum); const VECTOR_CTYPE *restrict vector = (VECTOR_CTYPE *restrict) arrow->buffers[1]; diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.c b/tsl/src/nodes/decompress_chunk/vector_predicates.c index 3f91a0a915f..908622b7bb8 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.c +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.c @@ -20,12 +20,14 @@ #include "pred_vector_const_arithmetic_all.c" +#include "compression/compression.h" + /* * Look up the vectorized implementation for a Postgres predicate, specified by * its Oid in pg_proc. Note that this Oid is different from the opcode. */ -void (*get_vector_const_predicate(Oid pg_predicate))(const ArrowArray *, const Datum, - uint64 *restrict) +VectorPredicate * +get_vector_const_predicate(Oid pg_predicate) { switch (pg_predicate) { diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.h b/tsl/src/nodes/decompress_chunk/vector_predicates.h index f00d72dfe44..ea1534a6ac8 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.h +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.h @@ -10,5 +10,6 @@ #pragma once -void (*get_vector_const_predicate(Oid pg_predicate))(const ArrowArray *, const Datum, - uint64 *restrict); +typedef void(VectorPredicate)(const ArrowArray *, Datum, uint64 *restrict); + +VectorPredicate *get_vector_const_predicate(Oid pg_predicate); diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 430a67507bc..f67694793a9 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -100,6 +100,34 @@ select count(*) from vectorqual where metric4 >= 0 /* nulls shouldn't pass the q 1 (1 row) +-- Scalar array operations. +select count(*) from vectorqual where metric3 = any(array[777, 888]); /* default value */ + count +------- + 2 +(1 row) + +select count(*) from vectorqual where metric4 = any(array[44, 55]) /* default null */; + count +------- + 1 +(1 row) + +select count(*) from vectorqual where metric2 > any(array[-1, -2, -3]) /* any */; + count +------- + 4 +(1 row) + +select count(*) from vectorqual where metric2 > all(array[-1, -2, -3]) /* all */; + count +------- + 4 +(1 row) + +-- FIXME have to support this because nulls are impossible to prevent in stable expressions. +-- select count(*) from vectorqual where metric2 = any(array[22, null]) /* any with null element */; +-- select count(*) from vectorqual where metric2 = all(array[22, null]) /* all with null element */; set timescaledb.debug_require_vector_qual to 'forbid'; select count(*) from vectorqual where device = 1 /* can't apply vector ops to the segmentby column */; count @@ -198,6 +226,19 @@ select count(*) from vectorqual where metric4 is not null; 2 (1 row) +-- Comparison with other column not vectorized. +select count(*) from vectorqual where metric3 = metric4; + count +------- + 0 +(1 row) + +select count(*) from vectorqual where metric3 = any(array[metric4]); + count +------- + 0 +(1 row) + -- Vectorized filters also work if we have only stable functions on the right -- side that can be evaluated to a constant at run time. set timescaledb.debug_require_vector_qual to 'only'; diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index a4317a9a64d..d6a41e6f2bd 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -32,6 +32,15 @@ select count(*) from vectorqual where metric3 = 777 /* default value */; select count(*) from vectorqual where metric4 = 44 /* column with default null */; select count(*) from vectorqual where metric4 >= 0 /* nulls shouldn't pass the qual */; +-- Scalar array operations. +select count(*) from vectorqual where metric3 = any(array[777, 888]); /* default value */ +select count(*) from vectorqual where metric4 = any(array[44, 55]) /* default null */; +select count(*) from vectorqual where metric2 > any(array[-1, -2, -3]) /* any */; +select count(*) from vectorqual where metric2 > all(array[-1, -2, -3]) /* all */; +-- FIXME have to support this because nulls are impossible to prevent in stable expressions. +-- select count(*) from vectorqual where metric2 = any(array[22, null]) /* any with null element */; +-- select count(*) from vectorqual where metric2 = all(array[22, null]) /* all with null element */; + set timescaledb.debug_require_vector_qual to 'forbid'; select count(*) from vectorqual where device = 1 /* can't apply vector ops to the segmentby column */; @@ -73,6 +82,11 @@ select count(*) from vectorqual where metric4 is null; select count(*) from vectorqual where metric4 is not null; +-- Comparison with other column not vectorized. +select count(*) from vectorqual where metric3 = metric4; +select count(*) from vectorqual where metric3 = any(array[metric4]); + + -- Vectorized filters also work if we have only stable functions on the right -- side that can be evaluated to a constant at run time. set timescaledb.debug_require_vector_qual to 'only'; From 6437831e449154690f1ff9ff4997e1589cde2aa4 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 3 Nov 2023 16:39:34 +0100 Subject: [PATCH 034/249] no hashes in pg13 --- tsl/src/nodes/decompress_chunk/planner.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index c2f440ab0a4..f9eb456d038 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -522,6 +522,7 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) if (saop) { +#if PG14_GE if (saop->hashfuncid) { /* @@ -529,6 +530,7 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) */ return NULL; } +#endif if (!IsA(lsecond(args), Const)) { From b16f7b986bb586a1b41ddfdca304504f3b9b3db2 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 10 Nov 2023 12:35:52 +0100 Subject: [PATCH 035/249] fixes --- .../nodes/decompress_chunk/compressed_batch.c | 125 +-------------- tsl/src/nodes/decompress_chunk/planner.c | 41 ++--- .../decompress_chunk/vector_predicates.c | 148 +++++++++++++++++- .../decompress_chunk/vector_predicates.h | 3 + tsl/test/expected/decompress_vector_qual.out | 123 +++++++++++---- tsl/test/sql/decompress_vector_qual.sql | 41 +++-- 6 files changed, 289 insertions(+), 192 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 28d0fd613e2..f87dbac9a63 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -78,112 +78,6 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) return arrow; } -static inline void -vector_predicate_saop_impl(VectorPredicate *vector_const_predicate, bool is_or, - const ArrowArray *vector, Datum array, uint64 *restrict final_result) -{ - const size_t result_bits = vector->length; - const size_t result_words = (result_bits + 63) / 64; - - uint64 *restrict array_result; - /* - * For OR, we need an intermediate storage to accumulate the results - * from all elements. - * For AND, we can apply predicate for each element to the final result. - */ - uint64 array_result_storage[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64]; - if (is_or) - { - array_result = array_result_storage; - for (size_t i = 0; i < result_words; i++) - { - array_result_storage[i] = 0; - } - } - else - { - array_result = final_result; - } - - ArrayType *arr = DatumGetArrayTypeP(array); - - int16 typlen; - bool typbyval; - char typalign; - get_typlenbyvalalign(ARR_ELEMTYPE(arr), &typlen, &typbyval, &typalign); - - const char *s = (const char *) ARR_DATA_PTR(arr); - Ensure(ARR_NULLBITMAP(arr) == NULL, - "vectorized scalar array ops do not support nullable arrays"); - - const int nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); - - for (int i = 0; i < nitems; i++) - { - Datum constvalue = fetch_att(s, typbyval, typlen); - s = att_addlength_pointer(s, typlen, s); - s = (char *) att_align_nominal(s, typalign); - - /* - * For OR, we also need an intermediate storage for predicate result - * for each array element, since the predicates AND their result. - * - * For AND, we can and apply predicate for each array element to the - * final result. - */ - uint64 single_result_storage[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64]; - uint64 *restrict single_result; - if (is_or) - { - single_result = single_result_storage; - for (size_t outer = 0; outer < result_words; outer++) - { - single_result[outer] = -1; - } - } - else - { - single_result = final_result; - } - - vector_const_predicate(vector, constvalue, single_result); - - if (is_or) - { - for (size_t outer = 0; outer < result_words; outer++) - { - array_result[outer] |= single_result[outer]; - } - } - } - - if (is_or) - { - for (size_t outer = 0; outer < result_words; outer++) - { - /* - * The tail bits corresponding to past-the-end rows when n % 64 != 0 - * should be already zeroed out in the final_result. - */ - final_result[outer] &= array_result[outer]; - } - } -} - -static void -vector_predicate_saop_and(VectorPredicate *scalar_predicate, const ArrowArray *vector, Datum array, - uint64 *restrict result) -{ - vector_predicate_saop_impl(scalar_predicate, /* is_or = */ false, vector, array, result); -} - -static void -vector_predicate_saop_or(VectorPredicate *scalar_predicate, const ArrowArray *vector, Datum array, - uint64 *restrict result) -{ - vector_predicate_saop_impl(scalar_predicate, /* is_or = */ true, vector, array, result); -} - static void apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batch_state) { @@ -323,20 +217,11 @@ apply_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *batc */ if (saop) { - if (saop->useOr) - { - vector_predicate_saop_or(vector_const_predicate, - vector, - constnode->constvalue, - predicate_result); - } - else - { - vector_predicate_saop_and(vector_const_predicate, - vector, - constnode->constvalue, - predicate_result); - } + vector_array_operator(vector_const_predicate, + saop->useOr, + vector, + constnode->constvalue, + predicate_result); } else { diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index 3a1e8444c8e..8f38f64ee95 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -452,9 +452,10 @@ is_not_runtime_constant(Node *node) static Node * make_vectorized_qual(DecompressChunkPath *path, Node *qual) { - // my_print(qual); - - /* Only simple "Var op Const" binary predicates for now. */ + /* + * Currently we vectorize some "Var op Const" binary predicates, + * and scalar array operations with these predicates. + */ if (!IsA(qual, OpExpr) && !IsA(qual, ScalarArrayOpExpr)) { return NULL; @@ -484,7 +485,9 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) if (opexpr && IsA(lsecond(args), Var)) { - /* Try to commute the operator if the constant is on the right. */ + /* + * Try to commute the operator if we have Var on the right. + */ opno = get_commutator(opno); if (!OidIsValid(opno)) { @@ -532,9 +535,9 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) return NULL; } +#if PG14_GE if (saop) { -#if PG14_GE if (saop->hashfuncid) { /* @@ -542,34 +545,8 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) */ return NULL; } -#endif - - if (!IsA(lsecond(args), Const)) - { - /* - * Vectorizing ScalarArrayOperation requires us to know the type - * of the array elements, and the absence of nulls, at runtime, - * so unfortunately we can't apply it for arrays evaluated at run - * time. - */ - return NULL; - } - Const *constnode = castNode(Const, lsecond(args)); - if (constnode->constisnull) - { - /* - * FIXME what happens for normal operations in this case? - * And if a stable function evaluates to null at run time? - */ - return NULL; - } - ArrayType *arr = DatumGetArrayTypeP(constnode->constvalue); - if (ARR_NULLBITMAP(arr) != NULL) - { - /* We don't have a provision for null elements in arrays yet. */ - return NULL; - } } +#endif return opexpr ? (Node *) opexpr : (Node *) saop; } diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.c b/tsl/src/nodes/decompress_chunk/vector_predicates.c index 908622b7bb8..84276f57814 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.c +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.c @@ -13,14 +13,19 @@ #include #include -#include "compat/compat.h" #include "compression/arrow_c_data_interface.h" #include "vector_predicates.h" -#include "pred_vector_const_arithmetic_all.c" - +#include "compat/compat.h" #include "compression/compression.h" +#include "debug_assert.h" + +/* + * We include all implementations of vector-const predicates here. No separate + * declarations for them to reduce the amount of macro template magic. + */ +#include "pred_vector_const_arithmetic_all.c" /* * Look up the vectorized implementation for a Postgres predicate, specified by @@ -37,3 +42,140 @@ get_vector_const_predicate(Oid pg_predicate) } return NULL; } + +/* + * Vectorized implementation of ScalarArrayOpExpr. Applies scalar_predicate for + * vector and each element of array, combines the result according to "is_or" + * flag. Written along the lines of ExecEvalScalarArrayOp(). + */ +static inline void +vector_array_operator_impl(VectorPredicate *vector_const_predicate, bool is_or, + const ArrowArray *vector, Datum array, uint64 *restrict final_result) +{ + const size_t result_bits = vector->length; + const size_t result_words = (result_bits + 63) / 64; + + uint64 *restrict array_result; + /* + * For OR, we need an intermediate storage to accumulate the results + * from all elements. + * For AND, we can apply predicate for each element to the final result. + */ + uint64 array_result_storage[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64]; + if (is_or) + { + array_result = array_result_storage; + for (size_t i = 0; i < result_words; i++) + { + array_result_storage[i] = 0; + } + } + else + { + array_result = final_result; + } + + ArrayType *arr = DatumGetArrayTypeP(array); + + int16 typlen; + bool typbyval; + char typalign; + get_typlenbyvalalign(ARR_ELEMTYPE(arr), &typlen, &typbyval, &typalign); + + const char *array_data = (const char *) ARR_DATA_PTR(arr); + const size_t nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); + const uint64 *restrict array_null_bitmap = (uint64 *) ARR_NULLBITMAP(arr); + + for (size_t array_index = 0; array_index < nitems; array_index++) + { + if (array_null_bitmap != NULL && !arrow_row_is_valid(array_null_bitmap, array_index)) + { + /* + * This array element is NULL. We can't avoid NULLS when evaluating + * the stable functions at run time, so we have to support them. + * This is a predicate, not a generic scalar array operation, so + * thankfully we return a non-nullable bool. + * For ANY: null | true = true, null | false = null, so this means + * we can skip the null element and continue evaluation. + * For ALL: null & true = null, null & false = false, so this means + * that for each row the condition goes to false, and we don't have + * to evaluate the next elements. + */ + if (is_or) + { + continue; + } + + for (size_t word = 0; word < result_words; word++) + { + final_result[word] = 0; + } + return; + } + Datum constvalue = fetch_att(array_data, typbyval, typlen); + array_data = att_addlength_pointer(array_data, typlen, array_data); + array_data = (char *) att_align_nominal(array_data, typalign); + + /* + * For OR, we also need an intermediate storage for predicate result + * for each array element, since the predicates AND their result. + * + * For AND, we can and apply predicate for each array element to the + * final result. + */ + uint64 single_result_storage[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64]; + uint64 *restrict single_result; + if (is_or) + { + single_result = single_result_storage; + for (size_t outer = 0; outer < result_words; outer++) + { + single_result[outer] = -1; + } + } + else + { + single_result = final_result; + } + + vector_const_predicate(vector, constvalue, single_result); + + if (is_or) + { + for (size_t outer = 0; outer < result_words; outer++) + { + array_result[outer] |= single_result[outer]; + } + } + } + + if (is_or) + { + for (size_t outer = 0; outer < result_words; outer++) + { + /* + * The tail bits corresponding to past-the-end rows when n % 64 != 0 + * should be already zeroed out in the final_result. + */ + final_result[outer] &= array_result[outer]; + } + } +} + +/* + * This is a thin wrapper to nudge the compiler to inline the AND version which + * is much simpler than OR version. + */ +void +vector_array_operator(VectorPredicate *scalar_predicate, bool is_or, const ArrowArray *vector, + Datum array, uint64 *restrict result) +{ + if (is_or) + { + vector_array_operator_impl(scalar_predicate, /* is_or = */ true, vector, array, result); + } + else + { + vector_array_operator_impl(scalar_predicate, /* is_or = */ false, vector, array, result); + } +} diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.h b/tsl/src/nodes/decompress_chunk/vector_predicates.h index ea1534a6ac8..e026aadd52b 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.h +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.h @@ -13,3 +13,6 @@ typedef void(VectorPredicate)(const ArrowArray *, Datum, uint64 *restrict); VectorPredicate *get_vector_const_predicate(Oid pg_predicate); + +void vector_array_operator(VectorPredicate *scalar_predicate, bool is_or, const ArrowArray *vector, + Datum array, uint64 *restrict result); diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index f67694793a9..35b976cf4b4 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -1,6 +1,7 @@ -- This file and its contents are licensed under the Timescale License. -- Please see the included NOTICE for copyright information and -- LICENSE-TIMESCALE for a copy of the license. +\c :TEST_DBNAME :ROLE_SUPERUSER create table vectorqual(metric1 int8, ts timestamp, metric2 int8, device int8); select create_hypertable('vectorqual', 'ts'); WARNING: column type "timestamp without time zone" used for "ts" does not follow best practices @@ -100,34 +101,6 @@ select count(*) from vectorqual where metric4 >= 0 /* nulls shouldn't pass the q 1 (1 row) --- Scalar array operations. -select count(*) from vectorqual where metric3 = any(array[777, 888]); /* default value */ - count -------- - 2 -(1 row) - -select count(*) from vectorqual where metric4 = any(array[44, 55]) /* default null */; - count -------- - 1 -(1 row) - -select count(*) from vectorqual where metric2 > any(array[-1, -2, -3]) /* any */; - count -------- - 4 -(1 row) - -select count(*) from vectorqual where metric2 > all(array[-1, -2, -3]) /* all */; - count -------- - 4 -(1 row) - --- FIXME have to support this because nulls are impossible to prevent in stable expressions. --- select count(*) from vectorqual where metric2 = any(array[22, null]) /* any with null element */; --- select count(*) from vectorqual where metric2 = all(array[22, null]) /* all with null element */; set timescaledb.debug_require_vector_qual to 'forbid'; select count(*) from vectorqual where device = 1 /* can't apply vector ops to the segmentby column */; count @@ -212,6 +185,22 @@ select count(*) from vectorqual where !!metric3; 5 (1 row) +-- Custom operator on column that supports bulk decompression is not vectorized. +set timescaledb.debug_require_vector_qual to 'forbid'; +create function int4eqq(int4, int4) returns bool as 'int4eq' language internal; +create operator === (function = 'int4eqq', rightarg = int4, leftarg = int4); +select count(*) from vectorqual where metric3 === 777; + count +------- + 2 +(1 row) + +select count(*) from vectorqual where metric3 === any(array[777, 888]); + count +------- + 2 +(1 row) + -- NullTest is not vectorized. set timescaledb.debug_require_vector_qual to 'forbid'; select count(*) from vectorqual where metric4 is null; @@ -226,7 +215,85 @@ select count(*) from vectorqual where metric4 is not null; 2 (1 row) +-- Scalar array operators are vectorized if the operator is vectorizable. +set timescaledb.debug_require_vector_qual to 'only'; +select count(*) from vectorqual where metric3 = any(array[777, 888]); /* default value */ + count +------- + 2 +(1 row) + +select count(*) from vectorqual where metric4 = any(array[44, 55]) /* default null */; + count +------- + 1 +(1 row) + +select count(*) from vectorqual where metric2 > any(array[-1, -2, -3]) /* any */; + count +------- + 5 +(1 row) + +select count(*) from vectorqual where metric2 > all(array[-1, -2, -3]) /* all */; + count +------- + 5 +(1 row) + +-- Also have to support null array elements, because they are impossible to +-- prevent in stable expressions. +set timescaledb.debug_require_vector_qual to 'only'; +select count(*) from vectorqual where metric2 = any(array[null::int]) /* any with null element */; + count +------- + 0 +(1 row) + +select count(*) from vectorqual where metric2 = any(array[22, null]) /* any with null element */; + count +------- + 1 +(1 row) + +select count(*) from vectorqual where metric2 = any(array[null, 32]) /* any with null element */; + count +------- + 1 +(1 row) + +select count(*) from vectorqual where metric2 = any(array[22, null, 32]) /* any with null element */; + count +------- + 2 +(1 row) + +select count(*) from vectorqual where metric2 = all(array[null::int]) /* all with null element */; + count +------- + 0 +(1 row) + +select count(*) from vectorqual where metric2 = all(array[22, null]) /* all with null element */; + count +------- + 0 +(1 row) + +select count(*) from vectorqual where metric2 = all(array[null, 32]) /* all with null element */; + count +------- + 0 +(1 row) + +select count(*) from vectorqual where metric2 = all(array[22, null, 32]) /* all with null element */; + count +------- + 0 +(1 row) + -- Comparison with other column not vectorized. +set timescaledb.debug_require_vector_qual to 'forbid'; select count(*) from vectorqual where metric3 = metric4; count ------- diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index d6a41e6f2bd..757dde86e08 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -2,6 +2,8 @@ -- Please see the included NOTICE for copyright information and -- LICENSE-TIMESCALE for a copy of the license. +\c :TEST_DBNAME :ROLE_SUPERUSER + create table vectorqual(metric1 int8, ts timestamp, metric2 int8, device int8); select create_hypertable('vectorqual', 'ts'); alter table vectorqual set (timescaledb.compress, timescaledb.compress_segmentby = 'device'); @@ -32,18 +34,10 @@ select count(*) from vectorqual where metric3 = 777 /* default value */; select count(*) from vectorqual where metric4 = 44 /* column with default null */; select count(*) from vectorqual where metric4 >= 0 /* nulls shouldn't pass the qual */; --- Scalar array operations. -select count(*) from vectorqual where metric3 = any(array[777, 888]); /* default value */ -select count(*) from vectorqual where metric4 = any(array[44, 55]) /* default null */; -select count(*) from vectorqual where metric2 > any(array[-1, -2, -3]) /* any */; -select count(*) from vectorqual where metric2 > all(array[-1, -2, -3]) /* all */; --- FIXME have to support this because nulls are impossible to prevent in stable expressions. --- select count(*) from vectorqual where metric2 = any(array[22, null]) /* any with null element */; --- select count(*) from vectorqual where metric2 = all(array[22, null]) /* all with null element */; - set timescaledb.debug_require_vector_qual to 'forbid'; select count(*) from vectorqual where device = 1 /* can't apply vector ops to the segmentby column */; + -- Test columns that don't support bulk decompression. alter table vectorqual add column tag text; insert into vectorqual(ts, device, metric2, metric3, metric4, tag) values ('2025-01-01 00:00:00', 5, 52, 53, 54, 'tag5'); @@ -76,13 +70,42 @@ create operator !! (function = 'bool', rightarg = int4); select count(*) from vectorqual where !!metric3; +-- Custom operator on column that supports bulk decompression is not vectorized. +set timescaledb.debug_require_vector_qual to 'forbid'; +create function int4eqq(int4, int4) returns bool as 'int4eq' language internal; +create operator === (function = 'int4eqq', rightarg = int4, leftarg = int4); +select count(*) from vectorqual where metric3 === 777; +select count(*) from vectorqual where metric3 === any(array[777, 888]); + + -- NullTest is not vectorized. set timescaledb.debug_require_vector_qual to 'forbid'; select count(*) from vectorqual where metric4 is null; select count(*) from vectorqual where metric4 is not null; +-- Scalar array operators are vectorized if the operator is vectorizable. +set timescaledb.debug_require_vector_qual to 'only'; +select count(*) from vectorqual where metric3 = any(array[777, 888]); /* default value */ +select count(*) from vectorqual where metric4 = any(array[44, 55]) /* default null */; +select count(*) from vectorqual where metric2 > any(array[-1, -2, -3]) /* any */; +select count(*) from vectorqual where metric2 > all(array[-1, -2, -3]) /* all */; + +-- Also have to support null array elements, because they are impossible to +-- prevent in stable expressions. +set timescaledb.debug_require_vector_qual to 'only'; +select count(*) from vectorqual where metric2 = any(array[null::int]) /* any with null element */; +select count(*) from vectorqual where metric2 = any(array[22, null]) /* any with null element */; +select count(*) from vectorqual where metric2 = any(array[null, 32]) /* any with null element */; +select count(*) from vectorqual where metric2 = any(array[22, null, 32]) /* any with null element */; +select count(*) from vectorqual where metric2 = all(array[null::int]) /* all with null element */; +select count(*) from vectorqual where metric2 = all(array[22, null]) /* all with null element */; +select count(*) from vectorqual where metric2 = all(array[null, 32]) /* all with null element */; +select count(*) from vectorqual where metric2 = all(array[22, null, 32]) /* all with null element */; + + -- Comparison with other column not vectorized. +set timescaledb.debug_require_vector_qual to 'forbid'; select count(*) from vectorqual where metric3 = metric4; select count(*) from vectorqual where metric3 = any(array[metric4]); From 05fb7d0247b6b9da01752eb9c46cb2df92b249c7 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 10 Nov 2023 14:09:37 +0100 Subject: [PATCH 036/249] add early exit to saop --- tsl/src/nodes/decompress_chunk/planner.c | 12 +- .../decompress_chunk/vector_predicates.c | 60 +++++- tsl/test/expected/decompress_vector_qual.out | 201 +++++++++++++++++- tsl/test/sql/decompress_vector_qual.sql | 45 ++++ 4 files changed, 309 insertions(+), 9 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index 8f38f64ee95..b4c7703fe1a 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -918,10 +918,16 @@ decompress_chunk_plan_create(PlannerInfo *root, RelOptInfo *rel, CustomPath *pat { elog(ERROR, "debug: encountered vector quals when they are disabled"); } - else if (ts_guc_debug_require_vector_qual == RVQ_Only && - list_length(decompress_plan->scan.plan.qual) > 0) + else if (ts_guc_debug_require_vector_qual == RVQ_Only) { - elog(ERROR, "debug: encountered non-vector quals when they are disabled"); + if (list_length(decompress_plan->scan.plan.qual) > 0) + { + elog(ERROR, "debug: encountered non-vector quals when they are disabled"); + } + if (list_length(vectorized_quals) == 0) + { + elog(ERROR, "debug: did not encounter vector quals when they are required"); + } } #endif diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.c b/tsl/src/nodes/decompress_chunk/vector_predicates.c index 84276f57814..c0b1fb3227a 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.c +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.c @@ -55,7 +55,7 @@ vector_array_operator_impl(VectorPredicate *vector_const_predicate, bool is_or, const size_t result_bits = vector->length; const size_t result_words = (result_bits + 63) / 64; - uint64 *restrict array_result; + uint64 *restrict array_result = NULL; /* * For OR, we need an intermediate storage to accumulate the results * from all elements. @@ -69,10 +69,17 @@ vector_array_operator_impl(VectorPredicate *vector_const_predicate, bool is_or, { array_result_storage[i] = 0; } - } - else - { - array_result = final_result; + + if (vector->length % 64 != 0) + { + /* + * Set the bits for past-the-end elements to 1. This way it's more + * convenient to check for early exit, and the final result should + * have them already set to 0 so it doesn't matter. + */ + const uint64 mask = ((uint64) -1) << (vector->length % 64); + array_result[vector->length / 64] = mask; + } } ArrayType *arr = DatumGetArrayTypeP(array); @@ -147,6 +154,49 @@ vector_array_operator_impl(VectorPredicate *vector_const_predicate, bool is_or, array_result[outer] |= single_result[outer]; } } + + /* + * On big arrays, we want to sometimes check if we can exit early, + * to avoid being slower than the non-vectorized version which exits + * at first possibility. + * In debug mode, do this more frequently to simplify testing. + */ +#ifdef NDEBUG + if (array_index > 0 && array_index % 16 == 0) +#else + if (array_index > 0 && array_index % 3 == 0) +#endif + { + if (is_or) + { + /* + * Note that we have set the bits for past-the-end rows in + * array_result to 1, so we can use simple AND here. + */ + uint64 all_rows_match = -1; + for (size_t word = 0; word < result_words; word++) + { + all_rows_match &= array_result[word]; + } + if (all_rows_match == -1ULL) + { + return; + } + } + else + { + uint64 any_rows_match = 0; + for (size_t word = 0; word < result_words; word++) + { + any_rows_match |= final_result[word]; + } + if (any_rows_match == 0) + { + fprintf(stderr, "early exit at %ld yay!\n", array_index); + return; + } + } + } } if (is_or) diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 35b976cf4b4..4aed089beb8 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -292,6 +292,205 @@ select count(*) from vectorqual where metric2 = all(array[22, null, 32]) /* all 0 (1 row) +-- Check early exit. +reset timescaledb.debug_require_vector_qual; +create table singlebatch(like vectorqual); +select create_hypertable('singlebatch', 'ts'); +WARNING: column type "timestamp without time zone" used for "ts" does not follow best practices + create_hypertable +-------------------------- + (3,public,singlebatch,t) +(1 row) + +alter table singlebatch set (timescaledb.compress); +insert into singlebatch select '2022-02-02 02:02:02', metric2, device, metric3, metric4, tag from vectorqual; +select count(compress_chunk(x, true)) from show_chunks('singlebatch') x; + count +------- + 1 +(1 row) + +set timescaledb.debug_require_vector_qual to 'only'; +select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 22]); + count +------- + 1 +(1 row) + +select count(*) from singlebatch where metric2 = any(array[0, 22, 0, 0, 0]); + count +------- + 1 +(1 row) + +select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 0]); + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 0]); + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric2 <= all(array[12, 0, 12, 12, 12]); + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 12]); + count +------- + 1 +(1 row) + +select count(*) from singlebatch where metric3 = 777 and metric2 = any(array[0, 0, 0, 0, 22]); + count +------- + 1 +(1 row) + +select count(*) from singlebatch where metric3 = 777 and metric2 = any(array[0, 22, 0, 0, 0]); + count +------- + 1 +(1 row) + +select count(*) from singlebatch where metric3 = 777 and metric2 = any(array[0, 0, 0, 0, 0]); + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric3 = 777 and metric2 <= all(array[12, 12, 12, 12, 0]); + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric3 = 777 and metric2 <= all(array[12, 0, 12, 12, 12]); + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric3 = 777 and metric2 <= all(array[12, 12, 12, 12, 12]); + count +------- + 1 +(1 row) + +select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 22]) and metric3 = 777; + count +------- + 1 +(1 row) + +select count(*) from singlebatch where metric2 = any(array[0, 22, 0, 0, 0]) and metric3 = 777; + count +------- + 1 +(1 row) + +select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 0]) and metric3 = 777; + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 0]) and metric3 = 777; + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric2 <= all(array[12, 0, 12, 12, 12]) and metric3 = 777; + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 12]) and metric3 = 777; + count +------- + 1 +(1 row) + +select count(*) from singlebatch where metric3 != 777 and metric2 = any(array[0, 0, 0, 0, 22]); + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric3 != 777 and metric2 = any(array[0, 22, 0, 0, 0]); + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric3 != 777 and metric2 = any(array[0, 0, 0, 0, 0]); + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric3 != 777 and metric2 <= all(array[12, 12, 12, 12, 0]); + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric3 != 777 and metric2 <= all(array[12, 0, 12, 12, 12]); + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric3 != 777 and metric2 <= all(array[12, 12, 12, 12, 12]); + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 22]) and metric3 != 777; + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric2 = any(array[0, 22, 0, 0, 0]) and metric3 != 777; + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 0]) and metric3 != 777; + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 0]) and metric3 != 777; + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric2 <= all(array[12, 0, 12, 12, 12]) and metric3 != 777; + count +------- + 0 +(1 row) + +select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 12]) and metric3 != 777; + count +------- + 0 +(1 row) + -- Comparison with other column not vectorized. set timescaledb.debug_require_vector_qual to 'forbid'; select count(*) from vectorqual where metric3 = metric4; @@ -378,7 +577,7 @@ select create_hypertable('date_table', 'ts'); NOTICE: adding not-null constraint to column "ts" create_hypertable ------------------------- - (3,public,date_table,t) + (5,public,date_table,t) (1 row) alter table date_table set (timescaledb.compress); diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 757dde86e08..b0ef79ff669 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -103,6 +103,51 @@ select count(*) from vectorqual where metric2 = all(array[22, null]) /* all with select count(*) from vectorqual where metric2 = all(array[null, 32]) /* all with null element */; select count(*) from vectorqual where metric2 = all(array[22, null, 32]) /* all with null element */; +-- Check early exit. +reset timescaledb.debug_require_vector_qual; +create table singlebatch(like vectorqual); +select create_hypertable('singlebatch', 'ts'); +alter table singlebatch set (timescaledb.compress); +insert into singlebatch select '2022-02-02 02:02:02', metric2, device, metric3, metric4, tag from vectorqual; +select count(compress_chunk(x, true)) from show_chunks('singlebatch') x; + +set timescaledb.debug_require_vector_qual to 'only'; +select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 22]); +select count(*) from singlebatch where metric2 = any(array[0, 22, 0, 0, 0]); +select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 0]); +select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 0]); +select count(*) from singlebatch where metric2 <= all(array[12, 0, 12, 12, 12]); +select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 12]); + +select count(*) from singlebatch where metric3 = 777 and metric2 = any(array[0, 0, 0, 0, 22]); +select count(*) from singlebatch where metric3 = 777 and metric2 = any(array[0, 22, 0, 0, 0]); +select count(*) from singlebatch where metric3 = 777 and metric2 = any(array[0, 0, 0, 0, 0]); +select count(*) from singlebatch where metric3 = 777 and metric2 <= all(array[12, 12, 12, 12, 0]); +select count(*) from singlebatch where metric3 = 777 and metric2 <= all(array[12, 0, 12, 12, 12]); +select count(*) from singlebatch where metric3 = 777 and metric2 <= all(array[12, 12, 12, 12, 12]); + +select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 22]) and metric3 = 777; +select count(*) from singlebatch where metric2 = any(array[0, 22, 0, 0, 0]) and metric3 = 777; +select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 0]) and metric3 = 777; +select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 0]) and metric3 = 777; +select count(*) from singlebatch where metric2 <= all(array[12, 0, 12, 12, 12]) and metric3 = 777; +select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 12]) and metric3 = 777; + +select count(*) from singlebatch where metric3 != 777 and metric2 = any(array[0, 0, 0, 0, 22]); +select count(*) from singlebatch where metric3 != 777 and metric2 = any(array[0, 22, 0, 0, 0]); +select count(*) from singlebatch where metric3 != 777 and metric2 = any(array[0, 0, 0, 0, 0]); +select count(*) from singlebatch where metric3 != 777 and metric2 <= all(array[12, 12, 12, 12, 0]); +select count(*) from singlebatch where metric3 != 777 and metric2 <= all(array[12, 0, 12, 12, 12]); +select count(*) from singlebatch where metric3 != 777 and metric2 <= all(array[12, 12, 12, 12, 12]); + +select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 22]) and metric3 != 777; +select count(*) from singlebatch where metric2 = any(array[0, 22, 0, 0, 0]) and metric3 != 777; +select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 0]) and metric3 != 777; +select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 0]) and metric3 != 777; +select count(*) from singlebatch where metric2 <= all(array[12, 0, 12, 12, 12]) and metric3 != 777; +select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 12]) and metric3 != 777; + + -- Comparison with other column not vectorized. set timescaledb.debug_require_vector_qual to 'forbid'; From 55153cd7bad1448d9e07e84f2db5e1ac5d46bfba Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 10 Nov 2023 15:07:09 +0100 Subject: [PATCH 037/249] early 4 --- tsl/src/nodes/decompress_chunk/vector_predicates.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.c b/tsl/src/nodes/decompress_chunk/vector_predicates.c index c0b1fb3227a..3bfdcff405f 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.c +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.c @@ -158,11 +158,11 @@ vector_array_operator_impl(VectorPredicate *vector_const_predicate, bool is_or, /* * On big arrays, we want to sometimes check if we can exit early, * to avoid being slower than the non-vectorized version which exits - * at first possibility. + * at first possibility. The frequency is chosen by benchmarking. * In debug mode, do this more frequently to simplify testing. */ #ifdef NDEBUG - if (array_index > 0 && array_index % 16 == 0) + if (array_index > 0 && array_index % 4 == 0) #else if (array_index > 0 && array_index % 3 == 0) #endif From 294632bd9f57c4f5d7f52f38b0ac94d840875250 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 10 Nov 2023 18:45:47 +0100 Subject: [PATCH 038/249] remove debug --- tsl/src/nodes/decompress_chunk/vector_predicates.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.c b/tsl/src/nodes/decompress_chunk/vector_predicates.c index 3bfdcff405f..9006382cd98 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.c +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.c @@ -192,7 +192,6 @@ vector_array_operator_impl(VectorPredicate *vector_const_predicate, bool is_or, } if (any_rows_match == 0) { - fprintf(stderr, "early exit at %ld yay!\n", array_index); return; } } From f6926aefd90864ca45b1f706caf42a515f63cdb2 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Sat, 11 Nov 2023 15:37:22 +0100 Subject: [PATCH 039/249] fixes after merge --- .../nodes/decompress_chunk/compressed_batch.c | 115 ++---------------- tsl/test/expected/decompress_vector_qual.out | 8 +- tsl/test/sql/decompress_vector_qual.sql | 1 + 3 files changed, 17 insertions(+), 107 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 5dc164e5519..55310589622 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -121,112 +121,6 @@ translate_from_dictionary(const ArrowArray *arrow, uint64 *restrict dict_result, #undef INNER_LOOP } -static inline void -vector_predicate_saop_impl(VectorPredicate *vector_const_predicate, bool is_or, - const ArrowArray *vector, Datum array, uint64 *restrict final_result) -{ - const size_t result_bits = vector->length; - const size_t result_words = (result_bits + 63) / 64; - - uint64 *restrict array_result; - /* - * For OR, we need an intermediate storage to accumulate the results - * from all elements. - * For AND, we can apply predicate for each element to the final result. - */ - uint64 array_result_storage[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64]; - if (is_or) - { - array_result = array_result_storage; - for (size_t i = 0; i < result_words; i++) - { - array_result_storage[i] = 0; - } - } - else - { - array_result = final_result; - } - - ArrayType *arr = DatumGetArrayTypeP(array); - - int16 typlen; - bool typbyval; - char typalign; - get_typlenbyvalalign(ARR_ELEMTYPE(arr), &typlen, &typbyval, &typalign); - - const char *s = (const char *) ARR_DATA_PTR(arr); - Ensure(ARR_NULLBITMAP(arr) == NULL, - "vectorized scalar array ops do not support nullable arrays"); - - const int nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); - - for (int i = 0; i < nitems; i++) - { - Datum constvalue = fetch_att(s, typbyval, typlen); - s = att_addlength_pointer(s, typlen, s); - s = (char *) att_align_nominal(s, typalign); - - /* - * For OR, we also need an intermediate storage for predicate result - * for each array element, since the predicates AND their result. - * - * For AND, we can and apply predicate for each array element to the - * final result. - */ - uint64 single_result_storage[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64]; - uint64 *restrict single_result; - if (is_or) - { - single_result = single_result_storage; - for (size_t outer = 0; outer < result_words; outer++) - { - single_result[outer] = -1; - } - } - else - { - single_result = final_result; - } - - vector_const_predicate(vector, constvalue, single_result); - - if (is_or) - { - for (size_t outer = 0; outer < result_words; outer++) - { - array_result[outer] |= single_result[outer]; - } - } - } - - if (is_or) - { - for (size_t outer = 0; outer < result_words; outer++) - { - /* - * The tail bits corresponding to past-the-end rows when n % 64 != 0 - * should be already zeroed out in the final_result. - */ - final_result[outer] &= array_result[outer]; - } - } -} - -static void -vector_predicate_saop_and(VectorPredicate *scalar_predicate, const ArrowArray *vector, Datum array, - uint64 *restrict result) -{ - vector_predicate_saop_impl(scalar_predicate, /* is_or = */ false, vector, array, result); -} - -static void -vector_predicate_saop_or(VectorPredicate *scalar_predicate, const ArrowArray *vector, Datum array, - uint64 *restrict result) -{ - vector_predicate_saop_impl(scalar_predicate, /* is_or = */ true, vector, array, result); -} - static int get_max_element_bytes(ArrowArray *text_array) { @@ -530,6 +424,15 @@ compute_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *ba vector_const_predicate(vector_nodict, constnode->constvalue, predicate_result_nodict); } + /* + * If the vector is dictionary-encoded, we have just computed the + * predicate for dictionary and now have to translate it. + */ + if (vector->dictionary) + { + translate_from_dictionary(vector, predicate_result_nodict, predicate_result); + } + /* Account for nulls which shouldn't pass the predicate. */ const size_t n = vector->length; const size_t n_words = (n + 63) / 64; diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 3aa2a647bff..d2578e4d13d 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -629,7 +629,7 @@ WARNING: column type "timestamp without time zone" used for "ts" does not follo NOTICE: adding not-null constraint to column "ts" create_hypertable ------------------- - (5,public,t,t) + (7,public,t,t) (1 row) alter table t set (timescaledb.compress); @@ -664,3 +664,9 @@ select count(*), min(ts) from t where a = 'different1000'; 1 | Fri Jan 01 02:17:41 2021 (1 row) +select count(*), min(ts), max(ts) from t where a in ('same', 'different500'); + count | min | max +-------+--------------------------+-------------------------- + 1001 | Fri Jan 01 01:01:02 2021 | Fri Jan 01 02:09:21 2021 +(1 row) + diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 431eccdd56a..96852c0be11 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -227,3 +227,4 @@ set timescaledb.debug_require_vector_qual to 'only'; select count(*), min(ts) from t where a = 'same'; select count(*), min(ts) from t where a = 'different1'; select count(*), min(ts) from t where a = 'different1000'; +select count(*), min(ts), max(ts) from t where a in ('same', 'different500'); From a7b7aef4411e7199c04f83911b9690131f4f8ed3 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 20 Nov 2023 13:25:43 +0100 Subject: [PATCH 040/249] 16 --- tsl/src/nodes/decompress_chunk/vector_predicates.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.c b/tsl/src/nodes/decompress_chunk/vector_predicates.c index 9006382cd98..18110aca551 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.c +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.c @@ -162,7 +162,7 @@ vector_array_operator_impl(VectorPredicate *vector_const_predicate, bool is_or, * In debug mode, do this more frequently to simplify testing. */ #ifdef NDEBUG - if (array_index > 0 && array_index % 4 == 0) + if (array_index > 0 && array_index % 16 == 0) #else if (array_index > 0 && array_index % 3 == 0) #endif From f1448e179d9962aeff6a5b70ccf9e854dce86923 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 6 Dec 2023 17:39:17 +0100 Subject: [PATCH 041/249] cleanup --- tsl/src/nodes/decompress_chunk/CMakeLists.txt | 1 + .../decompress_chunk/pred_vector_array.c | 207 ++++++++++++++++++ .../decompress_chunk/vector_predicates.c | 186 ---------------- tsl/test/expected/decompress_vector_qual.out | 42 ++++ tsl/test/sql/decompress_vector_qual.sql | 14 ++ 5 files changed, 264 insertions(+), 186 deletions(-) create mode 100644 tsl/src/nodes/decompress_chunk/pred_vector_array.c diff --git a/tsl/src/nodes/decompress_chunk/CMakeLists.txt b/tsl/src/nodes/decompress_chunk/CMakeLists.txt index 0ef8c79a67a..ab122f1dfd6 100644 --- a/tsl/src/nodes/decompress_chunk/CMakeLists.txt +++ b/tsl/src/nodes/decompress_chunk/CMakeLists.txt @@ -6,6 +6,7 @@ set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/decompress_chunk.c ${CMAKE_CURRENT_SOURCE_DIR}/exec.c ${CMAKE_CURRENT_SOURCE_DIR}/planner.c + ${CMAKE_CURRENT_SOURCE_DIR}/pred_vector_array.c ${CMAKE_CURRENT_SOURCE_DIR}/qual_pushdown.c ${CMAKE_CURRENT_SOURCE_DIR}/vector_predicates.c) target_sources(${TSL_LIBRARY_NAME} PRIVATE ${SOURCES}) diff --git a/tsl/src/nodes/decompress_chunk/pred_vector_array.c b/tsl/src/nodes/decompress_chunk/pred_vector_array.c new file mode 100644 index 00000000000..3e483fa79ca --- /dev/null +++ b/tsl/src/nodes/decompress_chunk/pred_vector_array.c @@ -0,0 +1,207 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +#include + +#include "compression/arrow_c_data_interface.h" + +#include "vector_predicates.h" + +#include "compression/compression.h" + +/* + * Vectorized implementation of ScalarArrayOpExpr. Applies scalar_predicate for + * vector and each element of array, combines the result according to "is_or" + * flag. Written along the lines of ExecEvalScalarArrayOp(). + */ +static inline void +vector_array_operator_impl(VectorPredicate *vector_const_predicate, bool is_or, + const ArrowArray *vector, Datum array, uint64 *restrict final_result) +{ + const size_t result_bits = vector->length; + const size_t result_words = (result_bits + 63) / 64; + + uint64 *restrict array_result = NULL; + /* + * For OR, we need an intermediate storage to accumulate the results + * from all elements. + * For AND, we can apply predicate for each element to the final result. + */ + uint64 array_result_storage[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64]; + if (is_or) + { + array_result = array_result_storage; + for (size_t i = 0; i < result_words; i++) + { + array_result_storage[i] = 0; + } + + if (vector->length % 64 != 0) + { + /* + * Set the bits for past-the-end elements to 1. This way it's more + * convenient to check for early exit, and the final result should + * have them already set to 0 so it doesn't matter. + */ + const uint64 mask = ((uint64) -1) << (vector->length % 64); + array_result[vector->length / 64] = mask; + } + } + + ArrayType *arr = DatumGetArrayTypeP(array); + + int16 typlen; + bool typbyval; + char typalign; + get_typlenbyvalalign(ARR_ELEMTYPE(arr), &typlen, &typbyval, &typalign); + + const char *array_data = (const char *) ARR_DATA_PTR(arr); + const size_t nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); + const uint64 *restrict array_null_bitmap = (uint64 *) ARR_NULLBITMAP(arr); + + for (size_t array_index = 0; array_index < nitems; array_index++) + { + if (array_null_bitmap != NULL && !arrow_row_is_valid(array_null_bitmap, array_index)) + { + /* + * This array element is NULL. We can't avoid NULLS when evaluating + * the stable functions at run time, so we have to support them. + * This is a predicate, not a generic scalar array operation, so + * thankfully we return a non-nullable bool. + * For ANY: null | true = true, null | false = null, so this means + * we can skip the null element and continue evaluation. + * For ALL: null & true = null, null & false = false, so this means + * that for each row the condition goes to false, and we don't have + * to evaluate the next elements. + */ + if (is_or) + { + continue; + } + + for (size_t word = 0; word < result_words; word++) + { + final_result[word] = 0; + } + return; + } + Datum constvalue = fetch_att(array_data, typbyval, typlen); + array_data = att_addlength_pointer(array_data, typlen, array_data); + array_data = (char *) att_align_nominal(array_data, typalign); + + /* + * For OR, we also need an intermediate storage for predicate result + * for each array element, since the predicates AND their result. + * + * For AND, we can and apply predicate for each array element to the + * final result. + */ + uint64 single_result_storage[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64]; + uint64 *restrict single_result; + if (is_or) + { + single_result = single_result_storage; + for (size_t outer = 0; outer < result_words; outer++) + { + single_result[outer] = -1; + } + } + else + { + single_result = final_result; + } + + vector_const_predicate(vector, constvalue, single_result); + + if (is_or) + { + for (size_t outer = 0; outer < result_words; outer++) + { + array_result[outer] |= single_result[outer]; + } + } + + /* + * On big arrays, we want to sometimes check if we can exit early, + * to avoid being slower than the non-vectorized version which exits + * at first possibility. The frequency is chosen by benchmarking. + * In debug mode, do this more frequently to simplify testing. + */ +#ifdef NDEBUG + if (array_index > 0 && array_index % 16 == 0) +#else + if (array_index > 0 && array_index % 3 == 0) +#endif + { + if (is_or) + { + /* + * Note that we have set the bits for past-the-end rows in + * array_result to 1, so we can use simple AND here. + */ + uint64 all_rows_match = -1; + for (size_t word = 0; word < result_words; word++) + { + all_rows_match &= array_result[word]; + } + if (all_rows_match == -1ULL) + { + fprintf(stderr, "early exit -- all rows match!\n"); + return; + } + } + else + { + uint64 any_rows_match = 0; + for (size_t word = 0; word < result_words; word++) + { + any_rows_match |= final_result[word]; + } + if (any_rows_match == 0) + { + return; + } + } + } + } + + if (is_or) + { + for (size_t outer = 0; outer < result_words; outer++) + { + /* + * The tail bits corresponding to past-the-end rows when n % 64 != 0 + * should be already zeroed out in the final_result. + */ + final_result[outer] &= array_result[outer]; + } + } +} + +/* + * This is a thin wrapper to nudge the compiler to specialize the AND version + * which is much simpler than the OR version. + */ +static pg_noinline void +vector_array_operator_and(VectorPredicate *scalar_predicate, const ArrowArray *vector, Datum array, + uint64 *restrict result) +{ + return vector_array_operator_impl(scalar_predicate, /* is_or = */ false, vector, array, result); +} + +void +vector_array_operator(VectorPredicate *scalar_predicate, bool is_or, const ArrowArray *vector, + Datum array, uint64 *restrict result) +{ + if (is_or) + { + vector_array_operator_impl(scalar_predicate, /* is_or = */ true, vector, array, result); + } + else + { + vector_array_operator_and(scalar_predicate, vector, array, result); + } +} diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.c b/tsl/src/nodes/decompress_chunk/vector_predicates.c index 18110aca551..f225383ec3f 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.c +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.c @@ -42,189 +42,3 @@ get_vector_const_predicate(Oid pg_predicate) } return NULL; } - -/* - * Vectorized implementation of ScalarArrayOpExpr. Applies scalar_predicate for - * vector and each element of array, combines the result according to "is_or" - * flag. Written along the lines of ExecEvalScalarArrayOp(). - */ -static inline void -vector_array_operator_impl(VectorPredicate *vector_const_predicate, bool is_or, - const ArrowArray *vector, Datum array, uint64 *restrict final_result) -{ - const size_t result_bits = vector->length; - const size_t result_words = (result_bits + 63) / 64; - - uint64 *restrict array_result = NULL; - /* - * For OR, we need an intermediate storage to accumulate the results - * from all elements. - * For AND, we can apply predicate for each element to the final result. - */ - uint64 array_result_storage[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64]; - if (is_or) - { - array_result = array_result_storage; - for (size_t i = 0; i < result_words; i++) - { - array_result_storage[i] = 0; - } - - if (vector->length % 64 != 0) - { - /* - * Set the bits for past-the-end elements to 1. This way it's more - * convenient to check for early exit, and the final result should - * have them already set to 0 so it doesn't matter. - */ - const uint64 mask = ((uint64) -1) << (vector->length % 64); - array_result[vector->length / 64] = mask; - } - } - - ArrayType *arr = DatumGetArrayTypeP(array); - - int16 typlen; - bool typbyval; - char typalign; - get_typlenbyvalalign(ARR_ELEMTYPE(arr), &typlen, &typbyval, &typalign); - - const char *array_data = (const char *) ARR_DATA_PTR(arr); - const size_t nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); - const uint64 *restrict array_null_bitmap = (uint64 *) ARR_NULLBITMAP(arr); - - for (size_t array_index = 0; array_index < nitems; array_index++) - { - if (array_null_bitmap != NULL && !arrow_row_is_valid(array_null_bitmap, array_index)) - { - /* - * This array element is NULL. We can't avoid NULLS when evaluating - * the stable functions at run time, so we have to support them. - * This is a predicate, not a generic scalar array operation, so - * thankfully we return a non-nullable bool. - * For ANY: null | true = true, null | false = null, so this means - * we can skip the null element and continue evaluation. - * For ALL: null & true = null, null & false = false, so this means - * that for each row the condition goes to false, and we don't have - * to evaluate the next elements. - */ - if (is_or) - { - continue; - } - - for (size_t word = 0; word < result_words; word++) - { - final_result[word] = 0; - } - return; - } - Datum constvalue = fetch_att(array_data, typbyval, typlen); - array_data = att_addlength_pointer(array_data, typlen, array_data); - array_data = (char *) att_align_nominal(array_data, typalign); - - /* - * For OR, we also need an intermediate storage for predicate result - * for each array element, since the predicates AND their result. - * - * For AND, we can and apply predicate for each array element to the - * final result. - */ - uint64 single_result_storage[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64]; - uint64 *restrict single_result; - if (is_or) - { - single_result = single_result_storage; - for (size_t outer = 0; outer < result_words; outer++) - { - single_result[outer] = -1; - } - } - else - { - single_result = final_result; - } - - vector_const_predicate(vector, constvalue, single_result); - - if (is_or) - { - for (size_t outer = 0; outer < result_words; outer++) - { - array_result[outer] |= single_result[outer]; - } - } - - /* - * On big arrays, we want to sometimes check if we can exit early, - * to avoid being slower than the non-vectorized version which exits - * at first possibility. The frequency is chosen by benchmarking. - * In debug mode, do this more frequently to simplify testing. - */ -#ifdef NDEBUG - if (array_index > 0 && array_index % 16 == 0) -#else - if (array_index > 0 && array_index % 3 == 0) -#endif - { - if (is_or) - { - /* - * Note that we have set the bits for past-the-end rows in - * array_result to 1, so we can use simple AND here. - */ - uint64 all_rows_match = -1; - for (size_t word = 0; word < result_words; word++) - { - all_rows_match &= array_result[word]; - } - if (all_rows_match == -1ULL) - { - return; - } - } - else - { - uint64 any_rows_match = 0; - for (size_t word = 0; word < result_words; word++) - { - any_rows_match |= final_result[word]; - } - if (any_rows_match == 0) - { - return; - } - } - } - } - - if (is_or) - { - for (size_t outer = 0; outer < result_words; outer++) - { - /* - * The tail bits corresponding to past-the-end rows when n % 64 != 0 - * should be already zeroed out in the final_result. - */ - final_result[outer] &= array_result[outer]; - } - } -} - -/* - * This is a thin wrapper to nudge the compiler to inline the AND version which - * is much simpler than OR version. - */ -void -vector_array_operator(VectorPredicate *scalar_predicate, bool is_or, const ArrowArray *vector, - Datum array, uint64 *restrict result) -{ - if (is_or) - { - vector_array_operator_impl(scalar_predicate, /* is_or = */ true, vector, array, result); - } - else - { - vector_array_operator_impl(scalar_predicate, /* is_or = */ false, vector, array, result); - } -} diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 4aed089beb8..4ccf378e15a 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -201,6 +201,13 @@ select count(*) from vectorqual where metric3 === any(array[777, 888]); 2 (1 row) +-- It also doesn't have a commutator. +select count(*) from vectorqual where 777 === metric3; + count +------- + 2 +(1 row) + -- NullTest is not vectorized. set timescaledb.debug_require_vector_qual to 'forbid'; select count(*) from vectorqual where metric4 is null; @@ -311,6 +318,9 @@ select count(compress_chunk(x, true)) from show_chunks('singlebatch') x; (1 row) set timescaledb.debug_require_vector_qual to 'only'; +-- Uncomment to generate the test reference w/o the vector optimizations. +-- set timescaledb.enable_bulk_decompression to off; +-- set timescaledb.debug_require_vector_qual to 'forbid'; select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 22]); count ------- @@ -329,6 +339,12 @@ select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 0]); 0 (1 row) +select count(*) from singlebatch where metric2 != any(array[0, 0, 0, 0, 0]); + count +------- + 5 +(1 row) + select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 0]); count ------- @@ -365,6 +381,12 @@ select count(*) from singlebatch where metric3 = 777 and metric2 = any(array[0, 0 (1 row) +select count(*) from singlebatch where metric3 = 777 and metric2 != any(array[0, 0, 0, 0, 0]); + count +------- + 2 +(1 row) + select count(*) from singlebatch where metric3 = 777 and metric2 <= all(array[12, 12, 12, 12, 0]); count ------- @@ -401,6 +423,12 @@ select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 0]) and m 0 (1 row) +select count(*) from singlebatch where metric2 != any(array[0, 0, 0, 0, 0]) and metric3 = 777; + count +------- + 2 +(1 row) + select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 0]) and metric3 = 777; count ------- @@ -437,6 +465,12 @@ select count(*) from singlebatch where metric3 != 777 and metric2 = any(array[0, 0 (1 row) +select count(*) from singlebatch where metric3 != 777 and metric2 != any(array[0, 0, 0, 0, 0]); + count +------- + 3 +(1 row) + select count(*) from singlebatch where metric3 != 777 and metric2 <= all(array[12, 12, 12, 12, 0]); count ------- @@ -473,6 +507,12 @@ select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 0]) and m 0 (1 row) +select count(*) from singlebatch where metric2 != any(array[0, 0, 0, 0, 0]) and metric3 != 777; + count +------- + 3 +(1 row) + select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 0]) and metric3 != 777; count ------- @@ -491,6 +531,8 @@ select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 12]) 0 (1 row) +reset timescaledb.enable_bulk_decompression; +reset timescaledb.debug_require_vector_qual; -- Comparison with other column not vectorized. set timescaledb.debug_require_vector_qual to 'forbid'; select count(*) from vectorqual where metric3 = metric4; diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index b0ef79ff669..9cee66531a0 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -77,6 +77,9 @@ create operator === (function = 'int4eqq', rightarg = int4, leftarg = int4); select count(*) from vectorqual where metric3 === 777; select count(*) from vectorqual where metric3 === any(array[777, 888]); +-- It also doesn't have a commutator. +select count(*) from vectorqual where 777 === metric3; + -- NullTest is not vectorized. set timescaledb.debug_require_vector_qual to 'forbid'; @@ -112,9 +115,14 @@ insert into singlebatch select '2022-02-02 02:02:02', metric2, device, metric3, select count(compress_chunk(x, true)) from show_chunks('singlebatch') x; set timescaledb.debug_require_vector_qual to 'only'; +-- Uncomment to generate the test reference w/o the vector optimizations. +-- set timescaledb.enable_bulk_decompression to off; +-- set timescaledb.debug_require_vector_qual to 'forbid'; + select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 22]); select count(*) from singlebatch where metric2 = any(array[0, 22, 0, 0, 0]); select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 0]); +select count(*) from singlebatch where metric2 != any(array[0, 0, 0, 0, 0]); select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 0]); select count(*) from singlebatch where metric2 <= all(array[12, 0, 12, 12, 12]); select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 12]); @@ -122,6 +130,7 @@ select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 12]) select count(*) from singlebatch where metric3 = 777 and metric2 = any(array[0, 0, 0, 0, 22]); select count(*) from singlebatch where metric3 = 777 and metric2 = any(array[0, 22, 0, 0, 0]); select count(*) from singlebatch where metric3 = 777 and metric2 = any(array[0, 0, 0, 0, 0]); +select count(*) from singlebatch where metric3 = 777 and metric2 != any(array[0, 0, 0, 0, 0]); select count(*) from singlebatch where metric3 = 777 and metric2 <= all(array[12, 12, 12, 12, 0]); select count(*) from singlebatch where metric3 = 777 and metric2 <= all(array[12, 0, 12, 12, 12]); select count(*) from singlebatch where metric3 = 777 and metric2 <= all(array[12, 12, 12, 12, 12]); @@ -129,6 +138,7 @@ select count(*) from singlebatch where metric3 = 777 and metric2 <= all(array[12 select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 22]) and metric3 = 777; select count(*) from singlebatch where metric2 = any(array[0, 22, 0, 0, 0]) and metric3 = 777; select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 0]) and metric3 = 777; +select count(*) from singlebatch where metric2 != any(array[0, 0, 0, 0, 0]) and metric3 = 777; select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 0]) and metric3 = 777; select count(*) from singlebatch where metric2 <= all(array[12, 0, 12, 12, 12]) and metric3 = 777; select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 12]) and metric3 = 777; @@ -136,6 +146,7 @@ select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 12]) select count(*) from singlebatch where metric3 != 777 and metric2 = any(array[0, 0, 0, 0, 22]); select count(*) from singlebatch where metric3 != 777 and metric2 = any(array[0, 22, 0, 0, 0]); select count(*) from singlebatch where metric3 != 777 and metric2 = any(array[0, 0, 0, 0, 0]); +select count(*) from singlebatch where metric3 != 777 and metric2 != any(array[0, 0, 0, 0, 0]); select count(*) from singlebatch where metric3 != 777 and metric2 <= all(array[12, 12, 12, 12, 0]); select count(*) from singlebatch where metric3 != 777 and metric2 <= all(array[12, 0, 12, 12, 12]); select count(*) from singlebatch where metric3 != 777 and metric2 <= all(array[12, 12, 12, 12, 12]); @@ -143,10 +154,13 @@ select count(*) from singlebatch where metric3 != 777 and metric2 <= all(array[1 select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 22]) and metric3 != 777; select count(*) from singlebatch where metric2 = any(array[0, 22, 0, 0, 0]) and metric3 != 777; select count(*) from singlebatch where metric2 = any(array[0, 0, 0, 0, 0]) and metric3 != 777; +select count(*) from singlebatch where metric2 != any(array[0, 0, 0, 0, 0]) and metric3 != 777; select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 0]) and metric3 != 777; select count(*) from singlebatch where metric2 <= all(array[12, 0, 12, 12, 12]) and metric3 != 777; select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 12]) and metric3 != 777; +reset timescaledb.enable_bulk_decompression; +reset timescaledb.debug_require_vector_qual; -- Comparison with other column not vectorized. From 3c25ceeb2b39521a0bd623f73a3f7d51e21265d8 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 6 Dec 2023 17:40:32 +0100 Subject: [PATCH 042/249] assert --- tsl/src/nodes/decompress_chunk/compressed_batch.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 328aafe2def..9705058c88c 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -225,9 +225,7 @@ compute_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *ba * Find the vector_const predicate. */ VectorPredicate *vector_const_predicate = get_vector_const_predicate(vector_const_opcode); - Ensure(vector_const_predicate != NULL, - "vectorized predicate not found for postgres predicate %d", - vector_const_opcode); + Assert(vector_const_predicate != NULL); /* * Find the compressed column referred to by the Var. From 2a6c6d6185446d6814923a886e1f6554838b841a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 6 Dec 2023 17:46:19 +0100 Subject: [PATCH 043/249] return --- tsl/src/nodes/decompress_chunk/pred_vector_array.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsl/src/nodes/decompress_chunk/pred_vector_array.c b/tsl/src/nodes/decompress_chunk/pred_vector_array.c index 3e483fa79ca..4c4e19739d2 100644 --- a/tsl/src/nodes/decompress_chunk/pred_vector_array.c +++ b/tsl/src/nodes/decompress_chunk/pred_vector_array.c @@ -189,7 +189,7 @@ static pg_noinline void vector_array_operator_and(VectorPredicate *scalar_predicate, const ArrowArray *vector, Datum array, uint64 *restrict result) { - return vector_array_operator_impl(scalar_predicate, /* is_or = */ false, vector, array, result); + vector_array_operator_impl(scalar_predicate, /* is_or = */ false, vector, array, result); } void From 85c0f10ce4dcd00fa204d8a2ac8edf5b1309c7c9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 6 Dec 2023 20:33:42 +0100 Subject: [PATCH 044/249] benchmark bulk text (2023-12-06 #0) From 0c679f9d8f42653a3d1a5854e892540caba0cfa5 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 6 Dec 2023 20:36:27 +0100 Subject: [PATCH 045/249] format --- tsl/src/nodes/decompress_chunk/compressed_batch.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index d364fa59b4a..ca23cc59b72 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -413,10 +413,10 @@ compute_vector_quals(DecompressChunkState *chunk_state, DecompressBatchState *ba if (saop) { vector_array_predicate(vector_const_predicate, - saop->useOr, - vector_nodict, - constnode->constvalue, - predicate_result_nodict); + saop->useOr, + vector_nodict, + constnode->constvalue, + predicate_result_nodict); } else { From 8292dfacd086e7784843ed2efc25d94b1e39c0d6 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 7 Dec 2023 10:52:56 +0100 Subject: [PATCH 046/249] remove unneeded inline --- .../decompress_chunk/pred_vector_array.c | 35 +++---------------- 1 file changed, 5 insertions(+), 30 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/pred_vector_array.c b/tsl/src/nodes/decompress_chunk/pred_vector_array.c index da154644d9b..d9a497ed64a 100644 --- a/tsl/src/nodes/decompress_chunk/pred_vector_array.c +++ b/tsl/src/nodes/decompress_chunk/pred_vector_array.c @@ -17,9 +17,9 @@ * vector and each element of array, combines the result according to "is_or" * flag. Written along the lines of ExecEvalScalarArrayOp(). */ -static inline void -vector_array_predicate_impl(VectorPredicate *vector_const_predicate, bool is_or, - const ArrowArray *vector, Datum array, uint64 *restrict final_result) +void +vector_array_predicate(VectorPredicate *vector_const_predicate, bool is_or, + const ArrowArray *vector, Datum array, uint64 *restrict final_result) { const size_t result_bits = vector->length; const size_t result_words = (result_bits + 63) / 64; @@ -58,7 +58,7 @@ vector_array_predicate_impl(VectorPredicate *vector_const_predicate, bool is_or, char typalign; get_typlenbyvalalign(ARR_ELEMTYPE(arr), &typlen, &typbyval, &typalign); - const char *array_data = (const char *) ARR_DATA_PTR(arr); + const char *restrict array_data = (const char *) ARR_DATA_PTR(arr); const size_t nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); const uint64 *restrict array_null_bitmap = (uint64 *) ARR_NULLBITMAP(arr); @@ -90,7 +90,7 @@ vector_array_predicate_impl(VectorPredicate *vector_const_predicate, bool is_or, } Datum constvalue = fetch_att(array_data, typbyval, typlen); array_data = att_addlength_pointer(array_data, typlen, array_data); - array_data = (char *) att_align_nominal(array_data, typalign); + array_data = (char * restrict) att_align_nominal(array_data, typalign); /* * For OR, we also need an intermediate storage for predicate result @@ -179,28 +179,3 @@ vector_array_predicate_impl(VectorPredicate *vector_const_predicate, bool is_or, } } } - -/* - * This is a thin wrapper to nudge the compiler to specialize the AND version - * which is much simpler than the OR version. - */ -static pg_noinline void -vector_array_predicate_and(VectorPredicate *scalar_predicate, const ArrowArray *vector, Datum array, - uint64 *restrict result) -{ - vector_array_predicate_impl(scalar_predicate, /* is_or = */ false, vector, array, result); -} - -void -vector_array_predicate(VectorPredicate *scalar_predicate, bool is_or, const ArrowArray *vector, - Datum array, uint64 *restrict result) -{ - if (is_or) - { - vector_array_predicate_impl(scalar_predicate, /* is_or = */ true, vector, array, result); - } - else - { - vector_array_predicate_and(scalar_predicate, vector, array, result); - } -} From c56920007d63b68d48ac331dc08be945cf1302d9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 7 Dec 2023 12:58:06 +0100 Subject: [PATCH 047/249] Use a stateful detoaster for compressed data The normal Postgres detoasting code locks and opens toast tables and indexes for each toast value, which can add up to 30% CPU time on simple queries. Since in decompression we're working with one table at a time, the toast table and index are the same for every datum, so we don't have to redo this work. --- tsl/src/nodes/decompress_chunk/CMakeLists.txt | 1 + .../nodes/decompress_chunk/compressed_batch.c | 9 +- tsl/src/nodes/decompress_chunk/detoaster.c | 390 ++++++++++++++++++ tsl/src/nodes/decompress_chunk/detoaster.h | 31 ++ tsl/src/nodes/decompress_chunk/exec.c | 5 + tsl/src/nodes/decompress_chunk/exec.h | 3 + 6 files changed, 438 insertions(+), 1 deletion(-) create mode 100644 tsl/src/nodes/decompress_chunk/detoaster.c create mode 100644 tsl/src/nodes/decompress_chunk/detoaster.h diff --git a/tsl/src/nodes/decompress_chunk/CMakeLists.txt b/tsl/src/nodes/decompress_chunk/CMakeLists.txt index ab122f1dfd6..8de31a7adb8 100644 --- a/tsl/src/nodes/decompress_chunk/CMakeLists.txt +++ b/tsl/src/nodes/decompress_chunk/CMakeLists.txt @@ -4,6 +4,7 @@ set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/batch_queue_heap.c ${CMAKE_CURRENT_SOURCE_DIR}/compressed_batch.c ${CMAKE_CURRENT_SOURCE_DIR}/decompress_chunk.c + ${CMAKE_CURRENT_SOURCE_DIR}/detoaster.c ${CMAKE_CURRENT_SOURCE_DIR}/exec.c ${CMAKE_CURRENT_SOURCE_DIR}/planner.c ${CMAKE_CURRENT_SOURCE_DIR}/pred_vector_array.c diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index b3f1825cb5a..19e830871bd 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -113,8 +113,15 @@ decompress_column(DecompressChunkState *chunk_state, DecompressBatchState *batch return; } + /* Detoast the compressed datum. */ + if (VARATT_IS_EXTENDED(value)) + { + value = PointerGetDatum( + ts_detoast_attr((struct varlena *) DatumGetPointer(value), &dcontext->detoaster)); + } + /* Decompress the entire batch if it is supported. */ - CompressedDataHeader *header = (CompressedDataHeader *) PG_DETOAST_DATUM(value); + CompressedDataHeader *header = (CompressedDataHeader *) value; ArrowArray *arrow = NULL; if (dcontext->enable_bulk_decompression && column_description->bulk_decompression_supported) { diff --git a/tsl/src/nodes/decompress_chunk/detoaster.c b/tsl/src/nodes/decompress_chunk/detoaster.c new file mode 100644 index 00000000000..d5becca46ed --- /dev/null +++ b/tsl/src/nodes/decompress_chunk/detoaster.c @@ -0,0 +1,390 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +#include "detoaster.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "debug_assert.h" + +/* We redefine this postgres macro to fix a warning about signed integer comparison. */ +#define TS_VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \ + (((int32) VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer)) < (toast_pointer).va_rawsize - VARHDRSZ) + +/* + * Fetch a TOAST slice from a heap table. + * + * toastrel is the relation from which chunks are to be fetched. + * valueid identifies the TOAST value from which chunks are being fetched. + * attrsize is the total size of the TOAST value. + * result is the varlena into which the results should be written. + * + * Modified from heap_fetch_toast_slice(). + */ +static void +ts_fetch_toast(Detoaster *detoaster, struct varatt_external *toast_pointer, struct varlena *result) +{ + const Oid valueid = toast_pointer->va_valueid; + + /* + * Open the toast relation and its indexes + */ + MemoryContext old_mctx = MemoryContextSwitchTo(detoaster->mctx); + if (detoaster->toastrel == NULL) + { + detoaster->toastrel = table_open(toast_pointer->va_toastrelid, AccessShareLock); + + int num_indexes; + Relation *toastidxs; + /* Look for the valid index of toast relation */ + const int validIndex = + toast_open_indexes(detoaster->toastrel, AccessShareLock, &toastidxs, &num_indexes); + detoaster->index = toastidxs[validIndex]; + for (int i = 0; i < num_indexes; i++) + { + if (i != validIndex) + { + index_close(toastidxs[i], AccessShareLock); + } + } + + /* Set up a scan key to fetch from the index. */ + ScanKeyInit(&detoaster->toastkey, + (AttrNumber) 1, + BTEqualStrategyNumber, + F_OIDEQ, + ObjectIdGetDatum(valueid)); + + /* Prepare for scan */ + init_toast_snapshot(&detoaster->SnapshotToast); + detoaster->toastscan = systable_beginscan_ordered(detoaster->toastrel, + detoaster->index, + &detoaster->SnapshotToast, + 1, + &detoaster->toastkey); + } + else + { + Ensure(detoaster->toastrel->rd_id == toast_pointer->va_toastrelid, + "unexpected toast pointer relid %d, expected %d", + toast_pointer->va_toastrelid, + detoaster->toastrel->rd_id); + detoaster->toastkey.sk_argument = ObjectIdGetDatum(valueid); + index_rescan(detoaster->toastscan->iscan, &detoaster->toastkey, 1, NULL, 0); + } + MemoryContextSwitchTo(old_mctx); + + TupleDesc toasttupDesc = detoaster->toastrel->rd_att; + + /////////////////////////////////////////////// + + /* + * Read the chunks by index + * + * The index is on (valueid, chunkidx) so they will come in order + */ + const int32 attrsize = VARATT_EXTERNAL_GET_EXTSIZE(*toast_pointer); + const int32 totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; + const int startchunk = 0; + const int endchunk = (attrsize - 1) / TOAST_MAX_CHUNK_SIZE; + Assert(endchunk <= totalchunks); + HeapTuple ttup; + int32 expectedchunk = startchunk; + while ((ttup = systable_getnext_ordered(detoaster->toastscan, ForwardScanDirection)) != NULL) + { + int32 curchunk; + Pointer chunk; + bool isnull; + char *chunkdata; + int32 chunksize; + int32 expected_size; + int32 chcpystrt; + int32 chcpyend; + + /* + * Have a chunk, extract the sequence number and the data + */ + curchunk = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull)); + Assert(!isnull); + chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); + Assert(!isnull); + if (!VARATT_IS_EXTENDED(chunk)) + { + chunksize = VARSIZE(chunk) - VARHDRSZ; + chunkdata = VARDATA(chunk); + } + else if (VARATT_IS_SHORT(chunk)) + { + /* could happen due to heap_form_tuple doing its thing */ + chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; + chunkdata = VARDATA_SHORT(chunk); + } + else + { + /* should never happen */ + elog(ERROR, + "found toasted toast chunk for toast value %u in %s", + valueid, + RelationGetRelationName(detoaster->toastrel)); + chunksize = 0; /* keep compiler quiet */ + chunkdata = NULL; + } + + /* + * Some checks on the data we've found + */ + if (curchunk != expectedchunk) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("unexpected chunk number %d (expected %d) for toast value %u " + "in %s", + curchunk, + expectedchunk, + valueid, + RelationGetRelationName(detoaster->toastrel)))); + if (curchunk > endchunk) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("unexpected chunk number %d (out of range %d..%d) for toast " + "value %u in %s", + curchunk, + startchunk, + endchunk, + valueid, + RelationGetRelationName(detoaster->toastrel)))); + expected_size = curchunk < totalchunks - 1 ? + TOAST_MAX_CHUNK_SIZE : + attrsize - ((totalchunks - 1) * TOAST_MAX_CHUNK_SIZE); + if (chunksize != expected_size) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("unexpected chunk size %d (expected %d) in chunk %d of %d for " + "toast value %u in %s", + chunksize, + expected_size, + curchunk, + totalchunks, + valueid, + RelationGetRelationName(detoaster->toastrel)))); + + /* + * Copy the data into proper place in our result + */ + chcpystrt = 0; + chcpyend = chunksize - 1; + if (curchunk == startchunk) + chcpystrt = 0; + if (curchunk == endchunk) + chcpyend = (attrsize - 1) % TOAST_MAX_CHUNK_SIZE; + + memcpy(VARDATA(result) + (curchunk * TOAST_MAX_CHUNK_SIZE) + chcpystrt, + chunkdata + chcpystrt, + (chcpyend - chcpystrt) + 1); + + expectedchunk++; + } + + /* + * Final checks that we successfully fetched the datum + */ + if (expectedchunk != (endchunk + 1)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("missing chunk number %d for toast value %u in %s", + expectedchunk, + valueid, + RelationGetRelationName(detoaster->toastrel)))); +} + +void +detoaster_close(Detoaster *detoaster) +{ + /* Close toast table */ + if (detoaster->toastrel) + { + systable_endscan_ordered(detoaster->toastscan); + table_close(detoaster->toastrel, AccessShareLock); + index_close(detoaster->index, AccessShareLock); + detoaster->toastrel = NULL; + detoaster->index = NULL; + } +} + +/* ---------- + * toast_fetch_datum - + * + * Reconstruct an in memory Datum from the chunks saved + * in the toast relation + * ---------- + */ +static struct varlena * +ts_toast_fetch_datum(struct varlena *attr, Detoaster *detoaster) +{ + struct varlena *result; + struct varatt_external toast_pointer; + int32 attrsize; + + if (!VARATT_IS_EXTERNAL_ONDISK(attr)) + elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums"); + + /* Must copy to access aligned fields */ + VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); + + attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); + + result = (struct varlena *) palloc(attrsize + VARHDRSZ); + + if (TS_VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) + SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ); + else + SET_VARSIZE(result, attrsize + VARHDRSZ); + + if (attrsize == 0) + return result; /* Probably shouldn't happen, but just in + * case. */ + + /* Fetch all chunks */ + ts_fetch_toast(detoaster, &toast_pointer, result); + + return result; +} + +/* ---------- + * toast_decompress_datum - + * + * Decompress a compressed version of a varlena datum + */ +static struct varlena * +ts_toast_decompress_datum(struct varlena *attr) +{ + ToastCompressionId cmid; + + Assert(VARATT_IS_COMPRESSED(attr)); + + /* + * Fetch the compression method id stored in the compression header and + * decompress the data using the appropriate decompression routine. + */ + cmid = TOAST_COMPRESS_METHOD(attr); + switch (cmid) + { + case TOAST_PGLZ_COMPRESSION_ID: + return pglz_decompress_datum(attr); + case TOAST_LZ4_COMPRESSION_ID: + return lz4_decompress_datum(attr); + default: + elog(ERROR, "invalid compression method id %d", cmid); + return NULL; /* keep compiler quiet */ + } +} + +/* ---------- + * detoast_attr - + * + * Public entry point to get back a toasted value from compression + * or external storage. The result is always non-extended varlena form. + * + * Note some callers assume that if the input is an EXTERNAL or COMPRESSED + * datum, the result will be a pfree'able chunk. + * ---------- + */ +struct varlena * +ts_detoast_attr(struct varlena *attr, Detoaster *detoaster) +{ + if (VARATT_IS_EXTERNAL_ONDISK(attr)) + { + /* + * This is an externally stored datum --- fetch it back from there + */ + attr = ts_toast_fetch_datum(attr, detoaster); + /* If it's compressed, decompress it */ + if (VARATT_IS_COMPRESSED(attr)) + { + struct varlena *tmp = attr; + + attr = ts_toast_decompress_datum(tmp); + pfree(tmp); + } + } + else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) + { + /* + * This is an indirect pointer --- dereference it + */ + struct varatt_indirect redirect; + + VARATT_EXTERNAL_GET_POINTER(redirect, attr); + attr = (struct varlena *) redirect.pointer; + + /* nested indirect Datums aren't allowed */ + Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); + + /* recurse in case value is still extended in some other way */ + attr = ts_detoast_attr(attr, detoaster); + + /* if it isn't, we'd better copy it */ + if (attr == (struct varlena *) redirect.pointer) + { + struct varlena *result; + + result = (struct varlena *) palloc(VARSIZE_ANY(attr)); + memcpy(result, attr, VARSIZE_ANY(attr)); + attr = result; + } + } + else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) + { + /* + * This is an expanded-object pointer --- get flat format + */ + ExpandedObjectHeader *eoh; + Size resultsize; + + eoh = DatumGetEOHP(PointerGetDatum(attr)); + resultsize = EOH_get_flat_size(eoh); + attr = (struct varlena *) palloc(resultsize); + EOH_flatten_into(eoh, (void *) attr, resultsize); + + /* flatteners are not allowed to produce compressed/short output */ + Assert(!VARATT_IS_EXTENDED(attr)); + } + else if (VARATT_IS_COMPRESSED(attr)) + { + /* + * This is a compressed value inside of the main tuple + */ + attr = ts_toast_decompress_datum(attr); + } + else if (VARATT_IS_SHORT(attr)) + { + /* + * This is a short-header varlena --- convert to 4-byte header format + */ + Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT; + Size new_size = data_size + VARHDRSZ; + struct varlena *new_attr; + + new_attr = (struct varlena *) palloc(new_size); + SET_VARSIZE(new_attr, new_size); + memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size); + attr = new_attr; + } + + return attr; +} diff --git a/tsl/src/nodes/decompress_chunk/detoaster.h b/tsl/src/nodes/decompress_chunk/detoaster.h new file mode 100644 index 00000000000..763cf2103bb --- /dev/null +++ b/tsl/src/nodes/decompress_chunk/detoaster.h @@ -0,0 +1,31 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +#pragma once + +#include + +#include +#include +#include +#include + +typedef struct RelationData *Relation; + +typedef struct Detoaster +{ + MemoryContext mctx; + Oid toastrelid; + Relation toastrel; + Relation index; + SnapshotData SnapshotToast; + ScanKeyData toastkey; + SysScanDesc toastscan; +} Detoaster; + +struct varlena *ts_detoast_attr(struct varlena *attr, Detoaster *detoaster); + +void detoaster_close(Detoaster *detoaster); diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index 28a54b47faf..cb8ef482a0d 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -546,6 +546,9 @@ decompress_chunk_begin(CustomScanState *node, EState *estate, int eflags) chunk_state->vectorized_quals_constified = lappend(chunk_state->vectorized_quals_constified, constified); } + + dcontext->detoaster.toastrel = NULL; + dcontext->detoaster.mctx = CurrentMemoryContext; } /* @@ -861,6 +864,8 @@ decompress_chunk_end(CustomScanState *node) chunk_state->batch_queue->free(chunk_state); ExecEndNode(linitial(node->custom_ps)); + + detoaster_close(&chunk_state->decompress_context.detoaster); } /* diff --git a/tsl/src/nodes/decompress_chunk/exec.h b/tsl/src/nodes/decompress_chunk/exec.h index 1610c6b8e86..1c817f39c16 100644 --- a/tsl/src/nodes/decompress_chunk/exec.h +++ b/tsl/src/nodes/decompress_chunk/exec.h @@ -11,6 +11,7 @@ #include #include "batch_array.h" +#include "detoaster.h" #define DECOMPRESS_CHUNK_COUNT_ID -9 #define DECOMPRESS_CHUNK_SEQUENCE_NUM_ID -10 @@ -57,6 +58,8 @@ typedef struct DecompressContext * data. */ MemoryContext bulk_decompression_context; + + Detoaster detoaster; } DecompressContext; typedef struct DecompressChunkState From 1085d66b2318160cdec6e875e6a0262f990fdb61 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 7 Dec 2023 14:19:55 +0100 Subject: [PATCH 048/249] fixes --- .../nodes/decompress_chunk/compressed_batch.c | 7 +- tsl/src/nodes/decompress_chunk/detoaster.c | 133 +++++++++--------- tsl/src/nodes/decompress_chunk/exec.c | 5 +- 3 files changed, 76 insertions(+), 69 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 19e830871bd..dd8036133d2 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -114,11 +114,8 @@ decompress_column(DecompressChunkState *chunk_state, DecompressBatchState *batch } /* Detoast the compressed datum. */ - if (VARATT_IS_EXTENDED(value)) - { - value = PointerGetDatum( - ts_detoast_attr((struct varlena *) DatumGetPointer(value), &dcontext->detoaster)); - } + value = PointerGetDatum( + ts_detoast_attr((struct varlena *) DatumGetPointer(value), &dcontext->detoaster)); /* Decompress the entire batch if it is supported. */ CompressedDataHeader *header = (CompressedDataHeader *) value; diff --git a/tsl/src/nodes/decompress_chunk/detoaster.c b/tsl/src/nodes/decompress_chunk/detoaster.c index d5becca46ed..792b03bac07 100644 --- a/tsl/src/nodes/decompress_chunk/detoaster.c +++ b/tsl/src/nodes/decompress_chunk/detoaster.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -265,11 +264,40 @@ ts_toast_fetch_datum(struct varlena *attr, Detoaster *detoaster) return result; } -/* ---------- - * toast_decompress_datum - - * - * Decompress a compressed version of a varlena datum +/* + * Copy of Postgres' toast_decompress_datum(): Decompress a compressed version + * of a varlena datum + * The decompression functions have changed since PG13, so we have to keep two + * implementations. */ +#if PG14_LT + +#include + +static struct varlena * +ts_toast_decompress_datum(struct varlena *attr) +{ + struct varlena *result; + + Assert(VARATT_IS_COMPRESSED(attr)); + + result = (struct varlena *) palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ); + SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ); + + if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr), + TOAST_COMPRESS_SIZE(attr), + VARDATA(result), + TOAST_COMPRESS_RAWSIZE(attr), + true) < 0) + elog(ERROR, "compressed data is corrupted"); + + return result; +} + +#else + +#include + static struct varlena * ts_toast_decompress_datum(struct varlena *attr) { @@ -293,6 +321,7 @@ ts_toast_decompress_datum(struct varlena *attr) return NULL; /* keep compiler quiet */ } } +#endif /* ---------- * detoast_attr - @@ -307,10 +336,16 @@ ts_toast_decompress_datum(struct varlena *attr) struct varlena * ts_detoast_attr(struct varlena *attr, Detoaster *detoaster) { + if (!VARATT_IS_EXTENDED(attr)) + { + /* Nothing to do here. */ + return attr; + } + if (VARATT_IS_EXTERNAL_ONDISK(attr)) { /* - * This is an externally stored datum --- fetch it back from there + * This is an externally stored datum --- fetch it back from there. */ attr = ts_toast_fetch_datum(attr, detoaster); /* If it's compressed, decompress it */ @@ -321,70 +356,42 @@ ts_detoast_attr(struct varlena *attr, Detoaster *detoaster) attr = ts_toast_decompress_datum(tmp); pfree(tmp); } - } - else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) - { - /* - * This is an indirect pointer --- dereference it - */ - struct varatt_indirect redirect; - VARATT_EXTERNAL_GET_POINTER(redirect, attr); - attr = (struct varlena *) redirect.pointer; + return attr; + } - /* nested indirect Datums aren't allowed */ - Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); + /* + * Can't get indirect TOAST here (out-of-line Datum that's stored in memory), + * because we're reading from the compressed chunk table. + */ + Ensure(!VARATT_IS_EXTERNAL_INDIRECT(attr), "got indirect TOAST for compressed data"); - /* recurse in case value is still extended in some other way */ - attr = ts_detoast_attr(attr, detoaster); + /* + * Compressed data doesn't have an expanded representation. + */ + Ensure(!VARATT_IS_EXTERNAL_EXPANDED(attr), "got expanded TOAST for compressed data"); - /* if it isn't, we'd better copy it */ - if (attr == (struct varlena *) redirect.pointer) - { - struct varlena *result; + /* + * This would be a compressed value inside of the main tuple. We can't have + * it for compressed columns because they have either external or + * extended storage. + */ + Ensure(!VARATT_IS_COMPRESSED(attr), "got inline compressed TOAST for compressed data"); - result = (struct varlena *) palloc(VARSIZE_ANY(attr)); - memcpy(result, attr, VARSIZE_ANY(attr)); - attr = result; - } - } - else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) - { - /* - * This is an expanded-object pointer --- get flat format - */ - ExpandedObjectHeader *eoh; - Size resultsize; + /* + * The only option left is a short-header varlena --- convert to 4-byte + * header format. + */ + Ensure(VARATT_IS_SHORT(attr), "got unexpected TOAST type for compressed data"); - eoh = DatumGetEOHP(PointerGetDatum(attr)); - resultsize = EOH_get_flat_size(eoh); - attr = (struct varlena *) palloc(resultsize); - EOH_flatten_into(eoh, (void *) attr, resultsize); + Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT; + Size new_size = data_size + VARHDRSZ; + struct varlena *new_attr; - /* flatteners are not allowed to produce compressed/short output */ - Assert(!VARATT_IS_EXTENDED(attr)); - } - else if (VARATT_IS_COMPRESSED(attr)) - { - /* - * This is a compressed value inside of the main tuple - */ - attr = ts_toast_decompress_datum(attr); - } - else if (VARATT_IS_SHORT(attr)) - { - /* - * This is a short-header varlena --- convert to 4-byte header format - */ - Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT; - Size new_size = data_size + VARHDRSZ; - struct varlena *new_attr; - - new_attr = (struct varlena *) palloc(new_size); - SET_VARSIZE(new_attr, new_size); - memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size); - attr = new_attr; - } + new_attr = (struct varlena *) palloc(new_size); + SET_VARSIZE(new_attr, new_size); + memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size); + attr = new_attr; return attr; } diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index cb8ef482a0d..ce050800dee 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -686,7 +686,10 @@ perform_vectorized_sum_int4(DecompressChunkState *chunk_state, Aggref *aggref) /* We have at least one value */ decompressed_scan_slot->tts_isnull[0] = false; - CompressedDataHeader *header = (CompressedDataHeader *) PG_DETOAST_DATUM(value); + CompressedDataHeader *header = + (CompressedDataHeader *) ts_detoast_attr((struct varlena *) DatumGetPointer(value), + &dcontext->detoaster); + ArrowArray *arrow = NULL; DecompressAllFunction decompress_all = From 17d7feb7e92f30d4dfcdb1e00020992dccbdd352 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 7 Dec 2023 14:38:57 +0100 Subject: [PATCH 049/249] compat --- tsl/src/nodes/decompress_chunk/detoaster.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tsl/src/nodes/decompress_chunk/detoaster.c b/tsl/src/nodes/decompress_chunk/detoaster.c index 792b03bac07..133ae605710 100644 --- a/tsl/src/nodes/decompress_chunk/detoaster.c +++ b/tsl/src/nodes/decompress_chunk/detoaster.c @@ -20,6 +20,7 @@ #include #include +#include #include "debug_assert.h" /* We redefine this postgres macro to fix a warning about signed integer comparison. */ From 84e6a0f1947e4c3fe475e7952d21a83a6da216ba Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 7 Dec 2023 14:51:09 +0100 Subject: [PATCH 050/249] pg13 --- tsl/src/nodes/decompress_chunk/detoaster.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tsl/src/nodes/decompress_chunk/detoaster.c b/tsl/src/nodes/decompress_chunk/detoaster.c index 133ae605710..5291dc01b9f 100644 --- a/tsl/src/nodes/decompress_chunk/detoaster.c +++ b/tsl/src/nodes/decompress_chunk/detoaster.c @@ -246,7 +246,11 @@ ts_toast_fetch_datum(struct varlena *attr, Detoaster *detoaster) /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); +#if PG14_LT + attrsize = toast_pointer.va_extsize; +#else attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); +#endif result = (struct varlena *) palloc(attrsize + VARHDRSZ); From 52a27675057d5a445fc23cda1fc196650d052307 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 7 Dec 2023 14:53:57 +0100 Subject: [PATCH 051/249] pg13 --- tsl/src/nodes/decompress_chunk/detoaster.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/detoaster.c b/tsl/src/nodes/decompress_chunk/detoaster.c index 5291dc01b9f..57496210711 100644 --- a/tsl/src/nodes/decompress_chunk/detoaster.c +++ b/tsl/src/nodes/decompress_chunk/detoaster.c @@ -23,10 +23,15 @@ #include #include "debug_assert.h" +#if PG14_LT +#define VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) (toast_pointer).va_extsize +#endif + /* We redefine this postgres macro to fix a warning about signed integer comparison. */ #define TS_VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \ (((int32) VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer)) < (toast_pointer).va_rawsize - VARHDRSZ) + /* * Fetch a TOAST slice from a heap table. * @@ -246,11 +251,7 @@ ts_toast_fetch_datum(struct varlena *attr, Detoaster *detoaster) /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); -#if PG14_LT - attrsize = toast_pointer.va_extsize; -#else attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); -#endif result = (struct varlena *) palloc(attrsize + VARHDRSZ); From 006cf24105f752370eed485ab3adc9459e31f1fc Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 7 Dec 2023 14:55:25 +0100 Subject: [PATCH 052/249] format --- tsl/src/nodes/decompress_chunk/detoaster.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tsl/src/nodes/decompress_chunk/detoaster.c b/tsl/src/nodes/decompress_chunk/detoaster.c index 57496210711..6347fed7320 100644 --- a/tsl/src/nodes/decompress_chunk/detoaster.c +++ b/tsl/src/nodes/decompress_chunk/detoaster.c @@ -31,7 +31,6 @@ #define TS_VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \ (((int32) VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer)) < (toast_pointer).va_rawsize - VARHDRSZ) - /* * Fetch a TOAST slice from a heap table. * From 4e79287618e45aa3f2063ebef21d96584b3dde8c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 8 Dec 2023 20:23:18 +0100 Subject: [PATCH 053/249] fix for inline compression --- .../nodes/decompress_chunk/compressed_batch.c | 2 +- tsl/src/nodes/decompress_chunk/detoaster.c | 17 +++++---- tsl/src/nodes/decompress_chunk/detoaster.h | 2 +- tsl/src/nodes/decompress_chunk/exec.c | 2 +- tsl/test/expected/compressed_detoaster.out | 38 +++++++++++++++++++ tsl/test/sql/CMakeLists.txt | 1 + tsl/test/sql/compressed_detoaster.sql | 33 ++++++++++++++++ 7 files changed, 85 insertions(+), 10 deletions(-) create mode 100644 tsl/test/expected/compressed_detoaster.out create mode 100644 tsl/test/sql/compressed_detoaster.sql diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index dd8036133d2..87015107f7e 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -115,7 +115,7 @@ decompress_column(DecompressChunkState *chunk_state, DecompressBatchState *batch /* Detoast the compressed datum. */ value = PointerGetDatum( - ts_detoast_attr((struct varlena *) DatumGetPointer(value), &dcontext->detoaster)); + detoaster_detoast_attr((struct varlena *) DatumGetPointer(value), &dcontext->detoaster)); /* Decompress the entire batch if it is supported. */ CompressedDataHeader *header = (CompressedDataHeader *) value; diff --git a/tsl/src/nodes/decompress_chunk/detoaster.c b/tsl/src/nodes/decompress_chunk/detoaster.c index 6347fed7320..4d0d8ecd7e2 100644 --- a/tsl/src/nodes/decompress_chunk/detoaster.c +++ b/tsl/src/nodes/decompress_chunk/detoaster.c @@ -339,7 +339,7 @@ ts_toast_decompress_datum(struct varlena *attr) * ---------- */ struct varlena * -ts_detoast_attr(struct varlena *attr, Detoaster *detoaster) +detoaster_detoast_attr(struct varlena *attr, Detoaster *detoaster) { if (!VARATT_IS_EXTENDED(attr)) { @@ -376,12 +376,15 @@ ts_detoast_attr(struct varlena *attr, Detoaster *detoaster) */ Ensure(!VARATT_IS_EXTERNAL_EXPANDED(attr), "got expanded TOAST for compressed data"); - /* - * This would be a compressed value inside of the main tuple. We can't have - * it for compressed columns because they have either external or - * extended storage. - */ - Ensure(!VARATT_IS_COMPRESSED(attr), "got inline compressed TOAST for compressed data"); + if(VARATT_IS_COMPRESSED(attr)) + { + /* + * This is a compressed value stored inline in the main tuple. It rarely + * occurs in practice, because we set a low toast_tuple_target = 128 + * for the compressed chunks, but is still technically possible. + */ + return ts_toast_decompress_datum(attr); + } /* * The only option left is a short-header varlena --- convert to 4-byte diff --git a/tsl/src/nodes/decompress_chunk/detoaster.h b/tsl/src/nodes/decompress_chunk/detoaster.h index 763cf2103bb..c5992b118bf 100644 --- a/tsl/src/nodes/decompress_chunk/detoaster.h +++ b/tsl/src/nodes/decompress_chunk/detoaster.h @@ -26,6 +26,6 @@ typedef struct Detoaster SysScanDesc toastscan; } Detoaster; -struct varlena *ts_detoast_attr(struct varlena *attr, Detoaster *detoaster); +struct varlena *detoaster_detoast_attr(struct varlena *attr, Detoaster *detoaster); void detoaster_close(Detoaster *detoaster); diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index ce050800dee..4cada3b0723 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -687,7 +687,7 @@ perform_vectorized_sum_int4(DecompressChunkState *chunk_state, Aggref *aggref) decompressed_scan_slot->tts_isnull[0] = false; CompressedDataHeader *header = - (CompressedDataHeader *) ts_detoast_attr((struct varlena *) DatumGetPointer(value), + (CompressedDataHeader *) detoaster_detoast_attr((struct varlena *) DatumGetPointer(value), &dcontext->detoaster); ArrowArray *arrow = NULL; diff --git a/tsl/test/expected/compressed_detoaster.out b/tsl/test/expected/compressed_detoaster.out new file mode 100644 index 00000000000..e20ff9c0e45 --- /dev/null +++ b/tsl/test/expected/compressed_detoaster.out @@ -0,0 +1,38 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. +-- Make a compressed table with a compressed string of varying length, to test +-- the various ways the compressed data can be toasted. +create table longstr(ts int default 1, s1 text); +select create_hypertable('longstr', 'ts'); +NOTICE: adding not-null constraint to column "ts" + create_hypertable +---------------------- + (1,public,longstr,t) +(1 row) + +alter table longstr set (timescaledb.compress); +-- We want to test the case for inline compression which. It is technically +-- possible, but very hard to hit with the usual toast_tuple_target = 128 on +-- compressed chunks. So here we increase the toast_tuple_target to simplify +-- testing. +select format('%I.%I', schema_name, table_name) compressed_table +from _timescaledb_catalog.hypertable +where id = (select compressed_hypertable_id from _timescaledb_catalog.hypertable + where table_name = 'longstr') +\gset +alter table :compressed_table set (toast_tuple_target = 512); +-- Now, test compression and decompression with various string lengths. +create function test(repeats int) returns table(ns bigint) as $$ begin + raise log 'repeats %', repeats; + truncate longstr; + insert into longstr(s1) select repeat('aaaa', repeats); + perform count(compress_chunk(x, true)) from show_chunks('longstr') x; + return query select sum(length(s1)) from longstr; +end; $$ language plpgsql volatile; +select sum(t) from generate_series(1, 30) x, lateral test(x * x * x) t; + sum +-------- + 864900 +(1 row) + diff --git a/tsl/test/sql/CMakeLists.txt b/tsl/test/sql/CMakeLists.txt index db32cfbceb3..aea07651844 100644 --- a/tsl/test/sql/CMakeLists.txt +++ b/tsl/test/sql/CMakeLists.txt @@ -14,6 +14,7 @@ set(TEST_FILES cagg_utils.sql cagg_watermark.sql compress_default.sql + compressed_detoaster.sql compressed_collation.sql compression_create_compressed_table.sql compression_conflicts.sql diff --git a/tsl/test/sql/compressed_detoaster.sql b/tsl/test/sql/compressed_detoaster.sql new file mode 100644 index 00000000000..085ed82d82a --- /dev/null +++ b/tsl/test/sql/compressed_detoaster.sql @@ -0,0 +1,33 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. + +-- Make a compressed table with a compressed string of varying length, to test +-- the various ways the compressed data can be toasted. +create table longstr(ts int default 1, s1 text); +select create_hypertable('longstr', 'ts'); +alter table longstr set (timescaledb.compress); + + +-- We want to test the case for inline compression which. It is technically +-- possible, but very hard to hit with the usual toast_tuple_target = 128 on +-- compressed chunks. So here we increase the toast_tuple_target to simplify +-- testing. +select format('%I.%I', schema_name, table_name) compressed_table +from _timescaledb_catalog.hypertable +where id = (select compressed_hypertable_id from _timescaledb_catalog.hypertable + where table_name = 'longstr') +\gset +alter table :compressed_table set (toast_tuple_target = 512); + + +-- Now, test compression and decompression with various string lengths. +create function test(repeats int) returns table(ns bigint) as $$ begin + raise log 'repeats %', repeats; + truncate longstr; + insert into longstr(s1) select repeat('aaaa', repeats); + perform count(compress_chunk(x, true)) from show_chunks('longstr') x; + return query select sum(length(s1)) from longstr; +end; $$ language plpgsql volatile; + +select sum(t) from generate_series(1, 30) x, lateral test(x * x * x) t; From 09f4f675ec8a634188fa26e2e90e3c9b8aa77307 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 8 Dec 2023 20:33:56 +0100 Subject: [PATCH 054/249] Disable vectorized sum for expressions This is not supported at the moment, we can only aggregate bare columns, but we forgot to check this at the planning stage, so it fails. --- tsl/src/partialize_agg.c | 7 ++ tsl/test/expected/vectorized_aggregation.out | 83 ++++++++++++++++++++ tsl/test/sql/vectorized_aggregation.sql | 8 ++ 3 files changed, 98 insertions(+) diff --git a/tsl/src/partialize_agg.c b/tsl/src/partialize_agg.c index 9e49aaa6476..869e8dacb6e 100644 --- a/tsl/src/partialize_agg.c +++ b/tsl/src/partialize_agg.c @@ -77,6 +77,13 @@ is_vectorizable_agg_path(PlannerInfo *root, AggPath *agg_path, Path *path) if (aggref->aggfnoid != F_SUM_INT4) return false; + /* Can aggregate only a bare decompressed column, not an expression. */ + TargetEntry *argument = castNode(TargetEntry, linitial(aggref->args)); + if (!IsA(argument->expr, Var)) + { + return false; + } + return true; } diff --git a/tsl/test/expected/vectorized_aggregation.out b/tsl/test/expected/vectorized_aggregation.out index c42ae22d5bb..4dc44ec41a7 100644 --- a/tsl/test/expected/vectorized_aggregation.out +++ b/tsl/test/expected/vectorized_aggregation.out @@ -1063,6 +1063,89 @@ SELECT sum(int_value) FROM testtable; Output: compress_hyper_2_20_chunk."time", compress_hyper_2_20_chunk.segment_by_value, compress_hyper_2_20_chunk.int_value, compress_hyper_2_20_chunk.float_value, compress_hyper_2_20_chunk._ts_meta_count, compress_hyper_2_20_chunk._ts_meta_sequence_num, compress_hyper_2_20_chunk._ts_meta_min_1, compress_hyper_2_20_chunk._ts_meta_max_1 (57 rows) +--Vectorized aggregation not possible for expression +SELECT sum(abs(int_value)) FROM testtable; + sum +-------- + 311405 +(1 row) + +:EXPLAIN +SELECT sum(abs(int_value)) FROM testtable; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: sum(abs(_hyper_1_1_chunk.int_value)) + -> Gather + Output: (PARTIAL sum(abs(_hyper_1_1_chunk.int_value))) + Workers Planned: 2 + -> Parallel Append + -> Partial Aggregate + Output: PARTIAL sum(abs(_hyper_1_1_chunk.int_value)) + -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk.int_value + -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_11_chunk + Output: compress_hyper_2_11_chunk."time", compress_hyper_2_11_chunk.segment_by_value, compress_hyper_2_11_chunk.int_value, compress_hyper_2_11_chunk.float_value, compress_hyper_2_11_chunk._ts_meta_count, compress_hyper_2_11_chunk._ts_meta_sequence_num, compress_hyper_2_11_chunk._ts_meta_min_1, compress_hyper_2_11_chunk._ts_meta_max_1 + -> Partial Aggregate + Output: PARTIAL sum(abs(_hyper_1_2_chunk.int_value)) + -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk.int_value + -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_12_chunk + Output: compress_hyper_2_12_chunk."time", compress_hyper_2_12_chunk.segment_by_value, compress_hyper_2_12_chunk.int_value, compress_hyper_2_12_chunk.float_value, compress_hyper_2_12_chunk._ts_meta_count, compress_hyper_2_12_chunk._ts_meta_sequence_num, compress_hyper_2_12_chunk._ts_meta_min_1, compress_hyper_2_12_chunk._ts_meta_max_1 + -> Partial Aggregate + Output: PARTIAL sum(abs(_hyper_1_3_chunk.int_value)) + -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_3_chunk + Output: _hyper_1_3_chunk.int_value + -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_13_chunk + Output: compress_hyper_2_13_chunk."time", compress_hyper_2_13_chunk.segment_by_value, compress_hyper_2_13_chunk.int_value, compress_hyper_2_13_chunk.float_value, compress_hyper_2_13_chunk._ts_meta_count, compress_hyper_2_13_chunk._ts_meta_sequence_num, compress_hyper_2_13_chunk._ts_meta_min_1, compress_hyper_2_13_chunk._ts_meta_max_1 + -> Partial Aggregate + Output: PARTIAL sum(abs(_hyper_1_4_chunk.int_value)) + -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_4_chunk + Output: _hyper_1_4_chunk.int_value + -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_14_chunk + Output: compress_hyper_2_14_chunk."time", compress_hyper_2_14_chunk.segment_by_value, compress_hyper_2_14_chunk.int_value, compress_hyper_2_14_chunk.float_value, compress_hyper_2_14_chunk._ts_meta_count, compress_hyper_2_14_chunk._ts_meta_sequence_num, compress_hyper_2_14_chunk._ts_meta_min_1, compress_hyper_2_14_chunk._ts_meta_max_1 + -> Partial Aggregate + Output: PARTIAL sum(abs(_hyper_1_5_chunk.int_value)) + -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_5_chunk + Output: _hyper_1_5_chunk.int_value + -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_15_chunk + Output: compress_hyper_2_15_chunk."time", compress_hyper_2_15_chunk.segment_by_value, compress_hyper_2_15_chunk.int_value, compress_hyper_2_15_chunk.float_value, compress_hyper_2_15_chunk._ts_meta_count, compress_hyper_2_15_chunk._ts_meta_sequence_num, compress_hyper_2_15_chunk._ts_meta_min_1, compress_hyper_2_15_chunk._ts_meta_max_1 + -> Partial Aggregate + Output: PARTIAL sum(abs(_hyper_1_6_chunk.int_value)) + -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_6_chunk + Output: _hyper_1_6_chunk.int_value + -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_16_chunk + Output: compress_hyper_2_16_chunk."time", compress_hyper_2_16_chunk.segment_by_value, compress_hyper_2_16_chunk.int_value, compress_hyper_2_16_chunk.float_value, compress_hyper_2_16_chunk._ts_meta_count, compress_hyper_2_16_chunk._ts_meta_sequence_num, compress_hyper_2_16_chunk._ts_meta_min_1, compress_hyper_2_16_chunk._ts_meta_max_1 + -> Partial Aggregate + Output: PARTIAL sum(abs(_hyper_1_7_chunk.int_value)) + -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_7_chunk + Output: _hyper_1_7_chunk.int_value + -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_17_chunk + Output: compress_hyper_2_17_chunk."time", compress_hyper_2_17_chunk.segment_by_value, compress_hyper_2_17_chunk.int_value, compress_hyper_2_17_chunk.float_value, compress_hyper_2_17_chunk._ts_meta_count, compress_hyper_2_17_chunk._ts_meta_sequence_num, compress_hyper_2_17_chunk._ts_meta_min_1, compress_hyper_2_17_chunk._ts_meta_max_1 + -> Partial Aggregate + Output: PARTIAL sum(abs(_hyper_1_8_chunk.int_value)) + -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_8_chunk + Output: _hyper_1_8_chunk.int_value + -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_18_chunk + Output: compress_hyper_2_18_chunk."time", compress_hyper_2_18_chunk.segment_by_value, compress_hyper_2_18_chunk.int_value, compress_hyper_2_18_chunk.float_value, compress_hyper_2_18_chunk._ts_meta_count, compress_hyper_2_18_chunk._ts_meta_sequence_num, compress_hyper_2_18_chunk._ts_meta_min_1, compress_hyper_2_18_chunk._ts_meta_max_1 + -> Partial Aggregate + Output: PARTIAL sum(abs(_hyper_1_9_chunk.int_value)) + -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_9_chunk + Output: _hyper_1_9_chunk.int_value + -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_19_chunk + Output: compress_hyper_2_19_chunk."time", compress_hyper_2_19_chunk.segment_by_value, compress_hyper_2_19_chunk.int_value, compress_hyper_2_19_chunk.float_value, compress_hyper_2_19_chunk._ts_meta_count, compress_hyper_2_19_chunk._ts_meta_sequence_num, compress_hyper_2_19_chunk._ts_meta_min_1, compress_hyper_2_19_chunk._ts_meta_max_1 + -> Partial Aggregate + Output: PARTIAL sum(abs(_hyper_1_10_chunk.int_value)) + -> Custom Scan (DecompressChunk) on _timescaledb_internal._hyper_1_10_chunk + Output: _hyper_1_10_chunk.int_value + -> Parallel Seq Scan on _timescaledb_internal.compress_hyper_2_20_chunk + Output: compress_hyper_2_20_chunk."time", compress_hyper_2_20_chunk.segment_by_value, compress_hyper_2_20_chunk.int_value, compress_hyper_2_20_chunk.float_value, compress_hyper_2_20_chunk._ts_meta_count, compress_hyper_2_20_chunk._ts_meta_sequence_num, compress_hyper_2_20_chunk._ts_meta_min_1, compress_hyper_2_20_chunk._ts_meta_max_1 + -> Partial Aggregate + Output: PARTIAL sum(abs(_hyper_1_1_chunk.int_value)) + -> Parallel Seq Scan on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk.int_value +(70 rows) + -- Vectorized aggregation NOT possible SET timescaledb.enable_vectorized_aggregation = OFF; :EXPLAIN diff --git a/tsl/test/sql/vectorized_aggregation.sql b/tsl/test/sql/vectorized_aggregation.sql index 4c427a2dfab..b8cc7847f41 100644 --- a/tsl/test/sql/vectorized_aggregation.sql +++ b/tsl/test/sql/vectorized_aggregation.sql @@ -116,6 +116,14 @@ SELECT sum(int_value) FROM testtable; :EXPLAIN SELECT sum(int_value) FROM testtable; + +--Vectorized aggregation not possible for expression +SELECT sum(abs(int_value)) FROM testtable; + +:EXPLAIN +SELECT sum(abs(int_value)) FROM testtable; + + -- Vectorized aggregation NOT possible SET timescaledb.enable_vectorized_aggregation = OFF; From fa95a9053cd89850b21fe359db3acfbaa3edb5bb Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 8 Dec 2023 20:37:04 +0100 Subject: [PATCH 055/249] changelog --- .unreleased/fix_6393 | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .unreleased/fix_6393 diff --git a/.unreleased/fix_6393 b/.unreleased/fix_6393 new file mode 100644 index 00000000000..6de765163b8 --- /dev/null +++ b/.unreleased/fix_6393 @@ -0,0 +1,2 @@ +Fixes: #6393 Disable vectorized sum for expressions. + From e985d25fc146d6d8f3d896ce7d8c052320843c99 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 8 Dec 2023 20:42:01 +0100 Subject: [PATCH 056/249] fixup --- tsl/src/nodes/decompress_chunk/decompress_context.h | 3 +++ tsl/src/nodes/decompress_chunk/exec.h | 5 ----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/decompress_context.h b/tsl/src/nodes/decompress_chunk/decompress_context.h index 1fee8cadb71..c20456a6d8d 100644 --- a/tsl/src/nodes/decompress_chunk/decompress_context.h +++ b/tsl/src/nodes/decompress_chunk/decompress_context.h @@ -13,6 +13,7 @@ #include #include "batch_array.h" +#include "detoaster.h" typedef enum CompressionColumnType { @@ -74,6 +75,8 @@ typedef struct DecompressContext TupleDesc compressed_slot_tdesc; PlanState *ps; /* Set for filtering and instrumentation */ + + Detoaster detoaster; } DecompressContext; #endif /* TIMESCALEDB_DECOMPRESS_CONTEXT_H */ diff --git a/tsl/src/nodes/decompress_chunk/exec.h b/tsl/src/nodes/decompress_chunk/exec.h index 816589bbafb..0543f732541 100644 --- a/tsl/src/nodes/decompress_chunk/exec.h +++ b/tsl/src/nodes/decompress_chunk/exec.h @@ -10,13 +10,8 @@ #include #include -<<<<<<< HEAD -#include "batch_array.h" -#include "detoaster.h" -======= #include "batch_queue.h" #include "decompress_context.h" ->>>>>>> origin/main #define DECOMPRESS_CHUNK_COUNT_ID -9 #define DECOMPRESS_CHUNK_SEQUENCE_NUM_ID -10 From efa33e45c59cb16e547c1b8503a48bad84b03a9d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 8 Dec 2023 20:54:25 +0100 Subject: [PATCH 057/249] format --- tsl/src/nodes/decompress_chunk/detoaster.c | 2 +- tsl/src/nodes/decompress_chunk/exec.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/detoaster.c b/tsl/src/nodes/decompress_chunk/detoaster.c index 4d0d8ecd7e2..dc9751bf4eb 100644 --- a/tsl/src/nodes/decompress_chunk/detoaster.c +++ b/tsl/src/nodes/decompress_chunk/detoaster.c @@ -376,7 +376,7 @@ detoaster_detoast_attr(struct varlena *attr, Detoaster *detoaster) */ Ensure(!VARATT_IS_EXTERNAL_EXPANDED(attr), "got expanded TOAST for compressed data"); - if(VARATT_IS_COMPRESSED(attr)) + if (VARATT_IS_COMPRESSED(attr)) { /* * This is a compressed value stored inline in the main tuple. It rarely diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index ceca0e8d0da..95b956d0adb 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -610,8 +610,9 @@ perform_vectorized_sum_int4(DecompressChunkState *chunk_state, Aggref *aggref) decompressed_scan_slot->tts_isnull[0] = false; CompressedDataHeader *header = - (CompressedDataHeader *) detoaster_detoast_attr((struct varlena *) DatumGetPointer(value), - &dcontext->detoaster); + (CompressedDataHeader *) detoaster_detoast_attr((struct varlena *) DatumGetPointer( + value), + &dcontext->detoaster); ArrowArray *arrow = NULL; From 1f77008bb341d83749324b85e4d5b3405c628eb3 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Sat, 9 Dec 2023 14:21:29 +0100 Subject: [PATCH 058/249] use detoaster in row decompressor --- tsl/src/compression/api.c | 1 + tsl/src/compression/compression.c | 19 +++++++++++++++++-- tsl/src/compression/compression.h | 6 ++++++ tsl/src/nodes/decompress_chunk/detoaster.c | 2 +- tsl/src/nodes/decompress_chunk/detoaster.h | 1 - 5 files changed, 25 insertions(+), 4 deletions(-) diff --git a/tsl/src/compression/api.c b/tsl/src/compression/api.c index ab47b17e2ea..84f9dfa623f 100644 --- a/tsl/src/compression/api.c +++ b/tsl/src/compression/api.c @@ -1594,6 +1594,7 @@ tsl_recompress_chunk_segmentwise(PG_FUNCTION_ARGS) index_endscan(index_scan); UnregisterSnapshot(snapshot); index_close(index_rel, AccessExclusiveLock); + row_decompressor_close(&decompressor); #if PG14_LT int options = 0; diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index 8e934127f63..3b474b96927 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -1425,9 +1425,18 @@ build_decompressor(Relation in_rel, Relation out_rel) */ memset(decompressor.decompressed_is_nulls, true, out_desc->natts); + decompressor.detoaster.mctx = CurrentMemoryContext; + decompressor.detoaster.toastrel = NULL; + return decompressor; } +void +row_decompressor_close(RowDecompressor *decompressor) +{ + detoaster_close(&decompressor->detoaster); +} + void decompress_chunk(Oid in_table, Oid out_table) { @@ -1469,6 +1478,7 @@ decompress_chunk(Oid in_table, Oid out_table) MemoryContextDelete(decompressor.per_compressed_row_ctx); ts_catalog_close_indexes(decompressor.indexstate); FreeExecutorState(decompressor.estate); + row_decompressor_close(&decompressor); table_close(out_rel, NoLock); table_close(in_rel, NoLock); @@ -1582,8 +1592,11 @@ decompress_batch(RowDecompressor *decompressor) } /* Normal compressed column. */ - CompressedDataHeader *header = - get_compressed_data_header(decompressor->compressed_datums[input_column]); + Datum compressed_datum = PointerGetDatum( + detoaster_detoast_attr((struct varlena *) DatumGetPointer( + decompressor->compressed_datums[input_column]), + &decompressor->detoaster)); + CompressedDataHeader *header = get_compressed_data_header(compressed_datum); column_info->iterator = definitions[header->compression_algorithm] .iterator_init_forward(PointerGetDatum(header), column_info->decompressed_type); @@ -2225,6 +2238,7 @@ decompress_batches_for_insert(ChunkInsertState *cis, Chunk *chunk, TupleTableSlo ts_catalog_close_indexes(decompressor.indexstate); FreeExecutorState(decompressor.estate); FreeBulkInsertState(decompressor.bistate); + row_decompressor_close(&decompressor); CommandCounterIncrement(); table_close(in_rel, NoLock); @@ -3402,6 +3416,7 @@ decompress_batches_for_update_delete(HypertableModifyState *ht_state, Chunk *chu ts_catalog_close_indexes(decompressor.indexstate); FreeExecutorState(decompressor.estate); FreeBulkInsertState(decompressor.bistate); + row_decompressor_close(&decompressor); table_close(chunk_rel, NoLock); table_close(comp_chunk_rel, NoLock); diff --git a/tsl/src/compression/compression.h b/tsl/src/compression/compression.h index fba64bced6e..0f8c40e160a 100644 --- a/tsl/src/compression/compression.h +++ b/tsl/src/compression/compression.h @@ -17,6 +17,8 @@ typedef struct BulkInsertStateData *BulkInsertState; #include #include "compat/compat.h" + +#include "nodes/decompress_chunk/detoaster.h" #include "segment_meta.h" /* @@ -151,6 +153,8 @@ typedef struct RowDecompressor int64 tuples_decompressed; TupleTableSlot **decompressed_slots; + + Detoaster detoaster; } RowDecompressor; /* @@ -367,6 +371,8 @@ extern void segment_info_update(SegmentInfo *segment_info, Datum val, bool is_nu extern RowDecompressor build_decompressor(Relation in_rel, Relation out_rel); +extern void row_decompressor_close(RowDecompressor *decompressor); + /* * A convenience macro to throw an error about the corrupted compressed data, if * the argument is false. When fuzzing is enabled, we don't show the message not diff --git a/tsl/src/nodes/decompress_chunk/detoaster.c b/tsl/src/nodes/decompress_chunk/detoaster.c index dc9751bf4eb..2ed174198ff 100644 --- a/tsl/src/nodes/decompress_chunk/detoaster.c +++ b/tsl/src/nodes/decompress_chunk/detoaster.c @@ -220,7 +220,7 @@ void detoaster_close(Detoaster *detoaster) { /* Close toast table */ - if (detoaster->toastrel) + if (detoaster->toastrel != NULL) { systable_endscan_ordered(detoaster->toastscan); table_close(detoaster->toastrel, AccessShareLock); diff --git a/tsl/src/nodes/decompress_chunk/detoaster.h b/tsl/src/nodes/decompress_chunk/detoaster.h index c5992b118bf..bcef7afa844 100644 --- a/tsl/src/nodes/decompress_chunk/detoaster.h +++ b/tsl/src/nodes/decompress_chunk/detoaster.h @@ -18,7 +18,6 @@ typedef struct RelationData *Relation; typedef struct Detoaster { MemoryContext mctx; - Oid toastrelid; Relation toastrel; Relation index; SnapshotData SnapshotToast; From e20f68f0339d95cfadd34148f69c86117ad43be3 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 11 Dec 2023 12:01:25 +0100 Subject: [PATCH 059/249] fix memory contexts --- tsl/src/nodes/decompress_chunk/detoaster.c | 4 ++-- tsl/src/nodes/decompress_chunk/exec.c | 13 ++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/detoaster.c b/tsl/src/nodes/decompress_chunk/detoaster.c index 2ed174198ff..1d050b5b495 100644 --- a/tsl/src/nodes/decompress_chunk/detoaster.c +++ b/tsl/src/nodes/decompress_chunk/detoaster.c @@ -49,9 +49,9 @@ ts_fetch_toast(Detoaster *detoaster, struct varatt_external *toast_pointer, stru /* * Open the toast relation and its indexes */ - MemoryContext old_mctx = MemoryContextSwitchTo(detoaster->mctx); if (detoaster->toastrel == NULL) { + MemoryContext old_mctx = MemoryContextSwitchTo(detoaster->mctx); detoaster->toastrel = table_open(toast_pointer->va_toastrelid, AccessShareLock); int num_indexes; @@ -82,6 +82,7 @@ ts_fetch_toast(Detoaster *detoaster, struct varatt_external *toast_pointer, stru &detoaster->SnapshotToast, 1, &detoaster->toastkey); + MemoryContextSwitchTo(old_mctx); } else { @@ -92,7 +93,6 @@ ts_fetch_toast(Detoaster *detoaster, struct varatt_external *toast_pointer, stru detoaster->toastkey.sk_argument = ObjectIdGetDatum(valueid); index_rescan(detoaster->toastscan->iscan, &detoaster->toastkey, 1, NULL, 0); } - MemoryContextSwitchTo(old_mctx); TupleDesc toasttupDesc = detoaster->toastrel->rd_att; diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index 95b956d0adb..625db01b3c0 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -545,6 +545,9 @@ perform_vectorized_sum_int4(DecompressChunkState *chunk_state, Aggref *aggref) break; } + MemoryContext old_mctx = MemoryContextSwitchTo(batch_state->per_batch_context); + MemoryContextReset(batch_state->per_batch_context); + bool isnull_value, isnull_elements; Datum value = slot_getattr(compressed_slot, column_description->compressed_scan_attno, @@ -581,6 +584,7 @@ perform_vectorized_sum_int4(DecompressChunkState *chunk_state, Aggref *aggref) (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("bigint out of range"))); } + MemoryContextSwitchTo(old_mctx); } } else if (column_description->type == COMPRESSED_COLUMN) @@ -599,6 +603,9 @@ perform_vectorized_sum_int4(DecompressChunkState *chunk_state, Aggref *aggref) break; } + MemoryContext old_mctx = MemoryContextSwitchTo(batch_state->per_batch_context); + MemoryContextReset(batch_state->per_batch_context); + /* Decompress data */ bool isnull; Datum value = @@ -620,8 +627,7 @@ perform_vectorized_sum_int4(DecompressChunkState *chunk_state, Aggref *aggref) tsl_get_decompress_all_function(header->compression_algorithm); Assert(decompress_all != NULL); - MemoryContext context_before_decompression = - MemoryContextSwitchTo(dcontext->bulk_decompression_context); + MemoryContextSwitchTo(dcontext->bulk_decompression_context); arrow = decompress_all(PointerGetDatum(header), column_description->typid, @@ -630,7 +636,7 @@ perform_vectorized_sum_int4(DecompressChunkState *chunk_state, Aggref *aggref) Assert(arrow != NULL); MemoryContextReset(dcontext->bulk_decompression_context); - MemoryContextSwitchTo(context_before_decompression); + MemoryContextSwitchTo(batch_state->per_batch_context); /* A compressed batch consists of 1000 tuples (see MAX_ROWS_PER_COMPRESSION). The * attribute value is a int32 with a max value of 2^32. Even if all tuples have the max @@ -657,6 +663,7 @@ perform_vectorized_sum_int4(DecompressChunkState *chunk_state, Aggref *aggref) ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("bigint out of range"))); + MemoryContextSwitchTo(old_mctx); } } else From 848b7f808b756cad311a92463db8abb4e13b90d5 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 11 Dec 2023 12:01:41 +0100 Subject: [PATCH 060/249] benchmark detoaster (2023-12-11 #1) From caac7ffed65afa42b2208faa1bdb468cd0cd15be Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 11 Dec 2023 16:04:25 +0100 Subject: [PATCH 061/249] cleanup --- tsl/src/compression/compression.c | 3 +-- tsl/src/nodes/decompress_chunk/detoaster.c | 17 ++++++++++++++++- tsl/src/nodes/decompress_chunk/detoaster.h | 4 ++-- tsl/src/nodes/decompress_chunk/exec.c | 5 ++--- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index 00e9ee6fa7e..a10406f9d0d 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -1425,8 +1425,7 @@ build_decompressor(Relation in_rel, Relation out_rel) */ memset(decompressor.decompressed_is_nulls, true, out_desc->natts); - decompressor.detoaster.mctx = CurrentMemoryContext; - decompressor.detoaster.toastrel = NULL; + detoaster_init(&decompressor.detoaster, CurrentMemoryContext); return decompressor; } diff --git a/tsl/src/nodes/decompress_chunk/detoaster.c b/tsl/src/nodes/decompress_chunk/detoaster.c index 1d050b5b495..127ff0d2fec 100644 --- a/tsl/src/nodes/decompress_chunk/detoaster.c +++ b/tsl/src/nodes/decompress_chunk/detoaster.c @@ -39,7 +39,10 @@ * attrsize is the total size of the TOAST value. * result is the varlena into which the results should be written. * - * Modified from heap_fetch_toast_slice(). + * This function is a modified copy of heap_fetch_toast_slice(). The difference + * is that it holds the open toast relation, index and other intermediate data + * for detoasting in the Detoaster struct, to allow them to be reused over many + * input tuples. */ static void ts_fetch_toast(Detoaster *detoaster, struct varatt_external *toast_pointer, struct varlena *result) @@ -216,6 +219,18 @@ ts_fetch_toast(Detoaster *detoaster, struct varatt_external *toast_pointer, stru RelationGetRelationName(detoaster->toastrel)))); } +/* + * The memory context is used to store intermediate data, and is supposed to + * live over the calls to detoaster_detoast_attr(). + * That function itself can be called in a short-lived memory context. + */ +void +detoaster_init(Detoaster *detoaster, MemoryContext mctx) +{ + detoaster->toastrel = NULL; + detoaster->mctx = mctx; +} + void detoaster_close(Detoaster *detoaster) { diff --git a/tsl/src/nodes/decompress_chunk/detoaster.h b/tsl/src/nodes/decompress_chunk/detoaster.h index bcef7afa844..50eb443e4ac 100644 --- a/tsl/src/nodes/decompress_chunk/detoaster.h +++ b/tsl/src/nodes/decompress_chunk/detoaster.h @@ -25,6 +25,6 @@ typedef struct Detoaster SysScanDesc toastscan; } Detoaster; -struct varlena *detoaster_detoast_attr(struct varlena *attr, Detoaster *detoaster); - +void detoaster_init(Detoaster *detoaster, MemoryContext mctx); void detoaster_close(Detoaster *detoaster); +struct varlena *detoaster_detoast_attr(struct varlena *attr, Detoaster *detoaster); diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index 625db01b3c0..6ff3df95a1a 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -469,8 +469,7 @@ decompress_chunk_begin(CustomScanState *node, EState *estate, int eflags) lappend(dcontext->vectorized_quals_constified, constified); } - dcontext->detoaster.toastrel = NULL; - dcontext->detoaster.mctx = CurrentMemoryContext; + detoaster_init(&dcontext->detoaster, CurrentMemoryContext); } /* @@ -627,7 +626,7 @@ perform_vectorized_sum_int4(DecompressChunkState *chunk_state, Aggref *aggref) tsl_get_decompress_all_function(header->compression_algorithm); Assert(decompress_all != NULL); - MemoryContextSwitchTo(dcontext->bulk_decompression_context); + MemoryContextSwitchTo(dcontext->bulk_decompression_context); arrow = decompress_all(PointerGetDatum(header), column_description->typid, From ffb8eb852acb5117ab6c9440ffd7414f20738f47 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 11 Dec 2023 17:12:10 +0100 Subject: [PATCH 062/249] cleanup --- tsl/src/nodes/decompress_chunk/detoaster.c | 26 +++++----------------- tsl/test/expected/compressed_detoaster.out | 6 ++--- tsl/test/sql/compressed_detoaster.sql | 6 ++--- 3 files changed, 12 insertions(+), 26 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/detoaster.c b/tsl/src/nodes/decompress_chunk/detoaster.c index 127ff0d2fec..1ea6a244c83 100644 --- a/tsl/src/nodes/decompress_chunk/detoaster.c +++ b/tsl/src/nodes/decompress_chunk/detoaster.c @@ -34,11 +34,6 @@ /* * Fetch a TOAST slice from a heap table. * - * toastrel is the relation from which chunks are to be fetched. - * valueid identifies the TOAST value from which chunks are being fetched. - * attrsize is the total size of the TOAST value. - * result is the varlena into which the results should be written. - * * This function is a modified copy of heap_fetch_toast_slice(). The difference * is that it holds the open toast relation, index and other intermediate data * for detoasting in the Detoaster struct, to allow them to be reused over many @@ -245,12 +240,9 @@ detoaster_close(Detoaster *detoaster) } } -/* ---------- - * toast_fetch_datum - - * - * Reconstruct an in memory Datum from the chunks saved - * in the toast relation - * ---------- +/* + * Copy of Postgres' toast_fetch_datum(): Reconstruct an in memory Datum from + * the chunks saved in the toast relation. */ static struct varlena * ts_toast_fetch_datum(struct varlena *attr, Detoaster *detoaster) @@ -343,15 +335,9 @@ ts_toast_decompress_datum(struct varlena *attr) } #endif -/* ---------- - * detoast_attr - - * - * Public entry point to get back a toasted value from compression - * or external storage. The result is always non-extended varlena form. - * - * Note some callers assume that if the input is an EXTERNAL or COMPRESSED - * datum, the result will be a pfree'able chunk. - * ---------- +/* + * Modification of Postgres' detoast_attr() where we use the stateful Detoaster + * and skip some cases that don't occur for the toasted compressed data. */ struct varlena * detoaster_detoast_attr(struct varlena *attr, Detoaster *detoaster) diff --git a/tsl/test/expected/compressed_detoaster.out b/tsl/test/expected/compressed_detoaster.out index e20ff9c0e45..d2476ed857e 100644 --- a/tsl/test/expected/compressed_detoaster.out +++ b/tsl/test/expected/compressed_detoaster.out @@ -12,9 +12,9 @@ NOTICE: adding not-null constraint to column "ts" (1 row) alter table longstr set (timescaledb.compress); --- We want to test the case for inline compression which. It is technically --- possible, but very hard to hit with the usual toast_tuple_target = 128 on --- compressed chunks. So here we increase the toast_tuple_target to simplify +-- We want to test the case for inline compression. It is technically possible, +-- but very hard to hit with the usual toast_tuple_target = 128 on compressed +-- chunks. So here we increase the toast_tuple_target to simplify -- testing. select format('%I.%I', schema_name, table_name) compressed_table from _timescaledb_catalog.hypertable diff --git a/tsl/test/sql/compressed_detoaster.sql b/tsl/test/sql/compressed_detoaster.sql index 085ed82d82a..4ebac311bdc 100644 --- a/tsl/test/sql/compressed_detoaster.sql +++ b/tsl/test/sql/compressed_detoaster.sql @@ -9,9 +9,9 @@ select create_hypertable('longstr', 'ts'); alter table longstr set (timescaledb.compress); --- We want to test the case for inline compression which. It is technically --- possible, but very hard to hit with the usual toast_tuple_target = 128 on --- compressed chunks. So here we increase the toast_tuple_target to simplify +-- We want to test the case for inline compression. It is technically possible, +-- but very hard to hit with the usual toast_tuple_target = 128 on compressed +-- chunks. So here we increase the toast_tuple_target to simplify -- testing. select format('%I.%I', schema_name, table_name) compressed_table from _timescaledb_catalog.hypertable From 90abbbd2f0bc72b28e1f68ed0f337a377e7dae8f Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 12 Dec 2023 17:15:52 +0100 Subject: [PATCH 063/249] More tests for decompression Add separate testing for bulk and row-by-row decompression, so that the errors in one don't mask the errors in the other. Also add fuzzing for row-by-row decompression, for text columns as well. --- .github/workflows/libfuzzer.yaml | 17 +- tsl/src/compression/array.c | 9 +- tsl/src/compression/compression.c | 152 ++++++++++----- tsl/src/compression/compression.h | 2 +- tsl/src/compression/datum_serialize.c | 11 ++ ...pl.c => decompress_arithmetic_test_impl.c} | 174 ++++++++++-------- .../compression/decompress_text_test_impl.c | 151 +++++++++++++++ tsl/src/compression/dictionary.c | 33 ++-- .../compression/simple8b_rle_decompress_all.h | 9 +- tsl/test/expected/compression_algos.out | 89 ++++++--- .../fuzzing/compression/array-text/array1 | Bin 0 -> 15917 bytes ...h-1ac67d1b8ebedeb93c515b5244f6e60613c1af0b | Bin 0 -> 440 bytes ...h-b6cfa8632a8bf28e90198ec167f3f63258880f77 | Bin 0 -> 478 bytes ...h-49789ae0866d7d630f2075dc26812433f4af1db3 | Bin 0 -> 231 bytes ...h-707526606a02c72364e1c8ea82357eead6c74f60 | Bin 0 -> 81 bytes ...h-b0db762535226b28c0b55ffe00d5537fd8ef7e39 | Bin 0 -> 52 bytes ...h-bd6e8aa1ebeb96cf8aa644c5ef6eb2214dee0ffc | Bin 0 -> 56 bytes .../fuzzing/compression/dictionary-text/dict1 | Bin 0 -> 56 bytes tsl/test/sql/compression_algos.sql | 55 ++++-- 19 files changed, 515 insertions(+), 187 deletions(-) rename tsl/src/compression/{decompress_test_impl.c => decompress_arithmetic_test_impl.c} (51%) create mode 100644 tsl/src/compression/decompress_text_test_impl.c create mode 100644 tsl/test/fuzzing/compression/array-text/array1 create mode 100644 tsl/test/fuzzing/compression/array-text/crash-1ac67d1b8ebedeb93c515b5244f6e60613c1af0b create mode 100644 tsl/test/fuzzing/compression/array-text/crash-b6cfa8632a8bf28e90198ec167f3f63258880f77 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-49789ae0866d7d630f2075dc26812433f4af1db3 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-707526606a02c72364e1c8ea82357eead6c74f60 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-b0db762535226b28c0b55ffe00d5537fd8ef7e39 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-bd6e8aa1ebeb96cf8aa644c5ef6eb2214dee0ffc create mode 100644 tsl/test/fuzzing/compression/dictionary-text/dict1 diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 883d7111371..06e649c5032 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -16,7 +16,8 @@ jobs: strategy: fail-fast: false matrix: - case: [ { algo: gorilla, type: float8 }, { algo: deltadelta, type: int8 } ] + case: [ { algo: gorilla, type: float8 }, { algo: deltadelta, type: int8 }, + { algo: array, type: text }, { algo: dictionary, type: text } ] name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.type }} runs-on: ubuntu-22.04 @@ -89,6 +90,10 @@ jobs: export LIBFUZZER_PATH=$(dirname "$(find $(llvm-config --libdir) -name libclang_rt.fuzzer_no_main-x86_64.a | head -1)") + # Some pointers for the next time we have linking/undefined symbol problems: + # http://web.archive.org/web/20200926071757/https://github.com/google/sanitizers/issues/111 + # http://web.archive.org/web/20231101091231/https://github.com/cms-sw/cmssw/issues/40680 + cmake -B build -S . -DASSERTIONS=ON -DLINTER=OFF -DCMAKE_VERBOSE_MAKEFILE=1 \ -DWARNINGS_AS_ERRORS=1 -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_COMPILER=clang \ -DCMAKE_C_FLAGS="-fsanitize=fuzzer-no-link -lstdc++ -L$LIBFUZZER_PATH -l:libclang_rt.fuzzer_no_main-x86_64.a -static-libsan" \ @@ -141,19 +146,23 @@ jobs: # Create the fuzzing function export MODULE_NAME=$(basename $(find $HOME/$PG_INSTALL_DIR -name "timescaledb-tsl-*.so")) - psql -a -c "create or replace function fuzz(algo cstring, type regtype, runs int) returns int as '"$MODULE_NAME"', 'ts_fuzz_compression' language c;" + psql -a -c "create or replace function fuzz(algo cstring, type regtype, kind cstring, runs int) returns int as '"$MODULE_NAME"', 'ts_fuzz_compression' language c;" + + # array has a peculiar recv function that recompresses all input, so + # fuzzing it is much slower. + runs=${{ matrix.case.algo == 'array' && 1000000 || 100000000 }} # Start more fuzzing processes in the background. We won't even monitor # their progress, because the server will panic if they find an error. for x in {2..$(nproc)} do - psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', '${{ matrix.case.type }}', 100000000);" & + psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', '${{ matrix.case.type }}', 'rowbyrow', $runs);" & done # Start the one fuzzing process that we will monitor, in foreground. # The LLVM fuzzing driver calls exit(), so we expect to lose the connection. ret=0 - psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', '${{ matrix.case.type }}', 100000000);" || ret=$? + psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', '${{ matrix.case.type }}', 'rowbyrow', $runs);" || ret=$? if ! [ $ret -eq 2 ] then >&2 echo "Unexpected psql exit code $ret" diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index 58535be92e6..c5068d95cb0 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -240,6 +240,7 @@ array_compression_serialization_size(ArrayCompressorSerializationInfo *info) uint32 array_compression_serialization_num_elements(ArrayCompressorSerializationInfo *info) { + CheckCompressedData(info->sizes != NULL); return info->sizes->num_elements; } @@ -602,7 +603,6 @@ array_compressed_data_send(StringInfo buffer, const char *_serialized_data, Size Datum array_compressed_recv(StringInfo buffer) { - ArrayCompressorSerializationInfo *data; uint8 has_nulls; Oid element_type; @@ -611,9 +611,12 @@ array_compressed_recv(StringInfo buffer) element_type = binary_string_get_type(buffer); - data = array_compressed_data_recv(buffer, element_type); + ArrayCompressorSerializationInfo *info = array_compressed_data_recv(buffer, element_type); - PG_RETURN_POINTER(array_compressed_from_serialization_info(data, element_type)); + CheckCompressedData(info->sizes != NULL); + CheckCompressedData(has_nulls == (info->nulls != NULL)); + + PG_RETURN_POINTER(array_compressed_from_serialization_info(info, element_type)); } void diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index fdd7f4c05f0..df09ee0fcf7 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -2244,48 +2244,72 @@ get_compression_algorithm(char *name) { return COMPRESSION_ALGORITHM_GORILLA; } + else if (pg_strcasecmp(name, "array") == 0) + { + return COMPRESSION_ALGORITHM_ARRAY; + } + else if (pg_strcasecmp(name, "dictionary") == 0) + { + return COMPRESSION_ALGORITHM_DICTIONARY; + } ereport(ERROR, (errmsg("unknown comrpession algorithm %s", name))); return _INVALID_COMPRESSION_ALGORITHM; } -#define ALGO gorilla +typedef enum +{ + DTT_BulkFuzzing, + DTT_RowByRowFuzzing, + DTT_RowByRow, + DTT_Bulk +} DecompressionTestType; + +#define ALGO GORILLA #define CTYPE float8 -#define PGTYPE FLOAT8OID +#define PG_TYPE_PREFIX FLOAT8 #define DATUM_TO_CTYPE DatumGetFloat8 -#include "decompress_test_impl.c" +#include "decompress_arithmetic_test_impl.c" #undef ALGO #undef CTYPE -#undef PGTYPE +#undef PG_TYPE_PREFIX #undef DATUM_TO_CTYPE -#define ALGO deltadelta +#define ALGO DELTADELTA #define CTYPE int64 -#define PGTYPE INT8OID +#define PG_TYPE_PREFIX INT8 #define DATUM_TO_CTYPE DatumGetInt64 -#include "decompress_test_impl.c" +#include "decompress_arithmetic_test_impl.c" #undef ALGO #undef CTYPE -#undef PGTYPE +#undef PG_TYPE_PREFIX #undef DATUM_TO_CTYPE +#include "decompress_text_test_impl.c" + +#define APPLY_FOR_TYPES(X) \ + X(GORILLA, FLOAT8, RowByRow) \ + X(GORILLA, FLOAT8, Bulk) \ + X(DELTADELTA, INT8, RowByRow) \ + X(DELTADELTA, INT8, Bulk) \ + X(ARRAY, TEXT, RowByRow) \ + X(DICTIONARY, TEXT, RowByRow) \ + static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Size, - bool extra_checks) + DecompressionTestType test_type) { - if (algo == COMPRESSION_ALGORITHM_GORILLA && type == FLOAT8OID) - { - return decompress_gorilla_float8; - } - else if (algo == COMPRESSION_ALGORITHM_DELTADELTA && type == INT8OID) - { - return decompress_deltadelta_int64; - } +#define DISPATCH(ALGO, PGTYPE, KIND) \ + if (algo == COMPRESSION_ALGORITHM_##ALGO && type == PGTYPE##OID) \ + { return decompress_##ALGO##_##PGTYPE; } + + APPLY_FOR_TYPES(DISPATCH) elog(ERROR, "no decompression function for compression algorithm %d with element type %d", algo, type); pg_unreachable(); +#undef DISPATCH } /* @@ -2295,13 +2319,15 @@ static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Si * if we error out later. */ static void -read_compressed_data_file_impl(int algo, Oid type, const char *path, volatile int *bytes, int *rows) +read_compressed_data_file_impl(int algo, Oid type, const char *path, bool bulk, volatile int *bytes, + int *rows) { FILE *f = fopen(path, "r"); if (!f) { - elog(ERROR, "could not open the file '%s'", path); + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FILE), errmsg("could not open the file '%s'", path))); } fseek(f, 0, SEEK_END); @@ -2325,14 +2351,16 @@ read_compressed_data_file_impl(int algo, Oid type, const char *path, volatile in if (elements_read != 1) { - elog(ERROR, "failed to read file '%s'", path); + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FILE), errmsg("failed to read file '%s'", path))); } fclose(f); string[fsize] = 0; - *rows = get_decompress_fn(algo, type)((const uint8 *) string, fsize, /* extra_checks = */ true); + *rows = get_decompress_fn(algo, type)((const uint8 *) string, + fsize, + /* test_type = */ bulk ? DTT_Bulk : DTT_RowByRow); } TS_FUNCTION_INFO_V1(ts_read_compressed_data_file); @@ -2346,6 +2374,7 @@ ts_read_compressed_data_file(PG_FUNCTION_ARGS) read_compressed_data_file_impl(get_compression_algorithm(PG_GETARG_CSTRING(0)), PG_GETARG_OID(1), PG_GETARG_CSTRING(2), + PG_GETARG_BOOL(3), &bytes, &rows); PG_RETURN_INT32(rows); @@ -2447,7 +2476,12 @@ ts_read_compressed_data_directory(PG_FUNCTION_ARGS) volatile int bytes = 0; PG_TRY(); { - read_compressed_data_file_impl(algo, PG_GETARG_OID(1), path, &bytes, &rows); + read_compressed_data_file_impl(algo, + PG_GETARG_OID(1), + path, + PG_GETARG_BOOL(3), + &bytes, + &rows); values[out_rows] = Int32GetDatum(rows); nulls[out_rows] = false; } @@ -2488,41 +2522,61 @@ ts_read_compressed_data_directory(PG_FUNCTION_ARGS) #ifdef TS_COMPRESSION_FUZZING +static DecompressionTestType +get_fuzzing_kind(const char* s) +{ + if (strcmp(s, "bulk") == 0) + { + return DTT_BulkFuzzing; + } + else if (strcmp(s, "rowbyrow") == 0) + { + return DTT_RowByRowFuzzing; + } + else + { + elog(ERROR, "unknown fuzzing type '%s'", s); + } +} + /* * This is our test function that will be called by the libfuzzer driver. It * has to catch the postgres exceptions normally produced for corrupt data. */ static int -llvm_fuzz_target_generic(int (*target)(const uint8_t *Data, size_t Size, bool extra_checks), - const uint8_t *Data, size_t Size) +target_generic(int (*test_fn)(const uint8_t *, size_t, DecompressionTestType), + const uint8_t *Data, size_t Size, DecompressionTestType test_type) { MemoryContextReset(CurrentMemoryContext); + int res = 0; PG_TRY(); { CHECK_FOR_INTERRUPTS(); - target(Data, Size, /* extra_checks = */ false); + res = test_fn(Data, Size, test_type); } PG_CATCH(); { + /* EmitErrorReport(); */ FlushErrorState(); } PG_END_TRY(); - /* We always return 0, and -1 would mean "don't include it into corpus". */ - return 0; + /* + * -1 means "don't include it into corpus", return it if the test function + * says so, otherwise return 0. The test function also returns the number + * of rows for the correct data, the fuzzer doesn't understand these values. + */ + return res == -1 ? -1 : 0; } -static int -llvm_fuzz_target_gorilla_float8(const uint8_t *Data, size_t Size) -{ - return llvm_fuzz_target_generic(decompress_gorilla_float8, Data, Size); -} -static int -llvm_fuzz_target_deltadelta_int64(const uint8_t *Data, size_t Size) -{ - return llvm_fuzz_target_generic(decompress_deltadelta_int64, Data, Size); -} +#define DECLARE_TARGET(ALGO, PGTYPE, KIND) \ +static int target_##ALGO##_##PGTYPE##_##KIND (const uint8_t *D, size_t S) { return target_generic(decompress_##ALGO##_##PGTYPE, D, S, DTT_##KIND##Fuzzing); } + +APPLY_FOR_TYPES(DECLARE_TARGET) + +#undef DECLARE_TARGET + /* * libfuzzer fuzzing driver that we import from LLVM libraries. It will run our @@ -2560,7 +2614,7 @@ ts_fuzz_compression(PG_FUNCTION_ARGS) //"-print_full_coverage=1", //"-print_final_stats=1", //"-help=1", - psprintf("-runs=%d", PG_GETARG_INT32(2)), + psprintf("-runs=%d", PG_GETARG_INT32(3)), "corpus" /* in the database directory */, NULL }; char **argv = argvdata; @@ -2568,16 +2622,18 @@ ts_fuzz_compression(PG_FUNCTION_ARGS) int algo = get_compression_algorithm(PG_GETARG_CSTRING(0)); Oid type = PG_GETARG_OID(1); - int (*target)(const uint8_t *, size_t); - if (algo == COMPRESSION_ALGORITHM_GORILLA && type == FLOAT8OID) - { - target = llvm_fuzz_target_gorilla_float8; - } - else if (algo == COMPRESSION_ALGORITHM_DELTADELTA && type == INT8OID) - { - target = llvm_fuzz_target_deltadelta_int64; - } - else + int kind = get_fuzzing_kind(PG_GETARG_CSTRING(2)); + + int (*target)(const uint8_t *, size_t) = NULL; + +#define DISPATCH(ALGO, PGTYPE, KIND) \ +if (algo == COMPRESSION_ALGORITHM_##ALGO && type == PGTYPE##OID && kind == DTT_##KIND##Fuzzing) \ +{ target = target_##ALGO##_##PGTYPE##_##KIND ; } + + +APPLY_FOR_TYPES(DISPATCH) + + if (target == NULL) { elog(ERROR, "no llvm fuzz target for compression algorithm %d and type %d", algo, type); } diff --git a/tsl/src/compression/compression.h b/tsl/src/compression/compression.h index 3d34ea95ca4..7aafc9fbb22 100644 --- a/tsl/src/compression/compression.h +++ b/tsl/src/compression/compression.h @@ -380,7 +380,7 @@ extern RowDecompressor build_decompressor(Relation in_rel, Relation out_rel); #define CheckCompressedData(X) \ if (unlikely(!(X))) \ - ereport(ERROR, CORRUPT_DATA_MESSAGE) + ereport(ERROR, CORRUPT_DATA_MESSAGE, errdetail(#X)) inline static void * consumeCompressedData(StringInfo si, int bytes) diff --git a/tsl/src/compression/datum_serialize.c b/tsl/src/compression/datum_serialize.c index c2ac0c06bee..bf3a799cf25 100644 --- a/tsl/src/compression/datum_serialize.c +++ b/tsl/src/compression/datum_serialize.c @@ -20,6 +20,8 @@ #include "datum_serialize.h" #include "compat/compat.h" +#include "compression.h" + typedef struct DatumSerializer { Oid type_oid; @@ -305,6 +307,15 @@ bytes_to_datum_and_advance(DatumDeserializer *deserializer, const char **ptr) *ptr = (Pointer) att_align_pointer(*ptr, deserializer->type_align, deserializer->type_len, *ptr); + if (deserializer->type_len == -1) + { + /* + * Check for potentially corrupt varlena headers since we're reading them + * directly from compressed data. We can only have a plain datum + * with 1-byte or 4-byte header here, no TOAST or compressed data. + */ + CheckCompressedData(VARATT_IS_4B_U(*ptr) || (VARATT_IS_1B(*ptr) && !VARATT_IS_1B_E(*ptr))); + } res = fetch_att(*ptr, deserializer->type_by_val, deserializer->type_len); *ptr = att_addlength_pointer(*ptr, deserializer->type_len, *ptr); return res; diff --git a/tsl/src/compression/decompress_test_impl.c b/tsl/src/compression/decompress_arithmetic_test_impl.c similarity index 51% rename from tsl/src/compression/decompress_test_impl.c rename to tsl/src/compression/decompress_arithmetic_test_impl.c index 69897d45b99..963a963aa72 100644 --- a/tsl/src/compression/decompress_test_impl.c +++ b/tsl/src/compression/decompress_arithmetic_test_impl.c @@ -4,19 +4,78 @@ * LICENSE-TIMESCALE for a copy of the license. */ -#define FUNCTION_NAME_HELPER(X, Y) decompress_##X##_##Y -#define FUNCTION_NAME(X, Y) FUNCTION_NAME_HELPER(X, Y) +#define FUNCTION_NAME_HELPER3(X, Y, Z) X##_##Y##_##Z +#define FUNCTION_NAME3(X, Y, Z) FUNCTION_NAME_HELPER3(X, Y, Z) +#define FUNCTION_NAME_HELPER2(X, Y) X##_##Y +#define FUNCTION_NAME2(X, Y) FUNCTION_NAME_HELPER2(X, Y) + +#define PG_TYPE_OID_HELPER(X) X##OID +#define PG_TYPE_OID_HELPER2(X) PG_TYPE_OID_HELPER(X) +#define PG_TYPE_OID PG_TYPE_OID_HELPER2(PG_TYPE_PREFIX) + +static void +FUNCTION_NAME2(check_arrow, CTYPE)(ArrowArray *arrow, int error_type, DecompressResult *results, + int n) +{ + if (n != arrow->length) + { + ereport(error_type, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("Expected %d elements, got %d.", n, (int) arrow->length))); + } + + for (int i = 0; i < n; i++) + { + const bool arrow_isnull = !arrow_row_is_valid(arrow->buffers[0], i); + if (arrow_isnull != results[i].is_null) + { + ereport(error_type, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("Expected null %d, got %d at row %d.", + results[i].is_null, + arrow_isnull, + i))); + } -#define TOSTRING_HELPER(x) #x -#define TOSTRING(x) TOSTRING_HELPER(x) + if (!results[i].is_null) + { + const CTYPE arrow_value = ((CTYPE *) arrow->buffers[1])[i]; + const CTYPE rowbyrow_value = DATUM_TO_CTYPE(results[i].val); + + /* + * Floats can also be NaN/infinite and the comparison doesn't + * work in that case. + */ + if (isfinite((double) arrow_value) != isfinite((double) rowbyrow_value)) + { + ereport(error_type, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("At row %d\n", i))); + } + + if (isfinite((double) arrow_value) && arrow_value != rowbyrow_value) + { + ereport(error_type, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("At row %d\n", i))); + } + } + } +} /* * Try to decompress the given compressed data. Used for fuzzing and for checking * the examples found by fuzzing. For fuzzing we do less checks to keep it - * faster and the coverage space smaller. + * faster and the coverage space smaller. This is a generic implementation + * for arithmetic types. */ static int -FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size, bool extra_checks) +FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, + DecompressionTestType test_type) { StringInfoData si = { .data = (char *) Data, .len = Size }; @@ -24,7 +83,7 @@ FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size, bool extra_checks) CheckCompressedData(algo > 0 && algo < _END_COMPRESSION_ALGORITHMS); - if (algo != get_compression_algorithm(TOSTRING(ALGO))) + if (algo != FUNCTION_NAME2(COMPRESSION_ALGORITHM, ALGO)) { /* * It's convenient to fuzz only one algorithm at a time. We specialize @@ -36,33 +95,32 @@ FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size, bool extra_checks) Datum compressed_data = definitions[algo].compressed_data_recv(&si); - if (!extra_checks) + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(algo); + + if (test_type == DTT_BulkFuzzing) { /* * For routine fuzzing, we only run bulk decompression to make it faster * and the coverage space smaller. */ - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(algo); - decompress_all(compressed_data, PGTYPE, CurrentMemoryContext); + decompress_all(compressed_data, PG_TYPE_OID, CurrentMemoryContext); return 0; } - /* - * Test bulk decompression. This might hide some errors in the row-by-row - * decompression, but testing both is significantly more complicated, and - * the row-by-row is old and stable. - */ ArrowArray *arrow = NULL; - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(algo); - if (decompress_all) + if (test_type == DTT_Bulk) { - arrow = decompress_all(compressed_data, PGTYPE, CurrentMemoryContext); + /* + * Test bulk decompression. Have to do this before row-by-row decompression + * so that the latter doesn't hide the errors. + */ + arrow = decompress_all(compressed_data, PG_TYPE_OID, CurrentMemoryContext); } /* * Test row-by-row decompression. */ - DecompressionIterator *iter = definitions[algo].iterator_init_forward(compressed_data, PGTYPE); + DecompressionIterator *iter = definitions[algo].iterator_init_forward(compressed_data, PG_TYPE_OID); DecompressResult results[GLOBAL_MAX_ROWS_PER_COMPRESSION]; int n = 0; for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) @@ -76,65 +134,19 @@ FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size, bool extra_checks) } /* Check that both ways of decompression match. */ - if (arrow) + if (test_type == DTT_Bulk) { - if (n != arrow->length) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("the bulk decompression result does not match"), - errdetail("Expected %d elements, got %d.", n, (int) arrow->length))); - } - - for (int i = 0; i < n; i++) - { - const bool arrow_isnull = !arrow_row_is_valid(arrow->buffers[0], i); - if (arrow_isnull != results[i].is_null) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("the bulk decompression result does not match"), - errdetail("Expected null %d, got %d at row %d.", - results[i].is_null, - arrow_isnull, - i))); - } - - if (!results[i].is_null) - { - const CTYPE arrow_value = ((CTYPE *) arrow->buffers[1])[i]; - const CTYPE rowbyrow_value = DATUM_TO_CTYPE(results[i].val); - - /* - * Floats can also be NaN/infinite and the comparison doesn't - * work in that case. - */ - if (isfinite((double) arrow_value) != isfinite((double) rowbyrow_value)) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("the bulk decompression result does not match"), - errdetail("At row %d\n", i))); - } - - if (isfinite((double) arrow_value) && arrow_value != rowbyrow_value) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("the bulk decompression result does not match"), - errdetail("At row %d\n", i))); - } - } - } + FUNCTION_NAME2(check_arrow, CTYPE)(arrow, ERROR, results, n); + return n; } /* - * Check that the result is still the same after we compress and decompress - * back. + * For row-by-row decompression, check that the result is still the same + * after we compress and decompress back. * * 1) Compress. */ - Compressor *compressor = definitions[algo].compressor_for_type(PGTYPE); + Compressor *compressor = definitions[algo].compressor_for_type(PG_TYPE_OID); for (int i = 0; i < n; i++) { @@ -158,7 +170,7 @@ FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size, bool extra_checks) /* * 2) Decompress and check that it's the same. */ - iter = definitions[algo].iterator_init_forward(compressed_data, PGTYPE); + iter = definitions[algo].iterator_init_forward(compressed_data, PG_TYPE_OID); int nn = 0; for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) { @@ -194,11 +206,21 @@ FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size, bool extra_checks) } } + /* + * 3) The bulk decompression must absolutely work on the correct compressed + * data we've just generated. + */ + arrow = decompress_all(compressed_data, PG_TYPE_OID, CurrentMemoryContext); + FUNCTION_NAME2(check_arrow, CTYPE)(arrow, PANIC, results, n); + return n; } -#undef TOSTRING -#undef TOSTRING_HELPER +#undef FUNCTION_NAME3 +#undef FUNCTION_NAME_HELPER3 +#undef FUNCTION_NAME2 +#undef FUNCTION_NAME_HELPER2 -#undef FUNCTION_NAME -#undef FUNCTION_NAME_HELPER +#undef PG_TYPE_OID +#undef PG_TYPE_OID_HELPER +#undef PG_TYPE_OID_HELPER2 diff --git a/tsl/src/compression/decompress_text_test_impl.c b/tsl/src/compression/decompress_text_test_impl.c new file mode 100644 index 00000000000..cd9fc243572 --- /dev/null +++ b/tsl/src/compression/decompress_text_test_impl.c @@ -0,0 +1,151 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +/* + * Try to decompress the given compressed data. Used for fuzzing and for checking + * the examples found by fuzzing. For fuzzing we do less checks to keep it + * faster and the coverage space smaller. This is a generic implementation + * for arithmetic types. + */ +static int +decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType test_type, + int requested_algo) +{ + Assert(test_type == DTT_RowByRow || test_type == DTT_RowByRowFuzzing); + + StringInfoData si = { .data = (char *) Data, .len = Size }; + + const int data_algo = pq_getmsgbyte(&si); + + CheckCompressedData(data_algo > 0 && data_algo < _END_COMPRESSION_ALGORITHMS); + + if (data_algo != requested_algo) + { + /* + * It's convenient to fuzz only one algorithm at a time. We specialize + * the fuzz target for one algorithm, so that the fuzzer doesn't waste + * time discovering others from scratch. + */ + return -1; + } + + Datum compressed_data = definitions[data_algo].compressed_data_recv(&si); + + if (test_type == DTT_RowByRowFuzzing) + { + DecompressionIterator *iter = + definitions[data_algo].iterator_init_forward(compressed_data, TEXTOID); + for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)); + return 0; + } + + /* + * Test row-by-row decompression. + */ + DecompressionIterator *iter = + definitions[data_algo].iterator_init_forward(compressed_data, TEXTOID); + DecompressResult results[GLOBAL_MAX_ROWS_PER_COMPRESSION]; + int n = 0; + for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) + { + if (n >= GLOBAL_MAX_ROWS_PER_COMPRESSION) + { + elog(ERROR, "too many compressed rows"); + } + + results[n++] = r; + } + + /* + * For row-by-row decompression, check that the result is still the same + * after we compress and decompress back. + * Don't perform this check for other types of tests. + */ + if (test_type != DTT_RowByRow) + { + return n; + } + + /* + * 1) Compress. + */ + Compressor *compressor = definitions[data_algo].compressor_for_type(TEXTOID); + + for (int i = 0; i < n; i++) + { + if (results[i].is_null) + { + compressor->append_null(compressor); + } + else + { + compressor->append_val(compressor, results[i].val); + } + } + + compressed_data = (Datum) compressor->finish(compressor); + if (compressed_data == 0) + { + /* The gorilla compressor returns NULL for all-null input sets. */ + return n; + }; + + /* + * 2) Decompress and check that it's the same. + */ + iter = definitions[data_algo].iterator_init_forward(compressed_data, TEXTOID); + int nn = 0; + for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) + { + if (r.is_null != results[nn].is_null) + { + elog(ERROR, "the repeated decompression result doesn't match"); + } + + if (!r.is_null) + { + const Datum old_value = results[nn].val; + const Datum new_value = r.val; + + /* + * Floats can also be NaN/infinite and the comparison doesn't + * work in that case. + */ + if (VARSIZE_ANY_EXHDR(old_value) != VARSIZE_ANY_EXHDR(new_value)) + { + elog(ERROR, "the repeated decompression result doesn't match"); + } + + if (strncmp(VARDATA_ANY(old_value), + VARDATA_ANY(new_value), + VARSIZE_ANY_EXHDR(new_value))) + { + elog(ERROR, "the repeated decompression result doesn't match"); + } + } + + nn++; + + if (nn > n) + { + elog(ERROR, "the repeated recompression result doesn't match"); + } + } + + return n; +} + +static int +decompress_ARRAY_TEXT(const uint8 *Data, size_t Size, DecompressionTestType test_type) +{ + return decompress_generic_text(Data, Size, test_type, COMPRESSION_ALGORITHM_ARRAY); +} + +static int +decompress_DICTIONARY_TEXT(const uint8 *Data, size_t Size, DecompressionTestType test_type) +{ + return decompress_generic_text(Data, Size, test_type, COMPRESSION_ALGORITHM_DICTIONARY); +} diff --git a/tsl/src/compression/dictionary.c b/tsl/src/compression/dictionary.c index 06fa9c6d5ec..0ad9b600d89 100644 --- a/tsl/src/compression/dictionary.c +++ b/tsl/src/compression/dictionary.c @@ -450,7 +450,7 @@ dictionary_decompression_iterator_try_next_forward(DecompressionIterator *iter_b .is_done = true, }; - Assert(result.val < iter->compressed->num_distinct); + CheckCompressedData(result.val < iter->compressed->num_distinct); return (DecompressResult){ .val = iter->values[result.val], .is_null = false, @@ -596,7 +596,7 @@ dictionary_compressed_send(CompressedDataHeader *header, StringInfo buffer) Datum dictionary_compressed_recv(StringInfo buffer) { - DictionaryCompressorSerializationInfo data = { 0 }; + DictionaryCompressorSerializationInfo info = { 0 }; uint8 has_nulls; Oid element_type; @@ -604,27 +604,30 @@ dictionary_compressed_recv(StringInfo buffer) CheckCompressedData(has_nulls == 0 || has_nulls == 1); element_type = binary_string_get_type(buffer); - data.dictionary_compressed_indexes = simple8brle_serialized_recv(buffer); - data.bitmaps_size = simple8brle_serialized_total_size(data.dictionary_compressed_indexes); - data.total_size = MAXALIGN(sizeof(DictionaryCompressed)) + data.bitmaps_size; + info.dictionary_compressed_indexes = simple8brle_serialized_recv(buffer); + info.bitmaps_size = simple8brle_serialized_total_size(info.dictionary_compressed_indexes); + info.total_size = MAXALIGN(sizeof(DictionaryCompressed)) + info.bitmaps_size; if (has_nulls) { - data.compressed_nulls = simple8brle_serialized_recv(buffer); - data.nulls_size = simple8brle_serialized_total_size(data.compressed_nulls); - data.total_size += data.nulls_size; + info.compressed_nulls = simple8brle_serialized_recv(buffer); + info.nulls_size = simple8brle_serialized_total_size(info.compressed_nulls); + info.total_size += info.nulls_size; } - data.dictionary_serialization_info = array_compressed_data_recv(buffer, element_type); - data.dictionary_size = array_compression_serialization_size(data.dictionary_serialization_info); - data.total_size += data.dictionary_size; - data.num_distinct = - array_compression_serialization_num_elements(data.dictionary_serialization_info); + info.dictionary_serialization_info = array_compressed_data_recv(buffer, element_type); + + CheckCompressedData(info.dictionary_serialization_info != NULL); + + info.dictionary_size = array_compression_serialization_size(info.dictionary_serialization_info); + info.total_size += info.dictionary_size; + info.num_distinct = + array_compression_serialization_num_elements(info.dictionary_serialization_info); - if (!AllocSizeIsValid(data.total_size)) + if (!AllocSizeIsValid(info.total_size)) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("compressed size exceeds the maximum allowed (%d)", (int) MaxAllocSize))); - return PointerGetDatum(dictionary_compressed_from_serialization_info(data, element_type)); + return PointerGetDatum(dictionary_compressed_from_serialization_info(info, element_type)); } diff --git a/tsl/src/compression/simple8b_rle_decompress_all.h b/tsl/src/compression/simple8b_rle_decompress_all.h index 48a168fb581..874ba71662b 100644 --- a/tsl/src/compression/simple8b_rle_decompress_all.h +++ b/tsl/src/compression/simple8b_rle_decompress_all.h @@ -86,7 +86,7 @@ FUNCTION_NAME(simple8brle_decompress_all_buf, * might be incorrect. \ */ \ const uint16 n_block_values = SIMPLE8B_NUM_ELEMENTS[X]; \ - CheckCompressedData(decompressed_index + n_block_values < n_buffer_elements); \ + CheckCompressedData(decompressed_index + n_block_values <= n_buffer_elements); \ \ const uint64 bitmask = simple8brle_selector_get_bitmask(X); \ \ @@ -155,10 +155,11 @@ FUNCTION_NAME(simple8brle_decompress_all, ELEMENT_TYPE)(Simple8bRleSerialized *c Assert(n_total_values <= GLOBAL_MAX_ROWS_PER_COMPRESSION); /* - * We need a significant padding of 64 elements, not bytes, here, because we - * work in Simple8B blocks which can contain up to 64 elements. + * We need a quite significant padding of 63 elements, not bytes, after the + * last element, because we work in Simple8B blocks which can contain up to + * 64 elements. */ - const uint16 n_buffer_elements = ((n_total_values + 63) / 64 + 1) * 64; + const uint16 n_buffer_elements = n_total_values + 63; ELEMENT_TYPE *restrict decompressed_values = palloc(sizeof(ELEMENT_TYPE) * n_buffer_elements); diff --git a/tsl/test/expected/compression_algos.out b/tsl/test/expected/compression_algos.out index d2a8a6a5622..e089b96756d 100644 --- a/tsl/test/expected/compression_algos.out +++ b/tsl/test/expected/compression_algos.out @@ -1542,31 +1542,72 @@ DROP TABLE base_texts; -- Interesting corrupt data found by fuzzing -- ----------------------------------------------- \c :TEST_DBNAME :ROLE_SUPERUSER -create or replace function ts_read_compressed_data_directory(cstring, regtype, cstring) +create or replace function ts_read_compressed_data_file(cstring, regtype, cstring, bool = true) returns int +as :TSL_MODULE_PATHNAME, 'ts_read_compressed_data_file' language c; +\set ON_ERROR_STOP 0 +select ts_read_compressed_data_file('gorilla', 'float8', '--nonexistent'); +ERROR: could not open the file '--nonexistent' +\set ON_ERROR_STOP 1 +create or replace function ts_read_compressed_data_directory(cstring, regtype, cstring, bool) returns table(path text, bytes int, rows int, sqlstate text, location text) as :TSL_MODULE_PATHNAME, 'ts_read_compressed_data_directory' language c; -select count(*), coalesce((rows >= 0)::text, sqlstate) result -from ts_read_compressed_data_directory('gorilla', 'float8', (:'TEST_INPUT_DIR' || '/fuzzing/compression/gorilla-float8')::cstring) -group by 2 order by 1 desc; - count | result --------+-------- - 224 | XX001 - 55 | true - 23 | 08P01 -(3 rows) - -select count(*), coalesce((rows >= 0)::text, sqlstate) result -from ts_read_compressed_data_directory('deltadelta', 'int8', (:'TEST_INPUT_DIR' || '/fuzzing/compression/deltadelta-int8')::cstring) -group by 2 order by 1 desc; - count | result --------+-------- - 157 | XX001 - 80 | true - 13 | 08P01 - 1 | false +\set fn 'ts_read_compressed_data_directory(:''algo'', :''type'', format(''%s/fuzzing/compression/%s-%s'', :''TEST_INPUT_DIR'', :''algo'', :''type'')::cstring, ' +\set algo gorilla +\set type float8 +select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc +; + count | bulk_result | rowbyrow_result +-------+-------------+----------------- + 142 | XX001 | true + 82 | XX001 | XX001 + 55 | true | true + 23 | 08P01 | 08P01 (4 rows) -create or replace function ts_read_compressed_data_file(cstring, regtype, cstring) returns int -as :TSL_MODULE_PATHNAME, 'ts_read_compressed_data_file' language c; -select ts_read_compressed_data_file('gorilla', 'float8', '--nonexistent'); -ERROR: could not open the file '--nonexistent' +\set algo deltadelta +\set type int8 +select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc +; + count | bulk_result | rowbyrow_result +-------+-------------+----------------- + 106 | XX001 | XX001 + 69 | true | true + 62 | XX001 | true + 13 | 08P01 | 08P01 + 1 | false | false +(5 rows) + +\set algo array +\set type text +select count(*) + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn false) rowbyrow +group by 2 order by 1 desc +; + count | rowbyrow_result +-------+----------------- + 2 | true + 1 | XX001 +(2 rows) + +\set algo dictionary +\set type text +select count(*) + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn false) rowbyrow +group by 2 order by 1 desc +; + count | rowbyrow_result +-------+----------------- + 4 | XX001 + 1 | true +(2 rows) + diff --git a/tsl/test/fuzzing/compression/array-text/array1 b/tsl/test/fuzzing/compression/array-text/array1 new file mode 100644 index 0000000000000000000000000000000000000000..50dfba19732b8428b250e63489743522452728c0 GIT binary patch literal 15917 zcmZA8F|IC24S>-*A{$^Y*JZn0cQt4PZ$?N65PAViW*NM{gZaBp=yvkSL7k|7`uO*6 zfBEY#-+%eXzkd7p{_VftKR$l?`1r&BK0ZGF{I|dV`s=sRt zeL>$ReV_Dw()UTLTH`ectK<;kJF=>f+$GectNA;Jv*pxQ)+ST@u{3&s$v%+_ukKT?*W` z&s$vvytfwtxAA$a3xM19d8_OBzipqlx*q@A_Ia!8>A!8Cx4Itw_x5`CZ{zb;*QU!#L+vlyWhyJ#G-s*bhZ`;?c lety5AkLlI)-n|a>o?b=o>2;>}^y+y}uOq#uSIv8h{{tyD3_Sn< literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/crash-1ac67d1b8ebedeb93c515b5244f6e60613c1af0b b/tsl/test/fuzzing/compression/array-text/crash-1ac67d1b8ebedeb93c515b5244f6e60613c1af0b new file mode 100644 index 0000000000000000000000000000000000000000..305bfd2a1814bfc7e8effbc8a29c55cdd9daab65 GIT binary patch literal 440 zcmZQ%C`gY_PAo~x$xmk}Nv$Yh0E4u&)S}e95)*S%AeSd46V5k5;~S&$?ahrK0ts*( zhNvpc(fDSTsPbkOXnb=tz8M-H>1J>Gw5>A7vHm#5lpeh4L<`~#gY4G3hk~}5<>m>y}$4l3y z_VySl#aALK$Y!_Ib@kTWroRqDoMQjyO!1yVE!t4d4aN*yd6u(db=bSSoaS>C(mH9} z^&m&!18@xtdIEX^dhGQA2j9nEJ^L{AWAJ0}WANi2zxDEK7RqFiOcuyw&&k9eklu%a literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-49789ae0866d7d630f2075dc26812433f4af1db3 b/tsl/test/fuzzing/compression/dictionary-text/crash-49789ae0866d7d630f2075dc26812433f4af1db3 new file mode 100644 index 0000000000000000000000000000000000000000..cf31c012e9210d5588c44ed71abbebed662689a3 GIT binary patch literal 231 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEiK7zFqk80;Dt8o+EY4V)PIuulNdL^vA& literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-707526606a02c72364e1c8ea82357eead6c74f60 b/tsl/test/fuzzing/compression/dictionary-text/crash-707526606a02c72364e1c8ea82357eead6c74f60 new file mode 100644 index 0000000000000000000000000000000000000000..2061e2e7aad1a533bf4e22a2ec38d64897318540 GIT binary patch literal 81 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh17+4q>1R4G_FgS|oFnqIX0LcI`6GMS| W{N#Dvzd>ps%0Z+Z5Hm1vFaQAMFAv-R literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-b0db762535226b28c0b55ffe00d5537fd8ef7e39 b/tsl/test/fuzzing/compression/dictionary-text/crash-b0db762535226b28c0b55ffe00d5537fd8ef7e39 new file mode 100644 index 0000000000000000000000000000000000000000..a75b22fb44f6f6f52a8e2183159a8aced13ee384 GIT binary patch literal 52 ucmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEi~7)0_jaN0FMMHq?`b5j8*NC`v$ literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-bd6e8aa1ebeb96cf8aa644c5ef6eb2214dee0ffc b/tsl/test/fuzzing/compression/dictionary-text/crash-bd6e8aa1ebeb96cf8aa644c5ef6eb2214dee0ffc new file mode 100644 index 0000000000000000000000000000000000000000..8262f92c11513080a09cb9e6e0b8b455d952fdcd GIT binary patch literal 56 tcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?YW1SA-N7zFq&AT&aRr8qG+6#zeF2*CgV literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/dict1 b/tsl/test/fuzzing/compression/dictionary-text/dict1 new file mode 100644 index 0000000000000000000000000000000000000000..08ae62cb6b1d9b958cf4342fe295478b58187fef GIT binary patch literal 56 ycmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEiK7zFqk80;FrERYBYvlJ)hrUC#*m= 0)::text, sqlstate) result -from ts_read_compressed_data_directory('gorilla', 'float8', (:'TEST_INPUT_DIR' || '/fuzzing/compression/gorilla-float8')::cstring) -group by 2 order by 1 desc; +\set ON_ERROR_STOP 0 +select ts_read_compressed_data_file('gorilla', 'float8', '--nonexistent'); +\set ON_ERROR_STOP 1 -select count(*), coalesce((rows >= 0)::text, sqlstate) result -from ts_read_compressed_data_directory('deltadelta', 'int8', (:'TEST_INPUT_DIR' || '/fuzzing/compression/deltadelta-int8')::cstring) -group by 2 order by 1 desc; +create or replace function ts_read_compressed_data_directory(cstring, regtype, cstring, bool) +returns table(path text, bytes int, rows int, sqlstate text, location text) +as :TSL_MODULE_PATHNAME, 'ts_read_compressed_data_directory' language c; -create or replace function ts_read_compressed_data_file(cstring, regtype, cstring) returns int -as :TSL_MODULE_PATHNAME, 'ts_read_compressed_data_file' language c; +\set fn 'ts_read_compressed_data_directory(:''algo'', :''type'', format(''%s/fuzzing/compression/%s-%s'', :''TEST_INPUT_DIR'', :''algo'', :''type'')::cstring, ' + +\set algo gorilla +\set type float8 +select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc +; + +\set algo deltadelta +\set type int8 +select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc +; + +\set algo array +\set type text +select count(*) + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn false) rowbyrow +group by 2 order by 1 desc +; + +\set algo dictionary +\set type text +select count(*) + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn false) rowbyrow +group by 2 order by 1 desc +; -select ts_read_compressed_data_file('gorilla', 'float8', '--nonexistent'); \ No newline at end of file From a4f5fc9bd00f5a646f4b24261cc0359682f3319f Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 12 Dec 2023 18:25:44 +0100 Subject: [PATCH 064/249] cleanup --- .github/workflows/libfuzzer.yaml | 28 +++++++---- tsl/src/compression/array.c | 4 +- tsl/src/compression/compression.c | 48 +++++++++++-------- .../decompress_arithmetic_test_impl.c | 5 +- .../compression/decompress_text_test_impl.c | 8 +++- 5 files changed, 56 insertions(+), 37 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 06e649c5032..c8d1da77878 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -16,10 +16,16 @@ jobs: strategy: fail-fast: false matrix: - case: [ { algo: gorilla, type: float8 }, { algo: deltadelta, type: int8 }, - { algo: array, type: text }, { algo: dictionary, type: text } ] - - name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.type }} + case: [ + { algo: gorilla , pgtype: float8, kind: rowbyrow }, + { algo: deltadelta, pgtype: int8 , kind: rowbyrow }, + { algo: gorilla , pgtype: float8, kind: bulk }, + { algo: deltadelta, pgtype: int8 , kind: bulk }, + { algo: array , pgtype: text , kind: rowbyrow }, + { algo: dictionary, pgtype: text , kind: rowbyrow }, + ] + + name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.kind }} runs-on: ubuntu-22.04 env: PG_SRC_DIR: pgbuild @@ -120,8 +126,8 @@ jobs: path: db/corpus # If the initial corpus changes, probably it was updated by hand with # some important examples, and it makes sense to start anew from it. - key: "libfuzzer-corpus-2-${{ matrix.case.algo }}-${{ matrix.case.type }}-\ - ${{ hashFiles(format('tsl/test/fuzzing/compression/{0}-{1}', matrix.case.algo, matrix.case.type)) }}" + key: "libfuzzer-corpus-2-${{ matrix.case.algo }}-${{ matrix.case.pgtype }}-\ + ${{ hashFiles(format('tsl/test/fuzzing/compression/{0}-{1}', matrix.case.algo, matrix.case.pgtype)) }}" - name: Initialize the fuzzing corpus # cache-hit is only true for exact key matches, and we use prefix matches. @@ -130,7 +136,7 @@ jobs: # Copy the intial corpus files from the repository. The github actions # cache doesn't follow symlinks. mkdir -p db/corpus - find "tsl/test/fuzzing/compression/${{ matrix.case.algo }}-${{ matrix.case.type }}" -type f -exec cp -t db/corpus {} + + find "tsl/test/fuzzing/compression/${{ matrix.case.algo }}-${{ matrix.case.pgtype }}" -type f -exec cp -t db/corpus {} + - name: Run libfuzzer for compression run: | @@ -146,7 +152,7 @@ jobs: # Create the fuzzing function export MODULE_NAME=$(basename $(find $HOME/$PG_INSTALL_DIR -name "timescaledb-tsl-*.so")) - psql -a -c "create or replace function fuzz(algo cstring, type regtype, kind cstring, runs int) returns int as '"$MODULE_NAME"', 'ts_fuzz_compression' language c;" + psql -a -c "create or replace function fuzz(algo cstring, pgtype regtype, kind cstring, runs int) returns int as '"$MODULE_NAME"', 'ts_fuzz_compression' language c;" # array has a peculiar recv function that recompresses all input, so # fuzzing it is much slower. @@ -156,13 +162,15 @@ jobs: # their progress, because the server will panic if they find an error. for x in {2..$(nproc)} do - psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', '${{ matrix.case.type }}', 'rowbyrow', $runs);" & + psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', + '${{ matrix.case.pgtype }}', '${{ matrix.case.kind }}', $runs);" & done # Start the one fuzzing process that we will monitor, in foreground. # The LLVM fuzzing driver calls exit(), so we expect to lose the connection. ret=0 - psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', '${{ matrix.case.type }}', 'rowbyrow', $runs);" || ret=$? + psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', + '${{ matrix.case.pgtype }}', '${{ matrix.case.kind }}', $runs);" || ret=$? if ! [ $ret -eq 2 ] then >&2 echo "Unexpected psql exit code $ret" diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index c5068d95cb0..febd6937f1d 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -406,12 +406,12 @@ array_decompression_iterator_try_next_forward(DecompressionIterator *general_ite .is_done = true, }; - Assert(iter->data_offset + datum_size.val <= iter->num_data_bytes); + CheckCompressedData(iter->data_offset + datum_size.val <= iter->num_data_bytes); start_pointer = iter->data + iter->data_offset; val = bytes_to_datum_and_advance(iter->deserializer, &start_pointer); iter->data_offset += datum_size.val; - Assert(iter->data + iter->data_offset == start_pointer); + CheckCompressedData(iter->data + iter->data_offset == start_pointer); return (DecompressResult){ .val = val, diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index df09ee0fcf7..6e252746f35 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -2287,20 +2287,22 @@ typedef enum #include "decompress_text_test_impl.c" -#define APPLY_FOR_TYPES(X) \ - X(GORILLA, FLOAT8, RowByRow) \ - X(GORILLA, FLOAT8, Bulk) \ - X(DELTADELTA, INT8, RowByRow) \ - X(DELTADELTA, INT8, Bulk) \ - X(ARRAY, TEXT, RowByRow) \ - X(DICTIONARY, TEXT, RowByRow) \ +#define APPLY_FOR_TYPES(X) \ + X(GORILLA, FLOAT8, RowByRow) \ + X(GORILLA, FLOAT8, Bulk) \ + X(DELTADELTA, INT8, RowByRow) \ + X(DELTADELTA, INT8, Bulk) \ + X(ARRAY, TEXT, RowByRow) \ + X(DICTIONARY, TEXT, RowByRow) static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Size, DecompressionTestType test_type) { -#define DISPATCH(ALGO, PGTYPE, KIND) \ - if (algo == COMPRESSION_ALGORITHM_##ALGO && type == PGTYPE##OID) \ - { return decompress_##ALGO##_##PGTYPE; } +#define DISPATCH(ALGO, PGTYPE, KIND) \ + if (algo == COMPRESSION_ALGORITHM_##ALGO && type == PGTYPE##OID) \ + { \ + return decompress_##ALGO##_##PGTYPE; \ + } APPLY_FOR_TYPES(DISPATCH) @@ -2523,7 +2525,7 @@ ts_read_compressed_data_directory(PG_FUNCTION_ARGS) #ifdef TS_COMPRESSION_FUZZING static DecompressionTestType -get_fuzzing_kind(const char* s) +get_fuzzing_kind(const char *s) { if (strcmp(s, "bulk") == 0) { @@ -2544,8 +2546,8 @@ get_fuzzing_kind(const char* s) * has to catch the postgres exceptions normally produced for corrupt data. */ static int -target_generic(int (*test_fn)(const uint8_t *, size_t, DecompressionTestType), - const uint8_t *Data, size_t Size, DecompressionTestType test_type) +target_generic(int (*test_fn)(const uint8_t *, size_t, DecompressionTestType), const uint8_t *Data, + size_t Size, DecompressionTestType test_type) { MemoryContextReset(CurrentMemoryContext); @@ -2570,14 +2572,16 @@ target_generic(int (*test_fn)(const uint8_t *, size_t, DecompressionTestType), return res == -1 ? -1 : 0; } -#define DECLARE_TARGET(ALGO, PGTYPE, KIND) \ -static int target_##ALGO##_##PGTYPE##_##KIND (const uint8_t *D, size_t S) { return target_generic(decompress_##ALGO##_##PGTYPE, D, S, DTT_##KIND##Fuzzing); } +#define DECLARE_TARGET(ALGO, PGTYPE, KIND) \ + static int target_##ALGO##_##PGTYPE##_##KIND(const uint8_t *D, size_t S) \ + { \ + return target_generic(decompress_##ALGO##_##PGTYPE, D, S, DTT_##KIND##Fuzzing); \ + } APPLY_FOR_TYPES(DECLARE_TARGET) #undef DECLARE_TARGET - /* * libfuzzer fuzzing driver that we import from LLVM libraries. It will run our * test functions with random inputs. @@ -2626,12 +2630,14 @@ ts_fuzz_compression(PG_FUNCTION_ARGS) int (*target)(const uint8_t *, size_t) = NULL; -#define DISPATCH(ALGO, PGTYPE, KIND) \ -if (algo == COMPRESSION_ALGORITHM_##ALGO && type == PGTYPE##OID && kind == DTT_##KIND##Fuzzing) \ -{ target = target_##ALGO##_##PGTYPE##_##KIND ; } - +#define DISPATCH(ALGO, PGTYPE, KIND) \ + if (algo == COMPRESSION_ALGORITHM_##ALGO && type == PGTYPE##OID && \ + kind == DTT_##KIND##Fuzzing) \ + { \ + target = target_##ALGO##_##PGTYPE##_##KIND; \ + } -APPLY_FOR_TYPES(DISPATCH) + APPLY_FOR_TYPES(DISPATCH) if (target == NULL) { diff --git a/tsl/src/compression/decompress_arithmetic_test_impl.c b/tsl/src/compression/decompress_arithmetic_test_impl.c index 963a963aa72..9437561ee51 100644 --- a/tsl/src/compression/decompress_arithmetic_test_impl.c +++ b/tsl/src/compression/decompress_arithmetic_test_impl.c @@ -75,7 +75,7 @@ FUNCTION_NAME2(check_arrow, CTYPE)(ArrowArray *arrow, int error_type, Decompress */ static int FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, - DecompressionTestType test_type) + DecompressionTestType test_type) { StringInfoData si = { .data = (char *) Data, .len = Size }; @@ -120,7 +120,8 @@ FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, /* * Test row-by-row decompression. */ - DecompressionIterator *iter = definitions[algo].iterator_init_forward(compressed_data, PG_TYPE_OID); + DecompressionIterator *iter = + definitions[algo].iterator_init_forward(compressed_data, PG_TYPE_OID); DecompressResult results[GLOBAL_MAX_ROWS_PER_COMPRESSION]; int n = 0; for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) diff --git a/tsl/src/compression/decompress_text_test_impl.c b/tsl/src/compression/decompress_text_test_impl.c index cd9fc243572..7872429f081 100644 --- a/tsl/src/compression/decompress_text_test_impl.c +++ b/tsl/src/compression/decompress_text_test_impl.c @@ -14,7 +14,10 @@ static int decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType test_type, int requested_algo) { - Assert(test_type == DTT_RowByRow || test_type == DTT_RowByRowFuzzing); + if (!(test_type == DTT_RowByRow || test_type == DTT_RowByRowFuzzing)) + { + elog(ERROR, "decompression test type %d not supported for text", test_type); + } StringInfoData si = { .data = (char *) Data, .len = Size }; @@ -38,7 +41,8 @@ decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType te { DecompressionIterator *iter = definitions[data_algo].iterator_init_forward(compressed_data, TEXTOID); - for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)); + for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) + ; return 0; } From 47b0160632f3f1af0ea13ff61e27aa2741f56090 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 12 Dec 2023 18:25:54 +0100 Subject: [PATCH 065/249] crash example --- ...crash-5eeac6ca5053992914dfb318e02e4c657a65c7cf | Bin 0 -> 174 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-5eeac6ca5053992914dfb318e02e4c657a65c7cf diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-5eeac6ca5053992914dfb318e02e4c657a65c7cf b/tsl/test/fuzzing/compression/dictionary-text/crash-5eeac6ca5053992914dfb318e02e4c657a65c7cf new file mode 100644 index 0000000000000000000000000000000000000000..8c347f8548e293f9c51b3e3213fa7f2f02863ae8 GIT binary patch literal 174 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh17+C&80Hc@=!#AADGhr%V0O^A0 K1u|#`5=#JO!5Urw literal 0 HcmV?d00001 From 66aa3fbd6d828029a5910710cc7b6ba8ee41230b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 12 Dec 2023 18:33:50 +0100 Subject: [PATCH 066/249] Make the bulk decompression function depend on PG type This is a refactoring to enable bulk decompression of array and dictionary compressed text columns, but not other types. Currently has no effect. --- tsl/src/compression/compression.c | 2 +- tsl/src/compression/compression.h | 3 ++- tsl/src/compression/decompress_test_impl.c | 4 ++-- .../nodes/decompress_chunk/compressed_batch.c | 3 ++- tsl/src/nodes/decompress_chunk/exec.c | 3 ++- tsl/src/nodes/decompress_chunk/planner.c | 21 ++++++++++++++++--- 6 files changed, 27 insertions(+), 9 deletions(-) diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index fdd7f4c05f0..a380f270f05 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -129,7 +129,7 @@ DecompressionIterator *(*tsl_get_decompression_iterator_init(CompressionAlgorith } DecompressAllFunction -tsl_get_decompress_all_function(CompressionAlgorithm algorithm) +tsl_get_decompress_all_function(CompressionAlgorithm algorithm, Oid type) { if (algorithm >= _END_COMPRESSION_ALGORITHMS) elog(ERROR, "invalid compression algorithm %d", algorithm); diff --git a/tsl/src/compression/compression.h b/tsl/src/compression/compression.h index 3d34ea95ca4..335e54d067e 100644 --- a/tsl/src/compression/compression.h +++ b/tsl/src/compression/compression.h @@ -323,7 +323,8 @@ extern void decompress_chunk(Oid in_table, Oid out_table); extern DecompressionIterator *(*tsl_get_decompression_iterator_init( CompressionAlgorithm algorithm, bool reverse))(Datum, Oid element_type); -extern DecompressAllFunction tsl_get_decompress_all_function(CompressionAlgorithm algorithm); +extern DecompressAllFunction tsl_get_decompress_all_function(CompressionAlgorithm algorithm, + Oid type); typedef struct Chunk Chunk; typedef struct ChunkInsertState ChunkInsertState; diff --git a/tsl/src/compression/decompress_test_impl.c b/tsl/src/compression/decompress_test_impl.c index 69897d45b99..c452b173345 100644 --- a/tsl/src/compression/decompress_test_impl.c +++ b/tsl/src/compression/decompress_test_impl.c @@ -42,7 +42,7 @@ FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size, bool extra_checks) * For routine fuzzing, we only run bulk decompression to make it faster * and the coverage space smaller. */ - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(algo); + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(algo, PGTYPE); decompress_all(compressed_data, PGTYPE, CurrentMemoryContext); return 0; } @@ -53,7 +53,7 @@ FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size, bool extra_checks) * the row-by-row is old and stable. */ ArrowArray *arrow = NULL; - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(algo); + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(algo, PGTYPE); if (decompress_all) { arrow = decompress_all(compressed_data, PGTYPE, CurrentMemoryContext); diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 5313d821f14..e0cb3073a25 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -123,7 +123,8 @@ decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state } DecompressAllFunction decompress_all = - tsl_get_decompress_all_function(header->compression_algorithm); + tsl_get_decompress_all_function(header->compression_algorithm, + column_description->typid); Assert(decompress_all != NULL); MemoryContext context_before_decompression = diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index 8fbeb78e410..868c995d36a 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -610,7 +610,8 @@ perform_vectorized_sum_int4(DecompressChunkState *chunk_state, Aggref *aggref) ArrowArray *arrow = NULL; DecompressAllFunction decompress_all = - tsl_get_decompress_all_function(header->compression_algorithm); + tsl_get_decompress_all_function(header->compression_algorithm, + column_description->typid); Assert(decompress_all != NULL); MemoryContext context_before_decompression = diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index f159d0d0893..d7bb17e643a 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -233,13 +233,28 @@ build_decompression_map(PlannerInfo *root, DecompressChunkPath *path, List *scan lappend_int(path->is_segmentby_column, compression_info && compression_info->segmentby_column_index != 0); - const bool bulk_decompression_possible = - destination_attno_in_uncompressed_chunk > 0 && compression_info && - tsl_get_decompress_all_function(compression_info->algo_id) != NULL; + /* + * Determine if we can use bulk decompression for this column. + */ + bool bulk_decompression_possible = false; + if (destination_attno_in_uncompressed_chunk > 0 && compression_info) + { + Oid typid = + get_atttype(path->info->chunk_rte->relid, destination_attno_in_uncompressed_chunk); + Assert(OidIsValid(typid)); + if (tsl_get_decompress_all_function(compression_info->algo_id, typid) != NULL) + { + bulk_decompression_possible = true; + } + } path->have_bulk_decompression_columns |= bulk_decompression_possible; path->bulk_decompression_column = lappend_int(path->bulk_decompression_column, bulk_decompression_possible); + /* + * Save information about decompressed columns in uncompressed chunk + * for planning of vectorized filters. + */ if (destination_attno_in_uncompressed_chunk > 0) { path->uncompressed_chunk_attno_to_compression_info From b3318b1a18f6943ca11e1607c7f8babac2135e5e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 12 Dec 2023 18:39:18 +0100 Subject: [PATCH 067/249] reference --- tsl/test/expected/compression_algos.out | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsl/test/expected/compression_algos.out b/tsl/test/expected/compression_algos.out index e089b96756d..cca0600eb4a 100644 --- a/tsl/test/expected/compression_algos.out +++ b/tsl/test/expected/compression_algos.out @@ -1607,7 +1607,7 @@ group by 2 order by 1 desc ; count | rowbyrow_result -------+----------------- - 4 | XX001 + 5 | XX001 1 | true (2 rows) From 768daed97e70931f98f2b839f5ee5c1e49c65cb3 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 12 Dec 2023 18:41:12 +0100 Subject: [PATCH 068/249] fixup --- .github/workflows/libfuzzer.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index c8d1da77878..7766bc75331 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -155,8 +155,8 @@ jobs: psql -a -c "create or replace function fuzz(algo cstring, pgtype regtype, kind cstring, runs int) returns int as '"$MODULE_NAME"', 'ts_fuzz_compression' language c;" # array has a peculiar recv function that recompresses all input, so - # fuzzing it is much slower. - runs=${{ matrix.case.algo == 'array' && 1000000 || 100000000 }} + # fuzzing it is much slower. dictionary recv also uses it. + runs=${{ (matrix.case.algo == 'array' || matrix.case.algo == 'dictionary') && 1000000 || 100000000 }} # Start more fuzzing processes in the background. We won't even monitor # their progress, because the server will panic if they find an error. From 7618723f5972c1bb821f495b249f014be8169e4d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 12 Dec 2023 20:35:13 +0100 Subject: [PATCH 069/249] fix --- .../decompress_arithmetic_test_impl.c | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/tsl/src/compression/decompress_arithmetic_test_impl.c b/tsl/src/compression/decompress_arithmetic_test_impl.c index 9437561ee51..e2a6769c8f5 100644 --- a/tsl/src/compression/decompress_arithmetic_test_impl.c +++ b/tsl/src/compression/decompress_arithmetic_test_impl.c @@ -79,11 +79,11 @@ FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, { StringInfoData si = { .data = (char *) Data, .len = Size }; - const int algo = pq_getmsgbyte(&si); + const int data_algo = pq_getmsgbyte(&si); - CheckCompressedData(algo > 0 && algo < _END_COMPRESSION_ALGORITHMS); + CheckCompressedData(data_algo > 0 && data_algo < _END_COMPRESSION_ALGORITHMS); - if (algo != FUNCTION_NAME2(COMPRESSION_ALGORITHM, ALGO)) + if (data_algo != FUNCTION_NAME2(COMPRESSION_ALGORITHM, ALGO)) { /* * It's convenient to fuzz only one algorithm at a time. We specialize @@ -93,9 +93,18 @@ FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, return -1; } - Datum compressed_data = definitions[algo].compressed_data_recv(&si); + Datum compressed_data = definitions[data_algo].compressed_data_recv(&si); - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(algo); + if (test_type == DTT_RowByRowFuzzing) + { + DecompressionIterator *iter = + definitions[data_algo].iterator_init_forward(compressed_data, PG_TYPE_OID); + for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) + ; + return 0; + } + + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo); if (test_type == DTT_BulkFuzzing) { @@ -121,7 +130,7 @@ FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, * Test row-by-row decompression. */ DecompressionIterator *iter = - definitions[algo].iterator_init_forward(compressed_data, PG_TYPE_OID); + definitions[data_algo].iterator_init_forward(compressed_data, PG_TYPE_OID); DecompressResult results[GLOBAL_MAX_ROWS_PER_COMPRESSION]; int n = 0; for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) @@ -147,7 +156,7 @@ FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, * * 1) Compress. */ - Compressor *compressor = definitions[algo].compressor_for_type(PG_TYPE_OID); + Compressor *compressor = definitions[data_algo].compressor_for_type(PG_TYPE_OID); for (int i = 0; i < n; i++) { @@ -171,7 +180,7 @@ FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, /* * 2) Decompress and check that it's the same. */ - iter = definitions[algo].iterator_init_forward(compressed_data, PG_TYPE_OID); + iter = definitions[data_algo].iterator_init_forward(compressed_data, PG_TYPE_OID); int nn = 0; for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) { From 2c43267375eb4b4c4f9948e3ddc1039a9c1e3122 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 12 Dec 2023 22:50:19 +0100 Subject: [PATCH 070/249] remove unused variable --- tsl/src/compression/compression.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index f779cbcca4a..d26a8347b55 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -876,7 +876,6 @@ row_compressor_init(CompressionSettings *settings, RowCompressor *row_compressor memset(row_compressor->compressed_is_null, 1, sizeof(bool) * num_columns_in_compressed_table); - int col = 0; for (int i = 0; i < uncompressed_tuple_desc->natts; i++) { Form_pg_attribute attr = TupleDescAttr(uncompressed_tuple_desc, i); @@ -944,7 +943,6 @@ row_compressor_init(CompressionSettings *settings, RowCompressor *row_compressor .max_metadata_attr_offset = -1, }; } - col++; } row_compressor->index_oid = From 2268052689d1c295323ea60896db67f7d7d8a70e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 12 Dec 2023 22:59:54 +0100 Subject: [PATCH 071/249] cleanup --- .github/workflows/libfuzzer.yaml | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 7766bc75331..8ca75f70dfa 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -17,12 +17,14 @@ jobs: fail-fast: false matrix: case: [ - { algo: gorilla , pgtype: float8, kind: rowbyrow }, - { algo: deltadelta, pgtype: int8 , kind: rowbyrow }, - { algo: gorilla , pgtype: float8, kind: bulk }, - { algo: deltadelta, pgtype: int8 , kind: bulk }, - { algo: array , pgtype: text , kind: rowbyrow }, - { algo: dictionary, pgtype: text , kind: rowbyrow }, + { algo: gorilla , pgtype: float8, kind: rowbyrow, runs: 500000000 }, + { algo: deltadelta, pgtype: int8 , kind: rowbyrow, runs: 500000000 }, + { algo: gorilla , pgtype: float8, kind: bulk , runs: 1000000000 }, + { algo: deltadelta, pgtype: int8 , kind: bulk , runs: 1000000000 }, + # array has a peculiar recv function that recompresses all input, so + # fuzzing it is much slower. The dictionary recv also uses it. + { algo: array , pgtype: text , kind: rowbyrow, runs: 10000000 }, + { algo: dictionary, pgtype: text , kind: rowbyrow, runs: 10000000 }, ] name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.kind }} @@ -154,23 +156,20 @@ jobs: export MODULE_NAME=$(basename $(find $HOME/$PG_INSTALL_DIR -name "timescaledb-tsl-*.so")) psql -a -c "create or replace function fuzz(algo cstring, pgtype regtype, kind cstring, runs int) returns int as '"$MODULE_NAME"', 'ts_fuzz_compression' language c;" - # array has a peculiar recv function that recompresses all input, so - # fuzzing it is much slower. dictionary recv also uses it. - runs=${{ (matrix.case.algo == 'array' || matrix.case.algo == 'dictionary') && 1000000 || 100000000 }} # Start more fuzzing processes in the background. We won't even monitor # their progress, because the server will panic if they find an error. for x in {2..$(nproc)} do psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', - '${{ matrix.case.pgtype }}', '${{ matrix.case.kind }}', $runs);" & + '${{ matrix.case.pgtype }}', '${{ matrix.case.kind }}', ${{ matrix.case.runs }});" & done # Start the one fuzzing process that we will monitor, in foreground. # The LLVM fuzzing driver calls exit(), so we expect to lose the connection. ret=0 psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', - '${{ matrix.case.pgtype }}', '${{ matrix.case.kind }}', $runs);" || ret=$? + '${{ matrix.case.pgtype }}', '${{ matrix.case.kind }}', ${{ matrix.case.runs }});" || ret=$? if ! [ $ret -eq 2 ] then >&2 echo "Unexpected psql exit code $ret" @@ -195,14 +194,14 @@ jobs: if: always() uses: actions/upload-artifact@v3 with: - name: PostgreSQL log for ${{ matrix.case.algo }} ${{ matrix.case.type }} + name: PostgreSQL log for ${{ matrix.case }} path: postgres.log - name: Save fuzzer-generated crash cases if: always() uses: actions/upload-artifact@v3 with: - name: Crash cases for ${{ matrix.case.algo }} ${{ matrix.case.type }} + name: Crash cases for ${{ matrix.case }} path: db/crash-* # We use separate restore/save actions, because the default action won't @@ -241,5 +240,5 @@ jobs: if: always() && steps.collectlogs.outputs.coredumps == 'true' uses: actions/upload-artifact@v3 with: - name: Coredumps for ${{ matrix.case.algo }} ${{ matrix.case.type }} + name: Coredumps for ${{ matrix.case }} path: coredumps From 1bbc455613385fededc8a3976223efbebcc89f71 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 12 Dec 2023 23:15:38 +0100 Subject: [PATCH 072/249] format --- tsl/src/nodes/decompress_chunk/planner.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index 11507224cc4..8a4faffe0ca 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -217,7 +217,8 @@ build_decompression_map(PlannerInfo *root, DecompressChunkPath *path, List *scan Oid typoid = get_atttype(path->info->chunk_rte->relid, chunk_attno); const bool bulk_decompression_possible = !is_segment && destination_attno_in_uncompressed_chunk > 0 && - tsl_get_decompress_all_function(compression_get_default_algorithm(typoid), typoid) != NULL; + tsl_get_decompress_all_function(compression_get_default_algorithm(typoid), typoid) != + NULL; path->have_bulk_decompression_columns |= bulk_decompression_possible; path->bulk_decompression_column = lappend_int(path->bulk_decompression_column, bulk_decompression_possible); From 41fb3601a4f5b159aabe05deaf183628a417303b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 13 Dec 2023 01:45:05 +0100 Subject: [PATCH 073/249] tojson --- .github/workflows/libfuzzer.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 8ca75f70dfa..f027d5e87c4 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -194,14 +194,14 @@ jobs: if: always() uses: actions/upload-artifact@v3 with: - name: PostgreSQL log for ${{ matrix.case }} + name: PostgreSQL log for ${{ toJSON(matrix.case) }} path: postgres.log - name: Save fuzzer-generated crash cases if: always() uses: actions/upload-artifact@v3 with: - name: Crash cases for ${{ matrix.case }} + name: Crash cases for ${{ toJSON(matrix.case) }} path: db/crash-* # We use separate restore/save actions, because the default action won't @@ -240,5 +240,5 @@ jobs: if: always() && steps.collectlogs.outputs.coredumps == 'true' uses: actions/upload-artifact@v3 with: - name: Coredumps for ${{ matrix.case }} + name: Coredumps for ${{ toJSON(matrix.case) }} path: coredumps From e3e6a1b4e8b15d4bb99c32e155e94d8c271492f5 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 13 Dec 2023 13:25:30 +0100 Subject: [PATCH 074/249] cleanups --- .github/workflows/libfuzzer.yaml | 64 +++++++++++++------ tsl/src/compression/compression.c | 52 +++++++++++++-- .../decompress_arithmetic_test_impl.c | 19 ------ .../compression/decompress_text_test_impl.c | 11 +--- 4 files changed, 90 insertions(+), 56 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index f027d5e87c4..06153277714 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -12,27 +12,12 @@ name: Libfuzzer paths: .github/workflows/libfuzzer.yaml jobs: - fuzz: - strategy: - fail-fast: false - matrix: - case: [ - { algo: gorilla , pgtype: float8, kind: rowbyrow, runs: 500000000 }, - { algo: deltadelta, pgtype: int8 , kind: rowbyrow, runs: 500000000 }, - { algo: gorilla , pgtype: float8, kind: bulk , runs: 1000000000 }, - { algo: deltadelta, pgtype: int8 , kind: bulk , runs: 1000000000 }, - # array has a peculiar recv function that recompresses all input, so - # fuzzing it is much slower. The dictionary recv also uses it. - { algo: array , pgtype: text , kind: rowbyrow, runs: 10000000 }, - { algo: dictionary, pgtype: text , kind: rowbyrow, runs: 10000000 }, - ] - - name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.kind }} + build: runs-on: ubuntu-22.04 + name: Build PostgreSQL and TimescaleDB env: PG_SRC_DIR: pgbuild PG_INSTALL_DIR: postgresql - steps: - name: Install Linux Dependencies run: | @@ -109,6 +94,45 @@ jobs: make -C build -j$(nproc) install + - name: Save the build + uses: actions/upload-artifact@v2 + with: + name: fuzzing-install-dir + path: $HOME/$PG_INSTALL_DIR + retention-days: 1 + + fuzz: + needs: build + strategy: + fail-fast: false + matrix: + case: [ + { algo: gorilla , pgtype: float8, kind: rowbyrow, runs: 500000000 }, + { algo: deltadelta, pgtype: int8 , kind: rowbyrow, runs: 500000000 }, + { algo: gorilla , pgtype: float8, kind: bulk , runs: 1000000000 }, + { algo: deltadelta, pgtype: int8 , kind: bulk , runs: 1000000000 }, + # array has a peculiar recv function that recompresses all input, so + # fuzzing it is much slower. The dictionary recv also uses it. + { algo: array , pgtype: text , kind: rowbyrow, runs: 10000000 }, + { algo: dictionary, pgtype: text , kind: rowbyrow, runs: 10000000 }, + ] + + name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.kind }} + runs-on: ubuntu-22.04 + env: + PG_SRC_DIR: pgbuild + PG_INSTALL_DIR: postgresql + + steps: + - name: Checkout TimescaleDB + uses: actions/checkout@v3 + + - name: Download the installation directory + uses: actions/download-artifact@v2 + with: + name: fuzzing-install-dir + path: $HOME/$PG_INSTALL_DIR + - name: initdb run: | # Have to do this before initializing the corpus, or initdb will complain. @@ -194,14 +218,14 @@ jobs: if: always() uses: actions/upload-artifact@v3 with: - name: PostgreSQL log for ${{ toJSON(matrix.case) }} + name: PostgreSQL log for ${{ matrix.case.algo }} ${{ matrix.case.type }} ${{ matrix.case.kind }} path: postgres.log - name: Save fuzzer-generated crash cases if: always() uses: actions/upload-artifact@v3 with: - name: Crash cases for ${{ toJSON(matrix.case) }} + name: Crash cases for ${{ matrix.case.algo }} ${{ matrix.case.type }} ${{ matrix.case.kind }} path: db/crash-* # We use separate restore/save actions, because the default action won't @@ -240,5 +264,5 @@ jobs: if: always() && steps.collectlogs.outputs.coredumps == 'true' uses: actions/upload-artifact@v3 with: - name: Coredumps for ${{ toJSON(matrix.case) }} + name: Coredumps for ${{ matrix.case.algo }} ${{ matrix.case.type }} ${{ matrix.case.kind }} path: coredumps diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index 7860aba1dd6..0572045cc00 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -876,7 +876,6 @@ row_compressor_init(CompressionSettings *settings, RowCompressor *row_compressor memset(row_compressor->compressed_is_null, 1, sizeof(bool) * num_columns_in_compressed_table); - int col = 0; for (int i = 0; i < uncompressed_tuple_desc->natts; i++) { Form_pg_attribute attr = TupleDescAttr(uncompressed_tuple_desc, i); @@ -944,7 +943,6 @@ row_compressor_init(CompressionSettings *settings, RowCompressor *row_compressor .max_metadata_attr_offset = -1, }; } - col++; } row_compressor->index_oid = @@ -2521,13 +2519,53 @@ get_fuzzing_kind(const char *s) } } + +static int +target(const uint8 *Data, size_t Size, CompressionAlgorithm requested_algo, + Oid pg_type, DecompressionTestType test_type) +{ + StringInfoData si = { .data = (char *) Data, .len = Size }; + + const CompressionAlgorithm data_algo = pq_getmsgbyte(&si); + + CheckCompressedData(data_algo > 0 && data_algo < _END_COMPRESSION_ALGORITHMS); + + if (data_algo != requested_algo) + { + /* + * It's convenient to fuzz only one algorithm at a time. We specialize + * the fuzz target for one algorithm, so that the fuzzer doesn't waste + * time discovering others from scratch. + */ + return -1; + } + + Datum compressed_data = definitions[data_algo].compressed_data_recv(&si); + + if (test_type == DTT_RowByRowFuzzing) + { + DecompressionIterator *iter = + definitions[data_algo].iterator_init_forward(compressed_data, pg_type); + for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) + ; + return 0; + } + + Assert(test_type == DTT_BulkFuzzing); + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo); + decompress_all(compressed_data, pg_type, CurrentMemoryContext); + return 0; +} + + /* * This is our test function that will be called by the libfuzzer driver. It * has to catch the postgres exceptions normally produced for corrupt data. */ static int -target_generic(int (*test_fn)(const uint8_t *, size_t, DecompressionTestType), const uint8_t *Data, - size_t Size, DecompressionTestType test_type) +target_wrapper( + const uint8_t *Data, size_t Size, CompressionAlgorithm requested_algo, + Oid pg_type, DecompressionTestType test_type) { MemoryContextReset(CurrentMemoryContext); @@ -2535,7 +2573,7 @@ target_generic(int (*test_fn)(const uint8_t *, size_t, DecompressionTestType), c PG_TRY(); { CHECK_FOR_INTERRUPTS(); - res = test_fn(Data, Size, test_type); + res = target(Data, Size, requested_algo, pg_type, test_type); } PG_CATCH(); { @@ -2546,7 +2584,7 @@ target_generic(int (*test_fn)(const uint8_t *, size_t, DecompressionTestType), c /* * -1 means "don't include it into corpus", return it if the test function - * says so, otherwise return 0. The test function also returns the number + * says so, otherwise return 0. Some test functions also returns the number * of rows for the correct data, the fuzzer doesn't understand these values. */ return res == -1 ? -1 : 0; @@ -2555,7 +2593,7 @@ target_generic(int (*test_fn)(const uint8_t *, size_t, DecompressionTestType), c #define DECLARE_TARGET(ALGO, PGTYPE, KIND) \ static int target_##ALGO##_##PGTYPE##_##KIND(const uint8_t *D, size_t S) \ { \ - return target_generic(decompress_##ALGO##_##PGTYPE, D, S, DTT_##KIND##Fuzzing); \ + return target_wrapper(D, S, COMPRESSION_ALGORITHM_##ALGO, PGTYPE##OID, DTT_##KIND##Fuzzing); \ } APPLY_FOR_TYPES(DECLARE_TARGET) diff --git a/tsl/src/compression/decompress_arithmetic_test_impl.c b/tsl/src/compression/decompress_arithmetic_test_impl.c index e2a6769c8f5..7799759b531 100644 --- a/tsl/src/compression/decompress_arithmetic_test_impl.c +++ b/tsl/src/compression/decompress_arithmetic_test_impl.c @@ -95,27 +95,8 @@ FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, Datum compressed_data = definitions[data_algo].compressed_data_recv(&si); - if (test_type == DTT_RowByRowFuzzing) - { - DecompressionIterator *iter = - definitions[data_algo].iterator_init_forward(compressed_data, PG_TYPE_OID); - for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) - ; - return 0; - } - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo); - if (test_type == DTT_BulkFuzzing) - { - /* - * For routine fuzzing, we only run bulk decompression to make it faster - * and the coverage space smaller. - */ - decompress_all(compressed_data, PG_TYPE_OID, CurrentMemoryContext); - return 0; - } - ArrowArray *arrow = NULL; if (test_type == DTT_Bulk) { diff --git a/tsl/src/compression/decompress_text_test_impl.c b/tsl/src/compression/decompress_text_test_impl.c index 7872429f081..be8d0923d96 100644 --- a/tsl/src/compression/decompress_text_test_impl.c +++ b/tsl/src/compression/decompress_text_test_impl.c @@ -14,7 +14,7 @@ static int decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType test_type, int requested_algo) { - if (!(test_type == DTT_RowByRow || test_type == DTT_RowByRowFuzzing)) + if (!(test_type == DTT_RowByRow)) { elog(ERROR, "decompression test type %d not supported for text", test_type); } @@ -37,15 +37,6 @@ decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType te Datum compressed_data = definitions[data_algo].compressed_data_recv(&si); - if (test_type == DTT_RowByRowFuzzing) - { - DecompressionIterator *iter = - definitions[data_algo].iterator_init_forward(compressed_data, TEXTOID); - for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) - ; - return 0; - } - /* * Test row-by-row decompression. */ From 20d85612d4e96991293a36801e0d73541dc39cb7 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 13 Dec 2023 13:35:49 +0100 Subject: [PATCH 075/249] fix? --- .github/workflows/libfuzzer.yaml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 06153277714..9d76c9eb604 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -18,6 +18,7 @@ jobs: env: PG_SRC_DIR: pgbuild PG_INSTALL_DIR: postgresql + steps: - name: Install Linux Dependencies run: | @@ -94,11 +95,11 @@ jobs: make -C build -j$(nproc) install - - name: Save the build - uses: actions/upload-artifact@v2 + - name: Save the installation directory + uses: actions/upload-artifact@v3 with: name: fuzzing-install-dir - path: $HOME/$PG_INSTALL_DIR + path: $HOME/$PG_INSTALL_DIR/ retention-days: 1 fuzz: @@ -128,7 +129,7 @@ jobs: uses: actions/checkout@v3 - name: Download the installation directory - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: fuzzing-install-dir path: $HOME/$PG_INSTALL_DIR From 44de281d845f88a4d9d43fc6e9fb16c608952d8c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 13 Dec 2023 13:47:17 +0100 Subject: [PATCH 076/249] fix --- .github/workflows/libfuzzer.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 9d76c9eb604..7a96dd21101 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -99,7 +99,8 @@ jobs: uses: actions/upload-artifact@v3 with: name: fuzzing-install-dir - path: $HOME/$PG_INSTALL_DIR/ + path: ~/$PG_INSTALL_DIR/ + if-no-files-found: error retention-days: 1 fuzz: @@ -132,7 +133,7 @@ jobs: uses: actions/download-artifact@v3 with: name: fuzzing-install-dir - path: $HOME/$PG_INSTALL_DIR + path: ~/$PG_INSTALL_DIR - name: initdb run: | From e091b35702637ac461aa4202b8763c96e808b015 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 13 Dec 2023 13:53:45 +0100 Subject: [PATCH 077/249] fix --- .github/workflows/libfuzzer.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 7a96dd21101..5930bcee0cd 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -99,7 +99,7 @@ jobs: uses: actions/upload-artifact@v3 with: name: fuzzing-install-dir - path: ~/$PG_INSTALL_DIR/ + path: ~/${{ env.PG_INSTALL_DIR }}/ if-no-files-found: error retention-days: 1 @@ -133,7 +133,7 @@ jobs: uses: actions/download-artifact@v3 with: name: fuzzing-install-dir - path: ~/$PG_INSTALL_DIR + path: ~/${{ env.PG_INSTALL_DIR }}/ - name: initdb run: | From 2422e82c98528e3e50cb73bf239ab5470e2a7abe Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 13 Dec 2023 14:10:44 +0100 Subject: [PATCH 078/249] this is so tiresome --- .github/workflows/libfuzzer.yaml | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 5930bcee0cd..0176766a1f0 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -25,7 +25,7 @@ jobs: # Don't add ddebs here because the ddebs mirror is always 503 Service Unavailable. # If needed, install them before opening the core dump. sudo apt-get update - sudo apt-get install clang lld llvm flex bison lcov systemd-coredump gdb libipc-run-perl \ + sudo apt-get install 7zip clang lld llvm flex bison lcov systemd-coredump gdb libipc-run-perl \ libtest-most-perl tree - name: Checkout TimescaleDB @@ -95,11 +95,17 @@ jobs: make -C build -j$(nproc) install + # Incredibly, the upload-artifact action can't preserve executable permissions: + # https://github.com/actions/upload-artifact/issues/38 + # It's also extremely slow. + - name: Compress the installation directory + run: 7z a install.7z $HOME/$PG_INSTALL_DIR + - name: Save the installation directory uses: actions/upload-artifact@v3 with: name: fuzzing-install-dir - path: ~/${{ env.PG_INSTALL_DIR }}/ + path: install.7z if-no-files-found: error retention-days: 1 @@ -126,6 +132,9 @@ jobs: PG_INSTALL_DIR: postgresql steps: + - name: Install Linux dependencies + run: sudo apt install 7zip + - name: Checkout TimescaleDB uses: actions/checkout@v3 @@ -133,7 +142,9 @@ jobs: uses: actions/download-artifact@v3 with: name: fuzzing-install-dir - path: ~/${{ env.PG_INSTALL_DIR }}/ + + - name: Unpack the installation directory + run: 7z x install.7z - name: initdb run: | From d5df5300913c96273548beff334a6610bae58413 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 13 Dec 2023 14:28:34 +0100 Subject: [PATCH 079/249] directory --- .github/workflows/libfuzzer.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 0176766a1f0..afc3e25a0b2 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -144,7 +144,7 @@ jobs: name: fuzzing-install-dir - name: Unpack the installation directory - run: 7z x install.7z + run: 7z x install.7z $HOME/$PG_INSTALL_DIR - name: initdb run: | From 2d7a60c6f0e911e2dc377553cc0eb0125e0087e1 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 13 Dec 2023 14:46:12 +0100 Subject: [PATCH 080/249] path --- .github/workflows/libfuzzer.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index afc3e25a0b2..0948cf4f11c 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -144,7 +144,7 @@ jobs: name: fuzzing-install-dir - name: Unpack the installation directory - run: 7z x install.7z $HOME/$PG_INSTALL_DIR + run: 7z x -o $HOME install.7z - name: initdb run: | From 7b0d8783d62e263ae10a220457f0edc7eee46445 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 13 Dec 2023 14:56:35 +0100 Subject: [PATCH 081/249] switch --- .github/workflows/libfuzzer.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 0948cf4f11c..17f8d9e353c 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -144,7 +144,7 @@ jobs: name: fuzzing-install-dir - name: Unpack the installation directory - run: 7z x -o $HOME install.7z + run: 7z x -o$HOME install.7z - name: initdb run: | From f996f80da0247f2e8338a89539b26aa63036fe00 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 13 Dec 2023 15:29:03 +0100 Subject: [PATCH 082/249] split out to files --- tsl/src/compression/CMakeLists.txt | 2 + tsl/src/compression/compression.c | 464 +---------------- tsl/src/compression/compression_test.c | 467 ++++++++++++++++++ tsl/src/compression/compression_test.h | 22 + .../decompress_arithmetic_test_impl.c | 10 +- .../compression/decompress_text_test_impl.c | 21 +- 6 files changed, 515 insertions(+), 471 deletions(-) create mode 100644 tsl/src/compression/compression_test.c create mode 100644 tsl/src/compression/compression_test.h diff --git a/tsl/src/compression/CMakeLists.txt b/tsl/src/compression/CMakeLists.txt index 9e8e9724872..647b7884ae8 100644 --- a/tsl/src/compression/CMakeLists.txt +++ b/tsl/src/compression/CMakeLists.txt @@ -2,6 +2,8 @@ set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/api.c ${CMAKE_CURRENT_SOURCE_DIR}/array.c ${CMAKE_CURRENT_SOURCE_DIR}/compression.c + ${CMAKE_CURRENT_SOURCE_DIR}/compression_test.c + ${CMAKE_CURRENT_SOURCE_DIR}/decompress_text_test_impl.c ${CMAKE_CURRENT_SOURCE_DIR}/create.c ${CMAKE_CURRENT_SOURCE_DIR}/datum_serialize.c ${CMAKE_CURRENT_SOURCE_DIR}/deltadelta.c diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index 0572045cc00..57b1e40b762 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -49,6 +49,7 @@ #include "array.h" #include "chunk.h" #include "compression.h" +#include "compression_test.h" #include "create.h" #include "custom_type_cache.h" #include "arrow_c_data_interface.h" @@ -2209,468 +2210,13 @@ decompress_batches_for_insert(ChunkInsertState *cis, Chunk *chunk, TupleTableSlo table_close(in_rel, NoLock); } -#if !defined(NDEBUG) || defined(TS_COMPRESSION_FUZZING) - -static int -get_compression_algorithm(char *name) -{ - if (pg_strcasecmp(name, "deltadelta") == 0) - { - return COMPRESSION_ALGORITHM_DELTADELTA; - } - else if (pg_strcasecmp(name, "gorilla") == 0) - { - return COMPRESSION_ALGORITHM_GORILLA; - } - else if (pg_strcasecmp(name, "array") == 0) - { - return COMPRESSION_ALGORITHM_ARRAY; - } - else if (pg_strcasecmp(name, "dictionary") == 0) - { - return COMPRESSION_ALGORITHM_DICTIONARY; - } - - ereport(ERROR, (errmsg("unknown comrpession algorithm %s", name))); - return _INVALID_COMPRESSION_ALGORITHM; -} - -typedef enum -{ - DTT_BulkFuzzing, - DTT_RowByRowFuzzing, - DTT_RowByRow, - DTT_Bulk -} DecompressionTestType; - -#define ALGO GORILLA -#define CTYPE float8 -#define PG_TYPE_PREFIX FLOAT8 -#define DATUM_TO_CTYPE DatumGetFloat8 -#include "decompress_arithmetic_test_impl.c" -#undef ALGO -#undef CTYPE -#undef PG_TYPE_PREFIX -#undef DATUM_TO_CTYPE - -#define ALGO DELTADELTA -#define CTYPE int64 -#define PG_TYPE_PREFIX INT8 -#define DATUM_TO_CTYPE DatumGetInt64 -#include "decompress_arithmetic_test_impl.c" -#undef ALGO -#undef CTYPE -#undef PG_TYPE_PREFIX -#undef DATUM_TO_CTYPE - -#include "decompress_text_test_impl.c" - -#define APPLY_FOR_TYPES(X) \ - X(GORILLA, FLOAT8, RowByRow) \ - X(GORILLA, FLOAT8, Bulk) \ - X(DELTADELTA, INT8, RowByRow) \ - X(DELTADELTA, INT8, Bulk) \ - X(ARRAY, TEXT, RowByRow) \ - X(DICTIONARY, TEXT, RowByRow) - -static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Size, - DecompressionTestType test_type) -{ -#define DISPATCH(ALGO, PGTYPE, KIND) \ - if (algo == COMPRESSION_ALGORITHM_##ALGO && type == PGTYPE##OID) \ - { \ - return decompress_##ALGO##_##PGTYPE; \ - } - - APPLY_FOR_TYPES(DISPATCH) - - elog(ERROR, - "no decompression function for compression algorithm %d with element type %d", - algo, - type); - pg_unreachable(); -#undef DISPATCH -} - -/* - * Read and decompress compressed data from file. Useful for debugging the - * results of fuzzing. - * The out parameter bytes is volatile because we want to fill it even - * if we error out later. - */ -static void -read_compressed_data_file_impl(int algo, Oid type, const char *path, bool bulk, volatile int *bytes, - int *rows) -{ - FILE *f = fopen(path, "r"); - - if (!f) - { - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_FILE), errmsg("could not open the file '%s'", path))); - } - - fseek(f, 0, SEEK_END); - const size_t fsize = ftell(f); - fseek(f, 0, SEEK_SET); /* same as rewind(f); */ - - *rows = 0; - *bytes = fsize; - - if (fsize == 0) - { - /* - * Skip empty data, because we'll just get "no data left in message" - * right away. - */ - return; - } - - char *string = palloc(fsize + 1); - size_t elements_read = fread(string, fsize, 1, f); - - if (elements_read != 1) - { - ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FILE), errmsg("failed to read file '%s'", path))); - } - - fclose(f); - - string[fsize] = 0; - - *rows = get_decompress_fn(algo, type)((const uint8 *) string, - fsize, - /* test_type = */ bulk ? DTT_Bulk : DTT_RowByRow); -} - -TS_FUNCTION_INFO_V1(ts_read_compressed_data_file); - -/* Read and decompress compressed data from file -- SQL-callable wrapper. */ -Datum -ts_read_compressed_data_file(PG_FUNCTION_ARGS) +const CompressionAlgorithmDefinition * +algorithm_definition(CompressionAlgorithm algo) { - int rows; - int bytes; - read_compressed_data_file_impl(get_compression_algorithm(PG_GETARG_CSTRING(0)), - PG_GETARG_OID(1), - PG_GETARG_CSTRING(2), - PG_GETARG_BOOL(3), - &bytes, - &rows); - PG_RETURN_INT32(rows); + Assert(algo > 0 && algo < _END_COMPRESSION_ALGORITHMS); + return &definitions[algo]; } -TS_FUNCTION_INFO_V1(ts_read_compressed_data_directory); - -/* - * Read and decomrpess all compressed data files from directory. Useful for - * checking the fuzzing corpuses in the regression tests. - */ -Datum -ts_read_compressed_data_directory(PG_FUNCTION_ARGS) -{ - /* Output columns of this function. */ - enum - { - out_path = 0, - out_bytes, - out_rows, - out_sqlstate, - out_location, - _out_columns - }; - - /* Cross-call context for this set-returning function. */ - struct user_context - { - DIR *dp; - struct dirent *ep; - }; - - char *name = PG_GETARG_CSTRING(2); - const int algo = get_compression_algorithm(PG_GETARG_CSTRING(0)); - - FuncCallContext *funcctx; - struct user_context *c; - MemoryContext call_memory_context = CurrentMemoryContext; - - /* stuff done only on the first call of the function */ - if (SRF_IS_FIRSTCALL()) - { - /* create a function context for cross-call persistence */ - funcctx = SRF_FIRSTCALL_INIT(); - - /* switch to memory context appropriate for multiple function calls */ - MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); - - /* Build a tuple descriptor for our result type */ - if (get_call_result_type(fcinfo, NULL, &funcctx->tuple_desc) != TYPEFUNC_COMPOSITE) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("function returning record called in context " - "that cannot accept type record"))); - - /* - * generate attribute metadata needed later to produce tuples from raw - * C strings - */ - funcctx->attinmeta = TupleDescGetAttInMetadata(funcctx->tuple_desc); - - funcctx->user_fctx = palloc(sizeof(struct user_context)); - c = funcctx->user_fctx; - - c->dp = opendir(name); - - if (!c->dp) - { - elog(ERROR, "could not open directory '%s'", name); - } - - MemoryContextSwitchTo(call_memory_context); - } - - funcctx = SRF_PERCALL_SETUP(); - c = (struct user_context *) funcctx->user_fctx; - - Datum values[_out_columns] = { 0 }; - bool nulls[_out_columns] = { 0 }; - for (int i = 0; i < _out_columns; i++) - { - nulls[i] = true; - } - - while ((c->ep = readdir(c->dp))) - { - if (c->ep->d_name[0] == '.') - { - continue; - } - - char *path = psprintf("%s/%s", name, c->ep->d_name); - - /* The return values are: path, ret, sqlstate, status, location. */ - values[out_path] = PointerGetDatum(cstring_to_text(path)); - nulls[out_path] = false; - - int rows; - volatile int bytes = 0; - PG_TRY(); - { - read_compressed_data_file_impl(algo, - PG_GETARG_OID(1), - path, - PG_GETARG_BOOL(3), - &bytes, - &rows); - values[out_rows] = Int32GetDatum(rows); - nulls[out_rows] = false; - } - PG_CATCH(); - { - MemoryContextSwitchTo(call_memory_context); - - ErrorData *error = CopyErrorData(); - - values[out_sqlstate] = - PointerGetDatum(cstring_to_text(unpack_sql_state(error->sqlerrcode))); - nulls[out_sqlstate] = false; - - if (error->filename) - { - values[out_location] = PointerGetDatum( - cstring_to_text(psprintf("%s:%d", error->filename, error->lineno))); - nulls[out_location] = false; - } - - FlushErrorState(); - } - PG_END_TRY(); - - values[out_bytes] = Int32GetDatum(bytes); - nulls[out_bytes] = false; - - SRF_RETURN_NEXT(funcctx, - HeapTupleGetDatum(heap_form_tuple(funcctx->tuple_desc, values, nulls))); - } - - (void) closedir(c->dp); - - SRF_RETURN_DONE(funcctx); -} - -#endif - -#ifdef TS_COMPRESSION_FUZZING - -static DecompressionTestType -get_fuzzing_kind(const char *s) -{ - if (strcmp(s, "bulk") == 0) - { - return DTT_BulkFuzzing; - } - else if (strcmp(s, "rowbyrow") == 0) - { - return DTT_RowByRowFuzzing; - } - else - { - elog(ERROR, "unknown fuzzing type '%s'", s); - } -} - - -static int -target(const uint8 *Data, size_t Size, CompressionAlgorithm requested_algo, - Oid pg_type, DecompressionTestType test_type) -{ - StringInfoData si = { .data = (char *) Data, .len = Size }; - - const CompressionAlgorithm data_algo = pq_getmsgbyte(&si); - - CheckCompressedData(data_algo > 0 && data_algo < _END_COMPRESSION_ALGORITHMS); - - if (data_algo != requested_algo) - { - /* - * It's convenient to fuzz only one algorithm at a time. We specialize - * the fuzz target for one algorithm, so that the fuzzer doesn't waste - * time discovering others from scratch. - */ - return -1; - } - - Datum compressed_data = definitions[data_algo].compressed_data_recv(&si); - - if (test_type == DTT_RowByRowFuzzing) - { - DecompressionIterator *iter = - definitions[data_algo].iterator_init_forward(compressed_data, pg_type); - for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) - ; - return 0; - } - - Assert(test_type == DTT_BulkFuzzing); - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo); - decompress_all(compressed_data, pg_type, CurrentMemoryContext); - return 0; -} - - -/* - * This is our test function that will be called by the libfuzzer driver. It - * has to catch the postgres exceptions normally produced for corrupt data. - */ -static int -target_wrapper( - const uint8_t *Data, size_t Size, CompressionAlgorithm requested_algo, - Oid pg_type, DecompressionTestType test_type) -{ - MemoryContextReset(CurrentMemoryContext); - - int res = 0; - PG_TRY(); - { - CHECK_FOR_INTERRUPTS(); - res = target(Data, Size, requested_algo, pg_type, test_type); - } - PG_CATCH(); - { - /* EmitErrorReport(); */ - FlushErrorState(); - } - PG_END_TRY(); - - /* - * -1 means "don't include it into corpus", return it if the test function - * says so, otherwise return 0. Some test functions also returns the number - * of rows for the correct data, the fuzzer doesn't understand these values. - */ - return res == -1 ? -1 : 0; -} - -#define DECLARE_TARGET(ALGO, PGTYPE, KIND) \ - static int target_##ALGO##_##PGTYPE##_##KIND(const uint8_t *D, size_t S) \ - { \ - return target_wrapper(D, S, COMPRESSION_ALGORITHM_##ALGO, PGTYPE##OID, DTT_##KIND##Fuzzing); \ - } - -APPLY_FOR_TYPES(DECLARE_TARGET) - -#undef DECLARE_TARGET - -/* - * libfuzzer fuzzing driver that we import from LLVM libraries. It will run our - * test functions with random inputs. - */ -extern int LLVMFuzzerRunDriver(int *argc, char ***argv, - int (*UserCb)(const uint8_t *Data, size_t Size)); - -/* - * The SQL function to perform fuzzing. - */ -TS_FUNCTION_INFO_V1(ts_fuzz_compression); - -Datum -ts_fuzz_compression(PG_FUNCTION_ARGS) -{ - /* - * We use the memory context size larger than default here, so that all data - * allocated by fuzzing fit into the first chunk. The first chunk is not - * deallocated when the memory context is reset, so this reduces overhead - * caused by repeated reallocations. - * The particular value of 8MB is somewhat arbitrary and large. In practice, - * we have inputs of 1k rows max here, which decompress to 8 kB max. - */ - MemoryContext fuzzing_context = - AllocSetContextCreate(CurrentMemoryContext, "fuzzing", 0, 8 * 1024 * 1024, 8 * 1024 * 1024); - MemoryContext old_context = MemoryContextSwitchTo(fuzzing_context); - - char *argvdata[] = { "PostgresFuzzer", - "-timeout=1", - "-report_slow_units=1", - // "-use_value_profile=1", - "-reload=1", - //"-print_coverage=1", - //"-print_full_coverage=1", - //"-print_final_stats=1", - //"-help=1", - psprintf("-runs=%d", PG_GETARG_INT32(3)), - "corpus" /* in the database directory */, - NULL }; - char **argv = argvdata; - int argc = sizeof(argvdata) / sizeof(*argvdata) - 1; - - int algo = get_compression_algorithm(PG_GETARG_CSTRING(0)); - Oid type = PG_GETARG_OID(1); - int kind = get_fuzzing_kind(PG_GETARG_CSTRING(2)); - - int (*target)(const uint8_t *, size_t) = NULL; - -#define DISPATCH(ALGO, PGTYPE, KIND) \ - if (algo == COMPRESSION_ALGORITHM_##ALGO && type == PGTYPE##OID && \ - kind == DTT_##KIND##Fuzzing) \ - { \ - target = target_##ALGO##_##PGTYPE##_##KIND; \ - } - - APPLY_FOR_TYPES(DISPATCH) - - if (target == NULL) - { - elog(ERROR, "no llvm fuzz target for compression algorithm %d and type %d", algo, type); - } - - int res = LLVMFuzzerRunDriver(&argc, &argv, target); - - MemoryContextSwitchTo(old_context); - - PG_RETURN_INT32(res); -} - -#endif - #if PG14_GE static BatchFilter * make_batchfilter(char *column_name, StrategyNumber strategy, Const *value, bool is_null_check) diff --git a/tsl/src/compression/compression_test.c b/tsl/src/compression/compression_test.c new file mode 100644 index 00000000000..ad4fde6d12a --- /dev/null +++ b/tsl/src/compression/compression_test.c @@ -0,0 +1,467 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ +#include + +#include +#include + +#include "compression_test.h" + +#include "compression.h" +#include "arrow_c_data_interface.h" + +#if !defined(NDEBUG) || defined(TS_COMPRESSION_FUZZING) + +static int +get_compression_algorithm(char *name) +{ + if (pg_strcasecmp(name, "deltadelta") == 0) + { + return COMPRESSION_ALGORITHM_DELTADELTA; + } + else if (pg_strcasecmp(name, "gorilla") == 0) + { + return COMPRESSION_ALGORITHM_GORILLA; + } + else if (pg_strcasecmp(name, "array") == 0) + { + return COMPRESSION_ALGORITHM_ARRAY; + } + else if (pg_strcasecmp(name, "dictionary") == 0) + { + return COMPRESSION_ALGORITHM_DICTIONARY; + } + + ereport(ERROR, (errmsg("unknown comrpession algorithm %s", name))); + return _INVALID_COMPRESSION_ALGORITHM; +} + +#define ALGO GORILLA +#define CTYPE float8 +#define PG_TYPE_PREFIX FLOAT8 +#define DATUM_TO_CTYPE DatumGetFloat8 +#include "decompress_arithmetic_test_impl.c" +#undef ALGO +#undef CTYPE +#undef PG_TYPE_PREFIX +#undef DATUM_TO_CTYPE + +#define ALGO DELTADELTA +#define CTYPE int64 +#define PG_TYPE_PREFIX INT8 +#define DATUM_TO_CTYPE DatumGetInt64 +#include "decompress_arithmetic_test_impl.c" +#undef ALGO +#undef CTYPE +#undef PG_TYPE_PREFIX +#undef DATUM_TO_CTYPE + +#define APPLY_FOR_TYPES(X) \ + X(GORILLA, FLOAT8, RowByRow) \ + X(GORILLA, FLOAT8, Bulk) \ + X(DELTADELTA, INT8, RowByRow) \ + X(DELTADELTA, INT8, Bulk) \ + X(ARRAY, TEXT, RowByRow) \ + X(DICTIONARY, TEXT, RowByRow) + +static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Size, + DecompressionTestType test_type) +{ +#define DISPATCH(ALGO, PGTYPE, KIND) \ + if (algo == COMPRESSION_ALGORITHM_##ALGO && type == PGTYPE##OID) \ + { \ + return decompress_##ALGO##_##PGTYPE; \ + } + + APPLY_FOR_TYPES(DISPATCH) + + elog(ERROR, + "no decompression function for compression algorithm %d with element type %d", + algo, + type); + pg_unreachable(); +#undef DISPATCH +} + +/* + * Read and decompress compressed data from file. Useful for debugging the + * results of fuzzing. + * The out parameter bytes is volatile because we want to fill it even + * if we error out later. + */ +static void +read_compressed_data_file_impl(int algo, Oid type, const char *path, bool bulk, volatile int *bytes, + int *rows) +{ + FILE *f = fopen(path, "r"); + + if (!f) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FILE), errmsg("could not open the file '%s'", path))); + } + + fseek(f, 0, SEEK_END); + const size_t fsize = ftell(f); + fseek(f, 0, SEEK_SET); /* same as rewind(f); */ + + *rows = 0; + *bytes = fsize; + + if (fsize == 0) + { + /* + * Skip empty data, because we'll just get "no data left in message" + * right away. + */ + return; + } + + char *string = palloc(fsize + 1); + size_t elements_read = fread(string, fsize, 1, f); + + if (elements_read != 1) + { + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FILE), errmsg("failed to read file '%s'", path))); + } + + fclose(f); + + string[fsize] = 0; + + *rows = get_decompress_fn(algo, type)((const uint8 *) string, + fsize, + /* test_type = */ bulk ? DTT_Bulk : DTT_RowByRow); +} + +TS_FUNCTION_INFO_V1(ts_read_compressed_data_file); + +/* Read and decompress compressed data from file -- SQL-callable wrapper. */ +Datum +ts_read_compressed_data_file(PG_FUNCTION_ARGS) +{ + int rows; + int bytes; + read_compressed_data_file_impl(get_compression_algorithm(PG_GETARG_CSTRING(0)), + PG_GETARG_OID(1), + PG_GETARG_CSTRING(2), + PG_GETARG_BOOL(3), + &bytes, + &rows); + PG_RETURN_INT32(rows); +} + +TS_FUNCTION_INFO_V1(ts_read_compressed_data_directory); + +/* + * Read and decomrpess all compressed data files from directory. Useful for + * checking the fuzzing corpuses in the regression tests. + */ +Datum +ts_read_compressed_data_directory(PG_FUNCTION_ARGS) +{ + /* Output columns of this function. */ + enum + { + out_path = 0, + out_bytes, + out_rows, + out_sqlstate, + out_location, + _out_columns + }; + + /* Cross-call context for this set-returning function. */ + struct user_context + { + DIR *dp; + struct dirent *ep; + }; + + char *name = PG_GETARG_CSTRING(2); + const int algo = get_compression_algorithm(PG_GETARG_CSTRING(0)); + + FuncCallContext *funcctx; + struct user_context *c; + MemoryContext call_memory_context = CurrentMemoryContext; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* switch to memory context appropriate for multiple function calls */ + MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &funcctx->tuple_desc) != TYPEFUNC_COMPOSITE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("function returning record called in context " + "that cannot accept type record"))); + + /* + * generate attribute metadata needed later to produce tuples from raw + * C strings + */ + funcctx->attinmeta = TupleDescGetAttInMetadata(funcctx->tuple_desc); + + funcctx->user_fctx = palloc(sizeof(struct user_context)); + c = funcctx->user_fctx; + + c->dp = opendir(name); + + if (!c->dp) + { + elog(ERROR, "could not open directory '%s'", name); + } + + MemoryContextSwitchTo(call_memory_context); + } + + funcctx = SRF_PERCALL_SETUP(); + c = (struct user_context *) funcctx->user_fctx; + + Datum values[_out_columns] = { 0 }; + bool nulls[_out_columns] = { 0 }; + for (int i = 0; i < _out_columns; i++) + { + nulls[i] = true; + } + + while ((c->ep = readdir(c->dp))) + { + if (c->ep->d_name[0] == '.') + { + continue; + } + + char *path = psprintf("%s/%s", name, c->ep->d_name); + + /* The return values are: path, ret, sqlstate, status, location. */ + values[out_path] = PointerGetDatum(cstring_to_text(path)); + nulls[out_path] = false; + + int rows; + volatile int bytes = 0; + PG_TRY(); + { + read_compressed_data_file_impl(algo, + PG_GETARG_OID(1), + path, + PG_GETARG_BOOL(3), + &bytes, + &rows); + values[out_rows] = Int32GetDatum(rows); + nulls[out_rows] = false; + } + PG_CATCH(); + { + MemoryContextSwitchTo(call_memory_context); + + ErrorData *error = CopyErrorData(); + + values[out_sqlstate] = + PointerGetDatum(cstring_to_text(unpack_sql_state(error->sqlerrcode))); + nulls[out_sqlstate] = false; + + if (error->filename) + { + values[out_location] = PointerGetDatum( + cstring_to_text(psprintf("%s:%d", error->filename, error->lineno))); + nulls[out_location] = false; + } + + FlushErrorState(); + } + PG_END_TRY(); + + values[out_bytes] = Int32GetDatum(bytes); + nulls[out_bytes] = false; + + SRF_RETURN_NEXT(funcctx, + HeapTupleGetDatum(heap_form_tuple(funcctx->tuple_desc, values, nulls))); + } + + (void) closedir(c->dp); + + SRF_RETURN_DONE(funcctx); +} + +#endif + +#ifdef TS_COMPRESSION_FUZZING + +static DecompressionTestType +get_fuzzing_kind(const char *s) +{ + if (strcmp(s, "bulk") == 0) + { + return DTT_BulkFuzzing; + } + else if (strcmp(s, "rowbyrow") == 0) + { + return DTT_RowByRowFuzzing; + } + else + { + elog(ERROR, "unknown fuzzing type '%s'", s); + } +} + +static int +target(const uint8 *Data, size_t Size, CompressionAlgorithm requested_algo, Oid pg_type, + DecompressionTestType test_type) +{ + StringInfoData si = { .data = (char *) Data, .len = Size }; + + const CompressionAlgorithm data_algo = pq_getmsgbyte(&si); + + CheckCompressedData(data_algo > 0 && data_algo < _END_COMPRESSION_ALGORITHMS); + + if (data_algo != requested_algo) + { + /* + * It's convenient to fuzz only one algorithm at a time. We specialize + * the fuzz target for one algorithm, so that the fuzzer doesn't waste + * time discovering others from scratch. + */ + return -1; + } + + Datum compressed_data = definitions[data_algo].compressed_data_recv(&si); + + if (test_type == DTT_RowByRowFuzzing) + { + DecompressionIterator *iter = + definitions[data_algo].iterator_init_forward(compressed_data, pg_type); + for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) + ; + return 0; + } + + Assert(test_type == DTT_BulkFuzzing); + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo); + decompress_all(compressed_data, pg_type, CurrentMemoryContext); + return 0; +} + +/* + * This is our test function that will be called by the libfuzzer driver. It + * has to catch the postgres exceptions normally produced for corrupt data. + */ +static int +target_wrapper(const uint8_t *Data, size_t Size, CompressionAlgorithm requested_algo, Oid pg_type, + DecompressionTestType test_type) +{ + MemoryContextReset(CurrentMemoryContext); + + int res = 0; + PG_TRY(); + { + CHECK_FOR_INTERRUPTS(); + res = target(Data, Size, requested_algo, pg_type, test_type); + } + PG_CATCH(); + { + /* EmitErrorReport(); */ + FlushErrorState(); + } + PG_END_TRY(); + + /* + * -1 means "don't include it into corpus", return it if the test function + * says so, otherwise return 0. Some test functions also returns the number + * of rows for the correct data, the fuzzer doesn't understand these values. + */ + return res == -1 ? -1 : 0; +} + +#define DECLARE_TARGET(ALGO, PGTYPE, KIND) \ + static int target_##ALGO##_##PGTYPE##_##KIND(const uint8_t *D, size_t S) \ + { \ + return target_wrapper(D, \ + S, \ + COMPRESSION_ALGORITHM_##ALGO, \ + PGTYPE##OID, \ + DTT_##KIND##Fuzzing); \ + } + +APPLY_FOR_TYPES(DECLARE_TARGET) + +#undef DECLARE_TARGET + +/* + * libfuzzer fuzzing driver that we import from LLVM libraries. It will run our + * test functions with random inputs. + */ +extern int LLVMFuzzerRunDriver(int *argc, char ***argv, + int (*UserCb)(const uint8_t *Data, size_t Size)); + +/* + * The SQL function to perform fuzzing. + */ +TS_FUNCTION_INFO_V1(ts_fuzz_compression); + +Datum +ts_fuzz_compression(PG_FUNCTION_ARGS) +{ + /* + * We use the memory context size larger than default here, so that all data + * allocated by fuzzing fit into the first chunk. The first chunk is not + * deallocated when the memory context is reset, so this reduces overhead + * caused by repeated reallocations. + * The particular value of 8MB is somewhat arbitrary and large. In practice, + * we have inputs of 1k rows max here, which decompress to 8 kB max. + */ + MemoryContext fuzzing_context = + AllocSetContextCreate(CurrentMemoryContext, "fuzzing", 0, 8 * 1024 * 1024, 8 * 1024 * 1024); + MemoryContext old_context = MemoryContextSwitchTo(fuzzing_context); + + char *argvdata[] = { "PostgresFuzzer", + "-timeout=1", + "-report_slow_units=1", + // "-use_value_profile=1", + "-reload=1", + //"-print_coverage=1", + //"-print_full_coverage=1", + //"-print_final_stats=1", + //"-help=1", + psprintf("-runs=%d", PG_GETARG_INT32(3)), + "corpus" /* in the database directory */, + NULL }; + char **argv = argvdata; + int argc = sizeof(argvdata) / sizeof(*argvdata) - 1; + + int algo = get_compression_algorithm(PG_GETARG_CSTRING(0)); + Oid type = PG_GETARG_OID(1); + int kind = get_fuzzing_kind(PG_GETARG_CSTRING(2)); + + int (*target)(const uint8_t *, size_t) = NULL; + +#define DISPATCH(ALGO, PGTYPE, KIND) \ + if (algo == COMPRESSION_ALGORITHM_##ALGO && type == PGTYPE##OID && \ + kind == DTT_##KIND##Fuzzing) \ + { \ + target = target_##ALGO##_##PGTYPE##_##KIND; \ + } + + APPLY_FOR_TYPES(DISPATCH) + + if (target == NULL) + { + elog(ERROR, "no llvm fuzz target for compression algorithm %d and type %d", algo, type); + } + + int res = LLVMFuzzerRunDriver(&argc, &argv, target); + + MemoryContextSwitchTo(old_context); + + PG_RETURN_INT32(res); +} + +#endif diff --git a/tsl/src/compression/compression_test.h b/tsl/src/compression/compression_test.h new file mode 100644 index 00000000000..e2efb736989 --- /dev/null +++ b/tsl/src/compression/compression_test.h @@ -0,0 +1,22 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ +#pragma once + +#include "compression.h" + +typedef enum +{ + DTT_BulkFuzzing, + DTT_RowByRowFuzzing, + DTT_RowByRow, + DTT_Bulk +} DecompressionTestType; + +int decompress_ARRAY_TEXT(const uint8 *Data, size_t Size, DecompressionTestType test_type); + +int decompress_DICTIONARY_TEXT(const uint8 *Data, size_t Size, DecompressionTestType test_type); + +const CompressionAlgorithmDefinition *algorithm_definition(CompressionAlgorithm algo); diff --git a/tsl/src/compression/decompress_arithmetic_test_impl.c b/tsl/src/compression/decompress_arithmetic_test_impl.c index 7799759b531..4aec38ee4a2 100644 --- a/tsl/src/compression/decompress_arithmetic_test_impl.c +++ b/tsl/src/compression/decompress_arithmetic_test_impl.c @@ -93,7 +93,8 @@ FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, return -1; } - Datum compressed_data = definitions[data_algo].compressed_data_recv(&si); + const CompressionAlgorithmDefinition *def = algorithm_definition(data_algo); + Datum compressed_data = def->compressed_data_recv(&si); DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo); @@ -110,8 +111,7 @@ FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, /* * Test row-by-row decompression. */ - DecompressionIterator *iter = - definitions[data_algo].iterator_init_forward(compressed_data, PG_TYPE_OID); + DecompressionIterator *iter = def->iterator_init_forward(compressed_data, PG_TYPE_OID); DecompressResult results[GLOBAL_MAX_ROWS_PER_COMPRESSION]; int n = 0; for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) @@ -137,7 +137,7 @@ FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, * * 1) Compress. */ - Compressor *compressor = definitions[data_algo].compressor_for_type(PG_TYPE_OID); + Compressor *compressor = def->compressor_for_type(PG_TYPE_OID); for (int i = 0; i < n; i++) { @@ -161,7 +161,7 @@ FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, /* * 2) Decompress and check that it's the same. */ - iter = definitions[data_algo].iterator_init_forward(compressed_data, PG_TYPE_OID); + iter = def->iterator_init_forward(compressed_data, PG_TYPE_OID); int nn = 0; for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) { diff --git a/tsl/src/compression/decompress_text_test_impl.c b/tsl/src/compression/decompress_text_test_impl.c index be8d0923d96..9f47108db49 100644 --- a/tsl/src/compression/decompress_text_test_impl.c +++ b/tsl/src/compression/decompress_text_test_impl.c @@ -3,6 +3,13 @@ * Please see the included NOTICE for copyright information and * LICENSE-TIMESCALE for a copy of the license. */ +#include + +#include + +#include "compression.h" + +#include "compression_test.h" /* * Try to decompress the given compressed data. Used for fuzzing and for checking @@ -35,13 +42,13 @@ decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType te return -1; } - Datum compressed_data = definitions[data_algo].compressed_data_recv(&si); + const CompressionAlgorithmDefinition *def = algorithm_definition(data_algo); + Datum compressed_data = def->compressed_data_recv(&si); /* * Test row-by-row decompression. */ - DecompressionIterator *iter = - definitions[data_algo].iterator_init_forward(compressed_data, TEXTOID); + DecompressionIterator *iter = def->iterator_init_forward(compressed_data, TEXTOID); DecompressResult results[GLOBAL_MAX_ROWS_PER_COMPRESSION]; int n = 0; for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) @@ -67,7 +74,7 @@ decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType te /* * 1) Compress. */ - Compressor *compressor = definitions[data_algo].compressor_for_type(TEXTOID); + Compressor *compressor = def->compressor_for_type(TEXTOID); for (int i = 0; i < n; i++) { @@ -91,7 +98,7 @@ decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType te /* * 2) Decompress and check that it's the same. */ - iter = definitions[data_algo].iterator_init_forward(compressed_data, TEXTOID); + iter = def->iterator_init_forward(compressed_data, TEXTOID); int nn = 0; for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) { @@ -133,13 +140,13 @@ decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType te return n; } -static int +int decompress_ARRAY_TEXT(const uint8 *Data, size_t Size, DecompressionTestType test_type) { return decompress_generic_text(Data, Size, test_type, COMPRESSION_ALGORITHM_ARRAY); } -static int +int decompress_DICTIONARY_TEXT(const uint8 *Data, size_t Size, DecompressionTestType test_type) { return decompress_generic_text(Data, Size, test_type, COMPRESSION_ALGORITHM_DICTIONARY); From df52a4d2e9e98a526172e833a757550718c430b4 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 13 Dec 2023 15:48:29 +0100 Subject: [PATCH 083/249] fix --- tsl/src/compression/compression_test.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tsl/src/compression/compression_test.c b/tsl/src/compression/compression_test.c index ad4fde6d12a..4bd9adbc6bd 100644 --- a/tsl/src/compression/compression_test.c +++ b/tsl/src/compression/compression_test.c @@ -333,12 +333,13 @@ target(const uint8 *Data, size_t Size, CompressionAlgorithm requested_algo, Oid return -1; } - Datum compressed_data = definitions[data_algo].compressed_data_recv(&si); + const CompressionAlgorithmDefinition *def = algorithm_definition(data_algo); + Datum compressed_data = def->compressed_data_recv(&si); if (test_type == DTT_RowByRowFuzzing) { DecompressionIterator *iter = - definitions[data_algo].iterator_init_forward(compressed_data, pg_type); + def->iterator_init_forward(compressed_data, pg_type); for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) ; return 0; From 5d4130827727a5aae721f95866a3c5debc6b5222 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 13 Dec 2023 16:14:58 +0100 Subject: [PATCH 084/249] headers --- tsl/src/compression/compression.c | 3 --- tsl/src/compression/compression_test.c | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index 57b1e40b762..18a691883ce 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -42,7 +41,6 @@ #include #include #include -#include #include "compat/compat.h" @@ -52,7 +50,6 @@ #include "compression_test.h" #include "create.h" #include "custom_type_cache.h" -#include "arrow_c_data_interface.h" #include "debug_assert.h" #include "debug_point.h" #include "deltadelta.h" diff --git a/tsl/src/compression/compression_test.c b/tsl/src/compression/compression_test.c index 4bd9adbc6bd..7ca9199b6e2 100644 --- a/tsl/src/compression/compression_test.c +++ b/tsl/src/compression/compression_test.c @@ -3,6 +3,8 @@ * Please see the included NOTICE for copyright information and * LICENSE-TIMESCALE for a copy of the license. */ +#include + #include #include From af61eaa0ee5b212e0689bbd37aa910d172d1b144 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 13 Dec 2023 16:18:30 +0100 Subject: [PATCH 085/249] headers? --- tsl/src/compression/compression.c | 1 - tsl/src/compression/compression_test.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index 18a691883ce..38d827a6fc6 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/tsl/src/compression/compression_test.c b/tsl/src/compression/compression_test.c index 7ca9199b6e2..2fc47aac75d 100644 --- a/tsl/src/compression/compression_test.c +++ b/tsl/src/compression/compression_test.c @@ -9,6 +9,7 @@ #include #include +#include #include "compression_test.h" From 5281c9ca0d580ce130a40a555d523f182b6093c3 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 13 Dec 2023 16:38:37 +0100 Subject: [PATCH 086/249] headers... --- tsl/src/compression/compression.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index 38d827a6fc6..18a691883ce 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include From 2cafdff244bdc3fd30e5fe2cf6b58233a26a0679 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 13 Dec 2023 16:40:22 +0100 Subject: [PATCH 087/249] ts format --- tsl/src/compression/compression_test.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tsl/src/compression/compression_test.c b/tsl/src/compression/compression_test.c index 2fc47aac75d..bfe7949fe6f 100644 --- a/tsl/src/compression/compression_test.c +++ b/tsl/src/compression/compression_test.c @@ -341,8 +341,7 @@ target(const uint8 *Data, size_t Size, CompressionAlgorithm requested_algo, Oid if (test_type == DTT_RowByRowFuzzing) { - DecompressionIterator *iter = - def->iterator_init_forward(compressed_data, pg_type); + DecompressionIterator *iter = def->iterator_init_forward(compressed_data, pg_type); for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) ; return 0; From b1e5dea7f3c6d825f31d345e7b1eaab3cedd3b18 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 13 Dec 2023 17:40:51 +0100 Subject: [PATCH 088/249] cleanup --- tsl/src/compression/compression.h | 8 +++--- tsl/src/compression/compression_test.c | 25 ++++++++++++++---- .../compression/decompress_text_test_impl.c | 4 +-- tsl/test/expected/compression_algos.out | 4 +-- .../fuzzing/compression/array-text/with-nulls | Bin 0 -> 13613 bytes .../compression/dictionary-text/with-nulls | Bin 0 -> 211 bytes 6 files changed, 28 insertions(+), 13 deletions(-) create mode 100644 tsl/test/fuzzing/compression/array-text/with-nulls create mode 100644 tsl/test/fuzzing/compression/dictionary-text/with-nulls diff --git a/tsl/src/compression/compression.h b/tsl/src/compression/compression.h index ade20c9da1e..954acb151a4 100644 --- a/tsl/src/compression/compression.h +++ b/tsl/src/compression/compression.h @@ -366,15 +366,15 @@ extern enum CompressionAlgorithms compress_get_default_algorithm(Oid typeoid); * to pollute the logs. */ #ifndef TS_COMPRESSION_FUZZING -#define CORRUPT_DATA_MESSAGE \ - (errmsg("the compressed data is corrupt"), errcode(ERRCODE_DATA_CORRUPTED)) +#define CORRUPT_DATA_MESSAGE(X) \ + (errmsg("the compressed data is corrupt"), errdetail(X), errcode(ERRCODE_DATA_CORRUPTED)) #else -#define CORRUPT_DATA_MESSAGE (errcode(ERRCODE_DATA_CORRUPTED)) +#define CORRUPT_DATA_MESSAGE(X) (errcode(ERRCODE_DATA_CORRUPTED)) #endif #define CheckCompressedData(X) \ if (unlikely(!(X))) \ - ereport(ERROR, CORRUPT_DATA_MESSAGE, errdetail(#X)) + ereport(ERROR, CORRUPT_DATA_MESSAGE(#X)) inline static void * consumeCompressedData(StringInfo si, int bytes) diff --git a/tsl/src/compression/compression_test.c b/tsl/src/compression/compression_test.c index bfe7949fe6f..35c92a79f56 100644 --- a/tsl/src/compression/compression_test.c +++ b/tsl/src/compression/compression_test.c @@ -42,6 +42,9 @@ get_compression_algorithm(char *name) return _INVALID_COMPRESSION_ALGORITHM; } +/* + * Specializations of test functions for arithmetic types. + */ #define ALGO GORILLA #define CTYPE float8 #define PG_TYPE_PREFIX FLOAT8 @@ -62,6 +65,10 @@ get_compression_algorithm(char *name) #undef PG_TYPE_PREFIX #undef DATUM_TO_CTYPE +/* + * The table of the supported testing configurations. We use it to generate + * dispatch tables and specializations of test functions. + */ #define APPLY_FOR_TYPES(X) \ X(GORILLA, FLOAT8, RowByRow) \ X(GORILLA, FLOAT8, Bulk) \ @@ -316,9 +323,12 @@ get_fuzzing_kind(const char *s) } } +/* + * Fuzzing target for all supported types. + */ static int -target(const uint8 *Data, size_t Size, CompressionAlgorithm requested_algo, Oid pg_type, - DecompressionTestType test_type) +target_generic(const uint8 *Data, size_t Size, CompressionAlgorithm requested_algo, Oid pg_type, + DecompressionTestType test_type) { StringInfoData si = { .data = (char *) Data, .len = Size }; @@ -354,8 +364,8 @@ target(const uint8 *Data, size_t Size, CompressionAlgorithm requested_algo, Oid } /* - * This is our test function that will be called by the libfuzzer driver. It - * has to catch the postgres exceptions normally produced for corrupt data. + * This is a wrapper for fuzzing target. It will called by the libfuzzer driver. + * It has to catch the postgres exceptions normally produced for corrupt data. */ static int target_wrapper(const uint8_t *Data, size_t Size, CompressionAlgorithm requested_algo, Oid pg_type, @@ -367,7 +377,7 @@ target_wrapper(const uint8_t *Data, size_t Size, CompressionAlgorithm requested_ PG_TRY(); { CHECK_FOR_INTERRUPTS(); - res = target(Data, Size, requested_algo, pg_type, test_type); + res = target_generic(Data, Size, requested_algo, pg_type, test_type); } PG_CATCH(); { @@ -384,6 +394,10 @@ target_wrapper(const uint8_t *Data, size_t Size, CompressionAlgorithm requested_ return res == -1 ? -1 : 0; } +/* + * Specializations of fuzzing targets for supported types that will be directly + * called by the fuzzing driver. + */ #define DECLARE_TARGET(ALGO, PGTYPE, KIND) \ static int target_##ALGO##_##PGTYPE##_##KIND(const uint8_t *D, size_t S) \ { \ @@ -454,6 +468,7 @@ ts_fuzz_compression(PG_FUNCTION_ARGS) } APPLY_FOR_TYPES(DISPATCH) +#undef DISPATCH if (target == NULL) { diff --git a/tsl/src/compression/decompress_text_test_impl.c b/tsl/src/compression/decompress_text_test_impl.c index 9f47108db49..a993ba9583f 100644 --- a/tsl/src/compression/decompress_text_test_impl.c +++ b/tsl/src/compression/decompress_text_test_impl.c @@ -91,9 +91,9 @@ decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType te compressed_data = (Datum) compressor->finish(compressor); if (compressed_data == 0) { - /* The gorilla compressor returns NULL for all-null input sets. */ + /* Some compressors return NULL when all rows are null. */ return n; - }; + } /* * 2) Decompress and check that it's the same. diff --git a/tsl/test/expected/compression_algos.out b/tsl/test/expected/compression_algos.out index cca0600eb4a..db1e90777ae 100644 --- a/tsl/test/expected/compression_algos.out +++ b/tsl/test/expected/compression_algos.out @@ -1594,7 +1594,7 @@ group by 2 order by 1 desc ; count | rowbyrow_result -------+----------------- - 2 | true + 3 | true 1 | XX001 (2 rows) @@ -1608,6 +1608,6 @@ group by 2 order by 1 desc count | rowbyrow_result -------+----------------- 5 | XX001 - 1 | true + 2 | true (2 rows) diff --git a/tsl/test/fuzzing/compression/array-text/with-nulls b/tsl/test/fuzzing/compression/array-text/with-nulls new file mode 100644 index 0000000000000000000000000000000000000000..a006fd327dc1ee650c6b65005bfdb88563f10a54 GIT binary patch literal 13613 zcmc)ML29N&5P;!7cmZ9grx-}Oy1Kd!;#vmHj00gtG@?iL03N{njmz&@n_VcXzn26a z>fzznk3atO{`LLKmyfsC=f7TW54YP_f8TDmufO^4=ZF94=dIy<`7P+-pSNGW{rS^} z56{0ne|~-U=cm_S-hKY#<>mJ$4MKy|pfu>W!FQJ*zkmIDa{v7GU! zmNO2S*IUO-jGS@Q#FjITo7YcoT2ICtmumK7aVbez)@4?5!|*{w4&3BPAfXC=(M8KicTv!t?0C((~3?jI<4rmqSJ~_D>|*{ zw4&3BPAfXC=(M8KicTv!t?0C((~3?jI<4rmqSJ~_D>|*{w4&3BPAfXC=(M8KicTv! zt!P@&w4!N6(~71QO)HvKG_7b_(X^szMbnC=6-_IeRy3_>TG6zkX+_hDrWH*qnpQNe zXj;* zTG6zk%Ze^5x~%B3qRWadE4r-cvZBk1E-SjM=(3{AiY_a0x~=H8 zqT7mYE4r=dwxZjLZY#R2=(eKUif${qt?0I*+lp>0x~=H8qT7mYE4r=dwxZjLZY#R2 z=(eKUif${qt?0I*+lp>0x~=H8qT7nL6>TfpR? zqHRUninbMPE814Ht!P`(wxVrC+lsaoZ7bSVw5@1c(YB&(McazD6>TfpR?qHRUninbN)?tcMV3?K{u literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/with-nulls b/tsl/test/fuzzing/compression/dictionary-text/with-nulls new file mode 100644 index 0000000000000000000000000000000000000000..b4f27907fedad2f810c87a949dc1423900d0a152 GIT binary patch literal 211 zcmZQ#EJ%+}PAo~x$xmk}Nv$YhU|?YU0wRDI1o#;k2)2Idz)T0jsARuKt8!ypPn t(ZH%zOTi*U8nX(dI18v4=mxMpunR%1 Date: Thu, 14 Dec 2023 14:10:45 +0100 Subject: [PATCH 089/249] cleanup --- .github/workflows/libfuzzer.yaml | 66 +++++++++++----- tsl/src/compression/compression_test.c | 78 +++++++------------ tsl/src/compression/compression_test.h | 12 +-- .../decompress_arithmetic_test_impl.c | 7 +- .../compression/decompress_text_test_impl.c | 17 ++-- 5 files changed, 85 insertions(+), 95 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 17f8d9e353c..afc9f4879d4 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -115,17 +115,17 @@ jobs: fail-fast: false matrix: case: [ - { algo: gorilla , pgtype: float8, kind: rowbyrow, runs: 500000000 }, - { algo: deltadelta, pgtype: int8 , kind: rowbyrow, runs: 500000000 }, - { algo: gorilla , pgtype: float8, kind: bulk , runs: 1000000000 }, - { algo: deltadelta, pgtype: int8 , kind: bulk , runs: 1000000000 }, + { algo: gorilla , pgtype: float8, bulk: false, runs: 500000000 }, + { algo: deltadelta, pgtype: int8 , bulk: false, runs: 500000000 }, + { algo: gorilla , pgtype: float8, bulk: true , runs: 1000000000 }, + { algo: deltadelta, pgtype: int8 , bulk: true , runs: 1000000000 }, # array has a peculiar recv function that recompresses all input, so # fuzzing it is much slower. The dictionary recv also uses it. - { algo: array , pgtype: text , kind: rowbyrow, runs: 10000000 }, - { algo: dictionary, pgtype: text , kind: rowbyrow, runs: 10000000 }, + { algo: array , pgtype: text , bulk: false, runs: 10000000 }, + { algo: dictionary, pgtype: text , bulk: false, runs: 10000000 }, ] - name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.kind }} + name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk }} runs-on: ubuntu-22.04 env: PG_SRC_DIR: pgbuild @@ -158,25 +158,44 @@ jobs: initdb echo "shared_preload_libraries = 'timescaledb'" >> $PGDATA/postgresql.conf - - name: Restore the cached fuzzing corpus - id: restore-corpus-cache + - name: Restore the cached fuzzing corpus (bulk) + id: restore-corpus-cache-bulk uses: actions/cache/restore@v3 with: - path: db/corpus + path: db/corpus-bulk # If the initial corpus changes, probably it was updated by hand with # some important examples, and it makes sense to start anew from it. - key: "libfuzzer-corpus-2-${{ matrix.case.algo }}-${{ matrix.case.pgtype }}-\ + key: "libfuzzer-corpus-2-${{ matrix.case.algo }}-${{ matrix.case.pgtype }}-true\ + ${{ hashFiles(format('tsl/test/fuzzing/compression/{0}-{1}', matrix.case.algo, matrix.case.pgtype)) }}" + + # We save the row-by-row corpus separately from the bulk corpus, so that + # they don't overwrite each other. Now we are going to combine them. + - name: Restore the cached fuzzing corpus (rowbyrow) + id: restore-corpus-cache-rowbyrow + uses: actions/cache/restore@v3 + with: + path: db/corpus-rowbyrow + key: "libfuzzer-corpus-2-${{ matrix.case.algo }}-${{ matrix.case.pgtype }}-false\ ${{ hashFiles(format('tsl/test/fuzzing/compression/{0}-{1}', matrix.case.algo, matrix.case.pgtype)) }}" - - name: Initialize the fuzzing corpus + - name: Initialize the fuzzing corpus from repository # cache-hit is only true for exact key matches, and we use prefix matches. - if: steps.restore-corpus-cache.outputs.cache-matched-key == '' + if: steps.restore-corpus-cache-bulk.outputs.cache-matched-key == '' && steps.restore-corpus-cache-rowbyrow.outputs.cache-matched-key == '' run: | # Copy the intial corpus files from the repository. The github actions # cache doesn't follow symlinks. mkdir -p db/corpus find "tsl/test/fuzzing/compression/${{ matrix.case.algo }}-${{ matrix.case.pgtype }}" -type f -exec cp -t db/corpus {} + + - name: Initialize the fuzzing corpus from cache + if: !(steps.restore-corpus-cache-bulk.outputs.cache-matched-key == '' && steps.restore-corpus-cache-rowbyrow.outputs.cache-matched-key == '') + run: | + # Combine the corpus from rowbyrow and bulk fuzzing. + mkdir -p db/corpus{,-rowbyrow,-bulk} + find "db/corpus-rowbyrow" -type f -exec cp -t db/corpus {} + + find "db/corpus-bulk" -type f -exec cp -t db/corpus {} + + ls db/corpus | wc -l + - name: Run libfuzzer for compression run: | set -xeu @@ -191,7 +210,7 @@ jobs: # Create the fuzzing function export MODULE_NAME=$(basename $(find $HOME/$PG_INSTALL_DIR -name "timescaledb-tsl-*.so")) - psql -a -c "create or replace function fuzz(algo cstring, pgtype regtype, kind cstring, runs int) returns int as '"$MODULE_NAME"', 'ts_fuzz_compression' language c;" + psql -a -c "create or replace function fuzz(algo cstring, pgtype regtype, bulk bool, runs int) returns int as '"$MODULE_NAME"', 'ts_fuzz_compression' language c;" # Start more fuzzing processes in the background. We won't even monitor @@ -199,14 +218,14 @@ jobs: for x in {2..$(nproc)} do psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', - '${{ matrix.case.pgtype }}', '${{ matrix.case.kind }}', ${{ matrix.case.runs }});" & + '${{ matrix.case.pgtype }}', '${{ matrix.case.bulk }}', ${{ matrix.case.runs }});" & done # Start the one fuzzing process that we will monitor, in foreground. # The LLVM fuzzing driver calls exit(), so we expect to lose the connection. ret=0 psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', - '${{ matrix.case.pgtype }}', '${{ matrix.case.kind }}', ${{ matrix.case.runs }});" || ret=$? + '${{ matrix.case.pgtype }}', '${{ matrix.case.bulk }}', ${{ matrix.case.runs }});" || ret=$? if ! [ $ret -eq 2 ] then >&2 echo "Unexpected psql exit code $ret" @@ -216,6 +235,8 @@ jobs: # Check that the server is still alive. psql -c "select 1" + ls db/corpus | wc -l + - name: Collect the logs if: always() id: collectlogs @@ -231,26 +252,31 @@ jobs: if: always() uses: actions/upload-artifact@v3 with: - name: PostgreSQL log for ${{ matrix.case.algo }} ${{ matrix.case.type }} ${{ matrix.case.kind }} + name: PostgreSQL log for ${{ matrix.case.algo }} ${{ matrix.case.type }} ${{ matrix.case.bulk }} path: postgres.log - name: Save fuzzer-generated crash cases if: always() uses: actions/upload-artifact@v3 with: - name: Crash cases for ${{ matrix.case.algo }} ${{ matrix.case.type }} ${{ matrix.case.kind }} + name: Crash cases for ${{ matrix.case.algo }} ${{ matrix.case.type }} ${{ matrix.case.bulk }} path: db/crash-* # We use separate restore/save actions, because the default action won't # save the updated folder after the cache hit. We also can't overwrite the # existing cache, so we add a unique suffix. The cache is matched by key # prefix, not exact key, and picks the newest matching item, so this works. + # The caches for rowbyrow and bulk fuzzing are saved separately, otherwise + # the slower job would always overwrite the cache from the faster one. We + # want to combine corpuses from bulk and rowbyrow fuzzing for better + # coverage. - name: Save fuzzer corpus uses: actions/cache/save@v3 with: path: db/corpus - key: "${{ format('{0}-{1}-{2}', + key: "${{ format('{0}-{1}-{2}-{3}', steps.restore-corpus-cache.outputs.cache-primary-key, + matrix.case.bulk, github.run_id, github.run_attempt) }}" - name: Stack trace @@ -277,5 +303,5 @@ jobs: if: always() && steps.collectlogs.outputs.coredumps == 'true' uses: actions/upload-artifact@v3 with: - name: Coredumps for ${{ matrix.case.algo }} ${{ matrix.case.type }} ${{ matrix.case.kind }} + name: Coredumps for ${{ matrix.case.algo }} ${{ matrix.case.type }} ${{ matrix.case.bulk }} path: coredumps diff --git a/tsl/src/compression/compression_test.c b/tsl/src/compression/compression_test.c index 35c92a79f56..fa29cbd9284 100644 --- a/tsl/src/compression/compression_test.c +++ b/tsl/src/compression/compression_test.c @@ -70,17 +70,16 @@ get_compression_algorithm(char *name) * dispatch tables and specializations of test functions. */ #define APPLY_FOR_TYPES(X) \ - X(GORILLA, FLOAT8, RowByRow) \ - X(GORILLA, FLOAT8, Bulk) \ - X(DELTADELTA, INT8, RowByRow) \ - X(DELTADELTA, INT8, Bulk) \ - X(ARRAY, TEXT, RowByRow) \ - X(DICTIONARY, TEXT, RowByRow) - -static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Size, - DecompressionTestType test_type) + X(GORILLA, FLOAT8, true) \ + X(GORILLA, FLOAT8, false) \ + X(DELTADELTA, INT8, true) \ + X(DELTADELTA, INT8, false) \ + X(ARRAY, TEXT, false) \ + X(DICTIONARY, TEXT, false) + +static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Size, bool bulk) { -#define DISPATCH(ALGO, PGTYPE, KIND) \ +#define DISPATCH(ALGO, PGTYPE, BULK) \ if (algo == COMPRESSION_ALGORITHM_##ALGO && type == PGTYPE##OID) \ { \ return decompress_##ALGO##_##PGTYPE; \ @@ -142,9 +141,7 @@ read_compressed_data_file_impl(int algo, Oid type, const char *path, bool bulk, string[fsize] = 0; - *rows = get_decompress_fn(algo, type)((const uint8 *) string, - fsize, - /* test_type = */ bulk ? DTT_Bulk : DTT_RowByRow); + *rows = get_decompress_fn(algo, type)((const uint8 *) string, fsize, bulk); } TS_FUNCTION_INFO_V1(ts_read_compressed_data_file); @@ -306,29 +303,12 @@ ts_read_compressed_data_directory(PG_FUNCTION_ARGS) #ifdef TS_COMPRESSION_FUZZING -static DecompressionTestType -get_fuzzing_kind(const char *s) -{ - if (strcmp(s, "bulk") == 0) - { - return DTT_BulkFuzzing; - } - else if (strcmp(s, "rowbyrow") == 0) - { - return DTT_RowByRowFuzzing; - } - else - { - elog(ERROR, "unknown fuzzing type '%s'", s); - } -} - /* * Fuzzing target for all supported types. */ static int target_generic(const uint8 *Data, size_t Size, CompressionAlgorithm requested_algo, Oid pg_type, - DecompressionTestType test_type) + bool bulk) { StringInfoData si = { .data = (char *) Data, .len = Size }; @@ -349,17 +329,16 @@ target_generic(const uint8 *Data, size_t Size, CompressionAlgorithm requested_al const CompressionAlgorithmDefinition *def = algorithm_definition(data_algo); Datum compressed_data = def->compressed_data_recv(&si); - if (test_type == DTT_RowByRowFuzzing) + if (bulk) { - DecompressionIterator *iter = def->iterator_init_forward(compressed_data, pg_type); - for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) - ; + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo); + decompress_all(compressed_data, pg_type, CurrentMemoryContext); return 0; } - Assert(test_type == DTT_BulkFuzzing); - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo); - decompress_all(compressed_data, pg_type, CurrentMemoryContext); + DecompressionIterator *iter = def->iterator_init_forward(compressed_data, pg_type); + for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) + ; return 0; } @@ -369,7 +348,7 @@ target_generic(const uint8 *Data, size_t Size, CompressionAlgorithm requested_al */ static int target_wrapper(const uint8_t *Data, size_t Size, CompressionAlgorithm requested_algo, Oid pg_type, - DecompressionTestType test_type) + bool bulk) { MemoryContextReset(CurrentMemoryContext); @@ -377,7 +356,7 @@ target_wrapper(const uint8_t *Data, size_t Size, CompressionAlgorithm requested_ PG_TRY(); { CHECK_FOR_INTERRUPTS(); - res = target_generic(Data, Size, requested_algo, pg_type, test_type); + res = target_generic(Data, Size, requested_algo, pg_type, bulk); } PG_CATCH(); { @@ -398,14 +377,10 @@ target_wrapper(const uint8_t *Data, size_t Size, CompressionAlgorithm requested_ * Specializations of fuzzing targets for supported types that will be directly * called by the fuzzing driver. */ -#define DECLARE_TARGET(ALGO, PGTYPE, KIND) \ - static int target_##ALGO##_##PGTYPE##_##KIND(const uint8_t *D, size_t S) \ +#define DECLARE_TARGET(ALGO, PGTYPE, BULK) \ + static int target_##ALGO##_##PGTYPE##_##BULK(const uint8_t *D, size_t S) \ { \ - return target_wrapper(D, \ - S, \ - COMPRESSION_ALGORITHM_##ALGO, \ - PGTYPE##OID, \ - DTT_##KIND##Fuzzing); \ + return target_wrapper(D, S, COMPRESSION_ALGORITHM_##ALGO, PGTYPE##OID, BULK); \ } APPLY_FOR_TYPES(DECLARE_TARGET) @@ -456,15 +431,14 @@ ts_fuzz_compression(PG_FUNCTION_ARGS) int algo = get_compression_algorithm(PG_GETARG_CSTRING(0)); Oid type = PG_GETARG_OID(1); - int kind = get_fuzzing_kind(PG_GETARG_CSTRING(2)); + bool bulk = PG_GETARG_BOOL(2); int (*target)(const uint8_t *, size_t) = NULL; -#define DISPATCH(ALGO, PGTYPE, KIND) \ - if (algo == COMPRESSION_ALGORITHM_##ALGO && type == PGTYPE##OID && \ - kind == DTT_##KIND##Fuzzing) \ +#define DISPATCH(ALGO, PGTYPE, BULK) \ + if (algo == COMPRESSION_ALGORITHM_##ALGO && type == PGTYPE##OID && bulk == BULK) \ { \ - target = target_##ALGO##_##PGTYPE##_##KIND; \ + target = target_##ALGO##_##PGTYPE##_##BULK; \ } APPLY_FOR_TYPES(DISPATCH) diff --git a/tsl/src/compression/compression_test.h b/tsl/src/compression/compression_test.h index e2efb736989..f10402eee52 100644 --- a/tsl/src/compression/compression_test.h +++ b/tsl/src/compression/compression_test.h @@ -7,16 +7,8 @@ #include "compression.h" -typedef enum -{ - DTT_BulkFuzzing, - DTT_RowByRowFuzzing, - DTT_RowByRow, - DTT_Bulk -} DecompressionTestType; +int decompress_ARRAY_TEXT(const uint8 *Data, size_t Size, bool bulk); -int decompress_ARRAY_TEXT(const uint8 *Data, size_t Size, DecompressionTestType test_type); - -int decompress_DICTIONARY_TEXT(const uint8 *Data, size_t Size, DecompressionTestType test_type); +int decompress_DICTIONARY_TEXT(const uint8 *Data, size_t Size, bool bulk); const CompressionAlgorithmDefinition *algorithm_definition(CompressionAlgorithm algo); diff --git a/tsl/src/compression/decompress_arithmetic_test_impl.c b/tsl/src/compression/decompress_arithmetic_test_impl.c index 4aec38ee4a2..60f5b7ea88d 100644 --- a/tsl/src/compression/decompress_arithmetic_test_impl.c +++ b/tsl/src/compression/decompress_arithmetic_test_impl.c @@ -74,8 +74,7 @@ FUNCTION_NAME2(check_arrow, CTYPE)(ArrowArray *arrow, int error_type, Decompress * for arithmetic types. */ static int -FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, - DecompressionTestType test_type) +FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, bool bulk) { StringInfoData si = { .data = (char *) Data, .len = Size }; @@ -99,7 +98,7 @@ FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo); ArrowArray *arrow = NULL; - if (test_type == DTT_Bulk) + if (bulk) { /* * Test bulk decompression. Have to do this before row-by-row decompression @@ -125,7 +124,7 @@ FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, } /* Check that both ways of decompression match. */ - if (test_type == DTT_Bulk) + if (bulk) { FUNCTION_NAME2(check_arrow, CTYPE)(arrow, ERROR, results, n); return n; diff --git a/tsl/src/compression/decompress_text_test_impl.c b/tsl/src/compression/decompress_text_test_impl.c index a993ba9583f..d90ed30cebe 100644 --- a/tsl/src/compression/decompress_text_test_impl.c +++ b/tsl/src/compression/decompress_text_test_impl.c @@ -18,12 +18,11 @@ * for arithmetic types. */ static int -decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType test_type, - int requested_algo) +decompress_generic_text(const uint8 *Data, size_t Size, bool bulk, int requested_algo) { - if (!(test_type == DTT_RowByRow)) + if (bulk) { - elog(ERROR, "decompression test type %d not supported for text", test_type); + elog(ERROR, "bulk decompression not supported for text"); } StringInfoData si = { .data = (char *) Data, .len = Size }; @@ -66,7 +65,7 @@ decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType te * after we compress and decompress back. * Don't perform this check for other types of tests. */ - if (test_type != DTT_RowByRow) + if (bulk) { return n; } @@ -141,13 +140,13 @@ decompress_generic_text(const uint8 *Data, size_t Size, DecompressionTestType te } int -decompress_ARRAY_TEXT(const uint8 *Data, size_t Size, DecompressionTestType test_type) +decompress_ARRAY_TEXT(const uint8 *Data, size_t Size, bool bulk) { - return decompress_generic_text(Data, Size, test_type, COMPRESSION_ALGORITHM_ARRAY); + return decompress_generic_text(Data, Size, bulk, COMPRESSION_ALGORITHM_ARRAY); } int -decompress_DICTIONARY_TEXT(const uint8 *Data, size_t Size, DecompressionTestType test_type) +decompress_DICTIONARY_TEXT(const uint8 *Data, size_t Size, bool bulk) { - return decompress_generic_text(Data, Size, test_type, COMPRESSION_ALGORITHM_DICTIONARY); + return decompress_generic_text(Data, Size, bulk, COMPRESSION_ALGORITHM_DICTIONARY); } From 4bdd452612358cce0a99e015490844e3805b2392 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 14:15:02 +0100 Subject: [PATCH 090/249] yaml --- .github/workflows/libfuzzer.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index afc9f4879d4..b552924569a 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -188,7 +188,7 @@ jobs: find "tsl/test/fuzzing/compression/${{ matrix.case.algo }}-${{ matrix.case.pgtype }}" -type f -exec cp -t db/corpus {} + - name: Initialize the fuzzing corpus from cache - if: !(steps.restore-corpus-cache-bulk.outputs.cache-matched-key == '' && steps.restore-corpus-cache-rowbyrow.outputs.cache-matched-key == '') + if: ${{ !(steps.restore-corpus-cache-bulk.outputs.cache-matched-key == '' && steps.restore-corpus-cache-rowbyrow.outputs.cache-matched-key == '') }} run: | # Combine the corpus from rowbyrow and bulk fuzzing. mkdir -p db/corpus{,-rowbyrow,-bulk} From 610c6c2ae421d23551e32e52ce74244a70da1921 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 14:33:33 +0100 Subject: [PATCH 091/249] dash --- .github/workflows/libfuzzer.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index b552924569a..bfee920d129 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -165,7 +165,7 @@ jobs: path: db/corpus-bulk # If the initial corpus changes, probably it was updated by hand with # some important examples, and it makes sense to start anew from it. - key: "libfuzzer-corpus-2-${{ matrix.case.algo }}-${{ matrix.case.pgtype }}-true\ + key: "libfuzzer-corpus-2-${{ matrix.case.algo }}-${{ matrix.case.pgtype }}-true-\ ${{ hashFiles(format('tsl/test/fuzzing/compression/{0}-{1}', matrix.case.algo, matrix.case.pgtype)) }}" # We save the row-by-row corpus separately from the bulk corpus, so that @@ -175,7 +175,7 @@ jobs: uses: actions/cache/restore@v3 with: path: db/corpus-rowbyrow - key: "libfuzzer-corpus-2-${{ matrix.case.algo }}-${{ matrix.case.pgtype }}-false\ + key: "libfuzzer-corpus-2-${{ matrix.case.algo }}-${{ matrix.case.pgtype }}-false-\ ${{ hashFiles(format('tsl/test/fuzzing/compression/{0}-{1}', matrix.case.algo, matrix.case.pgtype)) }}" - name: Initialize the fuzzing corpus from repository From 67e9fe1bc27d744070be1e3ecae85e9b46ab5f83 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 15:43:01 +0100 Subject: [PATCH 092/249] job fixes --- .github/workflows/libfuzzer.yaml | 40 +++++++++++++------------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index bfee920d129..eaae5664ed6 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -158,15 +158,19 @@ jobs: initdb echo "shared_preload_libraries = 'timescaledb'" >> $PGDATA/postgresql.conf + - name: Set configuration + id: config + run: | + set -x + echo "cache_prefix=${{ format('libfuzzer-2-{0}-{1}', matrix.case.algo, matrix.case.pgtype) }}" >> $GITHUB_OUTPUT + echo "name='${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}'" >> $GITHUB_OUTPUT + - name: Restore the cached fuzzing corpus (bulk) id: restore-corpus-cache-bulk uses: actions/cache/restore@v3 with: path: db/corpus-bulk - # If the initial corpus changes, probably it was updated by hand with - # some important examples, and it makes sense to start anew from it. - key: "libfuzzer-corpus-2-${{ matrix.case.algo }}-${{ matrix.case.pgtype }}-true-\ - ${{ hashFiles(format('tsl/test/fuzzing/compression/{0}-{1}', matrix.case.algo, matrix.case.pgtype)) }}" + key: "${{ steps.config.outputs.cache_prefix }}-bulk" # We save the row-by-row corpus separately from the bulk corpus, so that # they don't overwrite each other. Now we are going to combine them. @@ -175,25 +179,16 @@ jobs: uses: actions/cache/restore@v3 with: path: db/corpus-rowbyrow - key: "libfuzzer-corpus-2-${{ matrix.case.algo }}-${{ matrix.case.pgtype }}-false-\ - ${{ hashFiles(format('tsl/test/fuzzing/compression/{0}-{1}', matrix.case.algo, matrix.case.pgtype)) }}" + key: "${{ steps.config.outputs.cache_prefix }}-rowbyrow" - - name: Initialize the fuzzing corpus from repository - # cache-hit is only true for exact key matches, and we use prefix matches. - if: steps.restore-corpus-cache-bulk.outputs.cache-matched-key == '' && steps.restore-corpus-cache-rowbyrow.outputs.cache-matched-key == '' - run: | - # Copy the intial corpus files from the repository. The github actions - # cache doesn't follow symlinks. - mkdir -p db/corpus - find "tsl/test/fuzzing/compression/${{ matrix.case.algo }}-${{ matrix.case.pgtype }}" -type f -exec cp -t db/corpus {} + - - - name: Initialize the fuzzing corpus from cache + - name: Initialize the fuzzing corpus if: ${{ !(steps.restore-corpus-cache-bulk.outputs.cache-matched-key == '' && steps.restore-corpus-cache-rowbyrow.outputs.cache-matched-key == '') }} run: | - # Combine the corpus from rowbyrow and bulk fuzzing. + # Combine the cached corpus from rowbyrow and bulk fuzzing, and from repository. mkdir -p db/corpus{,-rowbyrow,-bulk} find "db/corpus-rowbyrow" -type f -exec cp -t db/corpus {} + find "db/corpus-bulk" -type f -exec cp -t db/corpus {} + + find "tsl/test/fuzzing/compression/${{ matrix.case.algo }}-${{ matrix.case.pgtype }}" -type f -exec cp -t db/corpus {} + ls db/corpus | wc -l - name: Run libfuzzer for compression @@ -252,14 +247,14 @@ jobs: if: always() uses: actions/upload-artifact@v3 with: - name: PostgreSQL log for ${{ matrix.case.algo }} ${{ matrix.case.type }} ${{ matrix.case.bulk }} + name: PostgreSQL log for ${{ steps.config.outputs.name }} path: postgres.log - name: Save fuzzer-generated crash cases if: always() uses: actions/upload-artifact@v3 with: - name: Crash cases for ${{ matrix.case.algo }} ${{ matrix.case.type }} ${{ matrix.case.bulk }} + name: Crash cases for ${{ steps.config.outputs.name }} path: db/crash-* # We use separate restore/save actions, because the default action won't @@ -274,10 +269,7 @@ jobs: uses: actions/cache/save@v3 with: path: db/corpus - key: "${{ format('{0}-{1}-{2}-{3}', - steps.restore-corpus-cache.outputs.cache-primary-key, - matrix.case.bulk, - github.run_id, github.run_attempt) }}" + key: "${{ steps.config.outputs.cache_prefix }}-${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}" - name: Stack trace if: always() && steps.collectlogs.outputs.coredumps == 'true' @@ -303,5 +295,5 @@ jobs: if: always() && steps.collectlogs.outputs.coredumps == 'true' uses: actions/upload-artifact@v3 with: - name: Coredumps for ${{ matrix.case.algo }} ${{ matrix.case.type }} ${{ matrix.case.bulk }} + name: Coredumps for ${{ steps.config.outputs.name }} path: coredumps From 196dd9ff794eb6dd6cea683ff98f5caec3b1fafd Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 15:50:29 +0100 Subject: [PATCH 093/249] fix --- .github/workflows/libfuzzer.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index eaae5664ed6..27007ab8e26 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -182,7 +182,6 @@ jobs: key: "${{ steps.config.outputs.cache_prefix }}-rowbyrow" - name: Initialize the fuzzing corpus - if: ${{ !(steps.restore-corpus-cache-bulk.outputs.cache-matched-key == '' && steps.restore-corpus-cache-rowbyrow.outputs.cache-matched-key == '') }} run: | # Combine the cached corpus from rowbyrow and bulk fuzzing, and from repository. mkdir -p db/corpus{,-rowbyrow,-bulk} From 219f5e0b5435bbdcba3817a3f554dc5a614fcba0 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:16:50 +0100 Subject: [PATCH 094/249] cleanup --- .github/workflows/libfuzzer.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 27007ab8e26..263f545ca1c 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -125,7 +125,7 @@ jobs: { algo: dictionary, pgtype: text , bulk: false, runs: 10000000 }, ] - name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk }} + name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }} runs-on: ubuntu-22.04 env: PG_SRC_DIR: pgbuild @@ -163,7 +163,7 @@ jobs: run: | set -x echo "cache_prefix=${{ format('libfuzzer-2-{0}-{1}', matrix.case.algo, matrix.case.pgtype) }}" >> $GITHUB_OUTPUT - echo "name='${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}'" >> $GITHUB_OUTPUT + echo "name=${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}" >> $GITHUB_OUTPUT - name: Restore the cached fuzzing corpus (bulk) id: restore-corpus-cache-bulk From 99b1dc6b00c22fcded2dfa4c3679fee223c6e439 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:39:36 +0100 Subject: [PATCH 095/249] fix the saved cache key --- .github/workflows/libfuzzer.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 263f545ca1c..f1fd8d6fee2 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -268,7 +268,10 @@ jobs: uses: actions/cache/save@v3 with: path: db/corpus - key: "${{ steps.config.outputs.cache_prefix }}-${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}" + key: "${{ format('{0}-{1}-{2}-{3}', + steps.config.outputs.cache_prefix, + matrix.case.bulk && 'bulk' || 'rowbyrow', + github.run_id, github.run_attempt) }}" - name: Stack trace if: always() && steps.collectlogs.outputs.coredumps == 'true' From 52033927b289531273c958e5eba09f138ff456d2 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:43:56 +0100 Subject: [PATCH 096/249] add another path --- .github/workflows/libfuzzer.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index f1fd8d6fee2..458af23a104 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -9,7 +9,9 @@ name: Libfuzzer - prerelease_test - trigger/libfuzzer pull_request: - paths: .github/workflows/libfuzzer.yaml + paths: + - .github/workflows/libfuzzer.yaml + - 'tsl/test/fuzzing/compression/**' jobs: build: From 3f0103eadea731b1d612e9704cf61ca105cbf04f Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 17:21:01 +0100 Subject: [PATCH 097/249] tests for nulls --- tsl/src/compression/array.c | 19 ++-- tsl/src/compression/dictionary.c | 1 + tsl/test/expected/decompress_vector_qual.out | 97 +++++++++++++++----- tsl/test/sql/decompress_vector_qual.sql | 40 +++++--- 4 files changed, 113 insertions(+), 44 deletions(-) diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index e46e57c1656..7aa60857ed6 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -547,6 +547,19 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, { Assert(i >= current_notnull_element); + /* + * The index of the corresponding offset is higher by one than + * the index of the element. The offset[0] is never affected by + * this shuffling and is always 0. + * Note that unlike the usual null reshuffling in other algorithms, + * for offsets, even if all elements are null, the starting offset + * is well-defined and we can do this assignment. This case is only + * accessible through fuzzing. Through SQL, all-null batches result + * in a null compressed value. + */ + Assert(current_notnull_element + 1 >= 0); + offsets[i + 1] = offsets[current_notnull_element + 1]; + if (simple8brle_bitmap_get_at(&nulls, i)) { arrow_set_row_validity(validity_bitmap, i, false); @@ -554,12 +567,6 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, else { Assert(current_notnull_element >= 0); - /* - * The index of the corresponding offset is higher by one than - * the index of the element. The offset[0] is never affected by - * this shuffling and is always 0. - */ - offsets[i + 1] = offsets[current_notnull_element + 1]; current_notnull_element--; } } diff --git a/tsl/src/compression/dictionary.c b/tsl/src/compression/dictionary.c index 145762404fc..0386d411345 100644 --- a/tsl/src/compression/dictionary.c +++ b/tsl/src/compression/dictionary.c @@ -471,6 +471,7 @@ tsl_text_dictionary_decompress_all(Datum compressed, Oid element_type, MemoryCon if (simple8brle_bitmap_get_at(&nulls, i)) { arrow_set_row_validity(validity_bitmap, i, false); + indices[i] = 0; } else { diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index b5781bf8637..22433e1f65c 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -665,22 +665,20 @@ select * from date_table where ts < '2021-01-02'; (1 row) -- Vectorized comparison for text -create table t(ts timestamp, a text); +create table t(ts int, d int, a text); select create_hypertable('t', 'ts'); -WARNING: column type "timestamp without time zone" used for "ts" does not follow best practices NOTICE: adding not-null constraint to column "ts" create_hypertable ------------------- (7,public,t,t) (1 row) -alter table t set (timescaledb.compress); -insert into t select '2021-01-01 01:01:01'::timestamp + interval '1 second' * x, 'same' -from generate_series(1, 1000) x -; -insert into t select '2021-01-01 02:01:01'::timestamp + interval '1 second' * x, 'different' || x -from generate_series(1, 1000) x -; +alter table t set (timescaledb.compress, timescaledb.compress_segmentby = 'd'); +insert into t select x, 1, '' from generate_series(1, 1000) x; +insert into t select x, 2, 'same' from generate_series(1, 1000) x; +insert into t select x, 3, 'different' || x from generate_series(1, 1000) x; +insert into t select x, 4, case when x % 2 = 0 then null else 'same-with-nulls' end from generate_series(1, 1000) x; +insert into t select x, 5, case when x % 2 = 0 then null else 'different-with-nulls' || x end from generate_series(1, 1000) x; select count(compress_chunk(x, true)) from show_chunks('t') x; count ------- @@ -688,27 +686,76 @@ select count(compress_chunk(x, true)) from show_chunks('t') x; (1 row) set timescaledb.debug_require_vector_qual to 'only'; -select count(*), min(ts) from t where a = 'same'; - count | min --------+-------------------------- - 1000 | Fri Jan 01 01:01:02 2021 +-- Uncomment to generate the test reference w/o the vector optimizations. +-- set timescaledb.enable_bulk_decompression to off; +-- set timescaledb.debug_require_vector_qual to 'forbid'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a = ''; + count | min | max | min | max +-------+-----+------+-----+----- + 1000 | 1 | 1000 | 1 | 1 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'same'; + count | min | max | min | max +-------+-----+------+-----+----- + 1000 | 1 | 1000 | 2 | 2 (1 row) -select count(*), min(ts) from t where a = 'different1'; - count | min --------+-------------------------- - 1 | Fri Jan 01 02:01:02 2021 +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'same-with-nulls'; + count | min | max | min | max +-------+-----+-----+-----+----- + 500 | 1 | 999 | 4 | 4 (1 row) -select count(*), min(ts) from t where a = 'different1000'; - count | min --------+-------------------------- - 1 | Fri Jan 01 02:17:41 2021 +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 1 | 1 | 1 | 3 | 3 (1 row) -select count(*), min(ts), max(ts) from t where a in ('same', 'different500'); - count | min | max --------+--------------------------+-------------------------- - 1001 | Fri Jan 01 01:01:02 2021 | Fri Jan 01 02:09:21 2021 +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different-with-nulls1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 1 | 1 | 1 | 5 | 5 (1 row) +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different1000'; + count | min | max | min | max +-------+------+------+-----+----- + 1 | 1000 | 1000 | 3 | 3 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different-with-nulls999'; + count | min | max | min | max +-------+-----+-----+-----+----- + 1 | 999 | 999 | 5 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('same', 'different500'); + count | min | max | min | max +-------+-----+------+-----+----- + 1001 | 1 | 1000 | 2 | 3 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('same-with-nulls', 'different-with-nulls499'); + count | min | max | min | max +-------+-----+-----+-----+----- + 501 | 1 | 999 | 4 | 5 +(1 row) + +-- Null tests are not vectorized yet. +reset timescaledb.debug_require_vector_qual; +select count(*), min(ts), max(ts), min(d), max(d) from t where a is null; + count | min | max | min | max +-------+-----+------+-----+----- + 1000 | 2 | 1000 | 4 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a is not null; + count | min | max | min | max +-------+-----+------+-----+----- + 4000 | 1 | 1000 | 1 | 5 +(1 row) + +reset timescaledb.debug_require_vector_qual; +reset timescaledb.enable_bulk_decompression; diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 8602717c7d9..d1044ef8098 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -222,23 +222,37 @@ select * from date_table where ts <= '2021-01-02'; select * from date_table where ts < '2021-01-02'; -- Vectorized comparison for text -create table t(ts timestamp, a text); +create table t(ts int, d int, a text); select create_hypertable('t', 'ts'); -alter table t set (timescaledb.compress); +alter table t set (timescaledb.compress, timescaledb.compress_segmentby = 'd'); -insert into t select '2021-01-01 01:01:01'::timestamp + interval '1 second' * x, 'same' -from generate_series(1, 1000) x -; - -insert into t select '2021-01-01 02:01:01'::timestamp + interval '1 second' * x, 'different' || x -from generate_series(1, 1000) x -; +insert into t select x, 1, '' from generate_series(1, 1000) x; +insert into t select x, 2, 'same' from generate_series(1, 1000) x; +insert into t select x, 3, 'different' || x from generate_series(1, 1000) x; +insert into t select x, 4, case when x % 2 = 0 then null else 'same-with-nulls' end from generate_series(1, 1000) x; +insert into t select x, 5, case when x % 2 = 0 then null else 'different-with-nulls' || x end from generate_series(1, 1000) x; select count(compress_chunk(x, true)) from show_chunks('t') x; set timescaledb.debug_require_vector_qual to 'only'; +-- Uncomment to generate the test reference w/o the vector optimizations. +-- set timescaledb.enable_bulk_decompression to off; +-- set timescaledb.debug_require_vector_qual to 'forbid'; -select count(*), min(ts) from t where a = 'same'; -select count(*), min(ts) from t where a = 'different1'; -select count(*), min(ts) from t where a = 'different1000'; -select count(*), min(ts), max(ts) from t where a in ('same', 'different500'); +select count(*), min(ts), max(ts), min(d), max(d) from t where a = ''; +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'same'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'same-with-nulls'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different-with-nulls1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different1000'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different-with-nulls999'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('same', 'different500'); +select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('same-with-nulls', 'different-with-nulls499'); + +-- Null tests are not vectorized yet. +reset timescaledb.debug_require_vector_qual; +select count(*), min(ts), max(ts), min(d), max(d) from t where a is null; +select count(*), min(ts), max(ts), min(d), max(d) from t where a is not null; + +reset timescaledb.debug_require_vector_qual; +reset timescaledb.enable_bulk_decompression; From 0be0a8af43a1c0bd9a2754833728b4d611aaf29c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 17:21:25 +0100 Subject: [PATCH 098/249] format --- tsl/src/nodes/decompress_chunk/exec.c | 5 ++--- tsl/src/nodes/decompress_chunk/pred_vector_array.c | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index f1052b61f31..f299754bc04 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -368,9 +368,8 @@ decompress_chunk_begin(CustomScanState *node, EState *estate, int eflags) * For variable-length types (we only have text) we can't * estimate the width currently. */ - batch_memory_context_bytes += - (GLOBAL_MAX_ROWS_PER_COMPRESSION + 64) * - (column->value_bytes > 0 ? column->value_bytes : 16); + batch_memory_context_bytes += (GLOBAL_MAX_ROWS_PER_COMPRESSION + 64) * + (column->value_bytes > 0 ? column->value_bytes : 16); /* Also nulls bitmap. */ batch_memory_context_bytes += GLOBAL_MAX_ROWS_PER_COMPRESSION / (64 * sizeof(uint64)); diff --git a/tsl/src/nodes/decompress_chunk/pred_vector_array.c b/tsl/src/nodes/decompress_chunk/pred_vector_array.c index d9a497ed64a..0a61c58aba4 100644 --- a/tsl/src/nodes/decompress_chunk/pred_vector_array.c +++ b/tsl/src/nodes/decompress_chunk/pred_vector_array.c @@ -90,7 +90,7 @@ vector_array_predicate(VectorPredicate *vector_const_predicate, bool is_or, } Datum constvalue = fetch_att(array_data, typbyval, typlen); array_data = att_addlength_pointer(array_data, typlen, array_data); - array_data = (char * restrict) att_align_nominal(array_data, typalign); + array_data = (char *restrict) att_align_nominal(array_data, typalign); /* * For OR, we also need an intermediate storage for predicate result From b8aa1d7d646bcde8de2e7b0221fbfbf588d451f9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 17:37:49 +0100 Subject: [PATCH 099/249] harmonize the varlena check with rowbyrow --- tsl/src/compression/array.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index 7aa60857ed6..4a664e0f281 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -514,8 +514,8 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, for (int i = 0; i < n_notnull; i++) { void *vardata = consumeCompressedData(si, sizes[i]); - CheckCompressedData(!VARATT_IS_EXTERNAL(vardata)); - CheckCompressedData(!VARATT_IS_COMPRESSED(vardata)); + CheckCompressedData(VARATT_IS_4B_U(vardata) || + (VARATT_IS_1B(vardata) && !VARATT_IS_1B_E(vardata))); CheckCompressedData(VARSIZE_ANY(vardata) == sizes[i]); const int textlen = VARSIZE_ANY_EXHDR(vardata); memcpy(&arrow_bodies[offset], VARDATA_ANY(vardata), textlen); From cb383a065284770ee8ac8ac87525801847edb3b9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 17:38:06 +0100 Subject: [PATCH 100/249] test case --- ...rash-e5143387e8896dcfb0f95f8111538502cee38ce0 | Bin 0 -> 1122 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-e5143387e8896dcfb0f95f8111538502cee38ce0 diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-e5143387e8896dcfb0f95f8111538502cee38ce0 b/tsl/test/fuzzing/compression/dictionary-text/crash-e5143387e8896dcfb0f95f8111538502cee38ce0 new file mode 100644 index 0000000000000000000000000000000000000000..280479e0535b53c2f790952f051970b8fb859a69 GIT binary patch literal 1122 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEiK7zFqk80;Dt8WaHCPEs` z1Xk|App8u%L=q??12-KmgiSqA3=)N)sMH5B;jxWFIamWwE)=IcszegVP=9||Kp;tC z6GM`QGo|1xbcP}W&|oYkBh;+{0tn9@A&ANXxew~V91=hs`5+HjYe`9okT1+dK#mm< yV+Sum8InOnLrK?W_b5>@~dPBS|I literal 0 HcmV?d00001 From 8ba3add0475ed07c3c4ccc3bccedd10a6fc7d6a7 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 18:23:56 +0100 Subject: [PATCH 101/249] fixes --- tsl/src/compression/array.c | 30 ++++++++++++++++++++----- tsl/src/compression/compression.h | 3 +-- tsl/src/compression/datum_serialize.c | 7 ++++++ tsl/src/compression/dictionary.c | 12 ++++++++++ tsl/test/expected/compression_algos.out | 2 +- 5 files changed, 46 insertions(+), 8 deletions(-) diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index 4a664e0f281..e16de683633 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -510,21 +510,41 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, (uint32 *) MemoryContextAllocZero(dest_mctx, pad64(sizeof(*offsets) * (n_total + 1))); uint8 *arrow_bodies = (uint8 *) MemoryContextAllocZero(dest_mctx, pad64(si->len - si->cursor)); - int offset = 0; + uint32 offset = 0; for (int i = 0; i < n_notnull; i++) { void *vardata = consumeCompressedData(si, sizes[i]); + /* + * Check for potentially corrupt varlena headers since we're reading them + * directly from compressed data. We can only have a plain datum + * with 1-byte or 4-byte header here, no TOAST or compressed data. + */ CheckCompressedData(VARATT_IS_4B_U(vardata) || (VARATT_IS_1B(vardata) && !VARATT_IS_1B_E(vardata))); + /* + * Full varsize must be larger or equal than the header size so that the + * calculation of size without header doesn't overflow. + */ + CheckCompressedData((VARATT_IS_1B(vardata) && VARSIZE_1B(vardata) >= VARHDRSZ_SHORT) || + (VARSIZE_4B(vardata) > VARHDRSZ)); + /* Varsize must match the size stored in the sizes array for this element. */ CheckCompressedData(VARSIZE_ANY(vardata) == sizes[i]); - const int textlen = VARSIZE_ANY_EXHDR(vardata); + + const uint16 textlen = VARSIZE_ANY_EXHDR(vardata); memcpy(&arrow_bodies[offset], VARDATA_ANY(vardata), textlen); - // fprintf(stderr, "%d: copied: '%s' len %d varsize %d result %.*s\n", - // i, text_to_cstring(vardata), textlen, (int) VARSIZE_ANY(vardata), textlen, - //&arrow_bodies[offset]); + // fprintf(stderr, + // "%d: copied: '%s' len %d varsize %d result %.*s\n", + // i, + // text_to_cstring(vardata), + // textlen, + // (int) VARSIZE_ANY(vardata), + // textlen, + // &arrow_bodies[offset]); offsets[i] = offset; + + CheckCompressedData(offset <= offset + textlen); /* Check for overflow. */ offset += textlen; } offsets[n_notnull] = offset; diff --git a/tsl/src/compression/compression.h b/tsl/src/compression/compression.h index 017cfa622a2..f716ba017e5 100644 --- a/tsl/src/compression/compression.h +++ b/tsl/src/compression/compression.h @@ -385,8 +385,7 @@ inline static void * consumeCompressedData(StringInfo si, int bytes) { CheckCompressedData(bytes >= 0); - CheckCompressedData(bytes < PG_INT32_MAX / 2); - CheckCompressedData(si->cursor + bytes >= 0); + CheckCompressedData(si->cursor + bytes >= si->cursor); /* Check for overflow. */ CheckCompressedData(si->cursor + bytes <= si->len); void *result = si->data + si->cursor; diff --git a/tsl/src/compression/datum_serialize.c b/tsl/src/compression/datum_serialize.c index bf3a799cf25..d1d38c09f2c 100644 --- a/tsl/src/compression/datum_serialize.c +++ b/tsl/src/compression/datum_serialize.c @@ -315,6 +315,13 @@ bytes_to_datum_and_advance(DatumDeserializer *deserializer, const char **ptr) * with 1-byte or 4-byte header here, no TOAST or compressed data. */ CheckCompressedData(VARATT_IS_4B_U(*ptr) || (VARATT_IS_1B(*ptr) && !VARATT_IS_1B_E(*ptr))); + + /* + * Full varsize must be larger or equal than the header size so that the + * calculation of size without header doesn't overflow. + */ + CheckCompressedData((VARATT_IS_1B(*ptr) && VARSIZE_1B(*ptr) >= VARHDRSZ_SHORT) || + (VARSIZE_4B(*ptr) > VARHDRSZ)); } res = fetch_att(*ptr, deserializer->type_by_val, deserializer->type_len); *ptr = att_addlength_pointer(*ptr, deserializer->type_len, *ptr); diff --git a/tsl/src/compression/dictionary.c b/tsl/src/compression/dictionary.c index 0386d411345..8f048a6a695 100644 --- a/tsl/src/compression/dictionary.c +++ b/tsl/src/compression/dictionary.c @@ -446,10 +446,22 @@ tsl_text_dictionary_decompress_all(Datum compressed, Oid element_type, MemoryCon simple8brle_decompress_all_buf_int16(indices_serialized, indices, n_padded); CheckCompressedData(n_decompressed == n_notnull); + /* Check that the dictionary indices that we've just read are not out of bounds. */ + CheckCompressedData(header->num_distinct <= GLOBAL_MAX_ROWS_PER_COMPRESSION); + CheckCompressedData(header->num_distinct <= INT16_MAX); + bool have_incorrect_index = false; + for (int i = 0; i < n_notnull; i++) + { + have_incorrect_index |= indices[i] >= (int16) header->num_distinct; + } + CheckCompressedData(!have_incorrect_index); + + /* Decompress the actual values in the dictionary. */ ArrowArray *dict = text_array_decompress_all_serialized_no_header(&si, /* has_nulls = */ false, dest_mctx); CheckCompressedData(header->num_distinct == dict->length); + /* Fill validity and indices of the array elements, reshuffling for nulls if needed. */ const int validity_bitmap_bytes = sizeof(uint64) * pad64(n_total); uint64 *restrict validity_bitmap = MemoryContextAlloc(dest_mctx, validity_bitmap_bytes); memset(validity_bitmap, 0xFF, validity_bitmap_bytes); diff --git a/tsl/test/expected/compression_algos.out b/tsl/test/expected/compression_algos.out index 1ff24380529..078650bd7b7 100644 --- a/tsl/test/expected/compression_algos.out +++ b/tsl/test/expected/compression_algos.out @@ -1609,7 +1609,7 @@ group by 2, 3 order by 1 desc ; count | bulk_result | rowbyrow_result -------+-------------+----------------- - 4 | XX001 | XX001 + 5 | XX001 | XX001 1 | true | true (2 rows) From f5602953d11c4de2c2d1280345cd64cc2b36aa6e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 18:25:25 +0100 Subject: [PATCH 102/249] test the cache (2023-12-14 #2) From 59a5a6c0ceaed3695e0362421f39798289c31cc6 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 18:52:15 +0100 Subject: [PATCH 103/249] try to get the prefix match back --- .github/workflows/libfuzzer.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 458af23a104..6f0cc0b9b17 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -172,7 +172,7 @@ jobs: uses: actions/cache/restore@v3 with: path: db/corpus-bulk - key: "${{ steps.config.outputs.cache_prefix }}-bulk" + restore-keys: "${{ steps.config.outputs.cache_prefix }}-bulk" # We save the row-by-row corpus separately from the bulk corpus, so that # they don't overwrite each other. Now we are going to combine them. @@ -181,7 +181,7 @@ jobs: uses: actions/cache/restore@v3 with: path: db/corpus-rowbyrow - key: "${{ steps.config.outputs.cache_prefix }}-rowbyrow" + restore-keys: "${{ steps.config.outputs.cache_prefix }}-rowbyrow" - name: Initialize the fuzzing corpus run: | From 53ac9fc4eaa046a3cb6a610c35a95a0a93ea5f66 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 19:11:25 +0100 Subject: [PATCH 104/249] key is required --- .github/workflows/libfuzzer.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 6f0cc0b9b17..67d368908a6 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -172,6 +172,7 @@ jobs: uses: actions/cache/restore@v3 with: path: db/corpus-bulk + key: "${{ steps.config.outputs.cache_prefix }}-bulk" restore-keys: "${{ steps.config.outputs.cache_prefix }}-bulk" # We save the row-by-row corpus separately from the bulk corpus, so that @@ -181,6 +182,7 @@ jobs: uses: actions/cache/restore@v3 with: path: db/corpus-rowbyrow + key: "${{ steps.config.outputs.cache_prefix }}-rowbyrow" restore-keys: "${{ steps.config.outputs.cache_prefix }}-rowbyrow" - name: Initialize the fuzzing corpus From f961cbb0356907caa17a957a355639150b177d79 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 19:48:03 +0100 Subject: [PATCH 105/249] I hate github actions so much --- .github/workflows/libfuzzer.yaml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 67d368908a6..2b5cae363e7 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -173,7 +173,6 @@ jobs: with: path: db/corpus-bulk key: "${{ steps.config.outputs.cache_prefix }}-bulk" - restore-keys: "${{ steps.config.outputs.cache_prefix }}-bulk" # We save the row-by-row corpus separately from the bulk corpus, so that # they don't overwrite each other. Now we are going to combine them. @@ -183,7 +182,6 @@ jobs: with: path: db/corpus-rowbyrow key: "${{ steps.config.outputs.cache_prefix }}-rowbyrow" - restore-keys: "${{ steps.config.outputs.cache_prefix }}-rowbyrow" - name: Initialize the fuzzing corpus run: | @@ -267,11 +265,17 @@ jobs: # The caches for rowbyrow and bulk fuzzing are saved separately, otherwise # the slower job would always overwrite the cache from the faster one. We # want to combine corpuses from bulk and rowbyrow fuzzing for better - # coverage. + # coverage. Note that the cache action cannot be restored on a path + # different from the one it was saved from. To make our lives more + # interesting, it is not directly documented anywhere, but we can deduce it + # from path influencing the version. + - name: Copy the fuzzer corpus to please the actions/cache + run: | + find "db/corpus" -type f -exec cp -t db/corpus-{{ matrix.case.bulk && 'bulk' || 'rowbyrow' }} {} + - name: Save fuzzer corpus uses: actions/cache/save@v3 with: - path: db/corpus + path: db/corpus-{{ matrix.case.bulk && 'bulk' || 'rowbyrow' }} key: "${{ format('{0}-{1}-{2}-{3}', steps.config.outputs.cache_prefix, matrix.case.bulk && 'bulk' || 'rowbyrow', From cb2f03e16197787a29a59a378258c281b4b40b79 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 20:06:01 +0100 Subject: [PATCH 106/249] dollar --- .github/workflows/libfuzzer.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 2b5cae363e7..3a85ef32e25 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -271,7 +271,7 @@ jobs: # from path influencing the version. - name: Copy the fuzzer corpus to please the actions/cache run: | - find "db/corpus" -type f -exec cp -t db/corpus-{{ matrix.case.bulk && 'bulk' || 'rowbyrow' }} {} + + find "db/corpus" -type f -exec cp -t db/corpus-${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }} {} + - name: Save fuzzer corpus uses: actions/cache/save@v3 with: From 489827c66a45c641520834ab844817d5445f7801 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 20:23:32 +0100 Subject: [PATCH 107/249] test the cache (2023-12-14 no. 3) From 9546a3c2526965158b2fabfc0a9f89142046d834 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 21:26:07 +0100 Subject: [PATCH 108/249] dollar --- .github/workflows/libfuzzer.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 3a85ef32e25..c82842bc996 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -275,7 +275,7 @@ jobs: - name: Save fuzzer corpus uses: actions/cache/save@v3 with: - path: db/corpus-{{ matrix.case.bulk && 'bulk' || 'rowbyrow' }} + path: db/corpus-${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }} key: "${{ format('{0}-{1}-{2}-{3}', steps.config.outputs.cache_prefix, matrix.case.bulk && 'bulk' || 'rowbyrow', From bc450d78c4ace202e51028e32714c3c807f58b20 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 14 Dec 2023 21:58:47 +0100 Subject: [PATCH 109/249] test the cache (2023-12-14 no. 4) From 0c6a6891af9bb316b8df8d2995ecd22c6cf19376 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 11:06:50 +0100 Subject: [PATCH 110/249] test detoasting in decompression as well --- tsl/test/expected/compressed_detoaster.out | 14 ++++++++++++-- tsl/test/sql/compressed_detoaster.sql | 10 ++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/tsl/test/expected/compressed_detoaster.out b/tsl/test/expected/compressed_detoaster.out index d2476ed857e..40a26039014 100644 --- a/tsl/test/expected/compressed_detoaster.out +++ b/tsl/test/expected/compressed_detoaster.out @@ -23,14 +23,24 @@ where id = (select compressed_hypertable_id from _timescaledb_catalog.hypertable \gset alter table :compressed_table set (toast_tuple_target = 512); -- Now, test compression and decompression with various string lengths. -create function test(repeats int) returns table(ns bigint) as $$ begin +create function test(repeats int, decompress bool) returns table(ns bigint) as $$ begin raise log 'repeats %', repeats; truncate longstr; insert into longstr(s1) select repeat('aaaa', repeats); perform count(compress_chunk(x, true)) from show_chunks('longstr') x; + if decompress then + perform decompress_chunk(x) from show_chunks('longstr') x; + end if; return query select sum(length(s1)) from longstr; end; $$ language plpgsql volatile; -select sum(t) from generate_series(1, 30) x, lateral test(x * x * x) t; +select sum(t) from generate_series(1, 30) x, lateral test(x * x * x, false) t; + sum +-------- + 864900 +(1 row) + +-- Also test decompression which uses the detoaster as well. +select sum(t) from generate_series(1, 30) x, lateral test(x * x * x, true) t; sum -------- 864900 diff --git a/tsl/test/sql/compressed_detoaster.sql b/tsl/test/sql/compressed_detoaster.sql index 4ebac311bdc..af3e295e1f9 100644 --- a/tsl/test/sql/compressed_detoaster.sql +++ b/tsl/test/sql/compressed_detoaster.sql @@ -22,12 +22,18 @@ alter table :compressed_table set (toast_tuple_target = 512); -- Now, test compression and decompression with various string lengths. -create function test(repeats int) returns table(ns bigint) as $$ begin +create function test(repeats int, decompress bool) returns table(ns bigint) as $$ begin raise log 'repeats %', repeats; truncate longstr; insert into longstr(s1) select repeat('aaaa', repeats); perform count(compress_chunk(x, true)) from show_chunks('longstr') x; + if decompress then + perform decompress_chunk(x) from show_chunks('longstr') x; + end if; return query select sum(length(s1)) from longstr; end; $$ language plpgsql volatile; -select sum(t) from generate_series(1, 30) x, lateral test(x * x * x) t; +select sum(t) from generate_series(1, 30) x, lateral test(x * x * x, false) t; + +-- Also test decompression which uses the detoaster as well. +select sum(t) from generate_series(1, 30) x, lateral test(x * x * x, true) t; From b142652e693ae99e93ab9d5b9c2744087d9573bf Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 11:11:53 +0100 Subject: [PATCH 111/249] do more things in row_decompressor_close --- tsl/src/compression/api.c | 4 ++-- tsl/src/compression/compression.c | 18 ++++++------------ tsl/src/compression/compression.h | 2 +- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/tsl/src/compression/api.c b/tsl/src/compression/api.c index 96e474e15ca..4d3da13a76b 100644 --- a/tsl/src/compression/api.c +++ b/tsl/src/compression/api.c @@ -943,7 +943,7 @@ tsl_get_compressed_chunk_index_for_recompression(PG_FUNCTION_ARGS) table_close(compressed_chunk_rel, NoLock); table_close(uncompressed_chunk_rel, NoLock); - row_compressor_finish(&row_compressor); + row_compressor_close(&row_compressor); if (OidIsValid(row_compressor.index_oid)) { @@ -1392,7 +1392,7 @@ tsl_recompress_chunk_segmentwise(PG_FUNCTION_ARGS) row_compressor.rowcnt_pre_compression, row_compressor.num_compressed_rows); - row_compressor_finish(&row_compressor); + row_compressor_close(&row_compressor); FreeBulkInsertState(decompressor.bistate); ExecDropSingleTupleTableSlot(slot); index_endscan(index_scan); diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index eb73901149c..8bf00443fbe 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -447,7 +447,7 @@ compress_chunk(Hypertable *ht, Oid in_table, Oid out_table, int insert_options) tuplesort_end(sorted_rel); } - row_compressor_finish(&row_compressor); + row_compressor_close(&row_compressor); DEBUG_WAITPOINT("compression_done_before_truncate_uncompressed"); truncate_relation(in_table); @@ -1269,7 +1269,7 @@ row_compressor_reset(RowCompressor *row_compressor) } void -row_compressor_finish(RowCompressor *row_compressor) +row_compressor_close(RowCompressor *row_compressor) { if (row_compressor->bistate) FreeBulkInsertState(row_compressor->bistate); @@ -1403,6 +1403,10 @@ build_decompressor(Relation in_rel, Relation out_rel) void row_decompressor_close(RowDecompressor *decompressor) { + FreeBulkInsertState(decompressor->bistate); + MemoryContextDelete(decompressor->per_compressed_row_ctx); + ts_catalog_close_indexes(decompressor->indexstate); + FreeExecutorState(decompressor->estate); detoaster_close(&decompressor->detoaster); } @@ -1446,10 +1450,6 @@ decompress_chunk(Oid in_table, Oid out_table) table_endscan(scan); ExecDropSingleTupleTableSlot(slot); - FreeBulkInsertState(decompressor.bistate); - MemoryContextDelete(decompressor.per_compressed_row_ctx); - ts_catalog_close_indexes(decompressor.indexstate); - FreeExecutorState(decompressor.estate); row_decompressor_close(&decompressor); table_close(out_rel, NoLock); @@ -2213,9 +2213,6 @@ decompress_batches_for_insert(ChunkInsertState *cis, Chunk *chunk, TupleTableSlo table_endscan(scan); ExecDropSingleTupleTableSlot(compressed_slot); - ts_catalog_close_indexes(decompressor.indexstate); - FreeExecutorState(decompressor.estate); - FreeBulkInsertState(decompressor.bistate); row_decompressor_close(&decompressor); CommandCounterIncrement(); @@ -3407,9 +3404,6 @@ decompress_batches_for_update_delete(HypertableModifyState *ht_state, Chunk *chu if (chunk_status_changed == true) ts_chunk_set_partial(chunk); - ts_catalog_close_indexes(decompressor.indexstate); - FreeExecutorState(decompressor.estate); - FreeBulkInsertState(decompressor.bistate); row_decompressor_close(&decompressor); table_close(chunk_rel, NoLock); diff --git a/tsl/src/compression/compression.h b/tsl/src/compression/compression.h index fc2e25b1efd..719eae49310 100644 --- a/tsl/src/compression/compression.h +++ b/tsl/src/compression/compression.h @@ -355,7 +355,7 @@ extern void row_compressor_init(CompressionSettings *settings, RowCompressor *ro int16 num_columns_in_compressed_table, bool need_bistate, bool reset_sequence, int insert_options); extern void row_compressor_reset(RowCompressor *row_compressor); -extern void row_compressor_finish(RowCompressor *row_compressor); +extern void row_compressor_close(RowCompressor *row_compressor); extern void row_compressor_append_sorted_rows(RowCompressor *row_compressor, Tuplesortstate *sorted_rel, TupleDesc sorted_desc); extern void segment_info_update(SegmentInfo *segment_info, Datum val, bool is_null); From 11897bbb554ca0973155b86cdbe2d87b2c37976b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 15:43:17 +0100 Subject: [PATCH 112/249] use the old cache key --- .github/workflows/libfuzzer.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index c82842bc996..792f1b2777e 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -164,7 +164,7 @@ jobs: id: config run: | set -x - echo "cache_prefix=${{ format('libfuzzer-2-{0}-{1}', matrix.case.algo, matrix.case.pgtype) }}" >> $GITHUB_OUTPUT + echo "cache_prefix=${{ format('libfuzzer-corpus-2-{0}-{1}', matrix.case.algo, matrix.case.pgtype) }}" >> $GITHUB_OUTPUT echo "name=${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}" >> $GITHUB_OUTPUT - name: Restore the cached fuzzing corpus (bulk) From e8d5940123e6aad0f48f6115b2b208861d1e9ff8 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 15:48:47 +0100 Subject: [PATCH 113/249] use "name" as column type w/o bulk decompression --- tsl/test/expected/decompress_vector_qual.out | 2 +- tsl/test/sql/decompress_vector_qual.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 22433e1f65c..eb3a1df2ed9 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -109,7 +109,7 @@ select count(*) from vectorqual where device = 1 /* can't apply vector ops to th (1 row) -- Test columns that don't support bulk decompression. -alter table vectorqual add column tag text; +alter table vectorqual add column tag name; insert into vectorqual(ts, device, metric2, metric3, metric4, tag) values ('2025-01-01 00:00:00', 5, 52, 53, 54, 'tag5'); select count(compress_chunk(x, true)) from show_chunks('vectorqual') x; NOTICE: chunk "_hyper_1_1_chunk" is already compressed diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index d1044ef8098..a13a8009dbb 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -39,7 +39,7 @@ select count(*) from vectorqual where device = 1 /* can't apply vector ops to th -- Test columns that don't support bulk decompression. -alter table vectorqual add column tag text; +alter table vectorqual add column tag name; insert into vectorqual(ts, device, metric2, metric3, metric4, tag) values ('2025-01-01 00:00:00', 5, 52, 53, 54, 'tag5'); select count(compress_chunk(x, true)) from show_chunks('vectorqual') x; From 93b39a9a7e4c8feb116f7d70f75bfe171491db28 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 16:57:50 +0100 Subject: [PATCH 114/249] tmp --- tsl/src/compression/compression_test.c | 4 +- .../compression/decompress_text_test_impl.c | 2 +- tsl/src/nodes/decompress_chunk/batch_array.c | 2 +- .../nodes/decompress_chunk/compressed_batch.c | 193 +++++++++++------- .../nodes/decompress_chunk/compressed_batch.h | 25 ++- 5 files changed, 144 insertions(+), 82 deletions(-) diff --git a/tsl/src/compression/compression_test.c b/tsl/src/compression/compression_test.c index 52f1d0f901e..6f66db196e9 100644 --- a/tsl/src/compression/compression_test.c +++ b/tsl/src/compression/compression_test.c @@ -75,8 +75,8 @@ get_compression_algorithm(char *name) X(DELTADELTA, INT8, true) \ X(DELTADELTA, INT8, false) \ X(ARRAY, TEXT, false) \ - X(ARRAY, TEXT, true) \ - X(DICTIONARY, TEXT, false) \ + X(ARRAY, TEXT, true) \ + X(DICTIONARY, TEXT, false) \ X(DICTIONARY, TEXT, true) static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Size, bool bulk) diff --git a/tsl/src/compression/decompress_text_test_impl.c b/tsl/src/compression/decompress_text_test_impl.c index 3bddd0d7e15..ec089e5bff4 100644 --- a/tsl/src/compression/decompress_text_test_impl.c +++ b/tsl/src/compression/decompress_text_test_impl.c @@ -161,7 +161,7 @@ decompress_generic_text(const uint8 *Data, size_t Size, bool bulk, int requested * For row-by-row decompression, check that the result is still the same * after we compress and decompress back. * Don't perform this check for other types of tests. - * + * * 1) Compress. */ Compressor *compressor = def->compressor_for_type(TEXTOID); diff --git a/tsl/src/nodes/decompress_chunk/batch_array.c b/tsl/src/nodes/decompress_chunk/batch_array.c index 43ad9af7be0..fd081922d35 100644 --- a/tsl/src/nodes/decompress_chunk/batch_array.c +++ b/tsl/src/nodes/decompress_chunk/batch_array.c @@ -21,7 +21,7 @@ batch_array_init(BatchArray *array, int nbatches, int ncolumns_per_batch, array->unused_batch_states = bms_add_range(NULL, 0, nbatches - 1); array->batch_memory_context_bytes = memory_context_block_size_bytes; array->n_batch_state_bytes = - sizeof(DecompressBatchState) + sizeof(CompressedColumnValues) * ncolumns_per_batch; + sizeof(DecompressBatchState) + sizeof(CompressedColumnValues2) * ncolumns_per_batch; array->batch_states = palloc0(array->n_batch_state_bytes * nbatches); Assert(bms_num_members(array->unused_batch_states) == array->n_batch_states); } diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 10fcd498b8d..6ca29b797ad 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -141,7 +141,7 @@ static void decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state, int i) { CompressionColumnDescription *column_description = &dcontext->template_columns[i]; - CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; + CompressedColumnValues *column_values = &batch_state->compressed_columns_wide[i]; column_values->iterator = NULL; column_values->arrow = NULL; column_values->arrow_values = NULL; @@ -321,7 +321,7 @@ compute_vector_quals(DecompressContext *dcontext, DecompressBatchState *batch_st "only compressed columns are supported in vectorized quals"); Assert(column_index < dcontext->num_compressed_columns); - CompressedColumnValues *column_values = &batch_state->compressed_columns[column_index]; + CompressedColumnValues *column_values = &batch_state->compressed_columns_wide[column_index]; if (column_values->value_bytes == 0) { @@ -519,6 +519,9 @@ compressed_batch_set_compressed_tuple(DecompressContext *dcontext, batch_state->decompressed_scan_slot = MakeSingleTupleTableSlot(dcontext->decompressed_slot_scan_tdesc, slot->tts_ops); + + batch_state->compressed_columns_wide = + palloc0(sizeof(CompressedColumnValues) * dcontext->num_compressed_columns); } else { @@ -555,7 +558,7 @@ compressed_batch_set_compressed_tuple(DecompressContext *dcontext, * skip decompressing some columns if the entire batch doesn't pass * the quals. Skip them for now. */ - CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; + CompressedColumnValues *column_values = &batch_state->compressed_columns_wide[i]; column_values->value_bytes = 0; column_values->arrow = NULL; column_values->iterator = NULL; @@ -636,12 +639,53 @@ compressed_batch_set_compressed_tuple(DecompressContext *dcontext, const int num_compressed_columns = dcontext->num_compressed_columns; for (int i = 0; i < num_compressed_columns; i++) { - CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; - if (column_values->value_bytes == 0) + CompressionColumnDescription *desc = &dcontext->template_columns[i]; + CompressedColumnValues *wide = &batch_state->compressed_columns_wide[i]; + if (wide->value_bytes == 0) { decompress_column(dcontext, batch_state, i); - Assert(column_values->value_bytes != 0); + Assert(wide->value_bytes != 0); + } + + CompressedColumnValues2 *packed = &batch_state->compressed_columns_packed[i]; + packed->output_attno = desc->output_attno; + if (wide->iterator) + { + packed->decompression_type = DT_Iterator; + packed->buffers[0] = wide->iterator; + continue; + } + + if (wide->arrow == NULL) + { + packed->decompression_type = DT_Default; + continue; + } + + if (wide->value_bytes > 0) + { + packed->decompression_type = wide->value_bytes; + packed->buffers[0] = wide->arrow->buffers[0]; + packed->buffers[1] = wide->arrow->buffers[1]; + continue; + } + + Assert(wide->value_bytes == -1); + + if (wide->arrow->dictionary == NULL) + { + packed->decompression_type = DT_ArrowText; + packed->buffers[0] = wide->arrow->buffers[0]; + packed->buffers[1] = wide->arrow->buffers[1]; + packed->buffers[2] = wide->arrow->buffers[2]; + continue; } + + packed->decompression_type = DT_ArrowTextDict; + packed->buffers[0] = wide->arrow->buffers[0]; + packed->buffers[1] = wide->arrow->dictionary->buffers[1]; + packed->buffers[2] = wide->arrow->dictionary->buffers[2]; + packed->buffers[3] = wide->arrow->buffers[1]; } } @@ -649,17 +693,16 @@ compressed_batch_set_compressed_tuple(DecompressContext *dcontext, } static void -store_text_datum(ArrowArray *arrow, int arrow_row, Datum *dest) +store_text_datum2(CompressedColumnValues2 *packed, int arrow_row, Datum *dest) { - Assert(arrow->dictionary == NULL); - const uint32 start = ((uint32 *) arrow->buffers[1])[arrow_row]; - const int32 value_bytes = ((uint32 *) arrow->buffers[1])[arrow_row + 1] - start; + const uint32 start = ((uint32 *) packed->buffers[1])[arrow_row]; + const int32 value_bytes = ((uint32 *) packed->buffers[1])[arrow_row + 1] - start; Assert(value_bytes >= 0); const int total_bytes = value_bytes + VARHDRSZ; Assert(DatumGetPointer(*dest) != NULL); SET_VARSIZE(*dest, total_bytes); - memcpy(VARDATA(*dest), &((uint8 *) arrow->buffers[2])[start], value_bytes); + memcpy(VARDATA(*dest), &((uint8 *) packed->buffers[2])[start], value_bytes); } /* @@ -682,79 +725,75 @@ make_next_tuple(DecompressContext *dcontext, DecompressBatchState *batch_state) const int num_compressed_columns = dcontext->num_compressed_columns; for (int i = 0; i < num_compressed_columns; i++) { - CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; - Ensure(column_values->value_bytes != 0, "the column is not decompressed"); - if (column_values->iterator != NULL) + CompressedColumnValues2 *packed = &batch_state->compressed_columns_packed[i]; + const AttrNumber attr = AttrNumberGetAttrOffset(packed->output_attno); + if (packed->decompression_type == DT_Default) + { + /* Do nothing. */ + } + else if (packed->decompression_type == DT_Iterator) { - DecompressResult result = column_values->iterator->try_next(column_values->iterator); + DecompressionIterator *iterator = (DecompressionIterator *) packed->buffers[0]; + DecompressResult result = iterator->try_next(iterator); if (result.is_done) { elog(ERROR, "compressed column out of sync with batch counter"); } - const AttrNumber attr = AttrNumberGetAttrOffset(column_values->output_attno); + const AttrNumber attr = AttrNumberGetAttrOffset(packed->output_attno); decompressed_scan_slot->tts_isnull[attr] = result.is_null; decompressed_scan_slot->tts_values[attr] = result.val; } - else if (column_values->arrow_values != NULL) + else if (packed->decompression_type == DT_ArrowText) { - Assert(column_values->value_bytes != 0); - const AttrNumber attr = AttrNumberGetAttrOffset(column_values->output_attno); - if (column_values->value_bytes == -1) - { - if (column_values->arrow->dictionary == NULL) - { - store_text_datum(column_values->arrow, - arrow_row, - &decompressed_scan_slot->tts_values[attr]); - } - else - { - const int16 index = ((int16 *) column_values->arrow->buffers[1])[arrow_row]; - store_text_datum(column_values->arrow->dictionary, - index, - &decompressed_scan_slot->tts_values[attr]); - } - - decompressed_scan_slot->tts_isnull[attr] = - !arrow_row_is_valid(column_values->arrow->buffers[0], arrow_row); - } - else - { - Assert(column_values->value_bytes > 0); - const char *restrict src = column_values->arrow_values; + store_text_datum2(packed, arrow_row, &decompressed_scan_slot->tts_values[attr]); + decompressed_scan_slot->tts_isnull[attr] = + !arrow_row_is_valid(packed->buffers[0], arrow_row); + } + else if (packed->decompression_type == DT_ArrowTextDict) + { + const int16 index = ((int16 *) packed->buffers[3])[arrow_row]; + store_text_datum2(packed, index, &decompressed_scan_slot->tts_values[attr]); + decompressed_scan_slot->tts_isnull[attr] = + !arrow_row_is_valid(packed->buffers[0], arrow_row); + } + else + { + const int value_bytes = packed->decompression_type; + Assert(value_bytes > 0); + Assert(value_bytes <= 8); + const char *restrict src = packed->buffers[1]; - /* - * The conversion of Datum to more narrow types will truncate - * the higher bytes, so we don't care if we read some garbage - * into them, and can always read 8 bytes. These are unaligned - * reads, so technically we have to do memcpy. - */ - uint64 value; - memcpy(&value, &src[column_values->value_bytes * arrow_row], 8); + /* + * The conversion of Datum to more narrow types will truncate + * the higher bytes, so we don't care if we read some garbage + * into them, and can always read 8 bytes. These are unaligned + * reads, so technically we have to do memcpy. + */ + uint64 value; + memcpy(&value, &src[value_bytes * arrow_row], 8); #ifdef USE_FLOAT8_BYVAL - Datum datum = Int64GetDatum(value); + Datum datum = Int64GetDatum(value); #else - /* - * On 32-bit systems, the data larger than 4 bytes go by - * reference, so we have to jump through these hoops. - */ - Datum datum; - if (column_values->value_bytes <= 4) - { - datum = Int32GetDatum((uint32) value); - } - else - { - datum = Int64GetDatum(value); - } -#endif - decompressed_scan_slot->tts_values[attr] = datum; - decompressed_scan_slot->tts_isnull[attr] = - !arrow_row_is_valid(column_values->arrow_validity, arrow_row); + /* + * On 32-bit systems, the data larger than 4 bytes go by + * reference, so we have to jump through these hoops. + */ + Datum datum; + if (value_bytes <= 4) + { + datum = Int32GetDatum((uint32) value); + } + else + { + datum = Int64GetDatum(value); } +#endif + decompressed_scan_slot->tts_values[attr] = datum; + decompressed_scan_slot->tts_isnull[attr] = + !arrow_row_is_valid(packed->buffers[0], arrow_row); } } @@ -835,11 +874,11 @@ compressed_batch_advance(DecompressContext *dcontext, DecompressBatchState *batc */ for (int i = 0; i < num_compressed_columns; i++) { - CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; - Ensure(column_values->value_bytes != 0, "the column is not decompressed"); - if (column_values->iterator) + CompressedColumnValues2 *packed = &batch_state->compressed_columns_packed[i]; + if (packed->decompression_type == DT_Iterator) { - column_values->iterator->try_next(column_values->iterator); + DecompressionIterator *iterator = (DecompressionIterator *) packed->buffers[0]; + iterator->try_next(iterator); } } @@ -871,11 +910,11 @@ compressed_batch_advance(DecompressContext *dcontext, DecompressBatchState *batc Assert(batch_state->next_batch_row == batch_state->total_batch_rows); for (int i = 0; i < num_compressed_columns; i++) { - CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; - if (column_values->iterator) + CompressedColumnValues2 *packed = &batch_state->compressed_columns_packed[i]; + if (packed->decompression_type == DT_Iterator) { - Assert(column_values->value_bytes != 0); - DecompressResult result = column_values->iterator->try_next(column_values->iterator); + DecompressionIterator *iterator = (DecompressionIterator *) packed->buffers[0]; + DecompressResult result = iterator->try_next(iterator); if (!result.is_done) { elog(ERROR, "compressed column out of sync with batch counter"); @@ -912,7 +951,7 @@ compressed_batch_save_first_tuple(DecompressContext *dcontext, DecompressBatchSt const int num_compressed_columns = dcontext->num_compressed_columns; for (int i = 0; i < num_compressed_columns; i++) { - CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; + CompressedColumnValues *column_values = &batch_state->compressed_columns_wide[i]; Assert(column_values->value_bytes != 0); } #endif diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.h b/tsl/src/nodes/decompress_chunk/compressed_batch.h index 1bd247cd6b4..e672679b752 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.h +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.h @@ -35,6 +35,27 @@ typedef struct CompressedColumnValues int8 value_bytes; } CompressedColumnValues; +typedef enum +{ + DT_ArrowTextDict = -4, + DT_ArrowText = -3, + DT_Default = -2, + DT_Iterator = -1, + Invalid = 0, +} DecompressionType; + +typedef struct CompressedColumnValues2 +{ + DecompressionType decompression_type; + AttrNumber output_attno; + // The buffers are as following: + // iterator: iterator + // arrow fixed: validity, value + // arrow text: validity, uint32* offsets, void* bodies + // arrow dict text: validity, uint32* dict offsets, void* dict bodies, int16* indices + const void *restrict buffers[4]; +} CompressedColumnValues2; + /* * All the information needed to decompress a batch. */ @@ -59,7 +80,9 @@ typedef struct DecompressBatchState */ uint64 *vector_qual_result; - CompressedColumnValues compressed_columns[FLEXIBLE_ARRAY_MEMBER]; + CompressedColumnValues *compressed_columns_wide; + + CompressedColumnValues2 compressed_columns_packed[FLEXIBLE_ARRAY_MEMBER]; } DecompressBatchState; extern void compressed_batch_set_compressed_tuple(DecompressContext *dcontext, From 93f2aaf0084a559eefe51d8e1ade35f22705d01c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 17:07:36 +0100 Subject: [PATCH 115/249] micro-optimizations? --- .../nodes/decompress_chunk/compressed_batch.c | 27 +++++++++++-------- .../nodes/decompress_chunk/compressed_batch.h | 4 +-- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 6ca29b797ad..ee33796d833 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -251,7 +251,7 @@ compute_vector_quals(DecompressContext *dcontext, DecompressBatchState *batch_st * Allocate the bitmap that will hold the vectorized qual results. We will * initialize it to all ones and AND the individual quals to it. */ - const int bitmap_bytes = sizeof(uint64) * ((batch_state->total_batch_rows + 63) / 64); + const int bitmap_bytes = sizeof(uint64) * (((uint64) batch_state->total_batch_rows + 63) / 64); batch_state->vector_qual_result = palloc(bitmap_bytes); memset(batch_state->vector_qual_result, 0xFF, bitmap_bytes); if (batch_state->total_batch_rows % 64 != 0) @@ -595,6 +595,7 @@ compressed_batch_set_compressed_tuple(DecompressContext *dcontext, } Assert(batch_state->total_batch_rows == 0); + CheckCompressedData(count_value <= UINT16_MAX); batch_state->total_batch_rows = count_value; break; @@ -708,9 +709,13 @@ store_text_datum2(CompressedColumnValues2 *packed, int arrow_row, Datum *dest) /* * Construct the next tuple in the decompressed scan slot. * Doesn't check the quals. + * + * It takes "reverse" and "num_compressed_columns" by value to avoid accessing + * the DecompressContext, which would prevent the compiler to combine it with + * vector_qual(). */ static void -make_next_tuple(DecompressContext *dcontext, DecompressBatchState *batch_state) +make_next_tuple(DecompressBatchState *batch_state, bool reverse, int num_compressed_columns) { TupleTableSlot *decompressed_scan_slot = batch_state->decompressed_scan_slot; Assert(decompressed_scan_slot != NULL); @@ -718,11 +723,10 @@ make_next_tuple(DecompressContext *dcontext, DecompressBatchState *batch_state) Assert(batch_state->total_batch_rows > 0); Assert(batch_state->next_batch_row < batch_state->total_batch_rows); - const int output_row = batch_state->next_batch_row; - const size_t arrow_row = - unlikely(dcontext->reverse) ? batch_state->total_batch_rows - 1 - output_row : output_row; + const uint16 output_row = batch_state->next_batch_row; + const uint16 arrow_row = + unlikely(reverse) ? batch_state->total_batch_rows - 1 - output_row : output_row; - const int num_compressed_columns = dcontext->num_compressed_columns; for (int i = 0; i < num_compressed_columns; i++) { CompressedColumnValues2 *packed = &batch_state->compressed_columns_packed[i]; @@ -819,8 +823,8 @@ vector_qual(DecompressBatchState *batch_state, bool reverse) Assert(batch_state->total_batch_rows > 0); Assert(batch_state->next_batch_row < batch_state->total_batch_rows); - const int output_row = batch_state->next_batch_row; - const size_t arrow_row = reverse ? batch_state->total_batch_rows - 1 - output_row : output_row; + const uint16 output_row = batch_state->next_batch_row; + const uint16 arrow_row = unlikely(reverse) ? batch_state->total_batch_rows - 1 - output_row : output_row; if (!batch_state->vector_qual_result) { @@ -861,12 +865,13 @@ compressed_batch_advance(DecompressContext *dcontext, DecompressBatchState *batc TupleTableSlot *decompressed_scan_slot = batch_state->decompressed_scan_slot; Assert(decompressed_scan_slot != NULL); + const bool reverse = dcontext->reverse; const int num_compressed_columns = dcontext->num_compressed_columns; for (; batch_state->next_batch_row < batch_state->total_batch_rows; batch_state->next_batch_row++) { - if (!vector_qual(batch_state, dcontext->reverse)) + if (!vector_qual(batch_state, reverse)) { /* * This row doesn't pass the vectorized quals. Advance the iterated @@ -886,7 +891,7 @@ compressed_batch_advance(DecompressContext *dcontext, DecompressBatchState *batc continue; } - make_next_tuple(dcontext, batch_state); + make_next_tuple(batch_state, reverse, num_compressed_columns); if (!postgres_qual(dcontext, batch_state)) { @@ -957,7 +962,7 @@ compressed_batch_save_first_tuple(DecompressContext *dcontext, DecompressBatchSt #endif /* Make the first tuple and save it. */ - make_next_tuple(dcontext, batch_state); + make_next_tuple(batch_state, dcontext->reverse, dcontext->num_compressed_columns); ExecCopySlot(first_tuple_slot, batch_state->decompressed_scan_slot); /* diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.h b/tsl/src/nodes/decompress_chunk/compressed_batch.h index e672679b752..d6c5d3a2864 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.h +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.h @@ -68,8 +68,8 @@ typedef struct DecompressBatchState * original tuple, and a batch outlives its source tuple. */ TupleTableSlot *compressed_slot; - int total_batch_rows; - int next_batch_row; + uint16 total_batch_rows; + uint16 next_batch_row; Size block_size_bytes; /* Block size to use for memory context */ MemoryContext per_batch_context; From 39ff1338f125a9a3020a9c1b7310fcb49a134452 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 17:22:45 +0100 Subject: [PATCH 116/249] double free --- tsl/src/compression/api.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tsl/src/compression/api.c b/tsl/src/compression/api.c index 4d3da13a76b..18fd1ed6fc6 100644 --- a/tsl/src/compression/api.c +++ b/tsl/src/compression/api.c @@ -1393,7 +1393,6 @@ tsl_recompress_chunk_segmentwise(PG_FUNCTION_ARGS) row_compressor.num_compressed_rows); row_compressor_close(&row_compressor); - FreeBulkInsertState(decompressor.bistate); ExecDropSingleTupleTableSlot(slot); index_endscan(index_scan); UnregisterSnapshot(snapshot); From 51626f7d9254fe6bc0ac55595eaba1e4b4fa4857 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 17:26:40 +0100 Subject: [PATCH 117/249] use after free 2 --- tsl/src/compression/api.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tsl/src/compression/api.c b/tsl/src/compression/api.c index 18fd1ed6fc6..412715718df 100644 --- a/tsl/src/compression/api.c +++ b/tsl/src/compression/api.c @@ -1194,10 +1194,6 @@ tsl_recompress_chunk_segmentwise(PG_FUNCTION_ARGS) /******************** row decompressor **************/ RowDecompressor decompressor = build_decompressor(compressed_chunk_rel, uncompressed_chunk_rel); - /* do not need the indexes on the uncompressed chunk as we do not write to it anymore */ - ts_catalog_close_indexes(decompressor.indexstate); - /* also do not need estate because we don't insert into indexes */ - FreeExecutorState(decompressor.estate); /********** row compressor *******************/ RowCompressor row_compressor; row_compressor_init(settings, From 3c94a676ec8ae5dd25cff4ecace01e5d0de34788 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 17:39:11 +0100 Subject: [PATCH 118/249] even more straightforward data layout --- .../nodes/decompress_chunk/compressed_batch.c | 42 +++++++++---------- .../nodes/decompress_chunk/compressed_batch.h | 3 +- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 6ce81703d81..9dce13ea702 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -653,7 +653,9 @@ compressed_batch_set_compressed_tuple(DecompressContext *dcontext, } CompressedColumnValues2 *packed = &batch_state->compressed_columns_packed[i]; - packed->output_attno = desc->output_attno; + const AttrNumber attr = AttrNumberGetAttrOffset(desc->output_attno); + packed->output_value = &batch_state->decompressed_scan_slot->tts_values[attr]; + packed->output_isnull = &batch_state->decompressed_scan_slot->tts_isnull[attr]; if (wide->iterator) { packed->decompression_type = DT_Iterator; @@ -698,16 +700,16 @@ compressed_batch_set_compressed_tuple(DecompressContext *dcontext, } static void -store_text_datum2(CompressedColumnValues2 *packed, int arrow_row, Datum *dest) +store_text_datum2(CompressedColumnValues2 *packed, int arrow_row) { const uint32 start = ((uint32 *) packed->buffers[1])[arrow_row]; const int32 value_bytes = ((uint32 *) packed->buffers[1])[arrow_row + 1] - start; Assert(value_bytes >= 0); const int total_bytes = value_bytes + VARHDRSZ; - Assert(DatumGetPointer(*dest) != NULL); - SET_VARSIZE(*dest, total_bytes); - memcpy(VARDATA(*dest), &((uint8 *) packed->buffers[2])[start], value_bytes); + Assert(DatumGetPointer(*packed->output_value) != NULL); + SET_VARSIZE(*packed->output_value, total_bytes); + memcpy(VARDATA(*packed->output_value), &((uint8 *) packed->buffers[2])[start], value_bytes); } /* @@ -734,7 +736,6 @@ make_next_tuple(DecompressBatchState *batch_state, bool reverse, int num_compres for (int i = 0; i < num_compressed_columns; i++) { CompressedColumnValues2 *packed = &batch_state->compressed_columns_packed[i]; - const AttrNumber attr = AttrNumberGetAttrOffset(packed->output_attno); if (packed->decompression_type == DT_Default) { /* Do nothing. */ @@ -749,28 +750,25 @@ make_next_tuple(DecompressBatchState *batch_state, bool reverse, int num_compres elog(ERROR, "compressed column out of sync with batch counter"); } - const AttrNumber attr = AttrNumberGetAttrOffset(packed->output_attno); - decompressed_scan_slot->tts_isnull[attr] = result.is_null; - decompressed_scan_slot->tts_values[attr] = result.val; + *packed->output_isnull = result.is_null; + *packed->output_value = result.val; } else if (packed->decompression_type == DT_ArrowText) { - store_text_datum2(packed, arrow_row, &decompressed_scan_slot->tts_values[attr]); - decompressed_scan_slot->tts_isnull[attr] = - !arrow_row_is_valid(packed->buffers[0], arrow_row); + store_text_datum2(packed, arrow_row); + *packed->output_isnull = !arrow_row_is_valid(packed->buffers[0], arrow_row); } else if (packed->decompression_type == DT_ArrowTextDict) { const int16 index = ((int16 *) packed->buffers[3])[arrow_row]; - store_text_datum2(packed, index, &decompressed_scan_slot->tts_values[attr]); - decompressed_scan_slot->tts_isnull[attr] = - !arrow_row_is_valid(packed->buffers[0], arrow_row); + store_text_datum2(packed, index); + *packed->output_isnull = !arrow_row_is_valid(packed->buffers[0], arrow_row); } else { - const int value_bytes = packed->decompression_type; - Assert(value_bytes > 0); - Assert(value_bytes <= 8); + Assert(packed->decompression_type > 0); + Assert(packed->decompression_type <= 8); + const uint8 value_bytes = packed->decompression_type; const char *restrict src = packed->buffers[1]; /* @@ -799,9 +797,8 @@ make_next_tuple(DecompressBatchState *batch_state, bool reverse, int num_compres datum = Int64GetDatum(value); } #endif - decompressed_scan_slot->tts_values[attr] = datum; - decompressed_scan_slot->tts_isnull[attr] = - !arrow_row_is_valid(packed->buffers[0], arrow_row); + *packed->output_value = datum; + *packed->output_isnull = !arrow_row_is_valid(packed->buffers[0], arrow_row); } } @@ -828,7 +825,8 @@ vector_qual(DecompressBatchState *batch_state, bool reverse) Assert(batch_state->next_batch_row < batch_state->total_batch_rows); const uint16 output_row = batch_state->next_batch_row; - const uint16 arrow_row = unlikely(reverse) ? batch_state->total_batch_rows - 1 - output_row : output_row; + const uint16 arrow_row = + unlikely(reverse) ? batch_state->total_batch_rows - 1 - output_row : output_row; if (!batch_state->vector_qual_result) { diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.h b/tsl/src/nodes/decompress_chunk/compressed_batch.h index d6c5d3a2864..c661216e906 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.h +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.h @@ -47,7 +47,8 @@ typedef enum typedef struct CompressedColumnValues2 { DecompressionType decompression_type; - AttrNumber output_attno; + Datum *output_value; + bool *output_isnull; // The buffers are as following: // iterator: iterator // arrow fixed: validity, value From 5de7e1e743bf014949c5e730fde0318ac629a7d7 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 17:50:50 +0100 Subject: [PATCH 119/249] make arrow_row index a parameter --- .../nodes/decompress_chunk/compressed_batch.c | 23 ++++++------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 9dce13ea702..5d504b12bbd 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -715,13 +715,9 @@ store_text_datum2(CompressedColumnValues2 *packed, int arrow_row) /* * Construct the next tuple in the decompressed scan slot. * Doesn't check the quals. - * - * It takes "reverse" and "num_compressed_columns" by value to avoid accessing - * the DecompressContext, which would prevent the compiler to combine it with - * vector_qual(). */ static void -make_next_tuple(DecompressBatchState *batch_state, bool reverse, int num_compressed_columns) +make_next_tuple(DecompressBatchState *batch_state, uint16 arrow_row, int num_compressed_columns) { TupleTableSlot *decompressed_scan_slot = batch_state->decompressed_scan_slot; Assert(decompressed_scan_slot != NULL); @@ -729,10 +725,6 @@ make_next_tuple(DecompressBatchState *batch_state, bool reverse, int num_compres Assert(batch_state->total_batch_rows > 0); Assert(batch_state->next_batch_row < batch_state->total_batch_rows); - const uint16 output_row = batch_state->next_batch_row; - const uint16 arrow_row = - unlikely(reverse) ? batch_state->total_batch_rows - 1 - output_row : output_row; - for (int i = 0; i < num_compressed_columns; i++) { CompressedColumnValues2 *packed = &batch_state->compressed_columns_packed[i]; @@ -819,15 +811,11 @@ make_next_tuple(DecompressBatchState *batch_state, bool reverse, int num_compres } static bool -vector_qual(DecompressBatchState *batch_state, bool reverse) +vector_qual(DecompressBatchState *batch_state, uint16 arrow_row) { Assert(batch_state->total_batch_rows > 0); Assert(batch_state->next_batch_row < batch_state->total_batch_rows); - const uint16 output_row = batch_state->next_batch_row; - const uint16 arrow_row = - unlikely(reverse) ? batch_state->total_batch_rows - 1 - output_row : output_row; - if (!batch_state->vector_qual_result) { return true; @@ -868,12 +856,15 @@ compressed_batch_advance(DecompressContext *dcontext, DecompressBatchState *batc Assert(decompressed_scan_slot != NULL); const bool reverse = dcontext->reverse; + const uint16 output_row = batch_state->next_batch_row; + const uint16 arrow_row = + unlikely(reverse) ? batch_state->total_batch_rows - 1 - output_row : output_row; const int num_compressed_columns = dcontext->num_compressed_columns; for (; batch_state->next_batch_row < batch_state->total_batch_rows; batch_state->next_batch_row++) { - if (!vector_qual(batch_state, reverse)) + if (!vector_qual(batch_state, arrow_row)) { /* * This row doesn't pass the vectorized quals. Advance the iterated @@ -893,7 +884,7 @@ compressed_batch_advance(DecompressContext *dcontext, DecompressBatchState *batc continue; } - make_next_tuple(batch_state, reverse, num_compressed_columns); + make_next_tuple(batch_state, arrow_row, num_compressed_columns); if (!postgres_qual(dcontext, batch_state)) { From 5d2feef0d066c794ed43f6cf32801f1e790e902f Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 17:22:45 +0100 Subject: [PATCH 120/249] double free --- tsl/src/compression/api.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tsl/src/compression/api.c b/tsl/src/compression/api.c index 4d3da13a76b..18fd1ed6fc6 100644 --- a/tsl/src/compression/api.c +++ b/tsl/src/compression/api.c @@ -1393,7 +1393,6 @@ tsl_recompress_chunk_segmentwise(PG_FUNCTION_ARGS) row_compressor.num_compressed_rows); row_compressor_close(&row_compressor); - FreeBulkInsertState(decompressor.bistate); ExecDropSingleTupleTableSlot(slot); index_endscan(index_scan); UnregisterSnapshot(snapshot); From f6386ec6bbfb5707ec7667d45d7f73dcf3541137 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 17:26:40 +0100 Subject: [PATCH 121/249] use after free 2 --- tsl/src/compression/api.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tsl/src/compression/api.c b/tsl/src/compression/api.c index 18fd1ed6fc6..412715718df 100644 --- a/tsl/src/compression/api.c +++ b/tsl/src/compression/api.c @@ -1194,10 +1194,6 @@ tsl_recompress_chunk_segmentwise(PG_FUNCTION_ARGS) /******************** row decompressor **************/ RowDecompressor decompressor = build_decompressor(compressed_chunk_rel, uncompressed_chunk_rel); - /* do not need the indexes on the uncompressed chunk as we do not write to it anymore */ - ts_catalog_close_indexes(decompressor.indexstate); - /* also do not need estate because we don't insert into indexes */ - FreeExecutorState(decompressor.estate); /********** row compressor *******************/ RowCompressor row_compressor; row_compressor_init(settings, From 7244a99707a126b7c234a543c1a51af0d4c8a3d5 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 18:06:57 +0100 Subject: [PATCH 122/249] benchmark bulk text (2023-12-15 #5) From f1223d65456e7689702eac75541c4168d9df5546 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 18:11:01 +0100 Subject: [PATCH 123/249] fix --- tsl/src/nodes/decompress_chunk/compressed_batch.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 5d504b12bbd..fb4a6caa909 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -856,14 +856,15 @@ compressed_batch_advance(DecompressContext *dcontext, DecompressBatchState *batc Assert(decompressed_scan_slot != NULL); const bool reverse = dcontext->reverse; - const uint16 output_row = batch_state->next_batch_row; - const uint16 arrow_row = - unlikely(reverse) ? batch_state->total_batch_rows - 1 - output_row : output_row; const int num_compressed_columns = dcontext->num_compressed_columns; for (; batch_state->next_batch_row < batch_state->total_batch_rows; batch_state->next_batch_row++) { + const uint16 output_row = batch_state->next_batch_row; + const uint16 arrow_row = + unlikely(reverse) ? batch_state->total_batch_rows - 1 - output_row : output_row; + if (!vector_qual(batch_state, arrow_row)) { /* From dace562d3fab2aafd60bd5a97ac78e1fb766daea Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 18:11:09 +0100 Subject: [PATCH 124/249] benchmark bulk text (2023-12-15 #6) From 3985663e50e53cf5c544ddc32df306f5cee2959b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 18:23:47 +0100 Subject: [PATCH 125/249] benchmark detoaster (2023-12-15 no. 8) From e6b87d7532e110599797426dc41730423988b20b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 19:13:15 +0100 Subject: [PATCH 126/249] cleanup --- .../nodes/decompress_chunk/compressed_batch.c | 61 ++++++++++--------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index fb4a6caa909..2ab7d2da35b 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -728,37 +728,8 @@ make_next_tuple(DecompressBatchState *batch_state, uint16 arrow_row, int num_com for (int i = 0; i < num_compressed_columns; i++) { CompressedColumnValues2 *packed = &batch_state->compressed_columns_packed[i]; - if (packed->decompression_type == DT_Default) + if (packed->decompression_type > 0) { - /* Do nothing. */ - } - else if (packed->decompression_type == DT_Iterator) - { - DecompressionIterator *iterator = (DecompressionIterator *) packed->buffers[0]; - DecompressResult result = iterator->try_next(iterator); - - if (result.is_done) - { - elog(ERROR, "compressed column out of sync with batch counter"); - } - - *packed->output_isnull = result.is_null; - *packed->output_value = result.val; - } - else if (packed->decompression_type == DT_ArrowText) - { - store_text_datum2(packed, arrow_row); - *packed->output_isnull = !arrow_row_is_valid(packed->buffers[0], arrow_row); - } - else if (packed->decompression_type == DT_ArrowTextDict) - { - const int16 index = ((int16 *) packed->buffers[3])[arrow_row]; - store_text_datum2(packed, index); - *packed->output_isnull = !arrow_row_is_valid(packed->buffers[0], arrow_row); - } - else - { - Assert(packed->decompression_type > 0); Assert(packed->decompression_type <= 8); const uint8 value_bytes = packed->decompression_type; const char *restrict src = packed->buffers[1]; @@ -792,6 +763,36 @@ make_next_tuple(DecompressBatchState *batch_state, uint16 arrow_row, int num_com *packed->output_value = datum; *packed->output_isnull = !arrow_row_is_valid(packed->buffers[0], arrow_row); } + else if (packed->decompression_type == DT_Iterator) + { + DecompressionIterator *iterator = (DecompressionIterator *) packed->buffers[0]; + DecompressResult result = iterator->try_next(iterator); + + if (result.is_done) + { + elog(ERROR, "compressed column out of sync with batch counter"); + } + + *packed->output_isnull = result.is_null; + *packed->output_value = result.val; + } + else if (packed->decompression_type == DT_ArrowText) + { + store_text_datum2(packed, arrow_row); + *packed->output_isnull = !arrow_row_is_valid(packed->buffers[0], arrow_row); + } + else if (packed->decompression_type == DT_ArrowTextDict) + { + const int16 index = ((int16 *) packed->buffers[3])[arrow_row]; + store_text_datum2(packed, index); + *packed->output_isnull = !arrow_row_is_valid(packed->buffers[0], arrow_row); + } + else + { + /* A compressed column with default value, do nothing. */ + Assert(packed->decompression_type == DT_Default); + pg_unreachable(); + } } /* From 0e219702b5e3d50b0a007be9ac8e9003f8d96193 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 15 Dec 2023 19:20:33 +0100 Subject: [PATCH 127/249] fix --- tsl/src/nodes/decompress_chunk/compressed_batch.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 2ab7d2da35b..4eedf9423e7 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -791,7 +791,6 @@ make_next_tuple(DecompressBatchState *batch_state, uint16 arrow_row, int num_com { /* A compressed column with default value, do nothing. */ Assert(packed->decompression_type == DT_Default); - pg_unreachable(); } } From 46ccad2a1e316ee1791b2f9049a9ef3547d6a531 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 18 Dec 2023 12:40:12 +0100 Subject: [PATCH 128/249] micro-optimizations --- src/adts/bit_array_impl.h | 6 ++---- tsl/src/compression/deltadelta_impl.c | 12 ++++++++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/adts/bit_array_impl.h b/src/adts/bit_array_impl.h index c9555613d61..8de4c7e844f 100644 --- a/src/adts/bit_array_impl.h +++ b/src/adts/bit_array_impl.h @@ -347,8 +347,6 @@ bit_array_append_bucket(BitArray *array, uint8 bits_used, uint64 bucket) static uint64 bit_array_low_bits_mask(uint8 bits_used) { - if (bits_used >= 64) - return PG_UINT64_MAX; - else - return (UINT64CONST(1) << bits_used) - UINT64CONST(1); + Assert(bits_used > 0); + return -1ULL >> (64 - bits_used); } diff --git a/tsl/src/compression/deltadelta_impl.c b/tsl/src/compression/deltadelta_impl.c index 48562aaec23..f51ff9804d4 100644 --- a/tsl/src/compression/deltadelta_impl.c +++ b/tsl/src/compression/deltadelta_impl.c @@ -31,7 +31,7 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory * test_delta4(). */ uint16 num_deltas; - const uint64 *restrict deltas_zigzag = + uint64 *restrict deltas_zigzag = simple8brle_decompress_all_uint64(deltas_compressed, &num_deltas); Simple8bRleBitmap nulls = { 0 }; @@ -82,7 +82,15 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory { for (uint16 inner = 0; inner < INNER_LOOP_SIZE; inner++) { - current_delta += zig_zag_decode(deltas_zigzag[outer + inner]); + deltas_zigzag[outer + inner] = zig_zag_decode(deltas_zigzag[outer + inner]); + } + } + + for (uint16 outer = 0; outer < n_notnull_padded; outer += INNER_LOOP_SIZE) + { + for (uint16 inner = 0; inner < INNER_LOOP_SIZE; inner++) + { + current_delta += deltas_zigzag[outer + inner]; current_element += current_delta; decompressed_values[outer + inner] = current_element; } From 0f91ded7091566592639b601ac79ec084be0d612 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 18 Dec 2023 12:41:37 +0100 Subject: [PATCH 129/249] fix --- tsl/src/compression/compression_test.c | 2 +- tsl/src/compression/decompress_arithmetic_test_impl.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tsl/src/compression/compression_test.c b/tsl/src/compression/compression_test.c index fa29cbd9284..55fbc5b03e9 100644 --- a/tsl/src/compression/compression_test.c +++ b/tsl/src/compression/compression_test.c @@ -331,7 +331,7 @@ target_generic(const uint8 *Data, size_t Size, CompressionAlgorithm requested_al if (bulk) { - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo); + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo, pg_type); decompress_all(compressed_data, pg_type, CurrentMemoryContext); return 0; } diff --git a/tsl/src/compression/decompress_arithmetic_test_impl.c b/tsl/src/compression/decompress_arithmetic_test_impl.c index 60f5b7ea88d..c9a207d143c 100644 --- a/tsl/src/compression/decompress_arithmetic_test_impl.c +++ b/tsl/src/compression/decompress_arithmetic_test_impl.c @@ -95,7 +95,7 @@ FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, const CompressionAlgorithmDefinition *def = algorithm_definition(data_algo); Datum compressed_data = def->compressed_data_recv(&si); - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo); + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo, PG_TYPE_OID); ArrowArray *arrow = NULL; if (bulk) From 4ed61234a72fb799c0673168948e6cc897102053 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 12:54:24 +0100 Subject: [PATCH 130/249] show the statistics about corpus --- .github/workflows/libfuzzer.yaml | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 792f1b2777e..f71ffcacdea 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -173,6 +173,9 @@ jobs: with: path: db/corpus-bulk key: "${{ steps.config.outputs.cache_prefix }}-bulk" + # I think the key used to match by prefix, but somehow it doesn't match + # anymore, so add "restore-keys" as well to be absolutely sure. + restore-keys: "${{ steps.config.outputs.cache_prefix }}-bulk" # We save the row-by-row corpus separately from the bulk corpus, so that # they don't overwrite each other. Now we are going to combine them. @@ -182,6 +185,7 @@ jobs: with: path: db/corpus-rowbyrow key: "${{ steps.config.outputs.cache_prefix }}-rowbyrow" + restore-keys: "${{ steps.config.outputs.cache_prefix }}-rowbyrow" - name: Initialize the fuzzing corpus run: | @@ -204,10 +208,18 @@ jobs: psql -c "create extension timescaledb;" - # Create the fuzzing function + # Create the fuzzing functions export MODULE_NAME=$(basename $(find $HOME/$PG_INSTALL_DIR -name "timescaledb-tsl-*.so")) - psql -a -c "create or replace function fuzz(algo cstring, pgtype regtype, bulk bool, runs int) returns int as '"$MODULE_NAME"', 'ts_fuzz_compression' language c;" + psql -a -c "create or replace function fuzz(algo cstring, pgtype regtype, + bulk bool, runs int) + returns int as '"$MODULE_NAME"', 'ts_fuzz_compression' language c; + create or replace function ts_read_compressed_data_directory(algo cstring, + pgtype regtype, path cstring, bulk bool) + returns table(path text, bytes int, rows int, sqlstate text, location text) + as '"$MODULE_NAME"', 'ts_read_compressed_data_directory' language c; + + " # Start more fuzzing processes in the background. We won't even monitor # their progress, because the server will panic if they find an error. @@ -233,6 +245,20 @@ jobs: ls db/corpus | wc -l + # Show the statistics about fuzzing corpus + psql -c "select count(*), location from ts_read_compressed_data_directory('${{ matrix.case.algo }}', + '${{ matrix.case.pgtype }}', 'corpus', '${{ matirx.case.bulk }}') + group by location order by count(*) desc + " + + # Check that we don't have any internal errors + errors = $(psql -qtAX --set=ON_ERROR_STOP=1 -c "select count(*) + from ts_read_compressed_data_directory('${{ matrix.case.algo }}', + '${{ matrix.case.pgtype }}', 'corpus', '${{ matirx.case.bulk }}') + where sqlstate = 'XX000'") + echo "Internal program errors: $errors" + [ $errors -eq 0 ] || exit 1 + - name: Collect the logs if: always() id: collectlogs From 0ad7981ce2cd718f4e44293677a900f585bf36df Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 13:01:18 +0100 Subject: [PATCH 131/249] typo --- .github/workflows/libfuzzer.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index f71ffcacdea..b241a0b1f37 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -247,14 +247,14 @@ jobs: # Show the statistics about fuzzing corpus psql -c "select count(*), location from ts_read_compressed_data_directory('${{ matrix.case.algo }}', - '${{ matrix.case.pgtype }}', 'corpus', '${{ matirx.case.bulk }}') + '${{ matrix.case.pgtype }}', 'corpus', '${{ matrix.case.bulk }}') group by location order by count(*) desc " # Check that we don't have any internal errors errors = $(psql -qtAX --set=ON_ERROR_STOP=1 -c "select count(*) from ts_read_compressed_data_directory('${{ matrix.case.algo }}', - '${{ matrix.case.pgtype }}', 'corpus', '${{ matirx.case.bulk }}') + '${{ matrix.case.pgtype }}', 'corpus', '${{ matrix.case.bulk }}') where sqlstate = 'XX000'") echo "Internal program errors: $errors" [ $errors -eq 0 ] || exit 1 From 5e3e15728b912e8baec4124bb56a68752368cd5a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 13:53:38 +0100 Subject: [PATCH 132/249] fix --- .github/workflows/libfuzzer.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index b241a0b1f37..24aa731e548 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -246,13 +246,14 @@ jobs: ls db/corpus | wc -l # Show the statistics about fuzzing corpus - psql -c "select count(*), location from ts_read_compressed_data_directory('${{ matrix.case.algo }}', - '${{ matrix.case.pgtype }}', 'corpus', '${{ matrix.case.bulk }}') + psql -c "select count(*), location, min(sqlstate), min(path) + from ts_read_compressed_data_directory('${{ matrix.case.algo }}', + '${{ matrix.case.pgtype }}', 'corpus', '${{ matrix.case.bulk }}') group by location order by count(*) desc " # Check that we don't have any internal errors - errors = $(psql -qtAX --set=ON_ERROR_STOP=1 -c "select count(*) + errors=$(psql -qtAX --set=ON_ERROR_STOP=1 -c "select count(*) from ts_read_compressed_data_directory('${{ matrix.case.algo }}', '${{ matrix.case.pgtype }}', 'corpus', '${{ matrix.case.bulk }}') where sqlstate = 'XX000'") From c651eb7cdf17c3a59e11273639e921311e355da3 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 13:59:37 +0100 Subject: [PATCH 133/249] proper error code --- tsl/src/compression/datum_serialize.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tsl/src/compression/datum_serialize.c b/tsl/src/compression/datum_serialize.c index bf3a799cf25..de0dc716511 100644 --- a/tsl/src/compression/datum_serialize.c +++ b/tsl/src/compression/datum_serialize.c @@ -354,8 +354,7 @@ binary_string_get_type(StringInfo buffer) Anum_pg_type_oid, PointerGetDatum(element_type_name), ObjectIdGetDatum(namespace_oid)); - if (!OidIsValid(type_oid)) - elog(ERROR, "could not find type %s.%s", element_type_namespace, element_type_name); + CheckCompressedData(OidIsValid(type_oid)); return type_oid; } From 993698d2185235b3806e3c9b14ef1439d5c53578 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 14:07:06 +0100 Subject: [PATCH 134/249] fixes --- .github/workflows/libfuzzer.yaml | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 24aa731e548..f75271d6a59 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -286,20 +286,27 @@ jobs: path: db/crash-* # We use separate restore/save actions, because the default action won't - # save the updated folder after the cache hit. We also can't overwrite the - # existing cache, so we add a unique suffix. The cache is matched by key - # prefix, not exact key, and picks the newest matching item, so this works. + # save the updated folder after the cache hit. We also want to save the + # cache after fuzzing errors, and the default action doesn't save after + # errors. + # We can't overwrite the existing cache, so we add a unique suffix. The + # cache is matched by key prefix, not exact key, and picks the newest + # matching item, so this works. # The caches for rowbyrow and bulk fuzzing are saved separately, otherwise # the slower job would always overwrite the cache from the faster one. We # want to combine corpuses from bulk and rowbyrow fuzzing for better - # coverage. Note that the cache action cannot be restored on a path - # different from the one it was saved from. To make our lives more - # interesting, it is not directly documented anywhere, but we can deduce it - # from path influencing the version. - - name: Copy the fuzzer corpus to please the actions/cache + # coverage. + # Note that the cache action cannot be restored on a path different from the + # one it was saved from. To make our lives more interesting, it is not + # directly documented anywhere, but we can deduce it from path influencing + # the version. + - name: Change corpus path to please the 'actions/cache' GitHub Action + if: always() run: | - find "db/corpus" -type f -exec cp -t db/corpus-${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }} {} + + mv -f db/corpus{,-${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}} + - name: Save fuzzer corpus + if: always() uses: actions/cache/save@v3 with: path: db/corpus-${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }} From fab16cada97e976f4be39ba7d0b5ee6c6f94dd87 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 15:34:47 +0100 Subject: [PATCH 135/249] finish refactoring of column values --- tsl/src/nodes/decompress_chunk/batch_array.c | 2 +- .../nodes/decompress_chunk/compressed_batch.c | 248 ++++++++---------- .../nodes/decompress_chunk/compressed_batch.h | 64 ++--- 3 files changed, 141 insertions(+), 173 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/batch_array.c b/tsl/src/nodes/decompress_chunk/batch_array.c index fd081922d35..43ad9af7be0 100644 --- a/tsl/src/nodes/decompress_chunk/batch_array.c +++ b/tsl/src/nodes/decompress_chunk/batch_array.c @@ -21,7 +21,7 @@ batch_array_init(BatchArray *array, int nbatches, int ncolumns_per_batch, array->unused_batch_states = bms_add_range(NULL, 0, nbatches - 1); array->batch_memory_context_bytes = memory_context_block_size_bytes; array->n_batch_state_bytes = - sizeof(DecompressBatchState) + sizeof(CompressedColumnValues2) * ncolumns_per_batch; + sizeof(DecompressBatchState) + sizeof(CompressedColumnValues) * ncolumns_per_batch; array->batch_states = palloc0(array->n_batch_state_bytes * nbatches); Assert(bms_num_members(array->unused_batch_states) == array->n_batch_states); } diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 4eedf9423e7..48cdf9e2b04 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -141,14 +141,13 @@ static void decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state, int i) { CompressionColumnDescription *column_description = &dcontext->template_columns[i]; - CompressedColumnValues *column_values = &batch_state->compressed_columns_wide[i]; - column_values->iterator = NULL; + CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; column_values->arrow = NULL; - column_values->arrow_values = NULL; - column_values->arrow_validity = NULL; - column_values->output_attno = column_description->output_attno; - column_values->value_bytes = get_typlen(column_description->typid); - Assert(column_values->value_bytes != 0); + const AttrNumber attr = AttrNumberGetAttrOffset(column_description->output_attno); + column_values->output_value = &batch_state->decompressed_scan_slot->tts_values[attr]; + column_values->output_isnull = &batch_state->decompressed_scan_slot->tts_isnull[attr]; + const int value_bytes = get_typlen(column_description->typid); + Assert(value_bytes != 0); bool isnull; Datum value = slot_getattr(batch_state->compressed_slot, @@ -161,8 +160,7 @@ decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state * The column will have a default value for the entire batch, * set it now. */ - column_values->iterator = NULL; - AttrNumber attr = AttrNumberGetAttrOffset(column_description->output_attno); + column_values->decompression_type = DT_Default; batch_state->decompressed_scan_slot->tts_values[attr] = getmissingattr(batch_state->decompressed_scan_slot->tts_tupleDescriptor, @@ -203,39 +201,65 @@ decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state MemoryContextSwitchTo(context_before_decompression); } - if (arrow) + if (arrow == NULL) { - /* Should have been filled from the count metadata column. */ - Assert(batch_state->total_batch_rows != 0); - if (batch_state->total_batch_rows != arrow->length) - { - elog(ERROR, "compressed column out of sync with batch counter"); - } + /* As a fallback, decompress row-by-row. */ + column_values->decompression_type = DT_Iterator; + column_values->buffers[0] = + tsl_get_decompression_iterator_init(header->compression_algorithm, + dcontext->reverse)(PointerGetDatum(header), + column_description->typid); + return; + } - column_values->arrow = arrow; - column_values->arrow_values = arrow->buffers[1]; - column_values->arrow_validity = arrow->buffers[0]; + /* Should have been filled from the count metadata column. */ + Assert(batch_state->total_batch_rows != 0); + if (batch_state->total_batch_rows != arrow->length) + { + elog(ERROR, "compressed column out of sync with batch counter"); + } - if (column_values->value_bytes == -1) - { - const int maxbytes = - VARHDRSZ + (column_values->arrow->dictionary ? - get_max_element_bytes(column_values->arrow->dictionary) : - get_max_element_bytes(column_values->arrow)); - - const AttrNumber attr = AttrNumberGetAttrOffset(column_values->output_attno); - batch_state->decompressed_scan_slot->tts_values[attr] = - PointerGetDatum(MemoryContextAlloc(batch_state->per_batch_context, maxbytes)); - } + column_values->arrow = arrow; - return; + if (value_bytes > 0) + { + /* Fixed-width column. */ + column_values->decompression_type = value_bytes; + column_values->buffers[0] = arrow->buffers[0]; + column_values->buffers[1] = arrow->buffers[1]; } + else + { + /* + * Text column. Pre-allocate memory for its text Datum in the + * decompressed scan slot. + */ + const int maxbytes = + VARHDRSZ + (arrow->dictionary ? get_max_element_bytes(arrow->dictionary) : + get_max_element_bytes(arrow)); + + *column_values->output_value = + PointerGetDatum(MemoryContextAlloc(batch_state->per_batch_context, maxbytes)); - /* As a fallback, decompress row-by-row. */ - column_values->iterator = - tsl_get_decompression_iterator_init(header->compression_algorithm, - dcontext->reverse)(PointerGetDatum(header), - column_description->typid); + /* + * Set up the datum conversion based on whether we use the dictionary. + */ + if (arrow->dictionary == NULL) + { + column_values->decompression_type = DT_ArrowText; + column_values->buffers[0] = arrow->buffers[0]; + column_values->buffers[1] = arrow->buffers[1]; + column_values->buffers[2] = arrow->buffers[2]; + } + else + { + column_values->decompression_type = DT_ArrowTextDict; + column_values->buffers[0] = arrow->buffers[0]; + column_values->buffers[1] = arrow->dictionary->buffers[1]; + column_values->buffers[2] = arrow->dictionary->buffers[2]; + column_values->buffers[3] = arrow->buffers[1]; + } + } } /* @@ -325,9 +349,9 @@ compute_vector_quals(DecompressContext *dcontext, DecompressBatchState *batch_st "only compressed columns are supported in vectorized quals"); Assert(column_index < dcontext->num_compressed_columns); - CompressedColumnValues *column_values = &batch_state->compressed_columns_wide[column_index]; + CompressedColumnValues *column_values = &batch_state->compressed_columns[column_index]; - if (column_values->value_bytes == 0) + if (column_values->decompression_type == DT_Invalid) { /* * We decompress the compressed columns on demand, so that we can @@ -335,11 +359,10 @@ compute_vector_quals(DecompressContext *dcontext, DecompressBatchState *batch_st * the quals. */ decompress_column(dcontext, batch_state, column_index); - Assert(column_values->value_bytes != 0); + Assert(column_values->decompression_type != DT_Invalid); } - Ensure(column_values->iterator == NULL, - "only arrow columns are supported in vectorized quals"); + Assert(column_values->decompression_type != DT_Iterator); /* * Prepare to compute the vector predicate. We have to handle the @@ -357,21 +380,15 @@ compute_vector_quals(DecompressContext *dcontext, DecompressBatchState *batch_st * with this default value, check if it passes the predicate, and apply * it to the entire batch. */ - AttrNumber attr = AttrNumberGetAttrOffset(column_description->output_attno); - - Ensure(column_values->iterator == NULL, - "ArrowArray expected for column %s", - NameStr( - TupleDescAttr(batch_state->decompressed_scan_slot->tts_tupleDescriptor, attr) - ->attname)); + Assert(column_values->decompression_type == DT_Default); /* * We saved the actual default value into the decompressed scan slot * above, so pull it from there. */ vector = make_single_value_arrow(column_description->typid, - batch_state->decompressed_scan_slot->tts_values[attr], - batch_state->decompressed_scan_slot->tts_isnull[attr]); + *column_values->output_value, + *column_values->output_isnull); /* * We start from an all-valid bitmap, because the predicate is @@ -447,7 +464,7 @@ compute_vector_quals(DecompressContext *dcontext, DecompressBatchState *batch_st if (column_values->arrow == NULL) { /* The column had a default value. */ - Assert(column_values->iterator == NULL); + Assert(column_values->decompression_type == DT_Default); if (!(default_value_predicate_result & 1)) { @@ -523,9 +540,6 @@ compressed_batch_set_compressed_tuple(DecompressContext *dcontext, batch_state->decompressed_scan_slot = MakeSingleTupleTableSlot(dcontext->decompressed_slot_scan_tdesc, slot->tts_ops); - - batch_state->compressed_columns_wide = - palloc0(sizeof(CompressedColumnValues) * dcontext->num_compressed_columns); } else { @@ -562,10 +576,9 @@ compressed_batch_set_compressed_tuple(DecompressContext *dcontext, * skip decompressing some columns if the entire batch doesn't pass * the quals. Skip them for now. */ - CompressedColumnValues *column_values = &batch_state->compressed_columns_wide[i]; - column_values->value_bytes = 0; + CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; + column_values->decompression_type = DT_Invalid; column_values->arrow = NULL; - column_values->iterator = NULL; break; } case SEGMENTBY_COLUMN: @@ -644,55 +657,12 @@ compressed_batch_set_compressed_tuple(DecompressContext *dcontext, const int num_compressed_columns = dcontext->num_compressed_columns; for (int i = 0; i < num_compressed_columns; i++) { - CompressionColumnDescription *desc = &dcontext->template_columns[i]; - CompressedColumnValues *wide = &batch_state->compressed_columns_wide[i]; - if (wide->value_bytes == 0) + CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; + if (column_values->decompression_type == DT_Invalid) { decompress_column(dcontext, batch_state, i); - Assert(wide->value_bytes != 0); + Assert(column_values->decompression_type != DT_Invalid); } - - CompressedColumnValues2 *packed = &batch_state->compressed_columns_packed[i]; - const AttrNumber attr = AttrNumberGetAttrOffset(desc->output_attno); - packed->output_value = &batch_state->decompressed_scan_slot->tts_values[attr]; - packed->output_isnull = &batch_state->decompressed_scan_slot->tts_isnull[attr]; - if (wide->iterator) - { - packed->decompression_type = DT_Iterator; - packed->buffers[0] = wide->iterator; - continue; - } - - if (wide->arrow == NULL) - { - packed->decompression_type = DT_Default; - continue; - } - - if (wide->value_bytes > 0) - { - packed->decompression_type = wide->value_bytes; - packed->buffers[0] = wide->arrow->buffers[0]; - packed->buffers[1] = wide->arrow->buffers[1]; - continue; - } - - Assert(wide->value_bytes == -1); - - if (wide->arrow->dictionary == NULL) - { - packed->decompression_type = DT_ArrowText; - packed->buffers[0] = wide->arrow->buffers[0]; - packed->buffers[1] = wide->arrow->buffers[1]; - packed->buffers[2] = wide->arrow->buffers[2]; - continue; - } - - packed->decompression_type = DT_ArrowTextDict; - packed->buffers[0] = wide->arrow->buffers[0]; - packed->buffers[1] = wide->arrow->dictionary->buffers[1]; - packed->buffers[2] = wide->arrow->dictionary->buffers[2]; - packed->buffers[3] = wide->arrow->buffers[1]; } } @@ -700,16 +670,18 @@ compressed_batch_set_compressed_tuple(DecompressContext *dcontext, } static void -store_text_datum2(CompressedColumnValues2 *packed, int arrow_row) +store_text_datum2(CompressedColumnValues *column_values, int arrow_row) { - const uint32 start = ((uint32 *) packed->buffers[1])[arrow_row]; - const int32 value_bytes = ((uint32 *) packed->buffers[1])[arrow_row + 1] - start; + const uint32 start = ((uint32 *) column_values->buffers[1])[arrow_row]; + const int32 value_bytes = ((uint32 *) column_values->buffers[1])[arrow_row + 1] - start; Assert(value_bytes >= 0); const int total_bytes = value_bytes + VARHDRSZ; - Assert(DatumGetPointer(*packed->output_value) != NULL); - SET_VARSIZE(*packed->output_value, total_bytes); - memcpy(VARDATA(*packed->output_value), &((uint8 *) packed->buffers[2])[start], value_bytes); + Assert(DatumGetPointer(*column_values->output_value) != NULL); + SET_VARSIZE(*column_values->output_value, total_bytes); + memcpy(VARDATA(*column_values->output_value), + &((uint8 *) column_values->buffers[2])[start], + value_bytes); } /* @@ -727,12 +699,12 @@ make_next_tuple(DecompressBatchState *batch_state, uint16 arrow_row, int num_com for (int i = 0; i < num_compressed_columns; i++) { - CompressedColumnValues2 *packed = &batch_state->compressed_columns_packed[i]; - if (packed->decompression_type > 0) + CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; + if (column_values->decompression_type > 0) { - Assert(packed->decompression_type <= 8); - const uint8 value_bytes = packed->decompression_type; - const char *restrict src = packed->buffers[1]; + Assert(column_values->decompression_type <= 8); + const uint8 value_bytes = column_values->decompression_type; + const char *restrict src = column_values->buffers[1]; /* * The conversion of Datum to more narrow types will truncate @@ -760,12 +732,13 @@ make_next_tuple(DecompressBatchState *batch_state, uint16 arrow_row, int num_com datum = Int64GetDatum(value); } #endif - *packed->output_value = datum; - *packed->output_isnull = !arrow_row_is_valid(packed->buffers[0], arrow_row); + *column_values->output_value = datum; + *column_values->output_isnull = + !arrow_row_is_valid(column_values->buffers[0], arrow_row); } - else if (packed->decompression_type == DT_Iterator) + else if (column_values->decompression_type == DT_Iterator) { - DecompressionIterator *iterator = (DecompressionIterator *) packed->buffers[0]; + DecompressionIterator *iterator = (DecompressionIterator *) column_values->buffers[0]; DecompressResult result = iterator->try_next(iterator); if (result.is_done) @@ -773,24 +746,26 @@ make_next_tuple(DecompressBatchState *batch_state, uint16 arrow_row, int num_com elog(ERROR, "compressed column out of sync with batch counter"); } - *packed->output_isnull = result.is_null; - *packed->output_value = result.val; + *column_values->output_isnull = result.is_null; + *column_values->output_value = result.val; } - else if (packed->decompression_type == DT_ArrowText) + else if (column_values->decompression_type == DT_ArrowText) { - store_text_datum2(packed, arrow_row); - *packed->output_isnull = !arrow_row_is_valid(packed->buffers[0], arrow_row); + store_text_datum2(column_values, arrow_row); + *column_values->output_isnull = + !arrow_row_is_valid(column_values->buffers[0], arrow_row); } - else if (packed->decompression_type == DT_ArrowTextDict) + else if (column_values->decompression_type == DT_ArrowTextDict) { - const int16 index = ((int16 *) packed->buffers[3])[arrow_row]; - store_text_datum2(packed, index); - *packed->output_isnull = !arrow_row_is_valid(packed->buffers[0], arrow_row); + const int16 index = ((int16 *) column_values->buffers[3])[arrow_row]; + store_text_datum2(column_values, index); + *column_values->output_isnull = + !arrow_row_is_valid(column_values->buffers[0], arrow_row); } else { /* A compressed column with default value, do nothing. */ - Assert(packed->decompression_type == DT_Default); + Assert(column_values->decompression_type == DT_Default); } } @@ -873,10 +848,11 @@ compressed_batch_advance(DecompressContext *dcontext, DecompressBatchState *batc */ for (int i = 0; i < num_compressed_columns; i++) { - CompressedColumnValues2 *packed = &batch_state->compressed_columns_packed[i]; - if (packed->decompression_type == DT_Iterator) + CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; + if (column_values->decompression_type == DT_Iterator) { - DecompressionIterator *iterator = (DecompressionIterator *) packed->buffers[0]; + DecompressionIterator *iterator = + (DecompressionIterator *) column_values->buffers[0]; iterator->try_next(iterator); } } @@ -909,10 +885,10 @@ compressed_batch_advance(DecompressContext *dcontext, DecompressBatchState *batc Assert(batch_state->next_batch_row == batch_state->total_batch_rows); for (int i = 0; i < num_compressed_columns; i++) { - CompressedColumnValues2 *packed = &batch_state->compressed_columns_packed[i]; - if (packed->decompression_type == DT_Iterator) + CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; + if (column_values->decompression_type == DT_Iterator) { - DecompressionIterator *iterator = (DecompressionIterator *) packed->buffers[0]; + DecompressionIterator *iterator = (DecompressionIterator *) column_values->buffers[0]; DecompressResult result = iterator->try_next(iterator); if (!result.is_done) { @@ -950,8 +926,8 @@ compressed_batch_save_first_tuple(DecompressContext *dcontext, DecompressBatchSt const int num_compressed_columns = dcontext->num_compressed_columns; for (int i = 0; i < num_compressed_columns; i++) { - CompressedColumnValues *column_values = &batch_state->compressed_columns_wide[i]; - Assert(column_values->value_bytes != 0); + CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; + Assert(column_values->decompression_type != DT_Invalid); } #endif diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.h b/tsl/src/nodes/decompress_chunk/compressed_batch.h index c661216e906..eb53d0ac08d 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.h +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.h @@ -10,52 +10,46 @@ typedef struct ArrowArray ArrowArray; -typedef struct CompressedColumnValues -{ - /* For row-by-row decompression. */ - DecompressionIterator *iterator; - - /* - * For bulk decompression and vectorized filters, mutually exclusive - * with the above. - */ - ArrowArray *arrow; - - /* - * These are the arrow buffers cached here to reduce the amount of - * indirections (we have about three there, so it matters). - */ - const void *arrow_validity; - const void *arrow_values; - - /* - * The following fields are copied here for better data locality. - */ - AttrNumber output_attno; - int8 value_bytes; -} CompressedColumnValues; - +/* How to obtain the decompressed datum for individual row. */ typedef enum { DT_ArrowTextDict = -4, DT_ArrowText = -3, DT_Default = -2, DT_Iterator = -1, - Invalid = 0, + DT_Invalid = 0, + /* + * Any positive number is also valid for the decompression type. It means + * arrow array of a fixed-size by-value type, with size given by the number. + */ } DecompressionType; -typedef struct CompressedColumnValues2 +typedef struct CompressedColumnValues { + /* How to obtain the decompressed datum for individual row. */ DecompressionType decompression_type; + + /* Where to put the decompressed datum. */ Datum *output_value; bool *output_isnull; - // The buffers are as following: - // iterator: iterator - // arrow fixed: validity, value - // arrow text: validity, uint32* offsets, void* bodies - // arrow dict text: validity, uint32* dict offsets, void* dict bodies, int16* indices + + /* + * The flattened source buffers for getting the decompressed datum. + * Depending on decompression type, they are as follows: + * iterator: iterator + * arrow fixed: validity, value + * arrow text: validity, uint32* offsets, void* bodies + * arrow dict text: validity, uint32* dict offsets, void* dict bodies, int16* indices + */ const void *restrict buffers[4]; -} CompressedColumnValues2; + + /* + * The source arrow array, if any. We don't use it for building the + * individual rows, and use the flattened buffers instead to lessen the + * amount of indirections. However, it is used for vectorized filters. + */ + ArrowArray *arrow; +} CompressedColumnValues; /* * All the information needed to decompress a batch. @@ -81,9 +75,7 @@ typedef struct DecompressBatchState */ uint64 *vector_qual_result; - CompressedColumnValues *compressed_columns_wide; - - CompressedColumnValues2 compressed_columns_packed[FLEXIBLE_ARRAY_MEMBER]; + CompressedColumnValues compressed_columns[FLEXIBLE_ARRAY_MEMBER]; } DecompressBatchState; extern void compressed_batch_set_compressed_tuple(DecompressContext *dcontext, From 70b19e9da8785957e3aae601e2fee5b16762d410 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 15:43:18 +0100 Subject: [PATCH 136/249] swap branches for less diff with main --- .../nodes/decompress_chunk/compressed_batch.c | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 48cdf9e2b04..a1bedac875c 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -700,7 +700,20 @@ make_next_tuple(DecompressBatchState *batch_state, uint16 arrow_row, int num_com for (int i = 0; i < num_compressed_columns; i++) { CompressedColumnValues *column_values = &batch_state->compressed_columns[i]; - if (column_values->decompression_type > 0) + if (column_values->decompression_type == DT_Iterator) + { + DecompressionIterator *iterator = (DecompressionIterator *) column_values->buffers[0]; + DecompressResult result = iterator->try_next(iterator); + + if (result.is_done) + { + elog(ERROR, "compressed column out of sync with batch counter"); + } + + *column_values->output_isnull = result.is_null; + *column_values->output_value = result.val; + } + else if (column_values->decompression_type > 0) { Assert(column_values->decompression_type <= 8); const uint8 value_bytes = column_values->decompression_type; @@ -736,19 +749,6 @@ make_next_tuple(DecompressBatchState *batch_state, uint16 arrow_row, int num_com *column_values->output_isnull = !arrow_row_is_valid(column_values->buffers[0], arrow_row); } - else if (column_values->decompression_type == DT_Iterator) - { - DecompressionIterator *iterator = (DecompressionIterator *) column_values->buffers[0]; - DecompressResult result = iterator->try_next(iterator); - - if (result.is_done) - { - elog(ERROR, "compressed column out of sync with batch counter"); - } - - *column_values->output_isnull = result.is_null; - *column_values->output_value = result.val; - } else if (column_values->decompression_type == DT_ArrowText) { store_text_datum2(column_values, arrow_row); From 787efb7c532237e7204d4195bb226efc61aec552 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 15:43:42 +0100 Subject: [PATCH 137/249] test the cache (2023-12-19 no. 9) From 1d7714e786d95802899cf127e0896ee60480eb3b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 15:56:10 +0100 Subject: [PATCH 138/249] maybe it works w/o restore-keys? --- .github/workflows/libfuzzer.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index f75271d6a59..0f01473ad1f 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -173,9 +173,6 @@ jobs: with: path: db/corpus-bulk key: "${{ steps.config.outputs.cache_prefix }}-bulk" - # I think the key used to match by prefix, but somehow it doesn't match - # anymore, so add "restore-keys" as well to be absolutely sure. - restore-keys: "${{ steps.config.outputs.cache_prefix }}-bulk" # We save the row-by-row corpus separately from the bulk corpus, so that # they don't overwrite each other. Now we are going to combine them. @@ -185,7 +182,6 @@ jobs: with: path: db/corpus-rowbyrow key: "${{ steps.config.outputs.cache_prefix }}-rowbyrow" - restore-keys: "${{ steps.config.outputs.cache_prefix }}-rowbyrow" - name: Initialize the fuzzing corpus run: | From 5569d853aec807b727cbbee33a03ae4ba0e11cbf Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 15:57:15 +0100 Subject: [PATCH 139/249] no clobber --- .github/workflows/libfuzzer.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 0f01473ad1f..c898343f9cb 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -187,9 +187,9 @@ jobs: run: | # Combine the cached corpus from rowbyrow and bulk fuzzing, and from repository. mkdir -p db/corpus{,-rowbyrow,-bulk} - find "db/corpus-rowbyrow" -type f -exec cp -t db/corpus {} + - find "db/corpus-bulk" -type f -exec cp -t db/corpus {} + - find "tsl/test/fuzzing/compression/${{ matrix.case.algo }}-${{ matrix.case.pgtype }}" -type f -exec cp -t db/corpus {} + + find "tsl/test/fuzzing/compression/${{ matrix.case.algo }}-${{ matrix.case.pgtype }}" -type f -exec cp -n -t db/corpus {} + + find "db/corpus-rowbyrow" -type f -exec cp -n -t db/corpus {} + + find "db/corpus-bulk" -type f -exec cp -n -t db/corpus {} + ls db/corpus | wc -l - name: Run libfuzzer for compression From dbfd14f4caee0fe72b880a78ff26bc4bc168178e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 16:12:39 +0100 Subject: [PATCH 140/249] proper size and checks --- tsl/src/compression/array.c | 8 ++++---- tsl/src/compression/simple8b_rle_decompress_all.h | 9 ++++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index 409b0e864ae..f68f3abd6d1 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -469,7 +469,7 @@ pad64(uint64 value) return ((value + 63) / 64) * 64; } -#define ELEMENT_TYPE uint16 +#define ELEMENT_TYPE uint32 #include "simple8b_rle_decompress_all.h" #undef ELEMENT_TYPE @@ -499,9 +499,9 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, Simple8bRleSerialized *sizes_serialized = bytes_deserialize_simple8b_and_advance(si); - uint16 sizes[GLOBAL_MAX_ROWS_PER_COMPRESSION]; + uint32 sizes[GLOBAL_MAX_ROWS_PER_COMPRESSION]; const uint16 n_notnull = - simple8brle_decompress_all_buf_uint16(sizes_serialized, + simple8brle_decompress_all_buf_uint32(sizes_serialized, sizes, sizeof(sizes) / sizeof(sizes[0])); const int n_total = has_nulls ? nulls_serialized->num_elements : n_notnull; @@ -530,7 +530,7 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, /* Varsize must match the size stored in the sizes array for this element. */ CheckCompressedData(VARSIZE_ANY(vardata) == sizes[i]); - const uint16 textlen = VARSIZE_ANY_EXHDR(vardata); + const uint32 textlen = VARSIZE_ANY_EXHDR(vardata); memcpy(&arrow_bodies[offset], VARDATA_ANY(vardata), textlen); // fprintf(stderr, diff --git a/tsl/src/compression/simple8b_rle_decompress_all.h b/tsl/src/compression/simple8b_rle_decompress_all.h index 874ba71662b..e20c4363928 100644 --- a/tsl/src/compression/simple8b_rle_decompress_all.h +++ b/tsl/src/compression/simple8b_rle_decompress_all.h @@ -56,10 +56,13 @@ FUNCTION_NAME(simple8brle_decompress_all_buf, const uint16 n_block_values = simple8brle_rledata_repeatcount(block_data); CheckCompressedData(decompressed_index + n_block_values <= n_buffer_elements); - const ELEMENT_TYPE repeated_value = simple8brle_rledata_value(block_data); + const uint64 repeated_value_raw = simple8brle_rledata_value(block_data); + const ELEMENT_TYPE repeated_value_converted = repeated_value_raw; + CheckCompressedData(repeated_value_raw == (uint64) repeated_value_converted); + for (uint16 i = 0; i < n_block_values; i++) { - decompressed_values[decompressed_index + i] = repeated_value; + decompressed_values[decompressed_index + i] = repeated_value_converted; } decompressed_index += n_block_values; @@ -77,7 +80,7 @@ FUNCTION_NAME(simple8brle_decompress_all_buf, * produces, which is easier for testing. \ */ \ const uint8 bits_per_value = SIMPLE8B_BIT_LENGTH[X]; \ - CheckCompressedData(bits_per_value / 8 <= sizeof(ELEMENT_TYPE)); \ + CheckCompressedData(bits_per_value <= sizeof(ELEMENT_TYPE) * 8); \ \ /* \ * The last block might have less values than normal, but we have \ From a69b25db166adcaac868c8345bf94aad07fab380 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 16:17:39 +0100 Subject: [PATCH 141/249] more fixes from the bulk text PR --- src/adts/bit_array_impl.h | 6 ++---- tsl/src/compression/compression.h | 3 +-- tsl/src/compression/datum_serialize.c | 7 +++++++ tsl/src/compression/deltadelta_impl.c | 12 ++++++++++-- tsl/src/compression/simple8b_rle_decompress_all.h | 9 ++++++--- 5 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/adts/bit_array_impl.h b/src/adts/bit_array_impl.h index c9555613d61..8de4c7e844f 100644 --- a/src/adts/bit_array_impl.h +++ b/src/adts/bit_array_impl.h @@ -347,8 +347,6 @@ bit_array_append_bucket(BitArray *array, uint8 bits_used, uint64 bucket) static uint64 bit_array_low_bits_mask(uint8 bits_used) { - if (bits_used >= 64) - return PG_UINT64_MAX; - else - return (UINT64CONST(1) << bits_used) - UINT64CONST(1); + Assert(bits_used > 0); + return -1ULL >> (64 - bits_used); } diff --git a/tsl/src/compression/compression.h b/tsl/src/compression/compression.h index 41ab38d2d81..cc60a8823a3 100644 --- a/tsl/src/compression/compression.h +++ b/tsl/src/compression/compression.h @@ -390,8 +390,7 @@ inline static void * consumeCompressedData(StringInfo si, int bytes) { CheckCompressedData(bytes >= 0); - CheckCompressedData(bytes < PG_INT32_MAX / 2); - CheckCompressedData(si->cursor + bytes >= 0); + CheckCompressedData(si->cursor + bytes >= si->cursor); /* Check for overflow. */ CheckCompressedData(si->cursor + bytes <= si->len); void *result = si->data + si->cursor; diff --git a/tsl/src/compression/datum_serialize.c b/tsl/src/compression/datum_serialize.c index de0dc716511..d22ca1c0dd9 100644 --- a/tsl/src/compression/datum_serialize.c +++ b/tsl/src/compression/datum_serialize.c @@ -315,6 +315,13 @@ bytes_to_datum_and_advance(DatumDeserializer *deserializer, const char **ptr) * with 1-byte or 4-byte header here, no TOAST or compressed data. */ CheckCompressedData(VARATT_IS_4B_U(*ptr) || (VARATT_IS_1B(*ptr) && !VARATT_IS_1B_E(*ptr))); + + /* + * Full varsize must be larger or equal than the header size so that the + * calculation of size without header doesn't overflow. + */ + CheckCompressedData((VARATT_IS_1B(*ptr) && VARSIZE_1B(*ptr) >= VARHDRSZ_SHORT) || + (VARSIZE_4B(*ptr) > VARHDRSZ)); } res = fetch_att(*ptr, deserializer->type_by_val, deserializer->type_len); *ptr = att_addlength_pointer(*ptr, deserializer->type_len, *ptr); diff --git a/tsl/src/compression/deltadelta_impl.c b/tsl/src/compression/deltadelta_impl.c index 48562aaec23..f51ff9804d4 100644 --- a/tsl/src/compression/deltadelta_impl.c +++ b/tsl/src/compression/deltadelta_impl.c @@ -31,7 +31,7 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory * test_delta4(). */ uint16 num_deltas; - const uint64 *restrict deltas_zigzag = + uint64 *restrict deltas_zigzag = simple8brle_decompress_all_uint64(deltas_compressed, &num_deltas); Simple8bRleBitmap nulls = { 0 }; @@ -82,7 +82,15 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory { for (uint16 inner = 0; inner < INNER_LOOP_SIZE; inner++) { - current_delta += zig_zag_decode(deltas_zigzag[outer + inner]); + deltas_zigzag[outer + inner] = zig_zag_decode(deltas_zigzag[outer + inner]); + } + } + + for (uint16 outer = 0; outer < n_notnull_padded; outer += INNER_LOOP_SIZE) + { + for (uint16 inner = 0; inner < INNER_LOOP_SIZE; inner++) + { + current_delta += deltas_zigzag[outer + inner]; current_element += current_delta; decompressed_values[outer + inner] = current_element; } diff --git a/tsl/src/compression/simple8b_rle_decompress_all.h b/tsl/src/compression/simple8b_rle_decompress_all.h index 874ba71662b..e20c4363928 100644 --- a/tsl/src/compression/simple8b_rle_decompress_all.h +++ b/tsl/src/compression/simple8b_rle_decompress_all.h @@ -56,10 +56,13 @@ FUNCTION_NAME(simple8brle_decompress_all_buf, const uint16 n_block_values = simple8brle_rledata_repeatcount(block_data); CheckCompressedData(decompressed_index + n_block_values <= n_buffer_elements); - const ELEMENT_TYPE repeated_value = simple8brle_rledata_value(block_data); + const uint64 repeated_value_raw = simple8brle_rledata_value(block_data); + const ELEMENT_TYPE repeated_value_converted = repeated_value_raw; + CheckCompressedData(repeated_value_raw == (uint64) repeated_value_converted); + for (uint16 i = 0; i < n_block_values; i++) { - decompressed_values[decompressed_index + i] = repeated_value; + decompressed_values[decompressed_index + i] = repeated_value_converted; } decompressed_index += n_block_values; @@ -77,7 +80,7 @@ FUNCTION_NAME(simple8brle_decompress_all_buf, * produces, which is easier for testing. \ */ \ const uint8 bits_per_value = SIMPLE8B_BIT_LENGTH[X]; \ - CheckCompressedData(bits_per_value / 8 <= sizeof(ELEMENT_TYPE)); \ + CheckCompressedData(bits_per_value <= sizeof(ELEMENT_TYPE) * 8); \ \ /* \ * The last block might have less values than normal, but we have \ From d8869139d19286563813a5a50d89bf95a101ef71 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 16:34:51 +0100 Subject: [PATCH 142/249] directory woes --- .github/workflows/libfuzzer.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index c898343f9cb..64f4ed6c025 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -299,7 +299,10 @@ jobs: - name: Change corpus path to please the 'actions/cache' GitHub Action if: always() run: | - mv -f db/corpus{,-${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}} + tree db/corpus + tree db/corpus-bulk + tree db/corpus-rowbyrow + mv -fT db/corpus{,-${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}} - name: Save fuzzer corpus if: always() From 39e07b017957b7bf7eebc13f0007039134a425ea Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 16:50:41 +0100 Subject: [PATCH 143/249] rm --- .github/workflows/libfuzzer.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 64f4ed6c025..8a7b1c2d61e 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -299,9 +299,7 @@ jobs: - name: Change corpus path to please the 'actions/cache' GitHub Action if: always() run: | - tree db/corpus - tree db/corpus-bulk - tree db/corpus-rowbyrow + rm -rf db/corpus-{bulk,rowbyrow} ||: mv -fT db/corpus{,-${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}} - name: Save fuzzer corpus From 00c9fdc59cd1e872d61d8f866b5e2a503c68292c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 17:09:46 +0100 Subject: [PATCH 144/249] try release build --- .github/workflows/libfuzzer.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 8a7b1c2d61e..bfad24238a2 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -91,7 +91,7 @@ jobs: # http://web.archive.org/web/20231101091231/https://github.com/cms-sw/cmssw/issues/40680 cmake -B build -S . -DASSERTIONS=ON -DLINTER=OFF -DCMAKE_VERBOSE_MAKEFILE=1 \ - -DWARNINGS_AS_ERRORS=1 -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_COMPILER=clang \ + -DWARNINGS_AS_ERRORS=1 -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=clang \ -DCMAKE_C_FLAGS="-fsanitize=fuzzer-no-link -lstdc++ -L$LIBFUZZER_PATH -l:libclang_rt.fuzzer_no_main-x86_64.a -static-libsan" \ -DPG_PATH=$HOME/$PG_INSTALL_DIR From e42618e5c9de5021465518b0bbcb50710622968b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 17:19:43 +0100 Subject: [PATCH 145/249] cleanup --- .github/workflows/libfuzzer.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index bfad24238a2..1209601c1c2 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -65,7 +65,7 @@ jobs: CC=clang ./configure --prefix=$HOME/$PG_INSTALL_DIR --with-openssl \ --without-readline --without-zlib --without-libxml --enable-cassert \ --enable-debug CC=clang \ - CFLAGS="-DTS_COMPRESSION_FUZZING=1 -fuse-ld=lld -ggdb3 -Og -fno-omit-frame-pointer" + CFLAGS="-DTS_COMPRESSION_FUZZING=1 -fuse-ld=lld -ggdb3 -O2 -fno-omit-frame-pointer" make -j$(nproc) - name: Install PostgreSQL @@ -200,7 +200,7 @@ jobs: export PGPORT=5432 export PGDATABASE=postgres export PATH=$HOME/$PG_INSTALL_DIR/bin:$PATH - pg_ctl -l postmaster.log start + pg_ctl -l postgres.log start psql -c "create extension timescaledb;" @@ -256,11 +256,14 @@ jobs: echo "Internal program errors: $errors" [ $errors -eq 0 ] || exit 1 + + # Shouldn't have any WARNINGS in the log. + ! grep -F "] WARNING: " postgres.log + - name: Collect the logs if: always() id: collectlogs run: | - find . -name postmaster.log -exec cat {} + > postgres.log # wait in case there are in-progress coredumps sleep 10 if coredumpctl -q list >/dev/null; then echo "coredumps=true" >>$GITHUB_OUTPUT; fi From c9b50eb1dee20e5c5bc999651373245a1dd8a619 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 17:19:48 +0100 Subject: [PATCH 146/249] Display the welcome message as NOTICE Now we display it as WARNING and it makes it harder to grep the logs for failures such as broken memory contexts or tupdesc reference leaks, which are also reported as warnings. --- sql/notice.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/notice.sql b/sql/notice.sql index e05000baf73..74466c73ebe 100644 --- a/sql/notice.sql +++ b/sql/notice.sql @@ -17,7 +17,7 @@ BEGIN telemetry_string = E''; END CASE; - RAISE WARNING E'%\n%\n', + RAISE NOTICE E'%\n%\n', E'\nWELCOME TO\n' || E' _____ _ _ ____________ \n' || E'|_ _(_) | | | _ \\ ___ \\ \n' || From 3f115784e2ad2657ebdfeecac1d67b66b6cc1e18 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 20:15:47 +0100 Subject: [PATCH 147/249] save interesting cases --- .github/workflows/libfuzzer.yaml | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 1209601c1c2..1f56a4d0ce9 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -241,17 +241,24 @@ jobs: ls db/corpus | wc -l + fn="ts_read_compressed_data_directory('${{ matrix.case.algo }}', + '${{ matrix.case.pgtype }}', 'corpus', '${{ matrix.case.bulk }}')" + # Show the statistics about fuzzing corpus psql -c "select count(*), location, min(sqlstate), min(path) - from ts_read_compressed_data_directory('${{ matrix.case.algo }}', - '${{ matrix.case.pgtype }}', 'corpus', '${{ matrix.case.bulk }}') + from $fn group by location order by count(*) desc " + # Save interesting cases because the caches are not available for download from UI + mkdir -p interesting + psql -qtAX "select distinct on (location) 'db/corpus/' || path from $fn + order by location, bytes desc + " | xargs cp -t interesting + # Check that we don't have any internal errors errors=$(psql -qtAX --set=ON_ERROR_STOP=1 -c "select count(*) - from ts_read_compressed_data_directory('${{ matrix.case.algo }}', - '${{ matrix.case.pgtype }}', 'corpus', '${{ matrix.case.bulk }}') + from $fn where sqlstate = 'XX000'") echo "Internal program errors: $errors" [ $errors -eq 0 ] || exit 1 @@ -284,6 +291,13 @@ jobs: name: Crash cases for ${{ steps.config.outputs.name }} path: db/crash-* + - name: Save interesting cases + if: always() + uses: actions/upload-artifact@v3 + with: + name: Interesting cases for ${{ steps.config.outputs.name }} + path: interesting/ + # We use separate restore/save actions, because the default action won't # save the updated folder after the cache hit. We also want to save the # cache after fuzzing errors, and the default action doesn't save after From 0270cfa38b0122f56eee7d6d3de5b1a69ba33dae Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 20:42:37 +0100 Subject: [PATCH 148/249] forgotten switch --- .github/workflows/libfuzzer.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 1f56a4d0ce9..786f7851622 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -252,7 +252,7 @@ jobs: # Save interesting cases because the caches are not available for download from UI mkdir -p interesting - psql -qtAX "select distinct on (location) 'db/corpus/' || path from $fn + psql -qtAX -c "select distinct on (location) 'db/corpus/' || path from $fn order by location, bytes desc " | xargs cp -t interesting From a399ee8ffe48cb0c950166043a942ffe3519222c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 20:56:16 +0100 Subject: [PATCH 149/249] fixes --- .github/workflows/libfuzzer.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 786f7851622..c6530a45e13 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -27,7 +27,7 @@ jobs: # Don't add ddebs here because the ddebs mirror is always 503 Service Unavailable. # If needed, install them before opening the core dump. sudo apt-get update - sudo apt-get install 7zip clang lld llvm flex bison lcov systemd-coredump gdb libipc-run-perl \ + sudo apt-get install 7zip clang lld llvm flex bison libipc-run-perl \ libtest-most-perl tree - name: Checkout TimescaleDB @@ -135,7 +135,7 @@ jobs: steps: - name: Install Linux dependencies - run: sudo apt install 7zip + run: sudo apt install 7zip systemd-coredump gdb - name: Checkout TimescaleDB uses: actions/checkout@v3 @@ -252,8 +252,8 @@ jobs: # Save interesting cases because the caches are not available for download from UI mkdir -p interesting - psql -qtAX -c "select distinct on (location) 'db/corpus/' || path from $fn - order by location, bytes desc + psql -qtAX -c "select distinct on (location) 'db/' || path from $fn + order by location, bytes " | xargs cp -t interesting # Check that we don't have any internal errors From 9a37838970f519fd3da0632267866549a18af95c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 22:15:42 +0100 Subject: [PATCH 150/249] add more interesting files --- tsl/test/expected/compression_algos.out | 16 ++++++++++++---- .../0d699bc41031c7525fa65c0ab267f34f608eef6a | Bin 0 -> 24 bytes .../0dbf553220bcd27478f10999d679d564a11632a1 | 1 + .../13f402104e20e5a38290bdc5fec85a46ae36bd73 | Bin 0 -> 170 bytes .../1641a06baa3defcf9a1b704cb94ea3387f40f2ad | Bin 0 -> 27 bytes .../22e70b0d023eac54b28a067aac0ab8e4eb75887b | Bin 0 -> 25 bytes .../3862930f38ef2ac7387e3e47191234094aee7c0a | Bin 0 -> 41 bytes .../428361124252a8847f1182747c936696bc43543b | Bin 0 -> 85 bytes .../4cd1b3841a01a3abc7f1cec6325130fd109dee84 | Bin 0 -> 24 bytes .../58420143cbcd2fe40fd1409948b6a78d3bf14a32 | Bin 0 -> 14 bytes .../592e2bafa4637d9786e9d14c5f1ca512e0076940 | Bin 0 -> 27 bytes .../5ba93c9db0cff93f52b521d7420e43f6eda2784f | Bin 0 -> 1 bytes .../76023b236d960f02d7fb41c7a1fa4d28dafa7c2d | Bin 0 -> 1051 bytes .../9159cb8bcee7fcb95582f140960cdae72788d326 | 1 + .../a3d453f14af5370aae60089101d659fb12c3aff4 | Bin 0 -> 14 bytes .../a42c6cf1de3abfdea9b95f34687cbbe92b9a7383 | 1 + .../c13eaced24e2a5039d3fbeef655fc3cf827a2be7 | Bin 0 -> 32 bytes .../c88b988789743b6aad8ef68278fc383847a37ddf | Bin 0 -> 23 bytes .../f2f014ef49bdaf4ff29d4a7116feff81e7015283 | Bin 0 -> 48 bytes .../f41f46df995dd4c7690f27978152ead25ccd5c75 | Bin 0 -> 48 bytes .../127d1cd7df6314984a355234cad8daee2a0a6f85 | Bin 0 -> 89 bytes .../1a155dbc0885a4ce404a03ccad4f90e8dfb6838b | Bin 0 -> 22 bytes .../1bdd8892fa9cd727ebf1b97101a04456b8ba7bc2 | Bin 0 -> 14 bytes .../27de45122a7e9da969165f24f6942fc131cb17df | Bin 0 -> 56 bytes .../2a0fa91e546f986d25159ed1e7507ec4793df3a4 | Bin 0 -> 50 bytes .../2d79b560f5c1dde8b7841ae6d77d616f26f5b3ab | Bin 0 -> 145 bytes .../2e10aad1b62e9ad833ea94af977cd498ba7da057 | Bin 0 -> 49 bytes .../4e8af02cd72c9425df8c3164b3a14bc1b70c6498 | Bin 0 -> 26 bytes .../559b65125ca556ff1a57f82f9ae55a86b71c6296 | 1 + .../5ba93c9db0cff93f52b521d7420e43f6eda2784f | Bin 0 -> 1 bytes .../5c9409528b92b40afa79d037eadcb73b859e94e6 | Bin 0 -> 49 bytes .../62ca0c60044ab7f33aa33d10a4ae715ee06b7748 | Bin 0 -> 51 bytes .../687464af84370f5d43d25acba442acc7fd14beec | Bin 0 -> 50 bytes .../8c92cd8b3e908dad0b490baa09ee984fdf224b21 | Bin 0 -> 84 bytes .../95f1a48e7e1cbe4b91461f1251416245497ff131 | Bin 0 -> 8 bytes .../994cc577406fe37f59e27ea1028a9d0a814af721 | Bin 0 -> 42 bytes .../a42f35cc555c689a38ef471b21fad93692f36a9c | Bin 0 -> 48 bytes .../ba200d8a4886abcdba4174f4b131db56e9128785 | Bin 0 -> 88 bytes .../bf8b4530d8d246dd74ac53a13471bba17941dff7 | 1 + .../c92920944247d80c842eaa65fd01efec1c84c342 | 1 + .../d0f63f55c89c3220cd326e9395c12e2f4bd57942 | Bin 0 -> 1353 bytes .../ecbd22c462813a437898cfe2848a46e5d6a460c5 | Bin 0 -> 49 bytes .../eddf750270b16df6744f3bbfa6ee82271961f573 | Bin 0 -> 26 bytes .../efb13296f8f471aadfdf8083380d1e7ac9a6bbc5 | Bin 0 -> 49 bytes 44 files changed, 18 insertions(+), 4 deletions(-) create mode 100644 tsl/test/fuzzing/compression/array-text/0d699bc41031c7525fa65c0ab267f34f608eef6a create mode 100644 tsl/test/fuzzing/compression/array-text/0dbf553220bcd27478f10999d679d564a11632a1 create mode 100644 tsl/test/fuzzing/compression/array-text/13f402104e20e5a38290bdc5fec85a46ae36bd73 create mode 100644 tsl/test/fuzzing/compression/array-text/1641a06baa3defcf9a1b704cb94ea3387f40f2ad create mode 100644 tsl/test/fuzzing/compression/array-text/22e70b0d023eac54b28a067aac0ab8e4eb75887b create mode 100644 tsl/test/fuzzing/compression/array-text/3862930f38ef2ac7387e3e47191234094aee7c0a create mode 100644 tsl/test/fuzzing/compression/array-text/428361124252a8847f1182747c936696bc43543b create mode 100644 tsl/test/fuzzing/compression/array-text/4cd1b3841a01a3abc7f1cec6325130fd109dee84 create mode 100644 tsl/test/fuzzing/compression/array-text/58420143cbcd2fe40fd1409948b6a78d3bf14a32 create mode 100644 tsl/test/fuzzing/compression/array-text/592e2bafa4637d9786e9d14c5f1ca512e0076940 create mode 100644 tsl/test/fuzzing/compression/array-text/5ba93c9db0cff93f52b521d7420e43f6eda2784f create mode 100644 tsl/test/fuzzing/compression/array-text/76023b236d960f02d7fb41c7a1fa4d28dafa7c2d create mode 100644 tsl/test/fuzzing/compression/array-text/9159cb8bcee7fcb95582f140960cdae72788d326 create mode 100644 tsl/test/fuzzing/compression/array-text/a3d453f14af5370aae60089101d659fb12c3aff4 create mode 100644 tsl/test/fuzzing/compression/array-text/a42c6cf1de3abfdea9b95f34687cbbe92b9a7383 create mode 100644 tsl/test/fuzzing/compression/array-text/c13eaced24e2a5039d3fbeef655fc3cf827a2be7 create mode 100644 tsl/test/fuzzing/compression/array-text/c88b988789743b6aad8ef68278fc383847a37ddf create mode 100644 tsl/test/fuzzing/compression/array-text/f2f014ef49bdaf4ff29d4a7116feff81e7015283 create mode 100644 tsl/test/fuzzing/compression/array-text/f41f46df995dd4c7690f27978152ead25ccd5c75 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/127d1cd7df6314984a355234cad8daee2a0a6f85 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/1a155dbc0885a4ce404a03ccad4f90e8dfb6838b create mode 100644 tsl/test/fuzzing/compression/dictionary-text/1bdd8892fa9cd727ebf1b97101a04456b8ba7bc2 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/27de45122a7e9da969165f24f6942fc131cb17df create mode 100644 tsl/test/fuzzing/compression/dictionary-text/2a0fa91e546f986d25159ed1e7507ec4793df3a4 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/2d79b560f5c1dde8b7841ae6d77d616f26f5b3ab create mode 100644 tsl/test/fuzzing/compression/dictionary-text/2e10aad1b62e9ad833ea94af977cd498ba7da057 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/4e8af02cd72c9425df8c3164b3a14bc1b70c6498 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/559b65125ca556ff1a57f82f9ae55a86b71c6296 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/5ba93c9db0cff93f52b521d7420e43f6eda2784f create mode 100644 tsl/test/fuzzing/compression/dictionary-text/5c9409528b92b40afa79d037eadcb73b859e94e6 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/62ca0c60044ab7f33aa33d10a4ae715ee06b7748 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/687464af84370f5d43d25acba442acc7fd14beec create mode 100644 tsl/test/fuzzing/compression/dictionary-text/8c92cd8b3e908dad0b490baa09ee984fdf224b21 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/95f1a48e7e1cbe4b91461f1251416245497ff131 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/994cc577406fe37f59e27ea1028a9d0a814af721 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/a42f35cc555c689a38ef471b21fad93692f36a9c create mode 100644 tsl/test/fuzzing/compression/dictionary-text/ba200d8a4886abcdba4174f4b131db56e9128785 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/bf8b4530d8d246dd74ac53a13471bba17941dff7 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/c92920944247d80c842eaa65fd01efec1c84c342 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/d0f63f55c89c3220cd326e9395c12e2f4bd57942 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/ecbd22c462813a437898cfe2848a46e5d6a460c5 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/eddf750270b16df6744f3bbfa6ee82271961f573 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/efb13296f8f471aadfdf8083380d1e7ac9a6bbc5 diff --git a/tsl/test/expected/compression_algos.out b/tsl/test/expected/compression_algos.out index db1e90777ae..4335e7913d6 100644 --- a/tsl/test/expected/compression_algos.out +++ b/tsl/test/expected/compression_algos.out @@ -1594,9 +1594,13 @@ group by 2 order by 1 desc ; count | rowbyrow_result -------+----------------- + 13 | XX001 + 4 | 08P01 3 | true - 1 | XX001 -(2 rows) + 1 | false + 1 | 22021 + 1 | 3F000 +(6 rows) \set algo dictionary \set type text @@ -1607,7 +1611,11 @@ group by 2 order by 1 desc ; count | rowbyrow_result -------+----------------- - 5 | XX001 + 22 | XX001 + 4 | 08P01 2 | true -(2 rows) + 1 | false + 1 | 22021 + 1 | 3F000 +(6 rows) diff --git a/tsl/test/fuzzing/compression/array-text/0d699bc41031c7525fa65c0ab267f34f608eef6a b/tsl/test/fuzzing/compression/array-text/0d699bc41031c7525fa65c0ab267f34f608eef6a new file mode 100644 index 0000000000000000000000000000000000000000..10d1dbde63f8f5ea6d02dea14051cfefe4fc4fca GIT binary patch literal 24 dcmZQ%C`gY_PAo~x$xmk}Nv$YhU|?hb0svSF1=#=q literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/0dbf553220bcd27478f10999d679d564a11632a1 b/tsl/test/fuzzing/compression/array-text/0dbf553220bcd27478f10999d679d564a11632a1 new file mode 100644 index 00000000000..2791843a64e --- /dev/null +++ b/tsl/test/fuzzing/compression/array-text/0dbf553220bcd27478f10999d679d564a11632a1 @@ -0,0 +1 @@ +e \ No newline at end of file diff --git a/tsl/test/fuzzing/compression/array-text/13f402104e20e5a38290bdc5fec85a46ae36bd73 b/tsl/test/fuzzing/compression/array-text/13f402104e20e5a38290bdc5fec85a46ae36bd73 new file mode 100644 index 0000000000000000000000000000000000000000..1dc4c2a2edb10da51f8f08994c496e2d50de44cd GIT binary patch literal 170 zcmZQ%EJ%+}PAo~x$xmk}Nv$YhU}Rume!;-NAb>2)laiU1mRgjW_q4oWGej2v5FQCi literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/f41f46df995dd4c7690f27978152ead25ccd5c75 b/tsl/test/fuzzing/compression/array-text/f41f46df995dd4c7690f27978152ead25ccd5c75 new file mode 100644 index 0000000000000000000000000000000000000000..637bb34ce31ae11fa3ff0de4683a7536d889122c GIT binary patch literal 48 xcmZQ%EJ%+}PAo~x$xmk}Nv$YhU}Rume!;*1qV*UA{{R2aP!Hq*!FQk#3jjE?3w!_o literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/127d1cd7df6314984a355234cad8daee2a0a6f85 b/tsl/test/fuzzing/compression/dictionary-text/127d1cd7df6314984a355234cad8daee2a0a6f85 new file mode 100644 index 0000000000000000000000000000000000000000..db0d03a4eaad2f45bd3f283d42065695d8e53b3a GIT binary patch literal 89 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh17+C&80Hc@=!#AAR literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/1a155dbc0885a4ce404a03ccad4f90e8dfb6838b b/tsl/test/fuzzing/compression/dictionary-text/1a155dbc0885a4ce404a03ccad4f90e8dfb6838b new file mode 100644 index 0000000000000000000000000000000000000000..d22df394c03b0fa0a9a43a4db406f6654341bbfd GIT binary patch literal 22 dcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?im002yG1=;`r literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/1bdd8892fa9cd727ebf1b97101a04456b8ba7bc2 b/tsl/test/fuzzing/compression/dictionary-text/1bdd8892fa9cd727ebf1b97101a04456b8ba7bc2 new file mode 100644 index 0000000000000000000000000000000000000000..c54e63b4f91b0f8f63a1572583df40e7ffeec09d GIT binary patch literal 14 VcmZQ#C`gY_PAo~x$xmls001731QY-O literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/27de45122a7e9da969165f24f6942fc131cb17df b/tsl/test/fuzzing/compression/dictionary-text/27de45122a7e9da969165f24f6942fc131cb17df new file mode 100644 index 0000000000000000000000000000000000000000..a0938d30ec118bd2414f44b579b243a6d8e3caf5 GIT binary patch literal 56 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh1AyHUIzs literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/2d79b560f5c1dde8b7841ae6d77d616f26f5b3ab b/tsl/test/fuzzing/compression/dictionary-text/2d79b560f5c1dde8b7841ae6d77d616f26f5b3ab new file mode 100644 index 0000000000000000000000000000000000000000..674c892c76c9286bff06e619e40f7c514ee70d32 GIT binary patch literal 145 zcmZQ#EJ%+}PAo~x$xmk}Nv$YhU|?YU0wRDI1o#;k2)1_n+b&BegL#JCCu1Q{V( oVP*hzSwYPKF@fqK<^WC6aA1A`l?S~5 literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/4e8af02cd72c9425df8c3164b3a14bc1b70c6498 b/tsl/test/fuzzing/compression/dictionary-text/4e8af02cd72c9425df8c3164b3a14bc1b70c6498 new file mode 100644 index 0000000000000000000000000000000000000000..4a9f44897c69d60cd01eaadccd2a0caeb8273333 GIT binary patch literal 26 fcmZQ#EJ%+}PAo~x$xmk}Nv$YhU|?YU0wfp!XtD;+ literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/559b65125ca556ff1a57f82f9ae55a86b71c6296 b/tsl/test/fuzzing/compression/dictionary-text/559b65125ca556ff1a57f82f9ae55a86b71c6296 new file mode 100644 index 00000000000..d0cd44bd336 --- /dev/null +++ b/tsl/test/fuzzing/compression/dictionary-text/559b65125ca556ff1a57f82f9ae55a86b71c6296 @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/tsl/test/fuzzing/compression/dictionary-text/5ba93c9db0cff93f52b521d7420e43f6eda2784f b/tsl/test/fuzzing/compression/dictionary-text/5ba93c9db0cff93f52b521d7420e43f6eda2784f new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/5c9409528b92b40afa79d037eadcb73b859e94e6 b/tsl/test/fuzzing/compression/dictionary-text/5c9409528b92b40afa79d037eadcb73b859e94e6 new file mode 100644 index 0000000000000000000000000000000000000000..591d905c1da3e3f4ba69bb004b2c456996c57235 GIT binary patch literal 49 tcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEiK7zFqk4D1@z8o+FjC;%6R2x$NS literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/62ca0c60044ab7f33aa33d10a4ae715ee06b7748 b/tsl/test/fuzzing/compression/dictionary-text/62ca0c60044ab7f33aa33d10a4ae715ee06b7748 new file mode 100644 index 0000000000000000000000000000000000000000..403168ba05e9b360d62e79fc2c311290886340b5 GIT binary patch literal 51 tcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh1{zCvKkOKt#4qysM005_w4i*3a literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/687464af84370f5d43d25acba442acc7fd14beec b/tsl/test/fuzzing/compression/dictionary-text/687464af84370f5d43d25acba442acc7fd14beec new file mode 100644 index 0000000000000000000000000000000000000000..225c0114040ff659cf0f67624e2ef7226720f32e GIT binary patch literal 50 rcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEiK7zBVcD})6inHU%V3r+^j literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/8c92cd8b3e908dad0b490baa09ee984fdf224b21 b/tsl/test/fuzzing/compression/dictionary-text/8c92cd8b3e908dad0b490baa09ee984fdf224b21 new file mode 100644 index 0000000000000000000000000000000000000000..51520764e495d3caaf82e904d1f4bd0fef12bcc5 GIT binary patch literal 84 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh17+C%T0powM|NnIuzUedo1q~q*%upJr M!0P{hpcpCu01WRGNB{r; literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/95f1a48e7e1cbe4b91461f1251416245497ff131 b/tsl/test/fuzzing/compression/dictionary-text/95f1a48e7e1cbe4b91461f1251416245497ff131 new file mode 100644 index 0000000000000000000000000000000000000000..aeef6e030439354e9b41baf40c724cd8a9126630 GIT binary patch literal 8 PcmZQ#OvvG8U|;|M1g8Mo literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/994cc577406fe37f59e27ea1028a9d0a814af721 b/tsl/test/fuzzing/compression/dictionary-text/994cc577406fe37f59e27ea1028a9d0a814af721 new file mode 100644 index 0000000000000000000000000000000000000000..511199a8bb6f937c28b86eaa98fae7102bbb2594 GIT binary patch literal 42 scmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VKE?7+79_cy>U{z`(%(0O-^RbpQYW literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/a42f35cc555c689a38ef471b21fad93692f36a9c b/tsl/test/fuzzing/compression/dictionary-text/a42f35cc555c689a38ef471b21fad93692f36a9c new file mode 100644 index 0000000000000000000000000000000000000000..ac547d9c5236d04fc071c167226b3868cf26bec6 GIT binary patch literal 48 tcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEi~7)0_jaN0EhX(J$J000J;2hRWi literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/ba200d8a4886abcdba4174f4b131db56e9128785 b/tsl/test/fuzzing/compression/dictionary-text/ba200d8a4886abcdba4174f4b131db56e9128785 new file mode 100644 index 0000000000000000000000000000000000000000..e28432a6e458707f35b9cb5cc2b8752035b268ed GIT binary patch literal 88 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=03;Y07+C&80Hc@=!#AAH!SFv+qyzv7?i%s{ literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/bf8b4530d8d246dd74ac53a13471bba17941dff7 b/tsl/test/fuzzing/compression/dictionary-text/bf8b4530d8d246dd74ac53a13471bba17941dff7 new file mode 100644 index 00000000000..6b2aaa76407 --- /dev/null +++ b/tsl/test/fuzzing/compression/dictionary-text/bf8b4530d8d246dd74ac53a13471bba17941dff7 @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/tsl/test/fuzzing/compression/dictionary-text/c92920944247d80c842eaa65fd01efec1c84c342 b/tsl/test/fuzzing/compression/dictionary-text/c92920944247d80c842eaa65fd01efec1c84c342 new file mode 100644 index 00000000000..03afaa5de0a --- /dev/null +++ b/tsl/test/fuzzing/compression/dictionary-text/c92920944247d80c842eaa65fd01efec1c84c342 @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/tsl/test/fuzzing/compression/dictionary-text/d0f63f55c89c3220cd326e9395c12e2f4bd57942 b/tsl/test/fuzzing/compression/dictionary-text/d0f63f55c89c3220cd326e9395c12e2f4bd57942 new file mode 100644 index 0000000000000000000000000000000000000000..4932a19bda51d3a96bf3134b4b892973d12fba66 GIT binary patch literal 1353 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEWG_!c9OfC7F721W)BGcz+iAO-@MIG7ed z;W2Qcav{7r28KWsagYsW1_qYq7Ut##=7t79^Fas%0+1|*h+rXE;GlqH0Mv@bM3RN5 mV+zUE4!8!qfu}Lx&Kq^WXb6xN0tOh$IWR;J0uBf!oCN?mJtTYp literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/ecbd22c462813a437898cfe2848a46e5d6a460c5 b/tsl/test/fuzzing/compression/dictionary-text/ecbd22c462813a437898cfe2848a46e5d6a460c5 new file mode 100644 index 0000000000000000000000000000000000000000..35533760aece0a52b22a5a8f1cd2c0e539845dde GIT binary patch literal 49 lcmZQ#C`gY_PAo~x$xmk}Nv$YhfC5GahCm<_NFf781_1FM1}y*p literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/eddf750270b16df6744f3bbfa6ee82271961f573 b/tsl/test/fuzzing/compression/dictionary-text/eddf750270b16df6744f3bbfa6ee82271961f573 new file mode 100644 index 0000000000000000000000000000000000000000..ced40bbf3273047305dfb6edb0df3ecca4683b29 GIT binary patch literal 26 hcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=@&7*q0|0Mg2*LmW literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/efb13296f8f471aadfdf8083380d1e7ac9a6bbc5 b/tsl/test/fuzzing/compression/dictionary-text/efb13296f8f471aadfdf8083380d1e7ac9a6bbc5 new file mode 100644 index 0000000000000000000000000000000000000000..7fd7fa3f27c6bfa6f020fe6b032503728dbacd13 GIT binary patch literal 49 ucmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEiI|Ns9VNWWkN5 Date: Tue, 19 Dec 2023 22:17:37 +0100 Subject: [PATCH 151/249] more iterations for dictionary --- .github/workflows/libfuzzer.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index c6530a45e13..35f9d87ce68 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -124,7 +124,7 @@ jobs: # array has a peculiar recv function that recompresses all input, so # fuzzing it is much slower. The dictionary recv also uses it. { algo: array , pgtype: text , bulk: false, runs: 10000000 }, - { algo: dictionary, pgtype: text , bulk: false, runs: 10000000 }, + { algo: dictionary, pgtype: text , bulk: false, runs: 100000000 }, ] name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }} From 9a07952cf4d7f0a8e034bbcb6618532b7ca83ced Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 22:19:34 +0100 Subject: [PATCH 152/249] more iterations --- .github/workflows/libfuzzer.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 67093af5cc0..912a5287086 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -123,10 +123,10 @@ jobs: { algo: deltadelta, pgtype: int8 , bulk: true , runs: 1000000000 }, # array has a peculiar recv function that recompresses all input, so # fuzzing it is much slower. The dictionary recv also uses it. - { algo: array , pgtype: text , bulk: true , runs: 10000000 }, - { algo: dictionary, pgtype: text , bulk: true , runs: 10000000 }, { algo: array , pgtype: text , bulk: false, runs: 10000000 }, + { algo: array , pgtype: text , bulk: true , runs: 10000000 }, { algo: dictionary, pgtype: text , bulk: false, runs: 100000000 }, + { algo: dictionary, pgtype: text , bulk: true , runs: 100000000 }, ] name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }} From 5b06b362e0bf1d4a7662e8536e4162ed0e0fe891 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 19 Dec 2023 22:20:28 +0100 Subject: [PATCH 153/249] benchmark decompression cleanup (2023-12-19 no. 11) From 7ce1a55cc0c058d932eaff237f732bad06651f5e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 20 Dec 2023 11:37:54 +0100 Subject: [PATCH 154/249] fix --- tsl/src/compression/deltadelta_impl.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/tsl/src/compression/deltadelta_impl.c b/tsl/src/compression/deltadelta_impl.c index f51ff9804d4..3c307bb7451 100644 --- a/tsl/src/compression/deltadelta_impl.c +++ b/tsl/src/compression/deltadelta_impl.c @@ -75,6 +75,9 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory * * Also tried using SIMD prefix sum from here twice: * https://en.algorithmica.org/hpc/algorithms/prefix/, it's slower. + * + * Also tried zig-zag decoding in a separate loop, seems to be slightly + * slower, around the noise threshold. */ #define INNER_LOOP_SIZE 8 Assert(n_notnull_padded % INNER_LOOP_SIZE == 0); @@ -82,15 +85,7 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory { for (uint16 inner = 0; inner < INNER_LOOP_SIZE; inner++) { - deltas_zigzag[outer + inner] = zig_zag_decode(deltas_zigzag[outer + inner]); - } - } - - for (uint16 outer = 0; outer < n_notnull_padded; outer += INNER_LOOP_SIZE) - { - for (uint16 inner = 0; inner < INNER_LOOP_SIZE; inner++) - { - current_delta += deltas_zigzag[outer + inner]; + current_delta += zig_zag_decode(deltas_zigzag[outer + inner]); current_element += current_delta; decompressed_values[outer + inner] = current_element; } From b3ed50b2c8625f48a76761d0d00fd60e312f5f7b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 21 Dec 2023 15:02:10 +0100 Subject: [PATCH 155/249] Bulk decompression of text columns Implement bulk decompression for text columns. This will allow us to use them in the vectorized computation pipeline. --- .github/workflows/libfuzzer.yaml | 2 + tsl/src/compression/array.c | 153 ++++++++++++++++++ tsl/src/compression/array.h | 7 + tsl/src/compression/arrow_c_data_interface.h | 7 + tsl/src/compression/compression.c | 7 + tsl/src/compression/compression_test.c | 4 +- .../compression/decompress_text_test_impl.c | 123 ++++++++++++-- tsl/src/compression/dictionary.c | 119 ++++++++++++++ tsl/src/compression/dictionary.h | 7 + .../compression/simple8b_rle_decompress_all.h | 3 + .../nodes/decompress_chunk/compressed_batch.c | 78 ++++++++- .../nodes/decompress_chunk/compressed_batch.h | 6 +- tsl/src/nodes/decompress_chunk/exec.c | 11 +- tsl/test/expected/compression_algos.out | 42 ++--- tsl/test/expected/decompress_vector_qual.out | 2 +- ...h-e5143387e8896dcfb0f95f8111538502cee38ce0 | Bin 0 -> 1122 bytes tsl/test/sql/compression_algos.sql | 10 +- tsl/test/sql/decompress_vector_qual.sql | 2 +- 18 files changed, 535 insertions(+), 48 deletions(-) create mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-e5143387e8896dcfb0f95f8111538502cee38ce0 diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 35f9d87ce68..912a5287086 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -124,7 +124,9 @@ jobs: # array has a peculiar recv function that recompresses all input, so # fuzzing it is much slower. The dictionary recv also uses it. { algo: array , pgtype: text , bulk: false, runs: 10000000 }, + { algo: array , pgtype: text , bulk: true , runs: 10000000 }, { algo: dictionary, pgtype: text , bulk: false, runs: 100000000 }, + { algo: dictionary, pgtype: text , bulk: true , runs: 100000000 }, ] name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }} diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index febd6937f1d..d240ba7bef7 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -17,8 +17,11 @@ #include "compression/array.h" #include "compression/compression.h" #include "compression/simple8b_rle.h" +#include "compression/simple8b_rle_bitmap.h" #include "datum_serialize.h" +#include "compression/arrow_c_data_interface.h" + /* A "compressed" array * uint8 has_nulls: 1 iff this has a nulls bitmap stored before the data * Oid element_type: the element stored by this array @@ -460,6 +463,156 @@ tsl_array_decompression_iterator_from_datum_reverse(Datum compressed_array, Oid return &iterator->base; } + +#define ELEMENT_TYPE uint32 +#include "simple8b_rle_decompress_all.h" +#undef ELEMENT_TYPE + +ArrowArray * +tsl_text_array_decompress_all(Datum compressed_array, Oid element_type, MemoryContext dest_mctx) +{ + Assert(element_type == TEXTOID); + void *compressed_data = PG_DETOAST_DATUM(compressed_array); + StringInfoData si = { .data = compressed_data, .len = VARSIZE(compressed_data) }; + ArrayCompressed *header = consumeCompressedData(&si, sizeof(ArrayCompressed)); + + Assert(header->compression_algorithm == COMPRESSION_ALGORITHM_ARRAY); + CheckCompressedData(header->element_type == TEXTOID); + + return text_array_decompress_all_serialized_no_header(&si, header->has_nulls, dest_mctx); +} + +ArrowArray * +text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, + MemoryContext dest_mctx) +{ + Simple8bRleSerialized *nulls_serialized = NULL; + if (has_nulls) + { + nulls_serialized = bytes_deserialize_simple8b_and_advance(si); + } + + Simple8bRleSerialized *sizes_serialized = bytes_deserialize_simple8b_and_advance(si); + + uint32 sizes[GLOBAL_MAX_ROWS_PER_COMPRESSION]; + const uint16 n_notnull = + simple8brle_decompress_all_buf_uint32(sizes_serialized, + sizes, + sizeof(sizes) / sizeof(sizes[0])); + const int n_total = has_nulls ? nulls_serialized->num_elements : n_notnull; + + uint32 *offsets = + (uint32 *) MemoryContextAllocZero(dest_mctx, pad_to_multiple(64, sizeof(*offsets) * (n_total + 1))); + uint8 *arrow_bodies = (uint8 *) MemoryContextAllocZero(dest_mctx, pad_to_multiple(64, si->len - si->cursor)); + + uint32 offset = 0; + for (int i = 0; i < n_notnull; i++) + { + void *vardata = consumeCompressedData(si, sizes[i]); + /* + * Check for potentially corrupt varlena headers since we're reading them + * directly from compressed data. We can only have a plain datum + * with 1-byte or 4-byte header here, no TOAST or compressed data. + */ + CheckCompressedData(VARATT_IS_4B_U(vardata) || + (VARATT_IS_1B(vardata) && !VARATT_IS_1B_E(vardata))); + /* + * Full varsize must be larger or equal than the header size so that the + * calculation of size without header doesn't overflow. + */ + CheckCompressedData((VARATT_IS_1B(vardata) && VARSIZE_1B(vardata) >= VARHDRSZ_SHORT) || + (VARSIZE_4B(vardata) >= VARHDRSZ)); + /* Varsize must match the size stored in the sizes array for this element. */ + CheckCompressedData(VARSIZE_ANY(vardata) == sizes[i]); + + const uint32 textlen = VARSIZE_ANY_EXHDR(vardata); + memcpy(&arrow_bodies[offset], VARDATA_ANY(vardata), textlen); + + // fprintf(stderr, + // "%d: copied: '%s' len %d varsize %d result %.*s\n", + // i, + // text_to_cstring(vardata), + // textlen, + // (int) VARSIZE_ANY(vardata), + // textlen, + // &arrow_bodies[offset]); + + offsets[i] = offset; + + CheckCompressedData(offset <= offset + textlen); /* Check for overflow. */ + offset += textlen; + } + offsets[n_notnull] = offset; + + const int validity_bitmap_bytes = sizeof(uint64) * (pad_to_multiple(64, n_total) / 64); + uint64 *restrict validity_bitmap = MemoryContextAlloc(dest_mctx, validity_bitmap_bytes); + memset(validity_bitmap, 0xFF, validity_bitmap_bytes); + + if (has_nulls) + { + /* + * We have decompressed the data with nulls skipped, reshuffle it + * according to the nulls bitmap. + */ + Simple8bRleBitmap nulls = simple8brle_bitmap_decompress(nulls_serialized); + CheckCompressedData(n_notnull + simple8brle_bitmap_num_ones(&nulls) == n_total); + + int current_notnull_element = n_notnull - 1; + for (int i = n_total - 1; i >= 0; i--) + { + Assert(i >= current_notnull_element); + + /* + * The index of the corresponding offset is higher by one than + * the index of the element. The offset[0] is never affected by + * this shuffling and is always 0. + * Note that unlike the usual null reshuffling in other algorithms, + * for offsets, even if all elements are null, the starting offset + * is well-defined and we can do this assignment. This case is only + * accessible through fuzzing. Through SQL, all-null batches result + * in a null compressed value. + */ + Assert(current_notnull_element + 1 >= 0); + offsets[i + 1] = offsets[current_notnull_element + 1]; + + if (simple8brle_bitmap_get_at(&nulls, i)) + { + arrow_set_row_validity(validity_bitmap, i, false); + } + else + { + Assert(current_notnull_element >= 0); + current_notnull_element--; + } + } + + Assert(current_notnull_element == -1); + } + else + { + /* + * The validity bitmap size is a multiple of 64 bits. Fill the tail bits + * with zeros, because the corresponding elements are not valid. + */ + if (n_total % 64) + { + const uint64 tail_mask = -1ULL >> (64 - n_total % 64); + validity_bitmap[n_total / 64] &= tail_mask; + } + } + + ArrowArray *result = MemoryContextAllocZero(dest_mctx, sizeof(ArrowArray) + sizeof(void *) * 3); + const void **buffers = (const void **) &result[1]; + buffers[0] = validity_bitmap; + buffers[1] = offsets; + buffers[2] = arrow_bodies; + result->n_buffers = 3; + result->buffers = buffers; + result->length = n_total; + result->null_count = n_total - n_notnull; + return result; +} + DecompressResult array_decompression_iterator_try_next_reverse(DecompressionIterator *base_iter) { diff --git a/tsl/src/compression/array.h b/tsl/src/compression/array.h index 25421fd85c7..d2cdec905f2 100644 --- a/tsl/src/compression/array.h +++ b/tsl/src/compression/array.h @@ -64,6 +64,12 @@ extern void array_compressed_send(CompressedDataHeader *header, StringInfo buffe extern Datum tsl_array_compressor_append(PG_FUNCTION_ARGS); extern Datum tsl_array_compressor_finish(PG_FUNCTION_ARGS); +ArrowArray *tsl_text_array_decompress_all(Datum compressed_array, Oid element_type, + MemoryContext dest_mctx); + +ArrowArray *text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, + MemoryContext dest_mctx); + #define ARRAY_ALGORITHM_DEFINITION \ { \ .iterator_init_forward = tsl_array_decompression_iterator_from_datum_forward, \ @@ -72,4 +78,5 @@ extern Datum tsl_array_compressor_finish(PG_FUNCTION_ARGS); .compressed_data_recv = array_compressed_recv, \ .compressor_for_type = array_compressor_for_type, \ .compressed_data_storage = TOAST_STORAGE_EXTENDED, \ + .decompress_all = tsl_text_array_decompress_all, \ } diff --git a/tsl/src/compression/arrow_c_data_interface.h b/tsl/src/compression/arrow_c_data_interface.h index 186b4b68b35..64393c91b60 100644 --- a/tsl/src/compression/arrow_c_data_interface.h +++ b/tsl/src/compression/arrow_c_data_interface.h @@ -153,3 +153,10 @@ arrow_set_row_validity(uint64 *bitmap, size_t row_number, bool value) Assert(arrow_row_is_valid(bitmap, row_number) == value); } + +/* Increase the `source_value` to be an even multiple of `pad_to`. */ +static inline uint64 +pad_to_multiple(uint64 pad_to, uint64 source_value) +{ + return ((source_value + pad_to - 1) / pad_to) * pad_to; +} diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index 0e0c239dbc4..07aac35133d 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -133,6 +133,13 @@ tsl_get_decompress_all_function(CompressionAlgorithm algorithm, Oid type) if (algorithm >= _END_COMPRESSION_ALGORITHMS) elog(ERROR, "invalid compression algorithm %d", algorithm); + if (type != TEXTOID && + (algorithm == COMPRESSION_ALGORITHM_DICTIONARY || algorithm == COMPRESSION_ALGORITHM_ARRAY)) + { + /* Bulk decompression of array and dictionary is only supported for text. */ + return NULL; + } + return definitions[algorithm].decompress_all; } diff --git a/tsl/src/compression/compression_test.c b/tsl/src/compression/compression_test.c index 55fbc5b03e9..6f66db196e9 100644 --- a/tsl/src/compression/compression_test.c +++ b/tsl/src/compression/compression_test.c @@ -75,7 +75,9 @@ get_compression_algorithm(char *name) X(DELTADELTA, INT8, true) \ X(DELTADELTA, INT8, false) \ X(ARRAY, TEXT, false) \ - X(DICTIONARY, TEXT, false) + X(ARRAY, TEXT, true) \ + X(DICTIONARY, TEXT, false) \ + X(DICTIONARY, TEXT, true) static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Size, bool bulk) { diff --git a/tsl/src/compression/decompress_text_test_impl.c b/tsl/src/compression/decompress_text_test_impl.c index d90ed30cebe..ddb5e5a89ad 100644 --- a/tsl/src/compression/decompress_text_test_impl.c +++ b/tsl/src/compression/decompress_text_test_impl.c @@ -11,20 +11,93 @@ #include "compression_test.h" +#include "arrow_c_data_interface.h" + +static uint32 +arrow_get_str(ArrowArray *arrow, int arrow_row, const char **str) +{ + if (!arrow->dictionary) + { + const uint32 *offsets = (uint32 *) arrow->buffers[1]; + const char *values = (char *) arrow->buffers[2]; + + const uint32 start = offsets[arrow_row]; + const uint32 end = offsets[arrow_row + 1]; + const uint32 arrow_len = end - start; + + *str = &values[start]; + return arrow_len; + } + + const int16 dict_row = ((int16 *) arrow->buffers[1])[arrow_row]; + return arrow_get_str(arrow->dictionary, dict_row, str); +} + +static void +decompress_generic_text_check_arrow(ArrowArray *arrow, int errorlevel, DecompressResult *results, + int n) +{ + /* Check that both ways of decompression match. */ + if (n != arrow->length) + { + ereport(errorlevel, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("Expected %d elements, got %d.", n, (int) arrow->length))); + } + + for (int i = 0; i < n; i++) + { + const bool arrow_isnull = !arrow_row_is_valid(arrow->buffers[0], i); + if (arrow_isnull != results[i].is_null) + { + ereport(errorlevel, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("Expected null %d, got %d at row %d.", + results[i].is_null, + arrow_isnull, + i))); + } + + if (!results[i].is_null) + { + const char *arrow_cstring; + size_t arrow_len = arrow_get_str(arrow, i, &arrow_cstring); + + const Datum rowbyrow_varlena = results[i].val; + const size_t rowbyrow_len = VARSIZE_ANY_EXHDR(rowbyrow_varlena); + const char *rowbyrow_cstring = VARDATA_ANY(rowbyrow_varlena); + + // fprintf(stderr, "arrow: '%.*s'(%ld), rbr: '%.*s'(%ld)\n", + // (int) arrow_len, arrow_cstring, arrow_len, + // (int) rowbyrow_len, rowbyrow_cstring, rowbyrow_len); + + if (rowbyrow_len != arrow_len) + { + ereport(errorlevel, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("At row %d\n", i))); + } + + if (strncmp(arrow_cstring, rowbyrow_cstring, rowbyrow_len)) + { + ereport(errorlevel, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("At row %d\n", i))); + } + } + } +} + /* - * Try to decompress the given compressed data. Used for fuzzing and for checking - * the examples found by fuzzing. For fuzzing we do less checks to keep it - * faster and the coverage space smaller. This is a generic implementation - * for arithmetic types. + * Try to decompress the given compressed data. */ static int decompress_generic_text(const uint8 *Data, size_t Size, bool bulk, int requested_algo) { - if (bulk) - { - elog(ERROR, "bulk decompression not supported for text"); - } - StringInfoData si = { .data = (char *) Data, .len = Size }; const int data_algo = pq_getmsgbyte(&si); @@ -40,9 +113,19 @@ decompress_generic_text(const uint8 *Data, size_t Size, bool bulk, int requested */ return -1; } - const CompressionAlgorithmDefinition *def = algorithm_definition(data_algo); Datum compressed_data = def->compressed_data_recv(&si); + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo, TEXTOID); + + ArrowArray *arrow = NULL; + if (bulk) + { + /* + * Check that the arrow decompression works. Have to do this before the + * row-by-row decompression so that it doesn't hide the possible errors. + */ + arrow = decompress_all(compressed_data, TEXTOID, CurrentMemoryContext); + } /* * Test row-by-row decompression. @@ -60,17 +143,20 @@ decompress_generic_text(const uint8 *Data, size_t Size, bool bulk, int requested results[n++] = r; } - /* - * For row-by-row decompression, check that the result is still the same - * after we compress and decompress back. - * Don't perform this check for other types of tests. - */ if (bulk) { + /* + * Check that the arrow decompression result matches. + */ + decompress_generic_text_check_arrow(arrow, ERROR, results, n); return n; } /* + * For row-by-row decompression, check that the result is still the same + * after we compress and decompress back. + * Don't perform this check for other types of tests. + * * 1) Compress. */ Compressor *compressor = def->compressor_for_type(TEXTOID); @@ -136,6 +222,13 @@ decompress_generic_text(const uint8 *Data, size_t Size, bool bulk, int requested } } + /* + * 3) The bulk decompression must absolutely work on the correct compressed + * data we've just generated. + */ + arrow = decompress_all(compressed_data, TEXTOID, CurrentMemoryContext); + decompress_generic_text_check_arrow(arrow, PANIC, results, n); + return n; } diff --git a/tsl/src/compression/dictionary.c b/tsl/src/compression/dictionary.c index 0ad9b600d89..d4976d1f766 100644 --- a/tsl/src/compression/dictionary.c +++ b/tsl/src/compression/dictionary.c @@ -22,9 +22,11 @@ #include "compression/compression.h" #include "compression/dictionary.h" #include "compression/simple8b_rle.h" +#include "compression/simple8b_rle_bitmap.h" #include "compression/array.h" #include "compression/dictionary_hash.h" #include "compression/datum_serialize.h" +#include "compression/arrow_c_data_interface.h" /* * A compression bitmap is stored as @@ -334,6 +336,13 @@ dictionary_compressor_finish(DictionaryCompressor *compressor) average_element_size = sizes.dictionary_size / sizes.num_distinct; expected_array_size = average_element_size * sizes.dictionary_compressed_indexes->num_elements; compressed = dictionary_compressed_from_serialization_info(sizes, compressor->type); + // fprintf(stderr, + // "dict size %ld, distinct %ld, avg element size %ld, easize %ld, totalsize %ld\n", + // sizes.dictionary_size, + // (uint64) sizes.num_distinct, + // average_element_size, + // expected_array_size, + // sizes.total_size); if (expected_array_size < sizes.total_size) return dictionary_compressed_to_array_compressed(compressed); @@ -395,6 +404,116 @@ dictionary_decompression_iterator_init(DictionaryDecompressionIterator *iter, co } Assert(array_decompression_iterator_try_next_forward(dictionary_iterator).is_done); } + +#define ELEMENT_TYPE int16 +#include "simple8b_rle_decompress_all.h" +#undef ELEMENT_TYPE + +ArrowArray * +tsl_text_dictionary_decompress_all(Datum compressed, Oid element_type, MemoryContext dest_mctx) +{ + Assert(element_type == TEXTOID); + + compressed = PointerGetDatum(PG_DETOAST_DATUM(compressed)); + + StringInfoData si = { .data = DatumGetPointer(compressed), .len = VARSIZE(compressed) }; + + const DictionaryCompressed *header = consumeCompressedData(&si, sizeof(DictionaryCompressed)); + + Assert(header->compression_algorithm == COMPRESSION_ALGORITHM_DICTIONARY); + CheckCompressedData(header->element_type == TEXTOID); + + Simple8bRleSerialized *indices_serialized = bytes_deserialize_simple8b_and_advance(&si); + + Simple8bRleSerialized *nulls_serialized = NULL; + if (header->has_nulls) + { + nulls_serialized = bytes_deserialize_simple8b_and_advance(&si); + } + + const uint16 n_notnull = indices_serialized->num_elements; + const uint16 n_total = header->has_nulls ? nulls_serialized->num_elements : n_notnull; + const uint16 n_padded = n_total + 63; /* This is the padding requirement of simple8brle_decompress_all. */ + int16 *restrict indices = MemoryContextAlloc(dest_mctx, sizeof(int16) * n_padded); + + const uint16 n_decompressed = + simple8brle_decompress_all_buf_int16(indices_serialized, indices, n_padded); + CheckCompressedData(n_decompressed == n_notnull); + + /* Check that the dictionary indices that we've just read are not out of bounds. */ + CheckCompressedData(header->num_distinct <= GLOBAL_MAX_ROWS_PER_COMPRESSION); + CheckCompressedData(header->num_distinct <= INT16_MAX); + bool have_incorrect_index = false; + for (int i = 0; i < n_notnull; i++) + { + have_incorrect_index |= indices[i] >= (int16) header->num_distinct; + } + CheckCompressedData(!have_incorrect_index); + + /* Decompress the actual values in the dictionary. */ + ArrowArray *dict = + text_array_decompress_all_serialized_no_header(&si, /* has_nulls = */ false, dest_mctx); + CheckCompressedData(header->num_distinct == dict->length); + + /* Fill validity and indices of the array elements, reshuffling for nulls if needed. */ + const int validity_bitmap_bytes = sizeof(uint64) * pad_to_multiple(64, n_total) / 64; + uint64 *restrict validity_bitmap = MemoryContextAlloc(dest_mctx, validity_bitmap_bytes); + memset(validity_bitmap, 0xFF, validity_bitmap_bytes); + + if (header->has_nulls) + { + /* + * We have decompressed the data with nulls skipped, reshuffle it + * according to the nulls bitmap. + */ + Simple8bRleBitmap nulls = simple8brle_bitmap_decompress(nulls_serialized); + CheckCompressedData(n_notnull + simple8brle_bitmap_num_ones(&nulls) == n_total); + + int current_notnull_element = n_notnull - 1; + for (int i = n_total - 1; i >= 0; i--) + { + Assert(i >= current_notnull_element); + + if (simple8brle_bitmap_get_at(&nulls, i)) + { + arrow_set_row_validity(validity_bitmap, i, false); + indices[i] = 0; + } + else + { + Assert(current_notnull_element >= 0); + indices[i] = indices[current_notnull_element]; + current_notnull_element--; + } + } + + Assert(current_notnull_element == -1); + } + else + { + /* + * The validity bitmap size is a multiple of 64 bits. Fill the tail bits + * with zeros, because the corresponding elements are not valid. + */ + if (n_total % 64) + { + const uint64 tail_mask = -1ULL >> (64 - n_total % 64); + validity_bitmap[n_total / 64] &= tail_mask; + } + } + + ArrowArray *result = MemoryContextAllocZero(dest_mctx, sizeof(ArrowArray) + sizeof(void *) * 2); + const void **buffers = (const void **) &result[1]; + buffers[0] = validity_bitmap; + buffers[1] = indices; + result->n_buffers = 2; + result->buffers = buffers; + result->length = n_total; + result->null_count = n_total - n_notnull; + result->dictionary = dict; + return result; +} + DecompressionIterator * tsl_dictionary_decompression_iterator_from_datum_forward(Datum dictionary_compressed, Oid element_type) diff --git a/tsl/src/compression/dictionary.h b/tsl/src/compression/dictionary.h index 06260d97d86..c9fef856f9d 100644 --- a/tsl/src/compression/dictionary.h +++ b/tsl/src/compression/dictionary.h @@ -47,6 +47,12 @@ extern Datum dictionary_compressed_recv(StringInfo buf); extern Datum tsl_dictionary_compressor_append(PG_FUNCTION_ARGS); extern Datum tsl_dictionary_compressor_finish(PG_FUNCTION_ARGS); +ArrowArray *tsl_text_array_decompress_all(Datum compressed_array, Oid element_type, + MemoryContext dest_mctx); + +ArrowArray *tsl_text_dictionary_decompress_all(Datum compressed, Oid element_type, + MemoryContext dest_mctx); + #define DICTIONARY_ALGORITHM_DEFINITION \ { \ .iterator_init_forward = tsl_dictionary_decompression_iterator_from_datum_forward, \ @@ -55,4 +61,5 @@ extern Datum tsl_dictionary_compressor_finish(PG_FUNCTION_ARGS); .compressed_data_recv = dictionary_compressed_recv, \ .compressor_for_type = dictionary_compressor_for_type, \ .compressed_data_storage = TOAST_STORAGE_EXTENDED, \ + .decompress_all = tsl_text_dictionary_decompress_all, \ } diff --git a/tsl/src/compression/simple8b_rle_decompress_all.h b/tsl/src/compression/simple8b_rle_decompress_all.h index e20c4363928..52f8de8499b 100644 --- a/tsl/src/compression/simple8b_rle_decompress_all.h +++ b/tsl/src/compression/simple8b_rle_decompress_all.h @@ -10,6 +10,9 @@ /* * Specialization of bulk simple8brle decompression for a data type specified by * ELEMENT_TYPE macro. + * + * The buffer must have a padding of 63 elements after the last one, because + * decompression is performed always in full blocks. */ static uint16 FUNCTION_NAME(simple8brle_decompress_all_buf, diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index efe39b82157..3504a79ea80 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -77,6 +77,23 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) return arrow; } +static int +get_max_text_datum_size(ArrowArray *text_array) +{ + int maxbytes = 0; + uint32 *offsets = (uint32 *) text_array->buffers[1]; + for (int i = 0; i < text_array->length; i++) + { + const int curbytes = offsets[i + 1] - offsets[i]; + if (curbytes > maxbytes) + { + maxbytes = curbytes; + } + } + + return maxbytes; +} + static void decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state, int i) { @@ -170,8 +187,37 @@ decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state } else { - /* No variable-width columns support bulk decompression. */ - Assert(false); + /* + * Text column. Pre-allocate memory for its text Datum in the + * decompressed scan slot. We can't put direct references to Arrow + * memory there, because it doesn't have the varlena headers that + * Postgres expects for text. + */ + const int maxbytes = + VARHDRSZ + (arrow->dictionary ? get_max_text_datum_size(arrow->dictionary) : + get_max_text_datum_size(arrow)); + + *column_values->output_value = + PointerGetDatum(MemoryContextAlloc(batch_state->per_batch_context, maxbytes)); + + /* + * Set up the datum conversion based on whether we use the dictionary. + */ + if (arrow->dictionary == NULL) + { + column_values->decompression_type = DT_ArrowText; + column_values->buffers[0] = arrow->buffers[0]; + column_values->buffers[1] = arrow->buffers[1]; + column_values->buffers[2] = arrow->buffers[2]; + } + else + { + column_values->decompression_type = DT_ArrowTextDict; + column_values->buffers[0] = arrow->buffers[0]; + column_values->buffers[1] = arrow->dictionary->buffers[1]; + column_values->buffers[2] = arrow->dictionary->buffers[2]; + column_values->buffers[3] = arrow->buffers[1]; + } } } @@ -552,6 +598,21 @@ compressed_batch_set_compressed_tuple(DecompressContext *dcontext, MemoryContextSwitchTo(old_context); } +static void +store_text_datum(CompressedColumnValues *column_values, int arrow_row) +{ + const uint32 start = ((uint32 *) column_values->buffers[1])[arrow_row]; + const int32 value_bytes = ((uint32 *) column_values->buffers[1])[arrow_row + 1] - start; + Assert(value_bytes >= 0); + + const int total_bytes = value_bytes + VARHDRSZ; + Assert(DatumGetPointer(*column_values->output_value) != NULL); + SET_VARSIZE(*column_values->output_value, total_bytes); + memcpy(VARDATA(*column_values->output_value), + &((uint8 *) column_values->buffers[2])[start], + value_bytes); +} + /* * Construct the next tuple in the decompressed scan slot. * Doesn't check the quals. @@ -617,6 +678,19 @@ make_next_tuple(DecompressBatchState *batch_state, uint16 arrow_row, int num_com *column_values->output_isnull = !arrow_row_is_valid(column_values->buffers[0], arrow_row); } + else if (column_values->decompression_type == DT_ArrowText) + { + store_text_datum(column_values, arrow_row); + *column_values->output_isnull = + !arrow_row_is_valid(column_values->buffers[0], arrow_row); + } + else if (column_values->decompression_type == DT_ArrowTextDict) + { + const int16 index = ((int16 *) column_values->buffers[3])[arrow_row]; + store_text_datum(column_values, index); + *column_values->output_isnull = + !arrow_row_is_valid(column_values->buffers[0], arrow_row); + } else { /* A compressed column with default value, do nothing. */ diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.h b/tsl/src/nodes/decompress_chunk/compressed_batch.h index a20f961b127..eb53d0ac08d 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.h +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.h @@ -13,6 +13,8 @@ typedef struct ArrowArray ArrowArray; /* How to obtain the decompressed datum for individual row. */ typedef enum { + DT_ArrowTextDict = -4, + DT_ArrowText = -3, DT_Default = -2, DT_Iterator = -1, DT_Invalid = 0, @@ -36,8 +38,10 @@ typedef struct CompressedColumnValues * Depending on decompression type, they are as follows: * iterator: iterator * arrow fixed: validity, value + * arrow text: validity, uint32* offsets, void* bodies + * arrow dict text: validity, uint32* dict offsets, void* dict bodies, int16* indices */ - const void *restrict buffers[2]; + const void *restrict buffers[4]; /* * The source arrow array, if any. We don't use it for building the diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index 9fe7b766741..89f3e95c6ea 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -362,9 +362,14 @@ decompress_chunk_begin(CustomScanState *node, EState *estate, int eflags) CompressionColumnDescription *column = &dcontext->template_columns[i]; if (column->bulk_decompression_supported) { - /* Values array, with 64 element padding (actually we have less). */ - batch_memory_context_bytes += - (GLOBAL_MAX_ROWS_PER_COMPRESSION + 64) * column->value_bytes; + /* + * Values array, with 64 element padding (actually we have less). + * + * For variable-length types (we only have text) we can't + * estimate the width currently. + */ + batch_memory_context_bytes += (GLOBAL_MAX_ROWS_PER_COMPRESSION + 64) * + (column->value_bytes > 0 ? column->value_bytes : 16); /* Also nulls bitmap. */ batch_memory_context_bytes += GLOBAL_MAX_ROWS_PER_COMPRESSION / (64 * sizeof(uint64)); diff --git a/tsl/test/expected/compression_algos.out b/tsl/test/expected/compression_algos.out index 4335e7913d6..b7cc5e3e409 100644 --- a/tsl/test/expected/compression_algos.out +++ b/tsl/test/expected/compression_algos.out @@ -1588,34 +1588,36 @@ group by 2, 3 order by 1 desc \set algo array \set type text select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result -from :fn false) rowbyrow -group by 2 order by 1 desc +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc ; - count | rowbyrow_result --------+----------------- - 13 | XX001 - 4 | 08P01 - 3 | true - 1 | false - 1 | 22021 - 1 | 3F000 + count | bulk_result | rowbyrow_result +-------+-------------+----------------- + 14 | XX001 | XX001 + 4 | 08P01 | 08P01 + 2 | true | true + 1 | 22021 | 22021 + 1 | 3F000 | 3F000 + 1 | false | false (6 rows) \set algo dictionary \set type text select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result -from :fn false) rowbyrow -group by 2 order by 1 desc +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc ; - count | rowbyrow_result --------+----------------- - 22 | XX001 - 4 | 08P01 - 2 | true - 1 | false - 1 | 22021 - 1 | 3F000 + count | bulk_result | rowbyrow_result +-------+-------------+----------------- + 23 | XX001 | XX001 + 4 | 08P01 | 08P01 + 2 | true | true + 1 | 22021 | 22021 + 1 | 3F000 | 3F000 + 1 | false | false (6 rows) diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 596245cba0a..66813af305f 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -109,7 +109,7 @@ select count(*) from vectorqual where device = 1 /* can't apply vector ops to th (1 row) -- Test columns that don't support bulk decompression. -alter table vectorqual add column tag text; +alter table vectorqual add column tag name; insert into vectorqual(ts, device, metric2, metric3, metric4, tag) values ('2025-01-01 00:00:00', 5, 52, 53, 54, 'tag5'); select count(compress_chunk(x, true)) from show_chunks('vectorqual') x; NOTICE: chunk "_hyper_1_1_chunk" is already compressed diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-e5143387e8896dcfb0f95f8111538502cee38ce0 b/tsl/test/fuzzing/compression/dictionary-text/crash-e5143387e8896dcfb0f95f8111538502cee38ce0 new file mode 100644 index 0000000000000000000000000000000000000000..280479e0535b53c2f790952f051970b8fb859a69 GIT binary patch literal 1122 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEiK7zFqk80;Dt8WaHCPEs` z1Xk|App8u%L=q??12-KmgiSqA3=)N)sMH5B;jxWFIamWwE)=IcszegVP=9||Kp;tC z6GM`QGo|1xbcP}W&|oYkBh;+{0tn9@A&ANXxew~V91=hs`5+HjYe`9okT1+dK#mm< yV+Sum8InOnLrK?W_b5>@~dPBS|I literal 0 HcmV?d00001 diff --git a/tsl/test/sql/compression_algos.sql b/tsl/test/sql/compression_algos.sql index e6a386acda6..412e3d7a7b3 100644 --- a/tsl/test/sql/compression_algos.sql +++ b/tsl/test/sql/compression_algos.sql @@ -410,16 +410,18 @@ group by 2, 3 order by 1 desc \set algo array \set type text select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result -from :fn false) rowbyrow -group by 2 order by 1 desc +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc ; \set algo dictionary \set type text select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result -from :fn false) rowbyrow -group by 2 order by 1 desc +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc ; diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 0b783303c5e..053c885a3a2 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -39,7 +39,7 @@ select count(*) from vectorqual where device = 1 /* can't apply vector ops to th -- Test columns that don't support bulk decompression. -alter table vectorqual add column tag text; +alter table vectorqual add column tag name; insert into vectorqual(ts, device, metric2, metric3, metric4, tag) values ('2025-01-01 00:00:00', 5, 52, 53, 54, 'tag5'); select count(compress_chunk(x, true)) from show_chunks('vectorqual') x; From deee56c8422b924029c700fb5fa8fd245476358e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 21 Dec 2023 19:27:02 +0100 Subject: [PATCH 156/249] Vectorize NullTest For checks like `variable IS (NOT) NULL`. --- .../nodes/decompress_chunk/compressed_batch.c | 78 +++++++----- tsl/src/nodes/decompress_chunk/exec.c | 13 +- tsl/src/nodes/decompress_chunk/planner.c | 102 +++++++++++---- .../decompress_chunk/vector_predicates.c | 20 +++ .../decompress_chunk/vector_predicates.h | 2 + tsl/test/expected/decompress_vector_qual.out | 14 +- .../shared/expected/ordered_append-15.out | 120 +++++++++--------- tsl/test/sql/decompress_vector_qual.sql | 11 +- 8 files changed, 235 insertions(+), 125 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index efe39b82157..974cacbc3c1 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -220,7 +220,13 @@ compute_vector_quals(DecompressContext *dcontext, DecompressBatchState *batch_st RegProcedure vector_const_opcode = InvalidOid; ScalarArrayOpExpr *saop = NULL; OpExpr *opexpr = NULL; - if (IsA(lfirst(lc), ScalarArrayOpExpr)) + NullTest *nulltest = NULL; + if (IsA(lfirst(lc), NullTest)) + { + nulltest = castNode(NullTest, lfirst(lc)); + args = list_make1(nulltest->arg); + } + else if (IsA(lfirst(lc), ScalarArrayOpExpr)) { saop = castNode(ScalarArrayOpExpr, lfirst(lc)); args = saop->args; @@ -233,12 +239,6 @@ compute_vector_quals(DecompressContext *dcontext, DecompressBatchState *batch_st vector_const_opcode = get_opcode(opexpr->opno); } - /* - * Find the vector_const predicate. - */ - VectorPredicate *vector_const_predicate = get_vector_const_predicate(vector_const_opcode); - Assert(vector_const_predicate != NULL); - /* * Find the compressed column referred to by the Var. */ @@ -311,36 +311,50 @@ compute_vector_quals(DecompressContext *dcontext, DecompressBatchState *batch_st predicate_result = &default_value_predicate_result; } - /* - * The vectorizable predicates should be STRICT, so we shouldn't see null - * constants here. - */ - Const *constnode = castNode(Const, lsecond(args)); - Ensure(!constnode->constisnull, "vectorized predicate called for a null value"); - - /* - * At last, compute the predicate. - */ - if (saop) + if (nulltest) { - vector_array_predicate(vector_const_predicate, - saop->useOr, - vector, - constnode->constvalue, - predicate_result); + vector_nulltest(vector, nulltest->nulltesttype, predicate_result); } else { - vector_const_predicate(vector, constnode->constvalue, predicate_result); - } + /* + * Find the vector_const predicate. + */ + VectorPredicate *vector_const_predicate = + get_vector_const_predicate(vector_const_opcode); + Assert(vector_const_predicate != NULL); - /* Account for nulls which shouldn't pass the predicate. */ - const size_t n = vector->length; - const size_t n_words = (n + 63) / 64; - const uint64 *restrict validity = (uint64 *restrict) vector->buffers[0]; - for (size_t i = 0; i < n_words; i++) - { - predicate_result[i] &= validity[i]; + /* + * The vectorizable predicates should be STRICT, so we shouldn't see null + * constants here. + */ + Const *constnode = castNode(Const, lsecond(args)); + Ensure(!constnode->constisnull, "vectorized predicate called for a null value"); + + /* + * At last, compute the predicate. + */ + if (saop) + { + vector_array_predicate(vector_const_predicate, + saop->useOr, + vector, + constnode->constvalue, + predicate_result); + } + else + { + vector_const_predicate(vector, constnode->constvalue, predicate_result); + } + + /* Account for nulls which shouldn't pass the predicate. */ + const size_t n = vector->length; + const size_t n_words = (n + 63) / 64; + const uint64 *restrict validity = (uint64 *restrict) vector->buffers[0]; + for (size_t i = 0; i < n_words; i++) + { + predicate_result[i] &= validity[i]; + } } /* Process the result. */ diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index 9fe7b766741..fa0aae42e4b 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -453,17 +453,22 @@ decompress_chunk_begin(CustomScanState *node, EState *estate, int eflags) } } - List *args; + List *args = NIL; if (IsA(constified, OpExpr)) { args = castNode(OpExpr, constified)->args; } - else + else if (IsA(constified, ScalarArrayOpExpr)) { args = castNode(ScalarArrayOpExpr, constified)->args; } - Ensure(IsA(lsecond(args), Const), - "failed to evaluate runtime constant in vectorized filter"); + + if (args) + { + Ensure(IsA(lsecond(args), Const), + "failed to evaluate runtime constant in vectorized filter"); + } + dcontext->vectorized_quals_constified = lappend(dcontext->vectorized_quals_constified, constified); } diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index 8a4faffe0ca..1aaf7f37a6b 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -420,36 +421,47 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) { /* * Currently we vectorize some "Var op Const" binary predicates, - * and scalar array operations with these predicates. + * scalar array operations with these predicates, and null test. */ - if (!IsA(qual, OpExpr) && !IsA(qual, ScalarArrayOpExpr)) - { - return NULL; - } - - List *args = NIL; + NullTest *nulltest = NULL; OpExpr *opexpr = NULL; - Oid opno = InvalidOid; ScalarArrayOpExpr *saop = NULL; + Node *arg1 = NULL; + Node *arg2 = NULL; + Oid opno = InvalidOid; if (IsA(qual, OpExpr)) { opexpr = castNode(OpExpr, qual); - args = opexpr->args; opno = opexpr->opno; + if (list_length(opexpr->args) != 2) + { + return NULL; + } + arg1 = (Node *) linitial(opexpr->args); + arg2 = (Node *) lsecond(opexpr->args); } - else + else if (IsA(qual, ScalarArrayOpExpr)) { saop = castNode(ScalarArrayOpExpr, qual); - args = saop->args; opno = saop->opno; + if (list_length(saop->args) != 2) + { + return NULL; + } + arg1 = (Node *) linitial(saop->args); + arg2 = (Node *) lsecond(saop->args); } - - if (list_length(args) != 2) + else if (IsA(qual, NullTest)) + { + nulltest = castNode(NullTest, qual); + arg1 = (Node *) nulltest->arg; + } + else { return NULL; } - if (opexpr && IsA(lsecond(args), Var)) + if (opexpr && IsA(arg2, Var)) { /* * Try to commute the operator if we have Var on the right. @@ -467,22 +479,37 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) * CommuteOpExpr() does. */ opexpr->opfuncid = InvalidOid; - args = list_make2(lsecond(args), linitial(args)); - opexpr->args = args; + opexpr->args = list_make2(arg2, arg1); + Node *tmp = arg1; + arg1 = arg2; + arg2 = tmp; } /* - * We can vectorize the operation where the left side is a Var and the right - * side is a constant or can be evaluated to a constant at run time (e.g. - * contains stable functions). + * We can vectorize the operation where the left side is a Var. */ - if (!IsA(linitial(args), Var) || is_not_runtime_constant(lsecond(args))) + if (!IsA(arg1, Var)) { return NULL; } - Var *var = castNode(Var, linitial(args)); - Assert((Index) var->varno == path->info->chunk_rel->relid); + Var *var = castNode(Var, arg1); + if((Index) var->varno != path->info->chunk_rel->relid) + { + /* + * We have a Var from other relation (join clause), can't vectorize it + * at the moment. + */ + return NULL; + } + + if (var->varattno <= 0) + { + /* + * Can't vectorize operators with special variables such as whole-row var. + */ + return NULL; + } /* * ExecQual is performed before ExecProject and operates on the decompressed @@ -495,12 +522,40 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) return NULL; } + if (nulltest) + { + /* + * The checks we've done to this point is all that is required for null + * test. + */ + return (Node *) nulltest; + } + + /* + * We can vectorize the opreration where the right side is a constant or can + * be evaluated to a constant at run time (e.g. contains stable functions). + */ + Assert(arg2); + if (is_not_runtime_constant(arg2)) + { + return NULL; + } + Oid opcode = get_opcode(opno); if (!get_vector_const_predicate(opcode)) { return NULL; } + if (opexpr) + { + /* + * The checks we've done to this point is all that is required for + * OpExpr. + */ + return (Node *) opexpr; + } + #if PG14_GE if (saop) { @@ -514,7 +569,8 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) } #endif - return opexpr ? (Node *) opexpr : (Node *) saop; + Assert(saop); + return (Node *) saop; } /* diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.c b/tsl/src/nodes/decompress_chunk/vector_predicates.c index f225383ec3f..ae1ce9a4e0a 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.c +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.c @@ -42,3 +42,23 @@ get_vector_const_predicate(Oid pg_predicate) } return NULL; } + +void +vector_nulltest(const ArrowArray *arrow, int test_type, uint64 *restrict result) +{ + const bool should_be_null = test_type == IS_NULL; + + const uint16 bitmap_words = (arrow->length + 63) / 64; + const uint64 *restrict validity = (const uint64 *) arrow->buffers[0]; + for (uint16 i = 0; i < bitmap_words; i++) + { + if (should_be_null) + { + result[i] &= ~validity[i]; + } + else + { + result[i] &= validity[i]; + } + } +} diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.h b/tsl/src/nodes/decompress_chunk/vector_predicates.h index c33d7993c4f..06a4b40435b 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.h +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.h @@ -15,3 +15,5 @@ VectorPredicate *get_vector_const_predicate(Oid pg_predicate); void vector_array_predicate(VectorPredicate *scalar_predicate, bool is_or, const ArrowArray *vector, Datum array, uint64 *restrict result); + +void vector_nulltest(const ArrowArray *arrow, int test_type, uint64 *restrict result); diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 596245cba0a..88b3a0db2a9 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -208,8 +208,8 @@ select count(*) from vectorqual where 777 === metric3; 2 (1 row) --- NullTest is not vectorized. -set timescaledb.debug_require_vector_qual to 'forbid'; +-- NullTest is vectorized. +set timescaledb.debug_require_vector_qual to 'only'; select count(*) from vectorqual where metric4 is null; count ------- @@ -222,6 +222,14 @@ select count(*) from vectorqual where metric4 is not null; 2 (1 row) +-- Can't vectorize conditions on whole row variable. +set timescaledb.debug_require_vector_qual to 'forbid'; +select count(*) from vectorqual where vectorqual is null; + count +------- + 0 +(1 row) + -- Scalar array operators are vectorized if the operator is vectorizable. set timescaledb.debug_require_vector_qual to 'only'; select count(*) from vectorqual where metric3 = any(array[777, 888]); /* default value */ @@ -616,7 +624,7 @@ set timescaledb.debug_require_vector_qual to 'forbid'; select count(*) from vectorqual where metric4 > 4; ERROR: debug: encountered vector quals when they are disabled set timescaledb.debug_require_vector_qual to 'only'; -select count(*) from vectorqual where metric4 is null; +select count(*) from vectorqual where metric3 === 4; ERROR: debug: encountered non-vector quals when they are disabled \set ON_ERROR_STOP 1 -- Date columns diff --git a/tsl/test/shared/expected/ordered_append-15.out b/tsl/test/shared/expected/ordered_append-15.out index f402524ad6d..43b879aaf99 100644 --- a/tsl/test/shared/expected/ordered_append-15.out +++ b/tsl/test/shared/expected/ordered_append-15.out @@ -2592,17 +2592,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (21 rows) @@ -2619,17 +2619,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (21 rows) @@ -2648,17 +2648,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (22 rows) @@ -2676,17 +2676,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (22 rows) @@ -2959,17 +2959,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk_1."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (actual rows=25190 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) Filter: ("time" = $1) @@ -4081,53 +4081,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (59 rows) @@ -4146,53 +4146,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10794 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (59 rows) @@ -4213,53 +4213,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10794 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (60 rows) @@ -4279,53 +4279,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (60 rows) @@ -4759,53 +4759,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk_1."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk_1."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (actual rows=15114 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk_1."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Merge Append (actual rows=0 loops=1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 0b783303c5e..7b1eee3819c 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -81,12 +81,17 @@ select count(*) from vectorqual where metric3 === any(array[777, 888]); select count(*) from vectorqual where 777 === metric3; --- NullTest is not vectorized. -set timescaledb.debug_require_vector_qual to 'forbid'; +-- NullTest is vectorized. +set timescaledb.debug_require_vector_qual to 'only'; select count(*) from vectorqual where metric4 is null; select count(*) from vectorqual where metric4 is not null; +-- Can't vectorize conditions on whole row variable. +set timescaledb.debug_require_vector_qual to 'forbid'; +select count(*) from vectorqual where vectorqual is null; + + -- Scalar array operators are vectorized if the operator is vectorizable. set timescaledb.debug_require_vector_qual to 'only'; select count(*) from vectorqual where metric3 = any(array[777, 888]); /* default value */ @@ -203,7 +208,7 @@ set timescaledb.enable_bulk_decompression to on; set timescaledb.debug_require_vector_qual to 'forbid'; select count(*) from vectorqual where metric4 > 4; set timescaledb.debug_require_vector_qual to 'only'; -select count(*) from vectorqual where metric4 is null; +select count(*) from vectorqual where metric3 === 4; \set ON_ERROR_STOP 1 From 443fa1ffcc0f17d0fe395ab057e091a52c84e388 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 21 Dec 2023 20:14:08 +0100 Subject: [PATCH 157/249] ref --- .../expected/ordered_append_join-15.out | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tsl/test/shared/expected/ordered_append_join-15.out b/tsl/test/shared/expected/ordered_append_join-15.out index 34f01051ab2..2e5dcf4f357 100644 --- a/tsl/test/shared/expected/ordered_append_join-15.out +++ b/tsl/test/shared/expected/ordered_append_join-15.out @@ -2282,17 +2282,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) (37 rows) @@ -3629,53 +3629,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) (75 rows) From d9a8106fdaee0994f827111ec5ff061b6a386c07 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 21 Dec 2023 21:10:32 +0100 Subject: [PATCH 158/249] tests --- tsl/src/nodes/decompress_chunk/planner.c | 7 ++----- tsl/src/nodes/decompress_chunk/qual_pushdown.c | 8 ++++++++ tsl/test/expected/decompress_vector_qual.out | 18 ++++++++++++++++-- tsl/test/sql/decompress_vector_qual.sql | 8 ++++++-- 4 files changed, 32 insertions(+), 9 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index 1aaf7f37a6b..f2374b33655 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -444,10 +444,7 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) { saop = castNode(ScalarArrayOpExpr, qual); opno = saop->opno; - if (list_length(saop->args) != 2) - { - return NULL; - } + Assert(list_length(saop->args) == 2); arg1 = (Node *) linitial(saop->args); arg2 = (Node *) lsecond(saop->args); } @@ -494,7 +491,7 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) } Var *var = castNode(Var, arg1); - if((Index) var->varno != path->info->chunk_rel->relid) + if ((Index) var->varno != path->info->chunk_rel->relid) { /* * We have a Var from other relation (join clause), can't vectorize it diff --git a/tsl/src/nodes/decompress_chunk/qual_pushdown.c b/tsl/src/nodes/decompress_chunk/qual_pushdown.c index 005e8fcf835..b3c1fd30e89 100644 --- a/tsl/src/nodes/decompress_chunk/qual_pushdown.c +++ b/tsl/src/nodes/decompress_chunk/qual_pushdown.c @@ -360,6 +360,14 @@ modify_expression(Node *node, QualPushdownContext *context) { Var *var = castNode(Var, node); Assert((Index) var->varno == context->chunk_rel->relid); + + if (var->varattno <= 0) + { + /* Can't do this for system columns such as whole-row var. */ + context->can_pushdown = false; + return NULL; + } + char *attname = get_attname(context->chunk_rte->relid, var->varattno, false); /* we can only push down quals for segmentby columns */ if (!ts_array_is_member(context->settings->fd.segmentby, attname)) diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 88b3a0db2a9..57423179010 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -222,9 +222,23 @@ select count(*) from vectorqual where metric4 is not null; 2 (1 row) --- Can't vectorize conditions on whole row variable. +-- Can't vectorize conditions on system columns. Have to check this on a single +-- chunk, otherwise the whole-row var will be masked by ConvertRowType. +select show_chunks('vectorqual') chunk1 limit 1 \gset set timescaledb.debug_require_vector_qual to 'forbid'; -select count(*) from vectorqual where vectorqual is null; +select count(*) from :chunk1 t where t is null; + count +------- + 0 +(1 row) + +select count(*) from :chunk1 t where t.* is null; + count +------- + 0 +(1 row) + +select count(*) from :chunk1 t where tableoid is null; count ------- 0 diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 7b1eee3819c..04a5da1c39a 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -87,9 +87,13 @@ select count(*) from vectorqual where metric4 is null; select count(*) from vectorqual where metric4 is not null; --- Can't vectorize conditions on whole row variable. +-- Can't vectorize conditions on system columns. Have to check this on a single +-- chunk, otherwise the whole-row var will be masked by ConvertRowType. +select show_chunks('vectorqual') chunk1 limit 1 \gset set timescaledb.debug_require_vector_qual to 'forbid'; -select count(*) from vectorqual where vectorqual is null; +select count(*) from :chunk1 t where t is null; +select count(*) from :chunk1 t where t.* is null; +select count(*) from :chunk1 t where tableoid is null; -- Scalar array operators are vectorized if the operator is vectorizable. From a002fb2fe31bf81700da64d69f2d9aa61419eeaa Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 3 Jan 2024 12:32:24 +0100 Subject: [PATCH 159/249] reference ordered_append-* ordered_append_join-* --- .../shared/expected/ordered_append-16.out | 120 +++++++++--------- .../expected/ordered_append_join-16.out | 24 ++-- 2 files changed, 72 insertions(+), 72 deletions(-) diff --git a/tsl/test/shared/expected/ordered_append-16.out b/tsl/test/shared/expected/ordered_append-16.out index f402524ad6d..43b879aaf99 100644 --- a/tsl/test/shared/expected/ordered_append-16.out +++ b/tsl/test/shared/expected/ordered_append-16.out @@ -2592,17 +2592,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (21 rows) @@ -2619,17 +2619,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (21 rows) @@ -2648,17 +2648,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (22 rows) @@ -2676,17 +2676,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (22 rows) @@ -2959,17 +2959,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk_1."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (actual rows=25190 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) Filter: ("time" = $1) @@ -4081,53 +4081,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (59 rows) @@ -4146,53 +4146,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10794 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (59 rows) @@ -4213,53 +4213,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10794 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (60 rows) @@ -4279,53 +4279,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (60 rows) @@ -4759,53 +4759,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk_1."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk_1."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (actual rows=15114 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk_1."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Merge Append (actual rows=0 loops=1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) diff --git a/tsl/test/shared/expected/ordered_append_join-16.out b/tsl/test/shared/expected/ordered_append_join-16.out index 1387ef968d3..47ee1c37f1e 100644 --- a/tsl/test/shared/expected/ordered_append_join-16.out +++ b/tsl/test/shared/expected/ordered_append_join-16.out @@ -2282,17 +2282,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) (37 rows) @@ -3629,53 +3629,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) (75 rows) From 23385e3e046fe27f823cb2c156d667e8ce7723e4 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 3 Jan 2024 12:33:29 +0100 Subject: [PATCH 160/249] reference ordered_append-* ordered_append_join-* --- .../shared/expected/ordered_append-14.out | 120 +++++++++--------- .../expected/ordered_append_join-14.out | 24 ++-- 2 files changed, 72 insertions(+), 72 deletions(-) diff --git a/tsl/test/shared/expected/ordered_append-14.out b/tsl/test/shared/expected/ordered_append-14.out index 4b25227cdd3..18a25d26ef5 100644 --- a/tsl/test/shared/expected/ordered_append-14.out +++ b/tsl/test/shared/expected/ordered_append-14.out @@ -2571,17 +2571,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (21 rows) @@ -2598,17 +2598,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (21 rows) @@ -2626,17 +2626,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (21 rows) @@ -2653,17 +2653,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (21 rows) @@ -2935,17 +2935,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk_1."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (actual rows=25190 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) Filter: ("time" = $1) @@ -4054,53 +4054,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (59 rows) @@ -4119,53 +4119,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10794 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (59 rows) @@ -4185,53 +4185,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10794 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (59 rows) @@ -4250,53 +4250,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (59 rows) @@ -4729,53 +4729,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk_1."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk_1."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (actual rows=15114 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk_1."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Merge Append (actual rows=0 loops=1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) diff --git a/tsl/test/shared/expected/ordered_append_join-14.out b/tsl/test/shared/expected/ordered_append_join-14.out index 23d432b4465..dcf6d20d941 100644 --- a/tsl/test/shared/expected/ordered_append_join-14.out +++ b/tsl/test/shared/expected/ordered_append_join-14.out @@ -2264,17 +2264,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) (37 rows) @@ -3605,53 +3605,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) (75 rows) From eea90cae230bc0a08cd012d0a69544789f464ca6 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 3 Jan 2024 12:34:14 +0100 Subject: [PATCH 161/249] reference ordered_append-* ordered_append_join-* --- .../shared/expected/ordered_append-13.out | 120 +++++++++--------- .../expected/ordered_append_join-13.out | 24 ++-- 2 files changed, 72 insertions(+), 72 deletions(-) diff --git a/tsl/test/shared/expected/ordered_append-13.out b/tsl/test/shared/expected/ordered_append-13.out index 4b25227cdd3..18a25d26ef5 100644 --- a/tsl/test/shared/expected/ordered_append-13.out +++ b/tsl/test/shared/expected/ordered_append-13.out @@ -2571,17 +2571,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (21 rows) @@ -2598,17 +2598,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (21 rows) @@ -2626,17 +2626,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=17990 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=20 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (21 rows) @@ -2653,17 +2653,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (21 rows) @@ -2935,17 +2935,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk_1."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (actual rows=25190 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) Filter: ("time" = $1) @@ -4054,53 +4054,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (59 rows) @@ -4119,53 +4119,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10794 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (59 rows) @@ -4185,53 +4185,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=10794 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=12 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=3598 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=4 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (59 rows) @@ -4250,53 +4250,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) (59 rows) @@ -4729,53 +4729,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk_1."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk_1."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (actual rows=15114 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk_1."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk_1."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk _hyper_X_X_chunk_1 (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Merge Append (actual rows=0 loops=1) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=0 loops=1) diff --git a/tsl/test/shared/expected/ordered_append_join-13.out b/tsl/test/shared/expected/ordered_append_join-13.out index 23d432b4465..dcf6d20d941 100644 --- a/tsl/test/shared/expected/ordered_append_join-13.out +++ b/tsl/test/shared/expected/ordered_append_join-13.out @@ -2264,17 +2264,17 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=25190 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=30 loops=1) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) (37 rows) @@ -3605,53 +3605,53 @@ QUERY PLAN Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=6 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=15114 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (actual rows=18 loops=1) -> Sort (actual rows=1 loops=1) Sort Key: _hyper_X_X_chunk."time" DESC Sort Method: top-N heapsort -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=5038 loops=1) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (actual rows=6 loops=1) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) -> Merge Append (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk (never executed) -> Sort (never executed) Sort Key: _hyper_X_X_chunk."time" DESC -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (never executed) - Filter: ("time" IS NOT NULL) + Vectorized Filter: ("time" IS NOT NULL) -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_1 (never executed) (75 rows) From b5709d8b17d11d57a0df3f3ad9d7552114002cdc Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 3 Jan 2024 14:12:20 +0100 Subject: [PATCH 162/249] avoid warnings --- .github/workflows/libfuzzer.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 912a5287086..8e93d690f07 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -288,10 +288,11 @@ jobs: - name: Save fuzzer-generated crash cases if: always() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: Crash cases for ${{ steps.config.outputs.name }} path: db/crash-* + if-no-files-found: ignore - name: Save interesting cases if: always() From ab5af985138647c69f92c58b58b63c089d63061b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 3 Jan 2024 14:46:26 +0100 Subject: [PATCH 163/249] cleanup --- tsl/src/compression/array.c | 16 ++++------------ tsl/src/compression/decompress_text_test_impl.c | 4 ---- tsl/src/compression/dictionary.c | 10 ++-------- tsl/test/expected/compression_algos.out | 13 +++++++------ .../0e356ba505631fbf715758bed27d503f8b260e3a | Bin 0 -> 2 bytes .../0f873e20f9cc905c940207795842a8f89598bb78 | Bin 0 -> 1123 bytes .../143a44ebe20d00b1c6bdf12487758974467b504f | Bin 0 -> 1156 bytes .../2123898ce1d45564480b9ff51cf391b87dcc5a07 | Bin 0 -> 5 bytes .../513033f491e3f9ae4cf779c239158c9063f2af4d | Bin 0 -> 769 bytes .../6c0295b5f6b25ca492bafd609ba5c9494785651f | Bin 0 -> 1048 bytes .../98e49024fd7e15859ec345ee83a01fec0656ad94 | Bin 0 -> 194 bytes .../a24f8ad32bdadaee87b839765599bc63dfcbd62a | Bin 0 -> 48 bytes .../b6f695dd09d681d144c71e52ebe565a2567a23f9 | Bin 0 -> 52 bytes .../b71fa13e7c2fee50d39a87fc927c31256f8c4af3 | Bin 0 -> 1057 bytes .../06a9bb98f465ce2136ba9c5c3b15912de9101d7b | Bin 0 -> 49 bytes .../20294feb1598e5893bda9b1fe7b9568ea0af237c | Bin 0 -> 168 bytes .../29e8abf085d862cb208f5e476f628644de1c22a0 | Bin 0 -> 113 bytes .../2ee77e4ad0a5c13eb219c48f0e8964d9f6124737 | Bin 0 -> 106 bytes .../3716f49a4dc3a527cc3682d04ae2036204c406ce | Bin 0 -> 73 bytes .../3e8223a2a8a5034ca9935ccb1a9c8f41f4dfd782 | Bin 0 -> 137 bytes .../3f1762bf4bdff8f21a3de2f04afe2cc9bfd538a5 | Bin 0 -> 169 bytes .../44991dd092a92994af67db1b51c9ca42261c27d3 | Bin 0 -> 73 bytes .../44dae141798a56015e84bb90b8d47f2d1d9db66e | Bin 0 -> 630 bytes .../4e4ec17ed15eab3b2aaee34c46ca44e72789f384 | Bin 0 -> 73 bytes .../55b0c7dbbd7470a644c43aaf8aaaa520631e3bb5 | Bin 0 -> 169 bytes .../57a99548ae911ad4a20406a03b0a0ac7a9adc63a | Bin 0 -> 73 bytes .../5a72bac420b736c0d530a9d4c861a374ad32f5a5 | Bin 0 -> 635 bytes .../5c99325fac6e6a77d673cf223fb3b3e62fb1e07e | Bin 0 -> 1235 bytes .../6407ff9cce2be245bacff6693615b8e382ba2a96 | Bin 0 -> 80 bytes .../664e56319f5a1ffc9bd3e9554f2358ace5a739ca | Bin 0 -> 193 bytes .../6af6e86bfe31a3941d3085227f051c05777657de | Bin 0 -> 321 bytes .../6f04561347c9100edce326d87e065789d2d56185 | Bin 0 -> 72 bytes .../726be829733ebbca258b51fc29a79e543de46677 | Bin 0 -> 193 bytes .../76bbc1ead78624711303acd22377969f0962736b | Bin 0 -> 170 bytes .../8127c19b14b9a5750a4731aef2f900a72ec6d802 | Bin 0 -> 321 bytes .../85e53271e14006f0265921d02d4d736cdc580b0b | 1 + .../8f1eab4f75b343ac81f12c926a077aaa572cd002 | Bin 0 -> 49 bytes .../9a88a0ae40cf185ed2c9bf4ebde71b048030211d | Bin 0 -> 82 bytes .../a707473dd0d734a745a15b98f20645839d69a660 | Bin 0 -> 321 bytes .../af33a704edf520f6ccc1c6c51b06d39b5a7e82f8 | Bin 0 -> 625 bytes .../b1eb62fe7596e0f62ef933c269b429851f73853b | Bin 0 -> 50 bytes .../c78643e37119bb0f817531ba2ff265d6ef53c64e | Bin 0 -> 621 bytes .../d421e94ef02d0e45a2f783b63f3fe6622b6776cd | Bin 0 -> 72 bytes .../e0485f22a1d04b0df70035eafa33f1278a52b8a6 | Bin 0 -> 170 bytes .../e5c4a84e1935991b3103fecf70bf563eb82f7936 | Bin 0 -> 540 bytes .../eb02ce7f9339084b7dfa707b412ee1b1f7046885 | Bin 0 -> 56 bytes .../fe56cff03603408c02ef6579df1958ba3cdbdd48 | Bin 0 -> 193 bytes 47 files changed, 14 insertions(+), 30 deletions(-) create mode 100644 tsl/test/fuzzing/compression/array-text/0e356ba505631fbf715758bed27d503f8b260e3a create mode 100644 tsl/test/fuzzing/compression/array-text/0f873e20f9cc905c940207795842a8f89598bb78 create mode 100644 tsl/test/fuzzing/compression/array-text/143a44ebe20d00b1c6bdf12487758974467b504f create mode 100644 tsl/test/fuzzing/compression/array-text/2123898ce1d45564480b9ff51cf391b87dcc5a07 create mode 100644 tsl/test/fuzzing/compression/array-text/513033f491e3f9ae4cf779c239158c9063f2af4d create mode 100644 tsl/test/fuzzing/compression/array-text/6c0295b5f6b25ca492bafd609ba5c9494785651f create mode 100644 tsl/test/fuzzing/compression/array-text/98e49024fd7e15859ec345ee83a01fec0656ad94 create mode 100644 tsl/test/fuzzing/compression/array-text/a24f8ad32bdadaee87b839765599bc63dfcbd62a create mode 100644 tsl/test/fuzzing/compression/array-text/b6f695dd09d681d144c71e52ebe565a2567a23f9 create mode 100644 tsl/test/fuzzing/compression/array-text/b71fa13e7c2fee50d39a87fc927c31256f8c4af3 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/06a9bb98f465ce2136ba9c5c3b15912de9101d7b create mode 100644 tsl/test/fuzzing/compression/dictionary-text/20294feb1598e5893bda9b1fe7b9568ea0af237c create mode 100644 tsl/test/fuzzing/compression/dictionary-text/29e8abf085d862cb208f5e476f628644de1c22a0 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/2ee77e4ad0a5c13eb219c48f0e8964d9f6124737 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/3716f49a4dc3a527cc3682d04ae2036204c406ce create mode 100644 tsl/test/fuzzing/compression/dictionary-text/3e8223a2a8a5034ca9935ccb1a9c8f41f4dfd782 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/3f1762bf4bdff8f21a3de2f04afe2cc9bfd538a5 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/44991dd092a92994af67db1b51c9ca42261c27d3 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/44dae141798a56015e84bb90b8d47f2d1d9db66e create mode 100644 tsl/test/fuzzing/compression/dictionary-text/4e4ec17ed15eab3b2aaee34c46ca44e72789f384 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/55b0c7dbbd7470a644c43aaf8aaaa520631e3bb5 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/57a99548ae911ad4a20406a03b0a0ac7a9adc63a create mode 100644 tsl/test/fuzzing/compression/dictionary-text/5a72bac420b736c0d530a9d4c861a374ad32f5a5 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/5c99325fac6e6a77d673cf223fb3b3e62fb1e07e create mode 100644 tsl/test/fuzzing/compression/dictionary-text/6407ff9cce2be245bacff6693615b8e382ba2a96 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/664e56319f5a1ffc9bd3e9554f2358ace5a739ca create mode 100644 tsl/test/fuzzing/compression/dictionary-text/6af6e86bfe31a3941d3085227f051c05777657de create mode 100644 tsl/test/fuzzing/compression/dictionary-text/6f04561347c9100edce326d87e065789d2d56185 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/726be829733ebbca258b51fc29a79e543de46677 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/76bbc1ead78624711303acd22377969f0962736b create mode 100644 tsl/test/fuzzing/compression/dictionary-text/8127c19b14b9a5750a4731aef2f900a72ec6d802 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/85e53271e14006f0265921d02d4d736cdc580b0b create mode 100644 tsl/test/fuzzing/compression/dictionary-text/8f1eab4f75b343ac81f12c926a077aaa572cd002 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/9a88a0ae40cf185ed2c9bf4ebde71b048030211d create mode 100644 tsl/test/fuzzing/compression/dictionary-text/a707473dd0d734a745a15b98f20645839d69a660 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/af33a704edf520f6ccc1c6c51b06d39b5a7e82f8 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/b1eb62fe7596e0f62ef933c269b429851f73853b create mode 100644 tsl/test/fuzzing/compression/dictionary-text/c78643e37119bb0f817531ba2ff265d6ef53c64e create mode 100644 tsl/test/fuzzing/compression/dictionary-text/d421e94ef02d0e45a2f783b63f3fe6622b6776cd create mode 100644 tsl/test/fuzzing/compression/dictionary-text/e0485f22a1d04b0df70035eafa33f1278a52b8a6 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/e5c4a84e1935991b3103fecf70bf563eb82f7936 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/eb02ce7f9339084b7dfa707b412ee1b1f7046885 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/fe56cff03603408c02ef6579df1958ba3cdbdd48 diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index d240ba7bef7..26352df7278 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -463,7 +463,6 @@ tsl_array_decompression_iterator_from_datum_reverse(Datum compressed_array, Oid return &iterator->base; } - #define ELEMENT_TYPE uint32 #include "simple8b_rle_decompress_all.h" #undef ELEMENT_TYPE @@ -502,8 +501,10 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, const int n_total = has_nulls ? nulls_serialized->num_elements : n_notnull; uint32 *offsets = - (uint32 *) MemoryContextAllocZero(dest_mctx, pad_to_multiple(64, sizeof(*offsets) * (n_total + 1))); - uint8 *arrow_bodies = (uint8 *) MemoryContextAllocZero(dest_mctx, pad_to_multiple(64, si->len - si->cursor)); + (uint32 *) MemoryContextAllocZero(dest_mctx, + pad_to_multiple(64, sizeof(*offsets) * (n_total + 1))); + uint8 *arrow_bodies = + (uint8 *) MemoryContextAllocZero(dest_mctx, pad_to_multiple(64, si->len - si->cursor)); uint32 offset = 0; for (int i = 0; i < n_notnull; i++) @@ -528,15 +529,6 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, const uint32 textlen = VARSIZE_ANY_EXHDR(vardata); memcpy(&arrow_bodies[offset], VARDATA_ANY(vardata), textlen); - // fprintf(stderr, - // "%d: copied: '%s' len %d varsize %d result %.*s\n", - // i, - // text_to_cstring(vardata), - // textlen, - // (int) VARSIZE_ANY(vardata), - // textlen, - // &arrow_bodies[offset]); - offsets[i] = offset; CheckCompressedData(offset <= offset + textlen); /* Check for overflow. */ diff --git a/tsl/src/compression/decompress_text_test_impl.c b/tsl/src/compression/decompress_text_test_impl.c index ddb5e5a89ad..a521e710ce5 100644 --- a/tsl/src/compression/decompress_text_test_impl.c +++ b/tsl/src/compression/decompress_text_test_impl.c @@ -69,10 +69,6 @@ decompress_generic_text_check_arrow(ArrowArray *arrow, int errorlevel, Decompres const size_t rowbyrow_len = VARSIZE_ANY_EXHDR(rowbyrow_varlena); const char *rowbyrow_cstring = VARDATA_ANY(rowbyrow_varlena); - // fprintf(stderr, "arrow: '%.*s'(%ld), rbr: '%.*s'(%ld)\n", - // (int) arrow_len, arrow_cstring, arrow_len, - // (int) rowbyrow_len, rowbyrow_cstring, rowbyrow_len); - if (rowbyrow_len != arrow_len) { ereport(errorlevel, diff --git a/tsl/src/compression/dictionary.c b/tsl/src/compression/dictionary.c index d4976d1f766..7d2978f8e85 100644 --- a/tsl/src/compression/dictionary.c +++ b/tsl/src/compression/dictionary.c @@ -336,13 +336,6 @@ dictionary_compressor_finish(DictionaryCompressor *compressor) average_element_size = sizes.dictionary_size / sizes.num_distinct; expected_array_size = average_element_size * sizes.dictionary_compressed_indexes->num_elements; compressed = dictionary_compressed_from_serialization_info(sizes, compressor->type); - // fprintf(stderr, - // "dict size %ld, distinct %ld, avg element size %ld, easize %ld, totalsize %ld\n", - // sizes.dictionary_size, - // (uint64) sizes.num_distinct, - // average_element_size, - // expected_array_size, - // sizes.total_size); if (expected_array_size < sizes.total_size) return dictionary_compressed_to_array_compressed(compressed); @@ -433,7 +426,8 @@ tsl_text_dictionary_decompress_all(Datum compressed, Oid element_type, MemoryCon const uint16 n_notnull = indices_serialized->num_elements; const uint16 n_total = header->has_nulls ? nulls_serialized->num_elements : n_notnull; - const uint16 n_padded = n_total + 63; /* This is the padding requirement of simple8brle_decompress_all. */ + const uint16 n_padded = + n_total + 63; /* This is the padding requirement of simple8brle_decompress_all. */ int16 *restrict indices = MemoryContextAlloc(dest_mctx, sizeof(int16) * n_padded); const uint16 n_decompressed = diff --git a/tsl/test/expected/compression_algos.out b/tsl/test/expected/compression_algos.out index b7cc5e3e409..fef1cb4444a 100644 --- a/tsl/test/expected/compression_algos.out +++ b/tsl/test/expected/compression_algos.out @@ -1595,11 +1595,11 @@ group by 2, 3 order by 1 desc ; count | bulk_result | rowbyrow_result -------+-------------+----------------- - 14 | XX001 | XX001 - 4 | 08P01 | 08P01 + 21 | XX001 | XX001 + 6 | 08P01 | 08P01 + 2 | 3F000 | 3F000 2 | true | true 1 | 22021 | 22021 - 1 | 3F000 | 3F000 1 | false | false (6 rows) @@ -1613,11 +1613,12 @@ group by 2, 3 order by 1 desc ; count | bulk_result | rowbyrow_result -------+-------------+----------------- - 23 | XX001 | XX001 + 51 | XX001 | XX001 4 | 08P01 | 08P01 + 4 | XX001 | true 2 | true | true - 1 | 22021 | 22021 + 2 | 22021 | 22021 1 | 3F000 | 3F000 1 | false | false -(6 rows) +(7 rows) diff --git a/tsl/test/fuzzing/compression/array-text/0e356ba505631fbf715758bed27d503f8b260e3a b/tsl/test/fuzzing/compression/array-text/0e356ba505631fbf715758bed27d503f8b260e3a new file mode 100644 index 0000000000000000000000000000000000000000..35a038769b15c0935bb3cd038f5cc1de7579f128 GIT binary patch literal 2 JcmZQ%0000400IC2 literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/0f873e20f9cc905c940207795842a8f89598bb78 b/tsl/test/fuzzing/compression/array-text/0f873e20f9cc905c940207795842a8f89598bb78 new file mode 100644 index 0000000000000000000000000000000000000000..5fad9d829fd414cf3dfee6b7af086a5510972c92 GIT binary patch literal 1123 zcmZQ%C`gY_PAo~x$xmk}Nv$Yh00HI~K!P_VGc7H(C^fId(7?a|D8K_2NM^|^v9z>= z^FazMEiFKNAReM14awa`5H>TC5EYn)44UMd!h)$4BhIj>!PG^*2u1EE%UasU2^r3^ oCqoaimI3z^nW-=V<_QiM14eT&z={bI17o0IUI{1zaUlv10H{eMv;Y7A literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/143a44ebe20d00b1c6bdf12487758974467b504f b/tsl/test/fuzzing/compression/array-text/143a44ebe20d00b1c6bdf12487758974467b504f new file mode 100644 index 0000000000000000000000000000000000000000..39ca829eeac6152d85c98908762df3c5d9f46da4 GIT binary patch literal 1156 zcmZQ%EJ%+}PAo~x$xmk}Nv$YhU}Rume!;-NARq_@p`oEbE`H#SE?oi^2w^Zrm!JV( y6WL6l^C<%YlxZK;Jq$wtC52J!KNQm_POro}hl6;fSX8388jCzuA$F{yXo3LIs0w2M literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/2123898ce1d45564480b9ff51cf391b87dcc5a07 b/tsl/test/fuzzing/compression/array-text/2123898ce1d45564480b9ff51cf391b87dcc5a07 new file mode 100644 index 0000000000000000000000000000000000000000..16dc76fb2ddb6422d26ef9f71bcf86f90018bc14 GIT binary patch literal 5 McmZQ%C}3a!00CM6asU7T literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/513033f491e3f9ae4cf779c239158c9063f2af4d b/tsl/test/fuzzing/compression/array-text/513033f491e3f9ae4cf779c239158c9063f2af4d new file mode 100644 index 0000000000000000000000000000000000000000..76c6d5d66fa630ad0cbd09849425d14e2f22a024 GIT binary patch literal 769 zcmZQ%EJ%+}PAo~x$xmk}Nv$YhU}Rum@C9NkK`8hS150rOsFED^|3IA#3}9|PBQ~>{ zB7yP(f&zk1LB@j>)NFe5HjXynh-K3=BRpL2Frqhq^vB&8B`*;v$}uH@nsEZ2 Rl*}|Fr)UA$98i8*2>_T(7MuV8 literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/a24f8ad32bdadaee87b839765599bc63dfcbd62a b/tsl/test/fuzzing/compression/array-text/a24f8ad32bdadaee87b839765599bc63dfcbd62a new file mode 100644 index 0000000000000000000000000000000000000000..7327d7cb7507c5f987f8937d5a9ed23d6f931b59 GIT binary patch literal 48 ycmZQ%EJ%+}PAo~x$xmk}Nv$YhU}Rume!;*1q8S(p7#Q}0Xa;KrhM6HVLv#TbKMFSh literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/b6f695dd09d681d144c71e52ebe565a2567a23f9 b/tsl/test/fuzzing/compression/array-text/b6f695dd09d681d144c71e52ebe565a2567a23f9 new file mode 100644 index 0000000000000000000000000000000000000000..9f05f1e2871c50379c6237f4cc81186e4ad27ae9 GIT binary patch literal 52 ycmZQ%EJ%+}PAo~x$xmk}Nv$YhU}Rume!;-Nzz8A)pu#+x^mT!tfXSYLK^FjU6b#S+ literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/b71fa13e7c2fee50d39a87fc927c31256f8c4af3 b/tsl/test/fuzzing/compression/array-text/b71fa13e7c2fee50d39a87fc927c31256f8c4af3 new file mode 100644 index 0000000000000000000000000000000000000000..53292ea2a4330d7b9efcb078c211ca0f477e678b GIT binary patch literal 1057 zcmZQ%C`gY_PAo~x$xmk}Nv$Yh00HI~Us3Vs}oDR>|f8O3n*bZ0;vSYiaUnJXnTEiJVuHLt`F06tm`-v9sr literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/06a9bb98f465ce2136ba9c5c3b15912de9101d7b b/tsl/test/fuzzing/compression/dictionary-text/06a9bb98f465ce2136ba9c5c3b15912de9101d7b new file mode 100644 index 0000000000000000000000000000000000000000..5e4def2134bc12c6096cbc3e36d8bfecb4b515be GIT binary patch literal 49 vcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=03;Y07+C%T!5;>O-#{)DFfsrDY1a(3 literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/20294feb1598e5893bda9b1fe7b9568ea0af237c b/tsl/test/fuzzing/compression/dictionary-text/20294feb1598e5893bda9b1fe7b9568ea0af237c new file mode 100644 index 0000000000000000000000000000000000000000..c1e41d196b7632bc80c136d5b8cc73549ef278bf GIT binary patch literal 168 zcmZQ#EJ%+}PAo~x$xmk}Nv$YhU|?YU0wRDI1o{~m{Tq7Eym02XCr006b>61D&U literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/2ee77e4ad0a5c13eb219c48f0e8964d9f6124737 b/tsl/test/fuzzing/compression/dictionary-text/2ee77e4ad0a5c13eb219c48f0e8964d9f6124737 new file mode 100644 index 0000000000000000000000000000000000000000..4005fcc6932d191ad48b575897334b43ed6e3b42 GIT binary patch literal 106 zcmZQ#EJ%+}PAo~x$xmk}Nv$YhU|?YU0wRDI1o#;k2)2Ik8^8Ys^49|9P~bQr$r hGynxzfS8G)LHz6g|6)4-*;)THFeot?0!aiY1^_Qz7?=P6 literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/3716f49a4dc3a527cc3682d04ae2036204c406ce b/tsl/test/fuzzing/compression/dictionary-text/3716f49a4dc3a527cc3682d04ae2036204c406ce new file mode 100644 index 0000000000000000000000000000000000000000..1b3f32bd3aec5348d5ccc26041ffc15bea3ee77a GIT binary patch literal 73 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh17+4q>1R4G_Ff0?(Vfbd(0Fr@7u*OfG R*Zmu$<^@Q-9RmXg0|0EH4>te+ literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/3e8223a2a8a5034ca9935ccb1a9c8f41f4dfd782 b/tsl/test/fuzzing/compression/dictionary-text/3e8223a2a8a5034ca9935ccb1a9c8f41f4dfd782 new file mode 100644 index 0000000000000000000000000000000000000000..0231b3be12633af897ae0cbb127238a6b2a41336 GIT binary patch literal 137 zcmZQ#C`gY_PAo~x$xmk}Nv$Yh00I!f!ypa>Y9K<~08BD5{09SuTZQ=$Eg-cZ@PZLS JlTR`-003`O67>K8 literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/3f1762bf4bdff8f21a3de2f04afe2cc9bfd538a5 b/tsl/test/fuzzing/compression/dictionary-text/3f1762bf4bdff8f21a3de2f04afe2cc9bfd538a5 new file mode 100644 index 0000000000000000000000000000000000000000..50d3676bfc8d046ecab918d1d96cd0da0d5509aa GIT binary patch literal 169 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?Vt1QG%-1O){}fq2!bRnGVT6Ci_@% literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/44991dd092a92994af67db1b51c9ca42261c27d3 b/tsl/test/fuzzing/compression/dictionary-text/44991dd092a92994af67db1b51c9ca42261c27d3 new file mode 100644 index 0000000000000000000000000000000000000000..590dd6b67f451fa5d357b596139f51f89e711bce GIT binary patch literal 73 zcmZQ#C`gY_PAo~x$xmk}Nv$Yh0D@E?X8F#*aEp;qTM;Ax0{jdNj0_xRW@dUo3l3Z4J} literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/44dae141798a56015e84bb90b8d47f2d1d9db66e b/tsl/test/fuzzing/compression/dictionary-text/44dae141798a56015e84bb90b8d47f2d1d9db66e new file mode 100644 index 0000000000000000000000000000000000000000..aeb1b586c9cd9c39bec350ce2bc5b4a4efcf3437 GIT binary patch literal 630 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W83?y8^VAC!L0C9E_5Bw1z2PLLb)5=Ta$1P~-JFz`cZodzJo0EmH7%!UkLt&G1xd{c%mKneob8UBM9O#lBg z96|*Q1wg>U!U9!@j{+(JS`q+t0}ho4|A0I}l1sqhlfutn1qS}$xNyP_5TOcZVTLY5 z>uDz>e}jd*kbtHaIOIT{Vah>D01)2^Kqwf=xdy}n`4B`yqX!(YP!1zp5GDzxL8_qu O7kO4Hs8nuxo;dgGB+a CoCqKQ literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/55b0c7dbbd7470a644c43aaf8aaaa520631e3bb5 b/tsl/test/fuzzing/compression/dictionary-text/55b0c7dbbd7470a644c43aaf8aaaa520631e3bb5 new file mode 100644 index 0000000000000000000000000000000000000000..8a8ca48a70e7d2dff97eabf492004c8ae5b0df50 GIT binary patch literal 169 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?Vt1QG&Lf`WpgK)hxW&k*tq9Y@&%nUQz+q-)rU%4801+2}N;5J501D+Bi~s-t literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/57a99548ae911ad4a20406a03b0a0ac7a9adc63a b/tsl/test/fuzzing/compression/dictionary-text/57a99548ae911ad4a20406a03b0a0ac7a9adc63a new file mode 100644 index 0000000000000000000000000000000000000000..69a45149e90dfff3d50c4a515d75729f959c63ea GIT binary patch literal 73 zcmZQ#EJ%+}PAo~x$xmk}Nv$Yh0D?v!W&~m;Am(RakaqymK*0P0NC3sz7z6~t0LYC7 HNq|HF#%lP6AZ3$WT@@K`0W~22KZFUS7$Gb_ zGaxWUC;+kqP?G^}tmxnW|MIA+8GuoOCIaFzF%&SQgBbAWV*CQ+!Q2T9aQOs~G&C9oKoTJTii5=v z1WW@$1d|1{3o~&%U|`?^YGwf{Xa4`^Kf)X_9fof@4M1U#6ca0OOC+ s_(FgQpCnOo`OIKtz^LIMO8uZvBrqskF*qENKRBF0Emr^&q7sH#0EUh|zyJUM literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/6407ff9cce2be245bacff6693615b8e382ba2a96 b/tsl/test/fuzzing/compression/dictionary-text/6407ff9cce2be245bacff6693615b8e382ba2a96 new file mode 100644 index 0000000000000000000000000000000000000000..2c7b25db1a824ee8f848f6f30b17eadfdb4c60a6 GIT binary patch literal 80 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=03;Y07+C)Q|Nj?=8O3xMzUedo1zCWYiJ@Vu VI8gN8e;~&Zh@pVl@IQl22>=g%7ySSL literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/664e56319f5a1ffc9bd3e9554f2358ace5a739ca b/tsl/test/fuzzing/compression/dictionary-text/664e56319f5a1ffc9bd3e9554f2358ace5a739ca new file mode 100644 index 0000000000000000000000000000000000000000..dd5e2c4e0f4c0146a602d7bfa0c9c82d83933491 GIT binary patch literal 193 zcmZQ#EJ%+}PAo~x$xmk}Nv$YhU|?YU0wRDI1o#;kfYk#{ z2m~@1nHVswU`PeZuzY7=xWx#ysGNZzvqTG`QCksA0|}t*j0_xRW@dV38M;8q%nTwU J09C-q008vMC4>L~ literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/6af6e86bfe31a3941d3085227f051c05777657de b/tsl/test/fuzzing/compression/dictionary-text/6af6e86bfe31a3941d3085227f051c05777657de new file mode 100644 index 0000000000000000000000000000000000000000..bf477cc4ee2cfddffde3c6e70a93b9a62c6764a2 GIT binary patch literal 321 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEU|m>3I^3D>o9!NX#mO? n0x{4?W&{8+ literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/6f04561347c9100edce326d87e065789d2d56185 b/tsl/test/fuzzing/compression/dictionary-text/6f04561347c9100edce326d87e065789d2d56185 new file mode 100644 index 0000000000000000000000000000000000000000..ebfa07015f2d0d08433f8f219517ed6102203218 GIT binary patch literal 72 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh17+C&80Hc@=!?(x=pr8?^gi$_Fj3E*L DZR;38 literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/726be829733ebbca258b51fc29a79e543de46677 b/tsl/test/fuzzing/compression/dictionary-text/726be829733ebbca258b51fc29a79e543de46677 new file mode 100644 index 0000000000000000000000000000000000000000..06f5cccc2eb746c2e3de2a1d58fc3e7a33393895 GIT binary patch literal 193 zcmZQ#EJ%+}PAo~x$xmk}Nv$YhU|?YU03v`G1o#;k2)2Idz)T0jsCg#U}LA`XOx hLDYq!fmN%Pf<=fkWz{N>;w+$Ipc_JSV6J0i002G1P*MN@ literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/76bbc1ead78624711303acd22377969f0962736b b/tsl/test/fuzzing/compression/dictionary-text/76bbc1ead78624711303acd22377969f0962736b new file mode 100644 index 0000000000000000000000000000000000000000..9f41c3ead3b87ce2cae195bc341f9dacdcea3bab GIT binary patch literal 170 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?Vt1QG%-1R-G6s#VU!01aXkp{N0xi~@kR0)YdV JL?f6O7yv3YD&_zH literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/8127c19b14b9a5750a4731aef2f900a72ec6d802 b/tsl/test/fuzzing/compression/dictionary-text/8127c19b14b9a5750a4731aef2f900a72ec6d802 new file mode 100644 index 0000000000000000000000000000000000000000..245275c6f57679ce93ee4c580ada314b34633525 GIT binary patch literal 321 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEU|m>5A)|6x)L3?&r|j9-9Kj6e(mjt~ls zJVX*;C;$Q$78W!^uyKKkfHnufY{#aAfq@B&f50jsx>^1M0powM|NnIuzUedoWekBB kXe6^C#M_L&4S;+@29Q4?fSut#h{5z9M7@C74y73x0PEIC>;M1& literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/85e53271e14006f0265921d02d4d736cdc580b0b b/tsl/test/fuzzing/compression/dictionary-text/85e53271e14006f0265921d02d4d736cdc580b0b new file mode 100644 index 00000000000..ce542efaa51 --- /dev/null +++ b/tsl/test/fuzzing/compression/dictionary-text/85e53271e14006f0265921d02d4d736cdc580b0b @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/tsl/test/fuzzing/compression/dictionary-text/8f1eab4f75b343ac81f12c926a077aaa572cd002 b/tsl/test/fuzzing/compression/dictionary-text/8f1eab4f75b343ac81f12c926a077aaa572cd002 new file mode 100644 index 0000000000000000000000000000000000000000..dc71afbe1171f18ff619bb35380860f4ef8880f4 GIT binary patch literal 49 tcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEiK7z7yE80;Dt3?Uq#C;$(r2fP3P literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/9a88a0ae40cf185ed2c9bf4ebde71b048030211d b/tsl/test/fuzzing/compression/dictionary-text/9a88a0ae40cf185ed2c9bf4ebde71b048030211d new file mode 100644 index 0000000000000000000000000000000000000000..e4448119f7ea432c5201e5c01a5240ab8aa06558 GIT binary patch literal 82 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh184Q6G14BQfyu1UDwq;;oQUcOIf?H5f aaMh|+Ku!Y?pE`AF)qfxWG8q{DGXMa?4iiZL literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/a707473dd0d734a745a15b98f20645839d69a660 b/tsl/test/fuzzing/compression/dictionary-text/a707473dd0d734a745a15b98f20645839d69a660 new file mode 100644 index 0000000000000000000000000000000000000000..33274c91e52ac2b4ac5b5b6b5ab2858c3b661a03 GIT binary patch literal 321 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEU|m>5A)|AA5r=_L&L45=j*3}!$XIB0fdK#u;|A3L literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/c78643e37119bb0f817531ba2ff265d6ef53c64e b/tsl/test/fuzzing/compression/dictionary-text/c78643e37119bb0f817531ba2ff265d6ef53c64e new file mode 100644 index 0000000000000000000000000000000000000000..3abd3cbdca9474ab42b8dc67bd7796638202a7ee GIT binary patch literal 621 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VD(jpQdq!$1_mHP0xUpK00c-v5T+SIq!-Qv8IBF8U=t)x%*n|Kt3rPw08lR!R8^H% r;RU)d*>W`CgvKX@3o{c$gB${JvYS6v4fY_?xzq( literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/e0485f22a1d04b0df70035eafa33f1278a52b8a6 b/tsl/test/fuzzing/compression/dictionary-text/e0485f22a1d04b0df70035eafa33f1278a52b8a6 new file mode 100644 index 0000000000000000000000000000000000000000..a2c9c112a2fab5de0cf5b0a1167a426dc0184e2c GIT binary patch literal 170 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VD+8zkByu5C~Rb1q=*~UleCKq6U&`_{?kRAp!dqH9_NBsZ)KL}zN69WSP)SNCD literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/e5c4a84e1935991b3103fecf70bf563eb82f7936 b/tsl/test/fuzzing/compression/dictionary-text/e5c4a84e1935991b3103fecf70bf563eb82f7936 new file mode 100644 index 0000000000000000000000000000000000000000..4c9b444ffd54a8954c60e3a53e7ee9855d4cd8da GIT binary patch literal 540 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?Xf0ut7)_yAA=11Cr+7VXRrfD()h3@kv3`Tzg_ zjAA+r-*g&)Jdg+zL&H~bpyIfD5GkYM}`bSIF00rU)r3noDnimy3f682zuP#Ed}4R{LH!NKByFBn~+W-`DP tDKg%ISOX>b;r9BOfdGmmFl>2)2F4daT0jsAR>1%RLm*5s yL>&W628j+0TeS)(3la$pjSYo@Sda#|W+ny{w*bY#&H)>m3giVMD+Y@K^#TCNNGk6D literal 0 HcmV?d00001 From 8e4a6cea2075371267bb3d2473c27b15fe0420b6 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 3 Jan 2024 16:29:52 +0100 Subject: [PATCH 164/249] Vectorized boolean operators Implement vectorized computation of AND, OR and NOT operators in WHERE clause. --- .../nodes/decompress_chunk/compressed_batch.c | 426 +++++++++++------- .../nodes/decompress_chunk/compressed_batch.h | 2 +- tsl/src/nodes/decompress_chunk/exec.c | 16 - tsl/src/nodes/decompress_chunk/planner.c | 36 ++ tsl/test/expected/decompress_vector_qual.out | 65 ++- tsl/test/sql/decompress_vector_qual.sql | 31 +- 6 files changed, 388 insertions(+), 188 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 974cacbc3c1..4c8585995ae 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -175,224 +175,316 @@ decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state } } -/* - * Compute the vectorized filters. Returns true if we have any passing rows. If not, - * it means the entire batch is filtered out, and we use this for further - * optimizations. - */ static bool -compute_vector_quals(DecompressContext *dcontext, DecompressBatchState *batch_state) +compute_simple_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, Node *qual, + uint64 *restrict result, int n_result_words) { - if (!dcontext->vectorized_quals_constified) + /* + * For now we support "Var ? Const" predicates and + * ScalarArrayOperations. + */ + List *args = NULL; + RegProcedure vector_const_opcode = InvalidOid; + ScalarArrayOpExpr *saop = NULL; + OpExpr *opexpr = NULL; + NullTest *nulltest = NULL; + if (IsA(qual, NullTest)) { - return true; + nulltest = castNode(NullTest, qual); + args = list_make1(nulltest->arg); + } + else if (IsA(qual, ScalarArrayOpExpr)) + { + saop = castNode(ScalarArrayOpExpr, qual); + args = saop->args; + vector_const_opcode = get_opcode(saop->opno); + } + else + { + opexpr = castNode(OpExpr, qual); + args = opexpr->args; + vector_const_opcode = get_opcode(opexpr->opno); } /* - * Allocate the bitmap that will hold the vectorized qual results. We will - * initialize it to all ones and AND the individual quals to it. + * Find the compressed column referred to by the Var. */ - const int bitmap_bytes = sizeof(uint64) * (((uint64) batch_state->total_batch_rows + 63) / 64); - batch_state->vector_qual_result = palloc(bitmap_bytes); - memset(batch_state->vector_qual_result, 0xFF, bitmap_bytes); - if (batch_state->total_batch_rows % 64 != 0) + Var *var = castNode(Var, linitial(args)); + CompressionColumnDescription *column_description = NULL; + int column_index = 0; + for (; column_index < dcontext->num_total_columns; column_index++) + { + column_description = &dcontext->template_columns[column_index]; + if (column_description->output_attno == var->varattno) + { + break; + } + } + Ensure(column_index < dcontext->num_total_columns, + "decompressed column %d not found in batch", + var->varattno); + Assert(column_description != NULL); + Assert(column_description->typid == var->vartype); + Ensure(column_description->type == COMPRESSED_COLUMN, + "only compressed columns are supported in vectorized quals"); + Assert(column_index < dcontext->num_compressed_columns); + + CompressedColumnValues *column_values = &batch_state->compressed_columns[column_index]; + + if (column_values->decompression_type == DT_Invalid) { /* - * We have to zero out the bits for past-the-end elements in the last - * bitmap word. Since all predicates are ANDed to the result bitmap, - * we can do it here once instead of doing it in each predicate. + * We decompress the compressed columns on demand, so that we can + * skip decompressing some columns if the entire batch doesn't pass + * the quals. */ - const uint64 mask = ((uint64) -1) >> (64 - batch_state->total_batch_rows % 64); - batch_state->vector_qual_result[batch_state->total_batch_rows / 64] = mask; + decompress_column(dcontext, batch_state, column_index); + Assert(column_values->decompression_type != DT_Invalid); } + Assert(column_values->decompression_type != DT_Iterator); + /* - * Compute the quals. + * Prepare to compute the vector predicate. We have to handle the + * default values in a special way because they don't produce the usual + * decompressed ArrowArrays. */ - ListCell *lc; - foreach (lc, dcontext->vectorized_quals_constified) + uint64 default_value_predicate_result; + uint64 *predicate_result = result; + const ArrowArray *vector = column_values->arrow; + if (column_values->arrow == NULL) { /* - * For now we support "Var ? Const" predicates and - * ScalarArrayOperations. + * The compressed column had a default value. We can't fall back to + * the non-vectorized quals now, so build a single-value ArrowArray + * with this default value, check if it passes the predicate, and apply + * it to the entire batch. */ - List *args = NULL; - RegProcedure vector_const_opcode = InvalidOid; - ScalarArrayOpExpr *saop = NULL; - OpExpr *opexpr = NULL; - NullTest *nulltest = NULL; - if (IsA(lfirst(lc), NullTest)) - { - nulltest = castNode(NullTest, lfirst(lc)); - args = list_make1(nulltest->arg); - } - else if (IsA(lfirst(lc), ScalarArrayOpExpr)) - { - saop = castNode(ScalarArrayOpExpr, lfirst(lc)); - args = saop->args; - vector_const_opcode = get_opcode(saop->opno); - } - else - { - opexpr = castNode(OpExpr, lfirst(lc)); - args = opexpr->args; - vector_const_opcode = get_opcode(opexpr->opno); - } + Assert(column_values->decompression_type == DT_Default); /* - * Find the compressed column referred to by the Var. + * We saved the actual default value into the decompressed scan slot + * above, so pull it from there. */ - Var *var = castNode(Var, linitial(args)); - CompressionColumnDescription *column_description = NULL; - int column_index = 0; - for (; column_index < dcontext->num_total_columns; column_index++) - { - column_description = &dcontext->template_columns[column_index]; - if (column_description->output_attno == var->varattno) - { - break; - } - } - Ensure(column_index < dcontext->num_total_columns, - "decompressed column %d not found in batch", - var->varattno); - Assert(column_description != NULL); - Assert(column_description->typid == var->vartype); - Ensure(column_description->type == COMPRESSED_COLUMN, - "only compressed columns are supported in vectorized quals"); - Assert(column_index < dcontext->num_compressed_columns); - - CompressedColumnValues *column_values = &batch_state->compressed_columns[column_index]; - - if (column_values->decompression_type == DT_Invalid) - { - /* - * We decompress the compressed columns on demand, so that we can - * skip decompressing some columns if the entire batch doesn't pass - * the quals. - */ - decompress_column(dcontext, batch_state, column_index); - Assert(column_values->decompression_type != DT_Invalid); - } + vector = make_single_value_arrow(column_description->typid, + *column_values->output_value, + *column_values->output_isnull); - Assert(column_values->decompression_type != DT_Iterator); + /* + * We start from an all-valid bitmap, because the predicate is + * AND-ed to it. + */ + default_value_predicate_result = 1; + predicate_result = &default_value_predicate_result; + } + if (nulltest) + { + vector_nulltest(vector, nulltest->nulltesttype, predicate_result); + } + else + { /* - * Prepare to compute the vector predicate. We have to handle the - * default values in a special way because they don't produce the usual - * decompressed ArrowArrays. + * Find the vector_const predicate. */ - uint64 default_value_predicate_result; - uint64 *predicate_result = batch_state->vector_qual_result; - const ArrowArray *vector = column_values->arrow; - if (column_values->arrow == NULL) - { - /* - * The compressed column had a default value. We can't fall back to - * the non-vectorized quals now, so build a single-value ArrowArray - * with this default value, check if it passes the predicate, and apply - * it to the entire batch. - */ - Assert(column_values->decompression_type == DT_Default); + VectorPredicate *vector_const_predicate = get_vector_const_predicate(vector_const_opcode); + Assert(vector_const_predicate != NULL); - /* - * We saved the actual default value into the decompressed scan slot - * above, so pull it from there. - */ - vector = make_single_value_arrow(column_description->typid, - *column_values->output_value, - *column_values->output_isnull); + Ensure(IsA(lsecond(args), Const), + "failed to evaluate runtime constant in vectorized filter"); - /* - * We start from an all-valid bitmap, because the predicate is - * AND-ed to it. - */ - default_value_predicate_result = 1; - predicate_result = &default_value_predicate_result; - } + /* + * The vectorizable predicates should be STRICT, so we shouldn't see null + * constants here. + */ + Const *constnode = castNode(Const, lsecond(args)); + Ensure(!constnode->constisnull, "vectorized predicate called for a null value"); - if (nulltest) + /* + * At last, compute the predicate. + */ + if (saop) { - vector_nulltest(vector, nulltest->nulltesttype, predicate_result); + vector_array_predicate(vector_const_predicate, + saop->useOr, + vector, + constnode->constvalue, + predicate_result); } else { - /* - * Find the vector_const predicate. - */ - VectorPredicate *vector_const_predicate = - get_vector_const_predicate(vector_const_opcode); - Assert(vector_const_predicate != NULL); + vector_const_predicate(vector, constnode->constvalue, predicate_result); + } - /* - * The vectorizable predicates should be STRICT, so we shouldn't see null - * constants here. - */ - Const *constnode = castNode(Const, lsecond(args)); - Ensure(!constnode->constisnull, "vectorized predicate called for a null value"); + /* Account for nulls which shouldn't pass the predicate. */ + const size_t n = vector->length; + const size_t n_words = (n + 63) / 64; + const uint64 *restrict validity = (uint64 *restrict) vector->buffers[0]; + for (size_t i = 0; i < n_words; i++) + { + predicate_result[i] &= validity[i]; + } + } + + /* Process the result. */ + if (column_values->arrow == NULL) + { + /* The column had a default value. */ + Assert(column_values->decompression_type == DT_Default); + if (!(default_value_predicate_result & 1)) + { /* - * At last, compute the predicate. + * We had a default value for the compressed column, and it + * didn't pass the predicate, so the entire batch didn't pass. */ - if (saop) - { - vector_array_predicate(vector_const_predicate, - saop->useOr, - vector, - constnode->constvalue, - predicate_result); - } - else - { - vector_const_predicate(vector, constnode->constvalue, predicate_result); - } - - /* Account for nulls which shouldn't pass the predicate. */ - const size_t n = vector->length; - const size_t n_words = (n + 63) / 64; - const uint64 *restrict validity = (uint64 *restrict) vector->buffers[0]; - for (size_t i = 0; i < n_words; i++) + for (int i = 0; i < n_result_words; i++) { - predicate_result[i] &= validity[i]; + result[i] = 0; } } + } - /* Process the result. */ - if (column_values->arrow == NULL) - { - /* The column had a default value. */ - Assert(column_values->decompression_type == DT_Default); + /* + * Have to return whether we have any passing rows. + */ + bool have_passing_rows = false; + for (int i = 0; i < n_result_words; i++) + { + have_passing_rows |= result[i]; + } - if (!(default_value_predicate_result & 1)) - { - /* - * We had a default value for the compressed column, and it - * didn't pass the predicate, so the entire batch didn't pass. - */ - for (int i = 0; i < bitmap_bytes / 8; i++) - { - batch_state->vector_qual_result[i] = 0; - } - } - } + return have_passing_rows; +} - /* - * Have to return whether we have any passing rows. - */ +static bool compute_qual_conjunction(DecompressContext *dcontext, DecompressBatchState *batch_state, + List *quals, uint64 *restrict result, int n_result_words); + +static bool +compute_compound_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, Node *qual, + uint64 *restrict result, int n_result_words) +{ + if (!IsA(qual, BoolExpr)) + { + return compute_simple_qual(dcontext, batch_state, qual, result, n_result_words); + } + + BoolExpr *boolexpr = castNode(BoolExpr, qual); + + if (boolexpr->boolop == AND_EXPR) + { + return compute_qual_conjunction(dcontext, + batch_state, + boolexpr->args, + result, + n_result_words); + } + + if (boolexpr->boolop == NOT_EXPR) + { + Assert(list_length(boolexpr->args) == 1); + uint64 *tmp_result = palloc(sizeof(uint64) * n_result_words); + compute_compound_qual(dcontext, + batch_state, + linitial(boolexpr->args), + tmp_result, + n_result_words); bool have_passing_rows = false; - for (int i = 0; i < bitmap_bytes / 8; i++) + for (int i = 0; i < n_result_words; i++) { - have_passing_rows |= batch_state->vector_qual_result[i]; + result[i] &= ~tmp_result[i]; + have_passing_rows |= result[i]; } - if (!have_passing_rows) + return have_passing_rows; + } + + Assert(boolexpr->boolop == OR_EXPR); + uint64 *or_result = palloc0(sizeof(uint64) * n_result_words); + uint64 *single_qual_result = palloc(sizeof(uint64) * n_result_words); + + ListCell *lc; + foreach (lc, boolexpr->args) + { + for (int i = 0; i < n_result_words; i++) { - return false; + single_qual_result[i] = (uint64) -1; + } + compute_compound_qual(dcontext, + batch_state, + lfirst(lc), + single_qual_result, + n_result_words); + for (int i = 0; i < n_result_words; i++) + { + or_result[i] |= single_qual_result[i]; } } + bool have_passing_rows = false; + for (int i = 0; i < n_result_words; i++) + { + result[i] &= or_result[i]; + have_passing_rows |= result[i]; + } + return have_passing_rows; +} +static bool +compute_qual_conjunction(DecompressContext *dcontext, DecompressBatchState *batch_state, + List *quals, uint64 *restrict result, int n_result_words) +{ + ListCell *lc; + foreach (lc, quals) + { + if (!compute_compound_qual(dcontext, batch_state, lfirst(lc), result, n_result_words)) + { + /* Early exit if no rows pass already. */ + return false; + } + } return true; } +/* + * Compute the vectorized filters. Returns true if we have any passing rows. If not, + * it means the entire batch is filtered out, and we use this for further + * optimizations. + */ +static bool +compute_vector_quals(DecompressContext *dcontext, DecompressBatchState *batch_state) +{ + if (!dcontext->vectorized_quals_constified) + { + return true; + } + + /* + * Allocate the bitmap that will hold the vectorized qual results. We will + * initialize it to all ones and AND the individual quals to it. + */ + const int bitmap_bytes = sizeof(uint64) * (((uint64) batch_state->total_batch_rows + 63) / 64); + batch_state->vector_qual_result = palloc(bitmap_bytes); + memset(batch_state->vector_qual_result, 0xFF, bitmap_bytes); + if (batch_state->total_batch_rows % 64 != 0) + { + /* + * We have to zero out the bits for past-the-end elements in the last + * bitmap word. Since all predicates are ANDed to the result bitmap, + * we can do it here once instead of doing it in each predicate. + */ + const uint64 mask = ((uint64) -1) >> (64 - batch_state->total_batch_rows % 64); + batch_state->vector_qual_result[batch_state->total_batch_rows / 64] = mask; + } + + /* + * Compute the quals. + */ + return compute_qual_conjunction(dcontext, + batch_state, + dcontext->vectorized_quals_constified, + batch_state->vector_qual_result, + bitmap_bytes / 8); +} + /* * Initialize the batch decompression state with the new compressed tuple. */ diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.h b/tsl/src/nodes/decompress_chunk/compressed_batch.h index a20f961b127..29d66826b21 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.h +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.h @@ -69,7 +69,7 @@ typedef struct DecompressBatchState * row. Indexed same as arrow arrays, w/o accounting for the reverse scan * direction. Initialized to all ones, i.e. all rows pass. */ - uint64 *vector_qual_result; + uint64 *restrict vector_qual_result; CompressedColumnValues compressed_columns[FLEXIBLE_ARRAY_MEMBER]; } DecompressBatchState; diff --git a/tsl/src/nodes/decompress_chunk/exec.c b/tsl/src/nodes/decompress_chunk/exec.c index fa0aae42e4b..924292a2488 100644 --- a/tsl/src/nodes/decompress_chunk/exec.c +++ b/tsl/src/nodes/decompress_chunk/exec.c @@ -453,22 +453,6 @@ decompress_chunk_begin(CustomScanState *node, EState *estate, int eflags) } } - List *args = NIL; - if (IsA(constified, OpExpr)) - { - args = castNode(OpExpr, constified)->args; - } - else if (IsA(constified, ScalarArrayOpExpr)) - { - args = castNode(ScalarArrayOpExpr, constified)->args; - } - - if (args) - { - Ensure(IsA(lsecond(args), Const), - "failed to evaluate runtime constant in vectorized filter"); - } - dcontext->vectorized_quals_constified = lappend(dcontext->vectorized_quals_constified, constified); } diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index f2374b33655..5358d45b424 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -419,6 +419,42 @@ is_not_runtime_constant(Node *node) static Node * make_vectorized_qual(DecompressChunkPath *path, Node *qual) { + /* + * and/or/not + */ + if (IsA(qual, BoolExpr)) + { + BoolExpr *boolexpr = castNode(BoolExpr, qual); + bool need_copy = false; + List *vectorized_args = NIL; + ListCell *lc; + foreach (lc, boolexpr->args) + { + Node *arg = lfirst(lc); + Node *vectorized_arg = make_vectorized_qual(path, arg); + if (vectorized_arg == NULL) + { + return NULL; + } + + if (vectorized_arg != arg) + { + need_copy = true; + } + + vectorized_args = lappend(vectorized_args, vectorized_arg); + } + + if (!need_copy) + { + return (Node *) boolexpr; + } + + BoolExpr *boolexpr_copy = (BoolExpr *) copyObject(boolexpr); + boolexpr_copy->args = vectorized_args; + return (Node *) boolexpr_copy; + } + /* * Currently we vectorize some "Var op Const" binary predicates, * scalar array operations with these predicates, and null test. diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 57423179010..63d178bb931 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -165,8 +165,10 @@ select * from vectorqual where ts > '2021-01-01 00:00:00' and metric3 > 40 order Wed Jan 01 00:00:00 2025 | 52 | 5 | 53 | 54 | tag5 (2 rows) --- ORed constrainst on multiple columns (not vectorized for now). -set timescaledb.debug_require_vector_qual to 'forbid'; +-- ORed constrainst on multiple columns. +set timescaledb.debug_require_vector_qual to 'only'; +--set timescaledb.debug_require_vector_qual to 'forbid'; +--set timescaledb.enable_bulk_decompression to off; select * from vectorqual where ts > '2021-01-01 00:00:00' or metric3 > 40 order by vectorqual; ts | metric2 | device | metric3 | metric4 | tag --------------------------+---------+--------+---------+---------+------ @@ -177,7 +179,66 @@ select * from vectorqual where ts > '2021-01-01 00:00:00' or metric3 > 40 order Wed Jan 01 00:00:00 2025 | 52 | 5 | 53 | 54 | tag5 (5 rows) +-- Some more tests for boolean operations. +select count(*) from vectorqual where ts > '2021-01-01 00:00:00'; + count +------- + 3 +(1 row) + +select count(*) from vectorqual where 40 < metric3; + count +------- + 4 +(1 row) + +select count(*) from vectorqual where metric2 < 0; + count +------- + 0 +(1 row) + +select count(*) from vectorqual where ts > '2021-01-01 00:00:00' or 40 < metric3; + count +------- + 5 +(1 row) + +select count(*) from vectorqual where ts > '2021-01-01 00:00:00' or not 40 >= metric3; + count +------- + 5 +(1 row) + +select count(*) from vectorqual where metric2 < 30 and (ts > '2021-01-01 00:00:00' or not 40 >= metric3); + count +------- + 2 +(1 row) + +select count(*) from vectorqual where not metric2 < 30 or ((not ts > '2021-01-01 00:00:00') and (not not 40 >= metric3)); + count +------- + 3 +(1 row) + +-- early exit inside AND BoolExpr +select count(*) from vectorqual where metric2 < 0 or (metric2 < -1 and 40 >= metric3); + count +------- + 0 +(1 row) + +-- early exit after OR BoolExpr +select count(*) from vectorqual where metric2 < 0 or metric2 < -1; + count +------- + 0 +(1 row) + +reset timescaledb.enable_bulk_decompression; -- Test with unary operator. +set timescaledb.debug_require_vector_qual to 'forbid'; create operator !! (function = 'bool', rightarg = int4); select count(*) from vectorqual where !!metric3; count diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 04a5da1c39a..5fc8d5d64eb 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -60,12 +60,39 @@ select metric4 from vectorqual where ts > '2021-01-01 00:00:00' order by 1; select * from vectorqual where ts > '2021-01-01 00:00:00' and metric3 > 40 order by vectorqual; --- ORed constrainst on multiple columns (not vectorized for now). -set timescaledb.debug_require_vector_qual to 'forbid'; +-- ORed constrainst on multiple columns. +set timescaledb.debug_require_vector_qual to 'only'; +--set timescaledb.debug_require_vector_qual to 'forbid'; +--set timescaledb.enable_bulk_decompression to off; + select * from vectorqual where ts > '2021-01-01 00:00:00' or metric3 > 40 order by vectorqual; +-- Some more tests for boolean operations. +select count(*) from vectorqual where ts > '2021-01-01 00:00:00'; + +select count(*) from vectorqual where 40 < metric3; + +select count(*) from vectorqual where metric2 < 0; + +select count(*) from vectorqual where ts > '2021-01-01 00:00:00' or 40 < metric3; + +select count(*) from vectorqual where ts > '2021-01-01 00:00:00' or not 40 >= metric3; + +select count(*) from vectorqual where metric2 < 30 and (ts > '2021-01-01 00:00:00' or not 40 >= metric3); + +select count(*) from vectorqual where not metric2 < 30 or ((not ts > '2021-01-01 00:00:00') and (not not 40 >= metric3)); + +-- early exit inside AND BoolExpr +select count(*) from vectorqual where metric2 < 0 or (metric2 < -1 and 40 >= metric3); + +-- early exit after OR BoolExpr +select count(*) from vectorqual where metric2 < 0 or metric2 < -1; + +reset timescaledb.enable_bulk_decompression; + -- Test with unary operator. +set timescaledb.debug_require_vector_qual to 'forbid'; create operator !! (function = 'bool', rightarg = int4); select count(*) from vectorqual where !!metric3; From fdffe18db77d6bc313ef2cb029794359053ebd4e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 3 Jan 2024 16:59:24 +0100 Subject: [PATCH 165/249] coverage for not --- tsl/test/expected/decompress_vector_qual.out | 22 +++++--------------- tsl/test/sql/decompress_vector_qual.sql | 14 +++++-------- 2 files changed, 10 insertions(+), 26 deletions(-) diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 63d178bb931..40c22ca3676 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -167,8 +167,8 @@ select * from vectorqual where ts > '2021-01-01 00:00:00' and metric3 > 40 order -- ORed constrainst on multiple columns. set timescaledb.debug_require_vector_qual to 'only'; ---set timescaledb.debug_require_vector_qual to 'forbid'; ---set timescaledb.enable_bulk_decompression to off; +-- set timescaledb.debug_require_vector_qual to 'forbid'; +-- set timescaledb.enable_bulk_decompression to off; select * from vectorqual where ts > '2021-01-01 00:00:00' or metric3 > 40 order by vectorqual; ts | metric2 | device | metric3 | metric4 | tag --------------------------+---------+--------+---------+---------+------ @@ -204,33 +204,21 @@ select count(*) from vectorqual where ts > '2021-01-01 00:00:00' or 40 < metric3 5 (1 row) -select count(*) from vectorqual where ts > '2021-01-01 00:00:00' or not 40 >= metric3; +select count(*) from vectorqual where not (ts <= '2021-01-01 00:00:00' and 40 >= metric3); count ------- 5 (1 row) -select count(*) from vectorqual where metric2 < 30 and (ts > '2021-01-01 00:00:00' or not 40 >= metric3); - count -------- - 2 -(1 row) - -select count(*) from vectorqual where not metric2 < 30 or ((not ts > '2021-01-01 00:00:00') and (not not 40 >= metric3)); - count -------- - 3 -(1 row) - -- early exit inside AND BoolExpr -select count(*) from vectorqual where metric2 < 0 or (metric2 < -1 and 40 >= metric3); +select count(*) from vectorqual where metric2 < 0 or (metric4 < -1 and 40 >= metric3); count ------- 0 (1 row) -- early exit after OR BoolExpr -select count(*) from vectorqual where metric2 < 0 or metric2 < -1; +select count(*) from vectorqual where metric2 < 0 or metric3 < -1; count ------- 0 diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 5fc8d5d64eb..2471d52d1fe 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -62,8 +62,8 @@ select * from vectorqual where ts > '2021-01-01 00:00:00' and metric3 > 40 order -- ORed constrainst on multiple columns. set timescaledb.debug_require_vector_qual to 'only'; ---set timescaledb.debug_require_vector_qual to 'forbid'; ---set timescaledb.enable_bulk_decompression to off; +-- set timescaledb.debug_require_vector_qual to 'forbid'; +-- set timescaledb.enable_bulk_decompression to off; select * from vectorqual where ts > '2021-01-01 00:00:00' or metric3 > 40 order by vectorqual; @@ -76,17 +76,13 @@ select count(*) from vectorqual where metric2 < 0; select count(*) from vectorqual where ts > '2021-01-01 00:00:00' or 40 < metric3; -select count(*) from vectorqual where ts > '2021-01-01 00:00:00' or not 40 >= metric3; - -select count(*) from vectorqual where metric2 < 30 and (ts > '2021-01-01 00:00:00' or not 40 >= metric3); - -select count(*) from vectorqual where not metric2 < 30 or ((not ts > '2021-01-01 00:00:00') and (not not 40 >= metric3)); +select count(*) from vectorqual where not (ts <= '2021-01-01 00:00:00' and 40 >= metric3); -- early exit inside AND BoolExpr -select count(*) from vectorqual where metric2 < 0 or (metric2 < -1 and 40 >= metric3); +select count(*) from vectorqual where metric2 < 0 or (metric4 < -1 and 40 >= metric3); -- early exit after OR BoolExpr -select count(*) from vectorqual where metric2 < 0 or metric2 < -1; +select count(*) from vectorqual where metric2 < 0 or metric3 < -1; reset timescaledb.enable_bulk_decompression; From a6e7bf2675642ec02c156e23c04a3fc3bc1699fe Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:53:15 +0100 Subject: [PATCH 166/249] not --- .../nodes/decompress_chunk/compressed_batch.c | 24 +++++-------------- tsl/src/nodes/decompress_chunk/planner.c | 10 ++++++++ tsl/test/expected/decompress_vector_qual.out | 18 ++++++++++++++ tsl/test/sql/decompress_vector_qual.sql | 3 +++ 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 4c8585995ae..26368fd6432 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -371,6 +371,12 @@ compute_compound_qual(DecompressContext *dcontext, DecompressBatchState *batch_s BoolExpr *boolexpr = castNode(BoolExpr, qual); + /* + * Postgres removes NOT for operators we can vectorize, so we don't support + * NOT and consider it non-vectorizable at planning time. + */ + Assert(boolexpr->boolop != NOT_EXPR); + if (boolexpr->boolop == AND_EXPR) { return compute_qual_conjunction(dcontext, @@ -380,24 +386,6 @@ compute_compound_qual(DecompressContext *dcontext, DecompressBatchState *batch_s n_result_words); } - if (boolexpr->boolop == NOT_EXPR) - { - Assert(list_length(boolexpr->args) == 1); - uint64 *tmp_result = palloc(sizeof(uint64) * n_result_words); - compute_compound_qual(dcontext, - batch_state, - linitial(boolexpr->args), - tmp_result, - n_result_words); - bool have_passing_rows = false; - for (int i = 0; i < n_result_words; i++) - { - result[i] &= ~tmp_result[i]; - have_passing_rows |= result[i]; - } - return have_passing_rows; - } - Assert(boolexpr->boolop == OR_EXPR); uint64 *or_result = palloc0(sizeof(uint64) * n_result_words); uint64 *single_qual_result = palloc(sizeof(uint64) * n_result_words); diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index 5358d45b424..f1971e3220c 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -425,6 +425,16 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) if (IsA(qual, BoolExpr)) { BoolExpr *boolexpr = castNode(BoolExpr, qual); + + if (boolexpr->boolop == NOT_EXPR) + { + /* + * NOT should be removed by Postgres for all operators we can + * vectorize (see prepqual.c), so we don't support it. + */ + return NULL; + } + bool need_copy = false; List *vectorized_args = NIL; ListCell *lc; diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 40c22ca3676..d28a922e0b1 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -234,6 +234,12 @@ select count(*) from vectorqual where !!metric3; 5 (1 row) +select count(*) from vectorqual where not !!metric3; + count +------- + 0 +(1 row) + -- Custom operator on column that supports bulk decompression is not vectorized. set timescaledb.debug_require_vector_qual to 'forbid'; create function int4eqq(int4, int4) returns bool as 'int4eq' language internal; @@ -250,6 +256,18 @@ select count(*) from vectorqual where metric3 === any(array[777, 888]); 2 (1 row) +select count(*) from vectorqual where not metric3 === 777; + count +------- + 3 +(1 row) + +select count(*) from vectorqual where metric3 = 777 or metric3 === 777; + count +------- + 2 +(1 row) + -- It also doesn't have a commutator. select count(*) from vectorqual where 777 === metric3; count diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 2471d52d1fe..bef9a238581 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -91,6 +91,7 @@ reset timescaledb.enable_bulk_decompression; set timescaledb.debug_require_vector_qual to 'forbid'; create operator !! (function = 'bool', rightarg = int4); select count(*) from vectorqual where !!metric3; +select count(*) from vectorqual where not !!metric3; -- Custom operator on column that supports bulk decompression is not vectorized. @@ -99,6 +100,8 @@ create function int4eqq(int4, int4) returns bool as 'int4eq' language internal; create operator === (function = 'int4eqq', rightarg = int4, leftarg = int4); select count(*) from vectorqual where metric3 === 777; select count(*) from vectorqual where metric3 === any(array[777, 888]); +select count(*) from vectorqual where not metric3 === 777; +select count(*) from vectorqual where metric3 = 777 or metric3 === 777; -- It also doesn't have a commutator. select count(*) from vectorqual where 777 === metric3; From 195a865699b83ae843312afb7c51965ba359b692 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:53:33 +0100 Subject: [PATCH 167/249] benchmark boolexpr (2024-01-04 no. 2) From 50c32f0a31b982bcaff7e9fd7ce1c01cb8f09de6 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 4 Jan 2024 23:04:08 +0100 Subject: [PATCH 168/249] early exit --- .../nodes/decompress_chunk/compressed_batch.c | 85 ++++++++++++------- .../decompress_chunk/pred_vector_array.c | 21 ++--- 2 files changed, 66 insertions(+), 40 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 26368fd6432..7d7616de0f3 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -177,8 +177,10 @@ decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state static bool compute_simple_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, Node *qual, - uint64 *restrict result, int n_result_words) + uint64 *restrict result) { + const size_t n_result_words = (batch_state->total_batch_rows + 63) / 64; + /* * For now we support "Var ? Const" predicates and * ScalarArrayOperations. @@ -317,10 +319,8 @@ compute_simple_qual(DecompressContext *dcontext, DecompressBatchState *batch_sta } /* Account for nulls which shouldn't pass the predicate. */ - const size_t n = vector->length; - const size_t n_words = (n + 63) / 64; const uint64 *restrict validity = (uint64 *restrict) vector->buffers[0]; - for (size_t i = 0; i < n_words; i++) + for (size_t i = 0; i < n_result_words; i++) { predicate_result[i] &= validity[i]; } @@ -338,7 +338,7 @@ compute_simple_qual(DecompressContext *dcontext, DecompressBatchState *batch_sta * We had a default value for the compressed column, and it * didn't pass the predicate, so the entire batch didn't pass. */ - for (int i = 0; i < n_result_words; i++) + for (size_t i = 0; i < n_result_words; i++) { result[i] = 0; } @@ -349,24 +349,24 @@ compute_simple_qual(DecompressContext *dcontext, DecompressBatchState *batch_sta * Have to return whether we have any passing rows. */ bool have_passing_rows = false; - for (int i = 0; i < n_result_words; i++) + for (size_t i = 0; i < n_result_words; i++) { - have_passing_rows |= result[i]; + have_passing_rows |= !!result[i] != 0; } return have_passing_rows; } static bool compute_qual_conjunction(DecompressContext *dcontext, DecompressBatchState *batch_state, - List *quals, uint64 *restrict result, int n_result_words); + List *quals, uint64 *restrict result); static bool compute_compound_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, Node *qual, - uint64 *restrict result, int n_result_words) + uint64 *restrict result) { if (!IsA(qual, BoolExpr)) { - return compute_simple_qual(dcontext, batch_state, qual, result, n_result_words); + return compute_simple_qual(dcontext, batch_state, qual, result); } BoolExpr *boolexpr = castNode(BoolExpr, qual); @@ -379,53 +379,79 @@ compute_compound_qual(DecompressContext *dcontext, DecompressBatchState *batch_s if (boolexpr->boolop == AND_EXPR) { - return compute_qual_conjunction(dcontext, - batch_state, - boolexpr->args, - result, - n_result_words); + return compute_qual_conjunction(dcontext, batch_state, boolexpr->args, result); } Assert(boolexpr->boolop == OR_EXPR); - uint64 *or_result = palloc0(sizeof(uint64) * n_result_words); + + const size_t n_result_words = (batch_state->total_batch_rows + 63) / 64; + uint64 *or_result = palloc(sizeof(uint64) * n_result_words); + for (size_t i = 0; i < n_result_words; i++) + { + or_result[i] = 0; + } + if (batch_state->total_batch_rows % 64 != 0) + { + /* + * Set the bits for past-the-end elements to 1. This way it's more + * convenient to check for early exit, and the final result should + * have them already set to 0 so it doesn't matter. + */ + const uint64 mask = ((uint64) -1) << (batch_state->total_batch_rows % 64); + or_result[n_result_words - 1] = mask; + } + uint64 *single_qual_result = palloc(sizeof(uint64) * n_result_words); ListCell *lc; foreach (lc, boolexpr->args) { - for (int i = 0; i < n_result_words; i++) + for (size_t i = 0; i < n_result_words; i++) { single_qual_result[i] = (uint64) -1; } - compute_compound_qual(dcontext, - batch_state, - lfirst(lc), - single_qual_result, - n_result_words); - for (int i = 0; i < n_result_words; i++) + compute_compound_qual(dcontext, batch_state, lfirst(lc), single_qual_result); + bool all_rows_pass = true; + for (size_t i = 0; i < n_result_words; i++) { or_result[i] |= single_qual_result[i]; + /* + * Note that we have set the bits for past-the-end rows in + * or_result to 1, so we can use simple comparison to zero here. + */ + all_rows_pass &= (~or_result[i] == 0); + } + if (all_rows_pass) + { + /* + * We can sometimes avoing reading the columns required for the + * rest of conditions if we break out early here. + */ + return true; } } bool have_passing_rows = false; - for (int i = 0; i < n_result_words; i++) + for (size_t i = 0; i < n_result_words; i++) { result[i] &= or_result[i]; - have_passing_rows |= result[i]; + have_passing_rows |= result[i] != 0; } return have_passing_rows; } static bool compute_qual_conjunction(DecompressContext *dcontext, DecompressBatchState *batch_state, - List *quals, uint64 *restrict result, int n_result_words) + List *quals, uint64 *restrict result) { ListCell *lc; foreach (lc, quals) { - if (!compute_compound_qual(dcontext, batch_state, lfirst(lc), result, n_result_words)) + if (!compute_compound_qual(dcontext, batch_state, lfirst(lc), result)) { - /* Early exit if no rows pass already. */ + /* + * Exit early if no rows pass already. This might allow us to avoid + * reading the columns required for the subsequent quals. + */ return false; } } @@ -469,8 +495,7 @@ compute_vector_quals(DecompressContext *dcontext, DecompressBatchState *batch_st return compute_qual_conjunction(dcontext, batch_state, dcontext->vectorized_quals_constified, - batch_state->vector_qual_result, - bitmap_bytes / 8); + batch_state->vector_qual_result); } /* diff --git a/tsl/src/nodes/decompress_chunk/pred_vector_array.c b/tsl/src/nodes/decompress_chunk/pred_vector_array.c index da154644d9b..e743ae2bccd 100644 --- a/tsl/src/nodes/decompress_chunk/pred_vector_array.c +++ b/tsl/src/nodes/decompress_chunk/pred_vector_array.c @@ -138,28 +138,29 @@ vector_array_predicate_impl(VectorPredicate *vector_const_predicate, bool is_or, { if (is_or) { - /* - * Note that we have set the bits for past-the-end rows in - * array_result to 1, so we can use simple AND here. - */ - uint64 all_rows_match = -1; + bool all_rows_match = true; for (size_t word = 0; word < result_words; word++) { - all_rows_match &= array_result[word]; + /* + * Note that we have set the bits for past-the-end rows in + * array_result to 1, so we can use simple comparison to + * zero here. + */ + all_rows_match &= (~array_result[word] == 0); } - if (all_rows_match == -1ULL) + if (all_rows_match) { return; } } else { - uint64 any_rows_match = 0; + bool any_rows_match = false; for (size_t word = 0; word < result_words; word++) { - any_rows_match |= final_result[word]; + any_rows_match |= (final_result[word] != 0); } - if (any_rows_match == 0) + if (!any_rows_match) { return; } From c8cb25ffeb26ad72c397bf3641e1b1e76be6e37a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 4 Jan 2024 23:04:25 +0100 Subject: [PATCH 169/249] benchmark boolexpr (2024-01-04 no. 1) From f24cd2024fd39320965d7323bcdbb1c8b0c79c66 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 5 Jan 2024 18:43:05 +0100 Subject: [PATCH 170/249] cleanup and more tests --- .../nodes/decompress_chunk/compressed_batch.c | 75 ++++--- tsl/test/expected/decompress_vector_qual.out | 206 ++++++++++++++++++ tsl/test/sql/decompress_vector_qual.sql | 45 ++++ 3 files changed, 290 insertions(+), 36 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 7d7616de0f3..175873a9b35 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -351,39 +351,38 @@ compute_simple_qual(DecompressContext *dcontext, DecompressBatchState *batch_sta bool have_passing_rows = false; for (size_t i = 0; i < n_result_words; i++) { - have_passing_rows |= !!result[i] != 0; + have_passing_rows |= result[i] != 0; } return have_passing_rows; } -static bool compute_qual_conjunction(DecompressContext *dcontext, DecompressBatchState *batch_state, - List *quals, uint64 *restrict result); +static bool compute_compound_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, + Node *qual, uint64 *restrict result); static bool -compute_compound_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, Node *qual, - uint64 *restrict result) +compute_qual_conjunction(DecompressContext *dcontext, DecompressBatchState *batch_state, + List *quals, uint64 *restrict result) { - if (!IsA(qual, BoolExpr)) - { - return compute_simple_qual(dcontext, batch_state, qual, result); - } - - BoolExpr *boolexpr = castNode(BoolExpr, qual); - - /* - * Postgres removes NOT for operators we can vectorize, so we don't support - * NOT and consider it non-vectorizable at planning time. - */ - Assert(boolexpr->boolop != NOT_EXPR); - - if (boolexpr->boolop == AND_EXPR) + ListCell *lc; + foreach (lc, quals) { - return compute_qual_conjunction(dcontext, batch_state, boolexpr->args, result); + if (!compute_compound_qual(dcontext, batch_state, lfirst(lc), result)) + { + /* + * Exit early if no rows pass already. This might allow us to avoid + * reading the columns required for the subsequent quals. + */ + return false; + } } + return true; +} - Assert(boolexpr->boolop == OR_EXPR); - +static bool +compute_qual_disjunction(DecompressContext *dcontext, DecompressBatchState *batch_state, + List *quals, uint64 *restrict result) +{ const size_t n_result_words = (batch_state->total_batch_rows + 63) / 64; uint64 *or_result = palloc(sizeof(uint64) * n_result_words); for (size_t i = 0; i < n_result_words; i++) @@ -404,7 +403,7 @@ compute_compound_qual(DecompressContext *dcontext, DecompressBatchState *batch_s uint64 *single_qual_result = palloc(sizeof(uint64) * n_result_words); ListCell *lc; - foreach (lc, boolexpr->args) + foreach (lc, quals) { for (size_t i = 0; i < n_result_words; i++) { @@ -440,22 +439,26 @@ compute_compound_qual(DecompressContext *dcontext, DecompressBatchState *batch_s } static bool -compute_qual_conjunction(DecompressContext *dcontext, DecompressBatchState *batch_state, - List *quals, uint64 *restrict result) +compute_compound_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, Node *qual, + uint64 *restrict result) { - ListCell *lc; - foreach (lc, quals) + if (!IsA(qual, BoolExpr)) { - if (!compute_compound_qual(dcontext, batch_state, lfirst(lc), result)) - { - /* - * Exit early if no rows pass already. This might allow us to avoid - * reading the columns required for the subsequent quals. - */ - return false; - } + return compute_simple_qual(dcontext, batch_state, qual, result); } - return true; + + BoolExpr *boolexpr = castNode(BoolExpr, qual); + if (boolexpr->boolop == AND_EXPR) + { + return compute_qual_conjunction(dcontext, batch_state, boolexpr->args, result); + } + + /* + * Postgres removes NOT for operators we can vectorize, so we don't support + * NOT and consider it non-vectorizable at planning time. So only OR is left. + */ + Assert(boolexpr->boolop == OR_EXPR); + return compute_qual_disjunction(dcontext, batch_state, boolexpr->args, result); } /* diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index d28a922e0b1..67701026d6f 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -620,6 +620,212 @@ select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 12]) 0 (1 row) +-- Also check early exit for AND/OR. Top-level clause must be OR, because top-level +-- AND is flattened into a list. +select count(*) from singlebatch where (metric2 < 20 and metric2 < 30) or metric3 = 777; + count +------- + 2 +(1 row) + +select count(*) from singlebatch where (metric2 < 30 and metric2 < 20) or metric3 = 777; + count +------- + 2 +(1 row) + +select count(*) from singlebatch where metric3 = 777 or (metric2 < 20 and metric2 < 30); + count +------- + 2 +(1 row) + +select count(*) from singlebatch where metric3 = 777 or (metric2 < 30 and metric2 < 20); + count +------- + 2 +(1 row) + +select count(*) from vectorqual where (metric2 < 20 and metric2 < 30) or metric3 = 777; + count +------- + 2 +(1 row) + +select count(*) from vectorqual where (metric2 < 30 and metric2 < 20) or metric3 = 777; + count +------- + 2 +(1 row) + +select count(*) from vectorqual where metric3 = 777 or (metric2 < 20 and metric2 < 30); + count +------- + 2 +(1 row) + +select count(*) from vectorqual where metric3 = 777 or (metric2 < 30 and metric2 < 20); + count +------- + 2 +(1 row) + +select count(*) from singlebatch where metric2 < 20 or metric3 < 50 or metric3 > 50; + count +------- + 5 +(1 row) + +select count(*) from singlebatch where metric2 < 20 or metric3 > 50 or metric3 < 50; + count +------- + 5 +(1 row) + +select count(*) from singlebatch where metric3 < 50 or metric2 < 20 or metric3 > 50; + count +------- + 5 +(1 row) + +select count(*) from singlebatch where metric3 > 50 or metric3 < 50 or metric2 < 20; + count +------- + 5 +(1 row) + +select count(*) from vectorqual where metric2 < 20 or metric3 < 50 or metric3 > 50; + count +------- + 5 +(1 row) + +select count(*) from vectorqual where metric2 < 20 or metric3 > 50 or metric3 < 50; + count +------- + 5 +(1 row) + +select count(*) from vectorqual where metric3 < 50 or metric2 < 20 or metric3 > 50; + count +------- + 5 +(1 row) + +select count(*) from vectorqual where metric3 > 50 or metric3 < 50 or metric2 < 20; + count +------- + 5 +(1 row) + +select count(*) from singlebatch where metric2 = 12 or metric3 = 888; + count +------- + 1 +(1 row) + +select count(*) from singlebatch where metric2 = 22 or metric3 = 888; + count +------- + 1 +(1 row) + +select count(*) from singlebatch where metric2 = 32 or metric3 = 888; + count +------- + 1 +(1 row) + +select count(*) from singlebatch where metric2 = 42 or metric3 = 888; + count +------- + 1 +(1 row) + +select count(*) from singlebatch where metric2 = 52 or metric3 = 888; + count +------- + 1 +(1 row) + +select count(*) from vectorqual where metric2 = 12 or metric3 = 888; + count +------- + 1 +(1 row) + +select count(*) from vectorqual where metric2 = 22 or metric3 = 888; + count +------- + 1 +(1 row) + +select count(*) from vectorqual where metric2 = 32 or metric3 = 888; + count +------- + 1 +(1 row) + +select count(*) from vectorqual where metric2 = 42 or metric3 = 888; + count +------- + 1 +(1 row) + +select count(*) from vectorqual where metric2 = 52 or metric3 = 888; + count +------- + 1 +(1 row) + +select count(*) from singlebatch where ts > '2024-01-01' or (metric3 = 777 and metric2 = 12); + count +------- + 1 +(1 row) + +select count(*) from singlebatch where ts > '2024-01-01' or (metric3 = 777 and metric2 = 666); + count +------- + 0 +(1 row) + +select count(*) from singlebatch where ts > '2024-01-01' or (metric3 = 888 and metric2 = 12); + count +------- + 0 +(1 row) + +select count(*) from singlebatch where ts > '2024-01-01' or (metric3 = 888 and metric2 = 666); + count +------- + 0 +(1 row) + +select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 777 and metric2 = 12); + count +------- + 2 +(1 row) + +select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 777 and metric2 = 666); + count +------- + 1 +(1 row) + +select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 888 and metric2 = 12); + count +------- + 1 +(1 row) + +select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 888 and metric2 = 666); + count +------- + 1 +(1 row) + reset timescaledb.enable_bulk_decompression; reset timescaledb.debug_require_vector_qual; -- Comparison with other column not vectorized. diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index bef9a238581..621e164ee9d 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -194,6 +194,51 @@ select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 0]) select count(*) from singlebatch where metric2 <= all(array[12, 0, 12, 12, 12]) and metric3 != 777; select count(*) from singlebatch where metric2 <= all(array[12, 12, 12, 12, 12]) and metric3 != 777; + +-- Also check early exit for AND/OR. Top-level clause must be OR, because top-level +-- AND is flattened into a list. +select count(*) from singlebatch where (metric2 < 20 and metric2 < 30) or metric3 = 777; +select count(*) from singlebatch where (metric2 < 30 and metric2 < 20) or metric3 = 777; +select count(*) from singlebatch where metric3 = 777 or (metric2 < 20 and metric2 < 30); +select count(*) from singlebatch where metric3 = 777 or (metric2 < 30 and metric2 < 20); + +select count(*) from vectorqual where (metric2 < 20 and metric2 < 30) or metric3 = 777; +select count(*) from vectorqual where (metric2 < 30 and metric2 < 20) or metric3 = 777; +select count(*) from vectorqual where metric3 = 777 or (metric2 < 20 and metric2 < 30); +select count(*) from vectorqual where metric3 = 777 or (metric2 < 30 and metric2 < 20); + +select count(*) from singlebatch where metric2 < 20 or metric3 < 50 or metric3 > 50; +select count(*) from singlebatch where metric2 < 20 or metric3 > 50 or metric3 < 50; +select count(*) from singlebatch where metric3 < 50 or metric2 < 20 or metric3 > 50; +select count(*) from singlebatch where metric3 > 50 or metric3 < 50 or metric2 < 20; + +select count(*) from vectorqual where metric2 < 20 or metric3 < 50 or metric3 > 50; +select count(*) from vectorqual where metric2 < 20 or metric3 > 50 or metric3 < 50; +select count(*) from vectorqual where metric3 < 50 or metric2 < 20 or metric3 > 50; +select count(*) from vectorqual where metric3 > 50 or metric3 < 50 or metric2 < 20; + +select count(*) from singlebatch where metric2 = 12 or metric3 = 888; +select count(*) from singlebatch where metric2 = 22 or metric3 = 888; +select count(*) from singlebatch where metric2 = 32 or metric3 = 888; +select count(*) from singlebatch where metric2 = 42 or metric3 = 888; +select count(*) from singlebatch where metric2 = 52 or metric3 = 888; + +select count(*) from vectorqual where metric2 = 12 or metric3 = 888; +select count(*) from vectorqual where metric2 = 22 or metric3 = 888; +select count(*) from vectorqual where metric2 = 32 or metric3 = 888; +select count(*) from vectorqual where metric2 = 42 or metric3 = 888; +select count(*) from vectorqual where metric2 = 52 or metric3 = 888; + +select count(*) from singlebatch where ts > '2024-01-01' or (metric3 = 777 and metric2 = 12); +select count(*) from singlebatch where ts > '2024-01-01' or (metric3 = 777 and metric2 = 666); +select count(*) from singlebatch where ts > '2024-01-01' or (metric3 = 888 and metric2 = 12); +select count(*) from singlebatch where ts > '2024-01-01' or (metric3 = 888 and metric2 = 666); + +select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 777 and metric2 = 12); +select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 777 and metric2 = 666); +select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 888 and metric2 = 12); +select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 888 and metric2 = 666); + reset timescaledb.enable_bulk_decompression; reset timescaledb.debug_require_vector_qual; From 911a8837848b59a54943cc3533412645417d04d9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 5 Jan 2024 19:18:23 +0100 Subject: [PATCH 171/249] fix for default values --- .../nodes/decompress_chunk/compressed_batch.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 175873a9b35..f227bd125a8 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -179,8 +179,6 @@ static bool compute_simple_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, Node *qual, uint64 *restrict result) { - const size_t n_result_words = (batch_state->total_batch_rows + 63) / 64; - /* * For now we support "Var ? Const" predicates and * ScalarArrayOperations. @@ -254,7 +252,7 @@ compute_simple_qual(DecompressContext *dcontext, DecompressBatchState *batch_sta uint64 default_value_predicate_result; uint64 *predicate_result = result; const ArrowArray *vector = column_values->arrow; - if (column_values->arrow == NULL) + if (vector == NULL) { /* * The compressed column had a default value. We can't fall back to @@ -318,15 +316,21 @@ compute_simple_qual(DecompressContext *dcontext, DecompressBatchState *batch_sta vector_const_predicate(vector, constnode->constvalue, predicate_result); } - /* Account for nulls which shouldn't pass the predicate. */ + /* + * Account for nulls which shouldn't pass the predicate. Note that the + * vector here might have only one row, in contrast with the number of + * rows in the batch, if the column has a default value in this batch. + */ + const size_t n_vector_result_words = (vector->length + 63) / 64; const uint64 *restrict validity = (uint64 *restrict) vector->buffers[0]; - for (size_t i = 0; i < n_result_words; i++) + for (size_t i = 0; i < n_vector_result_words; i++) { predicate_result[i] &= validity[i]; } } /* Process the result. */ + const size_t n_batch_result_words = (batch_state->total_batch_rows + 63) / 64; if (column_values->arrow == NULL) { /* The column had a default value. */ @@ -338,7 +342,7 @@ compute_simple_qual(DecompressContext *dcontext, DecompressBatchState *batch_sta * We had a default value for the compressed column, and it * didn't pass the predicate, so the entire batch didn't pass. */ - for (size_t i = 0; i < n_result_words; i++) + for (size_t i = 0; i < n_batch_result_words; i++) { result[i] = 0; } @@ -349,7 +353,7 @@ compute_simple_qual(DecompressContext *dcontext, DecompressBatchState *batch_sta * Have to return whether we have any passing rows. */ bool have_passing_rows = false; - for (size_t i = 0; i < n_result_words; i++) + for (size_t i = 0; i < n_batch_result_words; i++) { have_passing_rows |= result[i] != 0; } From 95c32401b65d9b533fa2a50e485fd4b3000e5763 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 5 Jan 2024 20:49:55 +0100 Subject: [PATCH 172/249] benchmark boolexpr (2024-01-05 no. 2) From 11fb3b47122cf1f9a82b8324b5cd5a336054f106 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 5 Jan 2024 22:27:01 +0100 Subject: [PATCH 173/249] cleanup --- tsl/src/nodes/decompress_chunk/planner.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index f2374b33655..ac63fe47c2b 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -553,20 +553,21 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) return (Node *) opexpr; } + /* + * The only option that is left is a ScalarArrayOpExpr. + */ + Assert(saop != NULL); + #if PG14_GE - if (saop) + if (saop->hashfuncid) { - if (saop->hashfuncid) - { - /* - * Don't vectorize if the planner decided to build a hash table. - */ - return NULL; - } + /* + * Don't vectorize if the planner decided to build a hash table. + */ + return NULL; } #endif - Assert(saop); return (Node *) saop; } From 1ddc365113006235d6c590668fe2a39b99be637e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 9 Jan 2024 15:53:35 +0100 Subject: [PATCH 174/249] cleanup --- tsl/src/nodes/decompress_chunk/compressed_batch.c | 2 +- tsl/src/nodes/decompress_chunk/planner.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 974cacbc3c1..4a0a56e7ac0 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -213,7 +213,7 @@ compute_vector_quals(DecompressContext *dcontext, DecompressBatchState *batch_st foreach (lc, dcontext->vectorized_quals_constified) { /* - * For now we support "Var ? Const" predicates and + * For now, we support NullTest, "Var ? Const" predicates and * ScalarArrayOperations. */ List *args = NULL; diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index ac63fe47c2b..f12bc4ebf3c 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -529,7 +529,7 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) } /* - * We can vectorize the opreration where the right side is a constant or can + * We can vectorize the operation where the right side is a constant or can * be evaluated to a constant at run time (e.g. contains stable functions). */ Assert(arg2); From 46cee6ba6c87da38b77b6b0fe6be70b52e14729b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 9 Jan 2024 16:41:51 +0100 Subject: [PATCH 175/249] cleanup --- tsl/src/nodes/decompress_chunk/compressed_batch.c | 2 +- tsl/src/nodes/decompress_chunk/planner.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index c8a3750e341..82698ffe680 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -252,7 +252,7 @@ compute_simple_qual(DecompressContext *dcontext, DecompressBatchState *batch_sta uint64 default_value_predicate_result; uint64 *predicate_result = result; const ArrowArray *vector = column_values->arrow; - if (vector == NULL) + if (column_values->arrow == NULL) { /* * The compressed column had a default value. We can't fall back to diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index 12df63983c5..56d478f6fda 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -420,7 +420,7 @@ static Node * make_vectorized_qual(DecompressChunkPath *path, Node *qual) { /* - * and/or/not + * We can vectorize BoolExpr (AND/OR/NOT). */ if (IsA(qual, BoolExpr)) { @@ -466,8 +466,8 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) } /* - * Currently we vectorize some "Var op Const" binary predicates, - * scalar array operations with these predicates, and null test. + * Among the simple predicates, we vectorize some "Var op Const" binary + * predicates, scalar array operations with these predicates, and null test. */ NullTest *nulltest = NULL; OpExpr *opexpr = NULL; From 0c311b47a6aacfe2191a528ce7d188e982edfc1a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 9 Jan 2024 16:48:08 +0100 Subject: [PATCH 176/249] cleanup --- .../nodes/decompress_chunk/compressed_batch.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 82698ffe680..2e9244f3114 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -176,8 +176,8 @@ decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state } static bool -compute_simple_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, Node *qual, - uint64 *restrict result) +compute_plain_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, Node *qual, + uint64 *restrict result) { /* * For now, we support NullTest, "Var ? Const" predicates and @@ -361,8 +361,8 @@ compute_simple_qual(DecompressContext *dcontext, DecompressBatchState *batch_sta return have_passing_rows; } -static bool compute_compound_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, - Node *qual, uint64 *restrict result); +static bool compute_one_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, + Node *qual, uint64 *restrict result); static bool compute_qual_conjunction(DecompressContext *dcontext, DecompressBatchState *batch_state, @@ -371,7 +371,7 @@ compute_qual_conjunction(DecompressContext *dcontext, DecompressBatchState *batc ListCell *lc; foreach (lc, quals) { - if (!compute_compound_qual(dcontext, batch_state, lfirst(lc), result)) + if (!compute_one_qual(dcontext, batch_state, lfirst(lc), result)) { /* * Exit early if no rows pass already. This might allow us to avoid @@ -404,20 +404,20 @@ compute_qual_disjunction(DecompressContext *dcontext, DecompressBatchState *batc or_result[n_result_words - 1] = mask; } - uint64 *single_qual_result = palloc(sizeof(uint64) * n_result_words); + uint64 *one_qual_result = palloc(sizeof(uint64) * n_result_words); ListCell *lc; foreach (lc, quals) { for (size_t i = 0; i < n_result_words; i++) { - single_qual_result[i] = (uint64) -1; + one_qual_result[i] = (uint64) -1; } - compute_compound_qual(dcontext, batch_state, lfirst(lc), single_qual_result); + compute_one_qual(dcontext, batch_state, lfirst(lc), one_qual_result); bool all_rows_pass = true; for (size_t i = 0; i < n_result_words; i++) { - or_result[i] |= single_qual_result[i]; + or_result[i] |= one_qual_result[i]; /* * Note that we have set the bits for past-the-end rows in * or_result to 1, so we can use simple comparison to zero here. @@ -443,12 +443,12 @@ compute_qual_disjunction(DecompressContext *dcontext, DecompressBatchState *batc } static bool -compute_compound_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, Node *qual, - uint64 *restrict result) +compute_one_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, Node *qual, + uint64 *restrict result) { if (!IsA(qual, BoolExpr)) { - return compute_simple_qual(dcontext, batch_state, qual, result); + return compute_plain_qual(dcontext, batch_state, qual, result); } BoolExpr *boolexpr = castNode(BoolExpr, qual); From 1cef484d87ce4e2e1592b2d7fbe3a71413292cd4 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 9 Jan 2024 17:02:55 +0100 Subject: [PATCH 177/249] cleanup --- tsl/src/compression/deltadelta_impl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsl/src/compression/deltadelta_impl.c b/tsl/src/compression/deltadelta_impl.c index 3c307bb7451..0a27b7b533b 100644 --- a/tsl/src/compression/deltadelta_impl.c +++ b/tsl/src/compression/deltadelta_impl.c @@ -31,7 +31,7 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory * test_delta4(). */ uint16 num_deltas; - uint64 *restrict deltas_zigzag = + const uint64 *restrict deltas_zigzag = simple8brle_decompress_all_uint64(deltas_compressed, &num_deltas); Simple8bRleBitmap nulls = { 0 }; From 08f02289b2de7d82ba1abf0e2f657167b5f7f669 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 9 Jan 2024 17:12:48 +0100 Subject: [PATCH 178/249] fixes after merge --- .../nodes/decompress_chunk/compressed_batch.c | 79 ++++++++++++++++++- .../decompress_chunk/vector_predicates.c | 9 +-- 2 files changed, 77 insertions(+), 11 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 3504a79ea80..f497b03f7db 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -77,6 +77,49 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) return arrow; } +static void +translate_from_dictionary(const ArrowArray *arrow, uint64 *restrict dict_result, + uint64 *restrict final_result) +{ + Assert(arrow->dictionary != NULL); + + /* Translate dictionary results to per-value results. */ + const size_t n = arrow->length; + int16 *restrict indices = (int16 *) arrow->buffers[1]; + for (size_t outer = 0; outer < n / 64; outer++) + { + uint64 word = 0; + for (size_t inner = 0; inner < 64; inner++) + { + const size_t row = outer * 64 + inner; + const size_t bit_index = inner; +#define INNER_LOOP \ + const int16 index = indices[row]; \ + const bool valid = arrow_row_is_valid(dict_result, index); \ + word |= ((uint64) valid) << bit_index; + + INNER_LOOP + + // fprintf(stderr, "dict-coded row %ld: index %d, valid %d\n", row, index, + // valid); + } + final_result[outer] &= word; + } + + if (n % 64) + { + uint64 word = 0; + for (size_t row = (n / 64) * 64; row < n; row++) + { + const size_t bit_index = row % 64; + + INNER_LOOP + } + final_result[n / 64] &= word; + } +#undef INNER_LOOP +} + static int get_max_text_datum_size(ArrowArray *text_array) { @@ -364,6 +407,27 @@ compute_vector_quals(DecompressContext *dcontext, DecompressBatchState *batch_st Const *constnode = castNode(Const, lsecond(args)); Ensure(!constnode->constisnull, "vectorized predicate called for a null value"); + /* + * If the data is dictionary-encoded, we are going to compute the + * predicate on dictionary and then translate the results. + */ + const ArrowArray *vector_nodict = NULL; + uint64 *restrict predicate_result_nodict = NULL; + uint64 dict_result[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64]; + if (vector->dictionary) + { + const size_t dict_rows = vector->dictionary->length; + const size_t dict_result_words = (dict_rows + 63) / 64; + memset(dict_result, 0xFF, dict_result_words * 8); + predicate_result_nodict = dict_result; + vector_nodict = vector->dictionary; + } + else + { + predicate_result_nodict = predicate_result; + vector_nodict = vector; + } + /* * At last, compute the predicate. */ @@ -371,13 +435,22 @@ compute_vector_quals(DecompressContext *dcontext, DecompressBatchState *batch_st { vector_array_predicate(vector_const_predicate, saop->useOr, - vector, + vector_nodict, constnode->constvalue, - predicate_result); + predicate_result_nodict); } else { - vector_const_predicate(vector, constnode->constvalue, predicate_result); + vector_const_predicate(vector_nodict, constnode->constvalue, predicate_result_nodict); + } + + /* + * If the vector is dictionary-encoded, we have just computed the + * predicate for dictionary and now have to translate it. + */ + if (vector->dictionary) + { + translate_from_dictionary(vector, predicate_result_nodict, predicate_result); } /* Account for nulls which shouldn't pass the predicate. */ diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.c b/tsl/src/nodes/decompress_chunk/vector_predicates.c index 2e287a92df1..febd707f78e 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.c +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.c @@ -30,7 +30,7 @@ #include "compression/compression.h" static void -vector_const_texteq_nodict(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) +vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) { Assert(!arrow->dictionary); @@ -77,13 +77,6 @@ vector_const_texteq_nodict(const ArrowArray *arrow, const Datum constdatum, uint #undef INNER_LOOP } -static void -vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) -{ - Assert(!arrow->dictionary); - vector_const_texteq_nodict(arrow, constdatum, result); -} - /* * Look up the vectorized implementation for a Postgres predicate, specified by * its Oid in pg_proc. Note that this Oid is different from the opcode. From 929eb9986a05939699080d12eb7647f39eaf6869 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 9 Jan 2024 17:30:09 +0100 Subject: [PATCH 179/249] benchmark text + boolexpr (2024-01-09 no. 3) From 8af04d5113d99ea3205872ace061e77a6b5dc1b0 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 9 Jan 2024 19:58:52 +0100 Subject: [PATCH 180/249] default value in text column --- .../nodes/decompress_chunk/compressed_batch.c | 43 ++++++++++++++----- .../decompress_chunk/vector_predicates.c | 4 +- tsl/test/expected/decompress_vector_qual.out | 38 +++++++++++++++- tsl/test/sql/decompress_vector_qual.sql | 12 +++++- 4 files changed, 82 insertions(+), 15 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 42d860f2e62..512b96ffc15 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -28,19 +28,29 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) struct ArrowWithBuffers { ArrowArray arrow; - uint64 buffers[2]; - uint64 nulls_buffer; - uint64 values_buffer; + uint64 arrow_buffers_array_storage[3]; + uint64 nulls_buffer[1]; + uint32 offsets_buffer[2]; + uint64 values_buffer[8 /* 64-byte padding as required by Arrow. */]; }; struct ArrowWithBuffers *with_buffers = palloc0(sizeof(struct ArrowWithBuffers)); ArrowArray *arrow = &with_buffers->arrow; arrow->length = 1; - arrow->null_count = -1; - arrow->n_buffers = 2; - arrow->buffers = (const void **) &with_buffers->buffers; + arrow->buffers = (const void **) with_buffers->arrow_buffers_array_storage; arrow->buffers[0] = &with_buffers->nulls_buffer; - arrow->buffers[1] = &with_buffers->values_buffer; + + if (pgtype == TEXTOID) + { + arrow->n_buffers = 3; + arrow->buffers[1] = with_buffers->offsets_buffer; + arrow->buffers[2] = with_buffers->values_buffer; + } + else + { + arrow->n_buffers = 2; + arrow->buffers[1] = with_buffers->values_buffer; + } if (isnull) { @@ -49,12 +59,27 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) * the Datum might be invalid if the value is null (important on i386 * where it might be pass-by-reference), so don't read it. */ + arrow->null_count = 1; + return arrow; + } + + arrow_set_row_validity((uint64 *) arrow->buffers[0], 0, true); + + if (pgtype == TEXTOID) + { + text *detoasted = PG_DETOAST_DATUM(datum); + ((uint32 *) arrow->buffers[1])[1] = VARSIZE_ANY_EXHDR(detoasted); + arrow->buffers[2] = VARDATA(detoasted); return arrow; } + /* + * Fixed-width by-value types. + */ + arrow->buffers[1] = with_buffers->values_buffer; #define FOR_TYPE(PGTYPE, CTYPE, FROMDATUM) \ case PGTYPE: \ - *((CTYPE *) &with_buffers->values_buffer) = FROMDATUM(datum); \ + *((CTYPE *) arrow->buffers[1]) = FROMDATUM(datum); \ break switch (pgtype) @@ -72,8 +97,6 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) pg_unreachable(); } - arrow_set_row_validity(&with_buffers->nulls_buffer, 0, true); - return arrow; } diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.c b/tsl/src/nodes/decompress_chunk/vector_predicates.c index 1edb8b03f86..09db2d2375d 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.c +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.c @@ -51,12 +51,12 @@ vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *res #define INNER_LOOP \ const uint32 start = offsets[row]; \ const uint32 end = offsets[row + 1]; \ + Assert(end >= start); \ const uint32 veclen = end - start; \ bool valid = veclen != textlen ? \ false : \ (strncmp((char *) &values[start], (char *) cstring, textlen) == 0); \ - word |= ((uint64) valid) << bit_index; \ - // fprintf(stderr, "plain row %ld: valid %d\n", row, valid); + word |= ((uint64) valid) << bit_index; INNER_LOOP } diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index dbf90673882..7e44650f731 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -974,7 +974,7 @@ select * from date_table where ts < CURRENT_DATE; (3 rows) -- Vectorized comparison for text -create table t(ts int, d int, a text); +create table t(ts int, d int); select create_hypertable('t', 'ts'); NOTICE: adding not-null constraint to column "ts" create_hypertable @@ -983,12 +983,21 @@ NOTICE: adding not-null constraint to column "ts" (1 row) alter table t set (timescaledb.compress, timescaledb.compress_segmentby = 'd'); +insert into t select x, 0 /*, default */ from generate_series(1, 1000) x; +select count(compress_chunk(x, true)) from show_chunks('t') x; + count +------- + 1 +(1 row) + +alter table t add column a text default 'default'; insert into t select x, 1, '' from generate_series(1, 1000) x; insert into t select x, 2, 'same' from generate_series(1, 1000) x; insert into t select x, 3, 'different' || x from generate_series(1, 1000) x; insert into t select x, 4, case when x % 2 = 0 then null else 'same-with-nulls' end from generate_series(1, 1000) x; insert into t select x, 5, case when x % 2 = 0 then null else 'different-with-nulls' || x end from generate_series(1, 1000) x; select count(compress_chunk(x, true)) from show_chunks('t') x; +NOTICE: chunk "_hyper_7_15_chunk" is already compressed count ------- 1 @@ -998,6 +1007,12 @@ set timescaledb.debug_require_vector_qual to 'only'; -- Uncomment to generate the test reference w/o the vector optimizations. -- set timescaledb.enable_bulk_decompression to off; -- set timescaledb.debug_require_vector_qual to 'forbid'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'default'; + count | min | max | min | max +-------+-----+------+-----+----- + 1000 | 1 | 1000 | 0 | 0 +(1 row) + select count(*), min(ts), max(ts), min(d), max(d) from t where a = ''; count | min | max | min | max -------+-----+------+-----+----- @@ -1052,6 +1067,25 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('same-with- 501 | 1 | 999 | 4 | 5 (1 row) +select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('different500', 'default'); + count | min | max | min | max +-------+-----+------+-----+----- + 1001 | 1 | 1000 | 0 | 3 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different500' or a = 'default'; + count | min | max | min | max +-------+-----+------+-----+----- + 1001 | 1 | 1000 | 0 | 3 +(1 row) + +reset timescaledb.debug_require_vector_qual; +select count(distinct a) from t; + count +------- + 1504 +(1 row) + -- Null tests are not vectorized yet. reset timescaledb.debug_require_vector_qual; select count(*), min(ts), max(ts), min(d), max(d) from t where a is null; @@ -1063,7 +1097,7 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a is null; select count(*), min(ts), max(ts), min(d), max(d) from t where a is not null; count | min | max | min | max -------+-----+------+-----+----- - 4000 | 1 | 1000 | 1 | 5 + 5000 | 1 | 1000 | 0 | 5 (1 row) reset timescaledb.debug_require_vector_qual; diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 5b5b2e44e16..8d4d03f3308 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -305,10 +305,14 @@ select * from date_table where ts < CURRENT_DATE; -- Vectorized comparison for text -create table t(ts int, d int, a text); +create table t(ts int, d int); select create_hypertable('t', 'ts'); alter table t set (timescaledb.compress, timescaledb.compress_segmentby = 'd'); +insert into t select x, 0 /*, default */ from generate_series(1, 1000) x; +select count(compress_chunk(x, true)) from show_chunks('t') x; +alter table t add column a text default 'default'; + insert into t select x, 1, '' from generate_series(1, 1000) x; insert into t select x, 2, 'same' from generate_series(1, 1000) x; insert into t select x, 3, 'different' || x from generate_series(1, 1000) x; @@ -322,6 +326,7 @@ set timescaledb.debug_require_vector_qual to 'only'; -- set timescaledb.enable_bulk_decompression to off; -- set timescaledb.debug_require_vector_qual to 'forbid'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'default'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = ''; select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'same'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'same-with-nulls'; @@ -331,6 +336,11 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different100 select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different-with-nulls999'; select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('same', 'different500'); select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('same-with-nulls', 'different-with-nulls499'); +select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('different500', 'default'); +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different500' or a = 'default'; + +reset timescaledb.debug_require_vector_qual; +select count(distinct a) from t; -- Null tests are not vectorized yet. reset timescaledb.debug_require_vector_qual; From 1ae27916e560d5040fbecb56b2828bfc8815b65d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 16 Jan 2024 16:44:08 +0100 Subject: [PATCH 181/249] review fixes --- tsl/test/src/test_compression.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tsl/test/src/test_compression.c b/tsl/test/src/test_compression.c index e7d4c8a4d30..d40e8cd0fb1 100644 --- a/tsl/test/src/test_compression.c +++ b/tsl/test/src/test_compression.c @@ -280,8 +280,8 @@ test_gorilla_float() GorillaCompressor *compressor = gorilla_compressor_alloc(); GorillaCompressed *compressed; DecompressionIterator *iter; - for (int i = 0.0; i < TEST_ELEMENTS; i++) - gorilla_compressor_append_value(compressor, float_get_bits((float) i)); + for (int x = 0; x < TEST_ELEMENTS; x++) + gorilla_compressor_append_value(compressor, float_get_bits((float) x)); compressed = gorilla_compressor_finish(compressor); TestAssertTrue(compressed != NULL); From 8bccc657a16c9d3e21ba92a4740bd92237faf82e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 19 Jan 2024 16:20:26 +0100 Subject: [PATCH 182/249] like --- tsl/src/nodes/decompress_chunk/CMakeLists.txt | 1 + tsl/src/nodes/decompress_chunk/planner.c | 12 + tsl/src/nodes/decompress_chunk/pred_text.c | 182 +++++++++++++++ tsl/src/nodes/decompress_chunk/pred_text.h | 31 +++ .../nodes/decompress_chunk/ts_like_match.c | 210 ++++++++++++++++++ .../decompress_chunk/vector_predicates.c | 63 ++---- tsl/test/expected/decompress_vector_qual.out | 24 ++ tsl/test/sql/decompress_vector_qual.sql | 9 +- 8 files changed, 481 insertions(+), 51 deletions(-) create mode 100644 tsl/src/nodes/decompress_chunk/pred_text.c create mode 100644 tsl/src/nodes/decompress_chunk/pred_text.h create mode 100644 tsl/src/nodes/decompress_chunk/ts_like_match.c diff --git a/tsl/src/nodes/decompress_chunk/CMakeLists.txt b/tsl/src/nodes/decompress_chunk/CMakeLists.txt index 5c0c12f5d83..ab92ea29b74 100644 --- a/tsl/src/nodes/decompress_chunk/CMakeLists.txt +++ b/tsl/src/nodes/decompress_chunk/CMakeLists.txt @@ -8,6 +8,7 @@ set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/detoaster.c ${CMAKE_CURRENT_SOURCE_DIR}/exec.c ${CMAKE_CURRENT_SOURCE_DIR}/planner.c + ${CMAKE_CURRENT_SOURCE_DIR}/pred_text.c ${CMAKE_CURRENT_SOURCE_DIR}/pred_vector_array.c ${CMAKE_CURRENT_SOURCE_DIR}/qual_pushdown.c ${CMAKE_CURRENT_SOURCE_DIR}/vector_predicates.c) diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index cc101a4358d..66697a50213 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -599,6 +599,18 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) return NULL; } + if (var->varcollid != InvalidOid && !get_collation_isdeterministic(var->varcollid)) + { + /* + * Can't vectorize string equality with a nondeterministic collation. + * Not sure if we have to check the collation of Const as well, but it + * will be known only at planning time. Currently we don't check it at + * all. Also this is untested because we don't have nondeterministic + * collations in all test configurations. + */ + return NULL; + } + if (opexpr) { /* diff --git a/tsl/src/nodes/decompress_chunk/pred_text.c b/tsl/src/nodes/decompress_chunk/pred_text.c new file mode 100644 index 00000000000..abbe541c9c9 --- /dev/null +++ b/tsl/src/nodes/decompress_chunk/pred_text.c @@ -0,0 +1,182 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +#include "pred_text.h" + +#include +#include + +void +vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) +{ + Assert(!arrow->dictionary); + + text *consttext = (text *) DatumGetPointer(constdatum); + const size_t textlen = VARSIZE_ANY_EXHDR(consttext); + const uint8 *cstring = (uint8 *) VARDATA_ANY(consttext); + const uint32 *offsets = (uint32 *) arrow->buffers[1]; + const uint8 *values = (uint8 *) arrow->buffers[2]; + + const size_t n = arrow->length; + for (size_t outer = 0; outer < n / 64; outer++) + { + uint64 word = 0; + for (size_t inner = 0; inner < 64; inner++) + { + const size_t row = outer * 64 + inner; + const size_t bit_index = inner; +#define INNER_LOOP \ + const uint32 start = offsets[row]; \ + const uint32 end = offsets[row + 1]; \ + Assert(end >= start); \ + const uint32 veclen = end - start; \ + bool valid = veclen != textlen ? \ + false : \ + (strncmp((char *) &values[start], (char *) cstring, textlen) == 0); \ + word |= ((uint64) valid) << bit_index; + + INNER_LOOP + } + result[outer] &= word; + } + + if (n % 64) + { + uint64 word = 0; + for (size_t row = (n / 64) * 64; row < n; row++) + { + const size_t bit_index = row % 64; + INNER_LOOP + } + result[n / 64] &= word; + } + +#undef INNER_LOOP +} + +#define LIKE_TRUE 1 +#define LIKE_FALSE 0 +#define LIKE_ABORT (-1) + +#define NextByte(p, plen) ((p)++, (plen)--) + +/* Set up to compile like_match.c for single-byte characters */ +#define CHAREQ(p1, p2) (*(p1) == *(p2)) +#define NextChar(p, plen) NextByte((p), (plen)) +#define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--) + +#define MatchText SB_MatchText +#define do_like_escape SB_do_like_escape + +#include "ts_like_match.c" + +/* setup to compile like_match.c for single byte case insensitive matches */ +#define MATCH_LOWER(t) (((t) >= 'A' && (t) <= 'Z') ? ((t) + 'a' - 'A') : (t)) +#define NextChar(p, plen) NextByte((p), (plen)) +#define MatchText SB_IMatchText + +#include "ts_like_match.c" + +/* setup to compile like_match.c for UTF8 encoding, using fast NextChar */ + +#define NextChar(p, plen) \ + do \ + { \ + (p)++; \ + (plen)--; \ + } while ((plen) > 0 && (*(p) &0xC0) == 0x80) +#define MatchText UTF8_MatchText + +#include "ts_like_match.c" + +static void +vector_const_like_impl(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result, + int (*match)(const char *, int, const char *, int), bool should_match) +{ + Assert(!arrow->dictionary); + + text *consttext = (text *) DatumGetPointer(constdatum); + const size_t textlen = VARSIZE_ANY_EXHDR(consttext); + const char *restrict cstring = VARDATA_ANY(consttext); + const uint32 *offsets = (uint32 *) arrow->buffers[1]; + const char *restrict values = arrow->buffers[2]; + + const size_t n = arrow->length; + for (size_t outer = 0; outer < n / 64; outer++) + { + uint64 word = 0; + for (size_t inner = 0; inner < 64; inner++) + { + const size_t row = outer * 64 + inner; + const size_t bit_index = inner; +#define INNER_LOOP \ + const uint32 start = offsets[row]; \ + const uint32 end = offsets[row + 1]; \ + Assert(end >= start); \ + const uint32 veclen = end - start; \ + int result = match(&values[start], veclen, cstring, textlen); \ + bool valid = (result == LIKE_TRUE) == should_match; \ + word |= ((uint64) valid) << bit_index; + + INNER_LOOP + } + result[outer] &= word; + } + + if (n % 64) + { + uint64 word = 0; + for (size_t row = (n / 64) * 64; row < n; row++) + { + const size_t bit_index = row % 64; + INNER_LOOP + } + result[n / 64] &= word; + } + +#undef INNER_LOOP +} + +void +vector_const_textlike_singlebyte(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result) +{ + return vector_const_like_impl(arrow, constdatum, result, SB_MatchText, true); +} + +void +vector_const_textnlike_singlebyte(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result) +{ + return vector_const_like_impl(arrow, constdatum, result, SB_MatchText, false); +} + +void +vector_const_texticlike_singlebyte(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result) +{ + return vector_const_like_impl(arrow, constdatum, result, SB_IMatchText, true); +} + +void +vector_const_texticnlike_singlebyte(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result) +{ + return vector_const_like_impl(arrow, constdatum, result, SB_IMatchText, false); +} + +void +vector_const_textlike_utf8(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) +{ + return vector_const_like_impl(arrow, constdatum, result, UTF8_MatchText, true); +} + +void +vector_const_textnlike_utf8(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result) +{ + return vector_const_like_impl(arrow, constdatum, result, UTF8_MatchText, false); +} diff --git a/tsl/src/nodes/decompress_chunk/pred_text.h b/tsl/src/nodes/decompress_chunk/pred_text.h new file mode 100644 index 00000000000..da87e57a9dd --- /dev/null +++ b/tsl/src/nodes/decompress_chunk/pred_text.h @@ -0,0 +1,31 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ +#pragma once + +#include + +#include "compression/arrow_c_data_interface.h" + +extern void vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result); + +extern void vector_const_textlike_singlebyte(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result); + +extern void vector_const_textnlike_singlebyte(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result); + +extern void vector_const_texticlike_singlebyte(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result); + +extern void vector_const_texticnlike_singlebyte(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result); + +extern void vector_const_textlike_utf8(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result); + +extern void vector_const_textnlike_utf8(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result); diff --git a/tsl/src/nodes/decompress_chunk/ts_like_match.c b/tsl/src/nodes/decompress_chunk/ts_like_match.c new file mode 100644 index 00000000000..69e7b660091 --- /dev/null +++ b/tsl/src/nodes/decompress_chunk/ts_like_match.c @@ -0,0 +1,210 @@ +/* + * This file and its contents are licensed under the Apache License 2.0. + * Please see the included NOTICE for copyright information and + * LICENSE-APACHE for a copy of the license. + */ + +/* + * This file contains source code that was copied and/or modified from + * the PostgreSQL database, which is licensed under the open-source + * PostgreSQL License. Please see the NOTICE at the top level + * directory for a copy of the PostgreSQL License. + * + * These function were copied from the PostgreSQL core planner, since + * they were declared static in the core planner, but we need them for + * our manipulations. + */ + +/*-------------------- + * Match text and pattern, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT. + * + * LIKE_TRUE: they match + * LIKE_FALSE: they don't match + * LIKE_ABORT: not only don't they match, but the text is too short. + * + * If LIKE_ABORT is returned, then no suffix of the text can match the + * pattern either, so an upper-level % scan can stop scanning now. + *-------------------- + */ + +#ifdef MATCH_LOWER +#define GETCHAR(t) MATCH_LOWER(t) +#else +#define GETCHAR(t) (t) +#endif + +static pg_attribute_always_inline int +MatchText(const char *t, int tlen, const char *p, int plen) +{ + /* Fast path for match-everything pattern */ + if (plen == 1 && *p == '%') + return LIKE_TRUE; + + /* Since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + /* + * In this loop, we advance by char when matching wildcards (and thus on + * recursive entry to this function we are properly char-synced). On other + * occasions it is safe to advance by byte, as the text and pattern will + * be in lockstep. This allows us to perform all comparisons between the + * text and pattern on a byte by byte basis, even for multi-byte + * encodings. + */ + while (tlen > 0 && plen > 0) + { + if (*p == '\\') + { + /* Next pattern byte must match literally, whatever it is */ + NextByte(p, plen); + /* ... and there had better be one, per SQL standard */ + if (plen <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), + errmsg("LIKE pattern must not end with escape character"))); + if (GETCHAR(*p) != GETCHAR(*t)) + return LIKE_FALSE; + } + else if (*p == '%') + { + char firstpat; + + /* + * % processing is essentially a search for a text position at + * which the remainder of the text matches the remainder of the + * pattern, using a recursive call to check each potential match. + * + * If there are wildcards immediately following the %, we can skip + * over them first, using the idea that any sequence of N _'s and + * one or more %'s is equivalent to N _'s and one % (ie, it will + * match any sequence of at least N text characters). In this way + * we will always run the recursive search loop using a pattern + * fragment that begins with a literal character-to-match, thereby + * not recursing more than we have to. + */ + NextByte(p, plen); + + while (plen > 0) + { + if (*p == '%') + NextByte(p, plen); + else if (*p == '_') + { + /* If not enough text left to match the pattern, ABORT */ + if (tlen <= 0) + return LIKE_ABORT; + NextChar(t, tlen); + NextByte(p, plen); + } + else + break; /* Reached a non-wildcard pattern char */ + } + + /* + * If we're at end of pattern, match: we have a trailing % which + * matches any remaining text string. + */ + if (plen <= 0) + return LIKE_TRUE; + + /* + * Otherwise, scan for a text position at which we can match the + * rest of the pattern. The first remaining pattern char is known + * to be a regular or escaped literal character, so we can compare + * the first pattern byte to each text byte to avoid recursing + * more than we have to. This fact also guarantees that we don't + * have to consider a match to the zero-length substring at the + * end of the text. + */ + if (*p == '\\') + { + if (plen < 2) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), + errmsg("LIKE pattern must not end with escape character"))); + firstpat = GETCHAR(p[1]); + } + else + firstpat = GETCHAR(*p); + + while (tlen > 0) + { + if (GETCHAR(*t) == firstpat) + { + int matched = MatchText(t, tlen, p, plen); + + if (matched != LIKE_FALSE) + return matched; /* TRUE or ABORT */ + } + + NextChar(t, tlen); + } + + /* + * End of text with no match, so no point in trying later places + * to start matching this pattern. + */ + return LIKE_ABORT; + } + else if (*p == '_') + { + /* _ matches any single character, and we know there is one */ + NextChar(t, tlen); + NextByte(p, plen); + continue; + } + else if (GETCHAR(*p) != GETCHAR(*t)) + { + /* non-wildcard pattern char fails to match text char */ + return LIKE_FALSE; + } + + /* + * Pattern and text match, so advance. + * + * It is safe to use NextByte instead of NextChar here, even for + * multi-byte character sets, because we are not following immediately + * after a wildcard character. If we are in the middle of a multibyte + * character, we must already have matched at least one byte of the + * character from both text and pattern; so we cannot get out-of-sync + * on character boundaries. And we know that no backend-legal + * encoding allows ASCII characters such as '%' to appear as non-first + * bytes of characters, so we won't mistakenly detect a new wildcard. + */ + NextByte(t, tlen); + NextByte(p, plen); + } + + if (tlen > 0) + return LIKE_FALSE; /* end of pattern, but not of text */ + + /* + * End of text, but perhaps not of pattern. Match iff the remaining + * pattern can match a zero-length string, ie, it's zero or more %'s. + */ + while (plen > 0 && *p == '%') + NextByte(p, plen); + if (plen <= 0) + return LIKE_TRUE; + + /* + * End of text with no match, so no point in trying later places to start + * matching this pattern. + */ + return LIKE_ABORT; +} /* MatchText() */ + +#ifdef CHAREQ +#undef CHAREQ +#endif + +#undef NextChar +#undef CopyAdvChar +#undef MatchText + +#undef GETCHAR + +#ifdef MATCH_LOWER +#undef MATCH_LOWER + +#endif diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.c b/tsl/src/nodes/decompress_chunk/vector_predicates.c index 09db2d2375d..aa70b86fde5 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.c +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.c @@ -12,6 +12,7 @@ #include #include +#include #include "compression/arrow_c_data_interface.h" @@ -27,55 +28,7 @@ */ #include "pred_vector_const_arithmetic_all.c" -#include "compression/compression.h" - -static void -vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) -{ - Assert(!arrow->dictionary); - - text *consttext = (text *) DatumGetPointer(constdatum); - const size_t textlen = VARSIZE_ANY_EXHDR(consttext); - const uint8 *cstring = (uint8 *) VARDATA_ANY(consttext); - const uint32 *offsets = (uint32 *) arrow->buffers[1]; - const uint8 *values = (uint8 *) arrow->buffers[2]; - - const size_t n = arrow->length; - for (size_t outer = 0; outer < n / 64; outer++) - { - uint64 word = 0; - for (size_t inner = 0; inner < 64; inner++) - { - const size_t row = outer * 64 + inner; - const size_t bit_index = inner; -#define INNER_LOOP \ - const uint32 start = offsets[row]; \ - const uint32 end = offsets[row + 1]; \ - Assert(end >= start); \ - const uint32 veclen = end - start; \ - bool valid = veclen != textlen ? \ - false : \ - (strncmp((char *) &values[start], (char *) cstring, textlen) == 0); \ - word |= ((uint64) valid) << bit_index; - - INNER_LOOP - } - result[outer] &= word; - } - - if (n % 64) - { - uint64 word = 0; - for (size_t row = (n / 64) * 64; row < n; row++) - { - const size_t bit_index = row % 64; - INNER_LOOP - } - result[n / 64] &= word; - } - -#undef INNER_LOOP -} +#include "pred_text.h" /* * Look up the vectorized implementation for a Postgres predicate, specified by @@ -94,6 +47,18 @@ get_vector_const_predicate(Oid pg_predicate) return vector_const_texteq; } + if (GetDatabaseEncoding() == PG_UTF8) + { + /* We have some simple LIKE vectorization for case-sensitive UTF8. */ + switch (pg_predicate) + { + case F_TEXTLIKE: + return vector_const_textlike_utf8; + case F_TEXTNLIKE: + return vector_const_textnlike_utf8; + } + } + return NULL; } diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index bdaad1552cd..52c46b978f0 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -1127,6 +1127,30 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different500 1001 | 1 | 1000 | 0 | 3 (1 row) +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same%'; + count | min | max | min | max +-------+-----+------+-----+----- + 1500 | 1 | 1000 | 2 | 4 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'same%'; + count | min | max | min | max +-------+-----+------+-----+----- + 1500 | 1 | 1000 | 2 | 4 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same'; + count | min | max | min | max +-------+-----+------+-----+----- + 1000 | 1 | 1000 | 2 | 2 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a not like '%same%'; + count | min | max | min | max +-------+-----+------+-----+----- + 3500 | 1 | 1000 | 0 | 5 +(1 row) + reset timescaledb.debug_require_vector_qual; select count(distinct a) from t; count diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index f3c13d62853..eb80bc29129 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -354,8 +354,8 @@ select count(compress_chunk(x, true)) from show_chunks('t') x; set timescaledb.debug_require_vector_qual to 'only'; -- Uncomment to generate the test reference w/o the vector optimizations. --- set timescaledb.enable_bulk_decompression to off; --- set timescaledb.debug_require_vector_qual to 'forbid'; + set timescaledb.enable_bulk_decompression to off; + set timescaledb.debug_require_vector_qual to 'forbid'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'default'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = ''; @@ -370,6 +370,11 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('same-with- select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('different500', 'default'); select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different500' or a = 'default'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'same%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a not like '%same%'; + reset timescaledb.debug_require_vector_qual; select count(distinct a) from t; From 48db1a50e699e8b8f056c8b948fc30d9777c481a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 19 Jan 2024 16:23:13 +0100 Subject: [PATCH 183/249] fixup --- tsl/test/sql/decompress_vector_qual.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index eb80bc29129..719b3703d44 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -354,8 +354,8 @@ select count(compress_chunk(x, true)) from show_chunks('t') x; set timescaledb.debug_require_vector_qual to 'only'; -- Uncomment to generate the test reference w/o the vector optimizations. - set timescaledb.enable_bulk_decompression to off; - set timescaledb.debug_require_vector_qual to 'forbid'; +-- set timescaledb.enable_bulk_decompression to off; +-- set timescaledb.debug_require_vector_qual to 'forbid'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'default'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = ''; From 922a92bba0e0c3773f15b34968d9306b2299109d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 19 Jan 2024 16:37:27 +0100 Subject: [PATCH 184/249] fixes --- tsl/src/nodes/decompress_chunk/planner.c | 2 +- tsl/src/nodes/decompress_chunk/pred_text.c | 8 ++++++-- tsl/src/nodes/decompress_chunk/ts_like_match.c | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index 66697a50213..3252f2cd173 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -599,7 +599,7 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) return NULL; } - if (var->varcollid != InvalidOid && !get_collation_isdeterministic(var->varcollid)) + if (OidIsValid(var->varcollid) && !get_collation_isdeterministic(var->varcollid)) { /* * Can't vectorize string equality with a nondeterministic collation. diff --git a/tsl/src/nodes/decompress_chunk/pred_text.c b/tsl/src/nodes/decompress_chunk/pred_text.c index abbe541c9c9..5056b1f43cd 100644 --- a/tsl/src/nodes/decompress_chunk/pred_text.c +++ b/tsl/src/nodes/decompress_chunk/pred_text.c @@ -9,8 +9,12 @@ #include #include -void -vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) +#ifdef PG16_GE +include +#endif + + void + vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) { Assert(!arrow->dictionary); diff --git a/tsl/src/nodes/decompress_chunk/ts_like_match.c b/tsl/src/nodes/decompress_chunk/ts_like_match.c index 69e7b660091..d3539d72ac4 100644 --- a/tsl/src/nodes/decompress_chunk/ts_like_match.c +++ b/tsl/src/nodes/decompress_chunk/ts_like_match.c @@ -33,7 +33,7 @@ #define GETCHAR(t) (t) #endif -static pg_attribute_always_inline int +static int MatchText(const char *t, int tlen, const char *p, int plen) { /* Fast path for match-everything pattern */ From f692cf0df27641f8e5800896bebcfd2f603dce0c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 19 Jan 2024 16:37:36 +0100 Subject: [PATCH 185/249] benchmark like (2024-01-19 no. 6) From a90568976e4eab146a96c4bf07878470ccff61c2 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 19 Jan 2024 16:46:43 +0100 Subject: [PATCH 186/249] fix --- tsl/src/nodes/decompress_chunk/pred_text.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/pred_text.c b/tsl/src/nodes/decompress_chunk/pred_text.c index 5056b1f43cd..df284b1de25 100644 --- a/tsl/src/nodes/decompress_chunk/pred_text.c +++ b/tsl/src/nodes/decompress_chunk/pred_text.c @@ -9,12 +9,14 @@ #include #include -#ifdef PG16_GE -include +#include "compat/compat.h" + +#if PG16_GE +#include #endif - void - vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) +void +vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) { Assert(!arrow->dictionary); From be1d96aac8bb103c0f66ca7db9a6b050309c5cf9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 19 Jan 2024 17:52:21 +0100 Subject: [PATCH 187/249] simpler code for by-reference fixed-width datums --- .../nodes/decompress_chunk/compressed_batch.c | 43 ++++++++----------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index bfb3e48756e..ae703fdb413 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -659,39 +659,34 @@ make_next_tuple(DecompressBatchState *batch_state, uint16 arrow_row, int num_com *column_values->output_isnull = result.is_null; *column_values->output_value = result.val; } - else if (column_values->decompression_type > 0) + else if (column_values->decompression_type > SIZEOF_DATUM) { - Assert(column_values->decompression_type <= 8); + /* + * Fixed-width by-reference type that doesn't fit into a Datum. + * For now this only happens for 8-byte types on 32-bit systems, + * but eventually we could also use it for bigger by-value types + * such as UUID. + */ const uint8 value_bytes = column_values->decompression_type; const char *restrict src = column_values->buffers[1]; - + *column_values->output_value = PointerGetDatum(&src[value_bytes * arrow_row]); + *column_values->output_isnull = + !arrow_row_is_valid(column_values->buffers[0], arrow_row); + } + else if (column_values->decompression_type > 0) + { /* + * Fixed-width by-value type that fits into a Datum. + * * The conversion of Datum to more narrow types will truncate * the higher bytes, so we don't care if we read some garbage * into them, and can always read 8 bytes. These are unaligned * reads, so technically we have to do memcpy. */ - uint64 value; - memcpy(&value, &src[value_bytes * arrow_row], 8); - -#ifdef USE_FLOAT8_BYVAL - Datum datum = Int64GetDatum(value); -#else - /* - * On 32-bit systems, the data larger than 4 bytes go by - * reference, so we have to jump through these hoops. - */ - Datum datum; - if (value_bytes <= 4) - { - datum = Int32GetDatum((uint32) value); - } - else - { - datum = Int64GetDatum(value); - } -#endif - *column_values->output_value = datum; + const uint8 value_bytes = column_values->decompression_type; + Assert(value_bytes <= SIZEOF_DATUM); + const char *restrict src = column_values->buffers[1]; + memcpy(column_values->output_value, &src[value_bytes * arrow_row], SIZEOF_DATUM); *column_values->output_isnull = !arrow_row_is_valid(column_values->buffers[0], arrow_row); } From cd9ca921f06fb5c1676554f5dfc99d2ce19307a9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 19 Jan 2024 18:32:10 +0100 Subject: [PATCH 188/249] clang-tidy --- tsl/src/compression/decompress_text_test_impl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsl/src/compression/decompress_text_test_impl.c b/tsl/src/compression/decompress_text_test_impl.c index d80f266fa2f..30f3b4a6f91 100644 --- a/tsl/src/compression/decompress_text_test_impl.c +++ b/tsl/src/compression/decompress_text_test_impl.c @@ -77,7 +77,7 @@ decompress_generic_text_check_arrow(ArrowArray *arrow, int errorlevel, Decompres errdetail("At row %d\n", i))); } - if (strncmp(arrow_cstring, rowbyrow_cstring, rowbyrow_len)) + if (strncmp(arrow_cstring, rowbyrow_cstring, rowbyrow_len) != 0) { ereport(errorlevel, (errcode(ERRCODE_INTERNAL_ERROR), From 629b08fc7f0a33188bcaeaca0462fbabfcf3a5ea Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 19 Jan 2024 19:02:43 +0100 Subject: [PATCH 189/249] tests for utf8 --- tsl/test/expected/decompress_vector_qual.out | 64 ++++++++++++++------ tsl/test/sql/decompress_vector_qual.sql | 17 ++++-- 2 files changed, 58 insertions(+), 23 deletions(-) diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 52c46b978f0..b31fd5c13d7 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -1044,6 +1044,8 @@ insert into t select x, 2, 'same' from generate_series(1, 1000) x; insert into t select x, 3, 'different' || x from generate_series(1, 1000) x; insert into t select x, 4, case when x % 2 = 0 then null else 'same-with-nulls' end from generate_series(1, 1000) x; insert into t select x, 5, case when x % 2 = 0 then null else 'different-with-nulls' || x end from generate_series(1, 1000) x; +insert into t select x, 6, 'одинаковый' from generate_series(1, 1000) x; +insert into t select x, 7, '異なる' || x from generate_series(1, 1000) x; select count(compress_chunk(x, true)) from show_chunks('t') x; NOTICE: chunk "_hyper_7_15_chunk" is already compressed count @@ -1052,7 +1054,7 @@ NOTICE: chunk "_hyper_7_15_chunk" is already compressed (1 row) set timescaledb.debug_require_vector_qual to 'only'; --- Uncomment to generate the test reference w/o the vector optimizations. +-- -- Uncomment to generate the test reference w/o the vector optimizations. -- set timescaledb.enable_bulk_decompression to off; -- set timescaledb.debug_require_vector_qual to 'forbid'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'default'; @@ -1073,6 +1075,12 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'same'; 1000 | 1 | 1000 | 2 | 2 (1 row) +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'одинаковый'; + count | min | max | min | max +-------+-----+------+-----+----- + 1000 | 1 | 1000 | 6 | 6 +(1 row) + select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'same-with-nulls'; count | min | max | min | max -------+-----+-----+-----+----- @@ -1085,6 +1093,12 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different1'; 1 | 1 | 1 | 3 | 3 (1 row) +select count(*), min(ts), max(ts), min(d), max(d) from t where a = '異なる1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 1 | 1 | 1 | 7 | 7 +(1 row) + select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different-with-nulls1'; count | min | max | min | max -------+-----+-----+-----+----- @@ -1127,12 +1141,36 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different500 1001 | 1 | 1000 | 0 | 3 (1 row) +select count(*), min(ts), max(ts), min(d), max(d) from t where a is null; + count | min | max | min | max +-------+-----+------+-----+----- + 1000 | 2 | 1000 | 4 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a is not null; + count | min | max | min | max +-------+-----+------+-----+----- + 7000 | 1 | 1000 | 0 | 7 +(1 row) + select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same%'; count | min | max | min | max -------+-----+------+-----+----- 1500 | 1 | 1000 | 2 | 4 (1 row) +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%одинаковый%'; + count | min | max | min | max +-------+-----+------+-----+----- + 1000 | 1 | 1000 | 6 | 6 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異なる%'; + count | min | max | min | max +-------+-----+------+-----+----- + 1000 | 1 | 1000 | 7 | 7 +(1 row) + select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'same%'; count | min | max | min | max -------+-----+------+-----+----- @@ -1148,28 +1186,20 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same'; select count(*), min(ts), max(ts), min(d), max(d) from t where a not like '%same%'; count | min | max | min | max -------+-----+------+-----+----- - 3500 | 1 | 1000 | 0 | 5 -(1 row) - -reset timescaledb.debug_require_vector_qual; -select count(distinct a) from t; - count -------- - 1504 + 5500 | 1 | 1000 | 0 | 7 (1 row) --- Null tests are not vectorized yet. -reset timescaledb.debug_require_vector_qual; -select count(*), min(ts), max(ts), min(d), max(d) from t where a is null; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same%'; count | min | max | min | max -------+-----+------+-----+----- - 1000 | 2 | 1000 | 4 | 5 + 1500 | 1 | 1000 | 2 | 4 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a is not null; - count | min | max | min | max --------+-----+------+-----+----- - 5000 | 1 | 1000 | 0 | 5 +reset timescaledb.debug_require_vector_qual; +select count(distinct a) from t; + count +------- + 2505 (1 row) reset timescaledb.debug_require_vector_qual; diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 719b3703d44..e01af473158 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -349,19 +349,23 @@ insert into t select x, 2, 'same' from generate_series(1, 1000) x; insert into t select x, 3, 'different' || x from generate_series(1, 1000) x; insert into t select x, 4, case when x % 2 = 0 then null else 'same-with-nulls' end from generate_series(1, 1000) x; insert into t select x, 5, case when x % 2 = 0 then null else 'different-with-nulls' || x end from generate_series(1, 1000) x; +insert into t select x, 6, 'одинаковый' from generate_series(1, 1000) x; +insert into t select x, 7, '異なる' || x from generate_series(1, 1000) x; select count(compress_chunk(x, true)) from show_chunks('t') x; set timescaledb.debug_require_vector_qual to 'only'; --- Uncomment to generate the test reference w/o the vector optimizations. +-- -- Uncomment to generate the test reference w/o the vector optimizations. -- set timescaledb.enable_bulk_decompression to off; -- set timescaledb.debug_require_vector_qual to 'forbid'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'default'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = ''; select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'same'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'одинаковый'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'same-with-nulls'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a = '異なる1'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different-with-nulls1'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different1000'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different-with-nulls999'; @@ -370,18 +374,19 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('same-with- select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('different500', 'default'); select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different500' or a = 'default'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a is null; +select count(*), min(ts), max(ts), min(d), max(d) from t where a is not null; + select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%одинаковый%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異なる%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'same%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same'; select count(*), min(ts), max(ts), min(d), max(d) from t where a not like '%same%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same%'; reset timescaledb.debug_require_vector_qual; select count(distinct a) from t; --- Null tests are not vectorized yet. -reset timescaledb.debug_require_vector_qual; -select count(*), min(ts), max(ts), min(d), max(d) from t where a is null; -select count(*), min(ts), max(ts), min(d), max(d) from t where a is not null; - reset timescaledb.debug_require_vector_qual; reset timescaledb.enable_bulk_decompression; From 3ea19e205d006b98b112c73d9152aa2d6cdcf035 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 29 Jan 2024 17:56:37 +0100 Subject: [PATCH 190/249] more aggressive early exit --- .../nodes/decompress_chunk/compressed_batch.c | 100 ++++++++---------- .../decompress_chunk/pred_vector_array.c | 65 ++---------- .../decompress_chunk/vector_predicates.h | 40 +++++++ 3 files changed, 93 insertions(+), 112 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index aa66ced4cad..183c7b26d62 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -175,7 +175,7 @@ decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state } } -static bool +static void compute_plain_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, Node *qual, uint64 *restrict result) { @@ -348,61 +348,41 @@ compute_plain_qual(DecompressContext *dcontext, DecompressBatchState *batch_stat } } } - - /* - * Have to return whether we have any passing rows. - */ - bool have_passing_rows = false; - for (size_t i = 0; i < n_batch_result_words; i++) - { - have_passing_rows |= result[i] != 0; - } - - return have_passing_rows; } -static bool compute_one_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, +static void compute_one_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, Node *qual, uint64 *restrict result); -static bool +static void compute_qual_conjunction(DecompressContext *dcontext, DecompressBatchState *batch_state, List *quals, uint64 *restrict result) { ListCell *lc; foreach (lc, quals) { - if (!compute_one_qual(dcontext, batch_state, lfirst(lc), result)) + compute_one_qual(dcontext, batch_state, lfirst(lc), result); + if (get_vector_qual_summary(result, batch_state->total_batch_rows) == NoRowsPass) { /* * Exit early if no rows pass already. This might allow us to avoid * reading the columns required for the subsequent quals. */ - return false; + return; } } - return true; } -static bool +static void compute_qual_disjunction(DecompressContext *dcontext, DecompressBatchState *batch_state, List *quals, uint64 *restrict result) { - const size_t n_result_words = (batch_state->total_batch_rows + 63) / 64; + const size_t n_rows = batch_state->total_batch_rows; + const size_t n_result_words = (n_rows + 63) / 64; uint64 *or_result = palloc(sizeof(uint64) * n_result_words); for (size_t i = 0; i < n_result_words; i++) { or_result[i] = 0; } - if (batch_state->total_batch_rows % 64 != 0) - { - /* - * Set the bits for past-the-end elements to 1. This way it's more - * convenient to check for early exit, and the final result should - * have them already set to 0 so it doesn't matter. - */ - const uint64 mask = ((uint64) -1) << (batch_state->total_batch_rows % 64); - or_result[n_result_words - 1] = mask; - } uint64 *one_qual_result = palloc(sizeof(uint64) * n_result_words); @@ -414,47 +394,42 @@ compute_qual_disjunction(DecompressContext *dcontext, DecompressBatchState *batc one_qual_result[i] = (uint64) -1; } compute_one_qual(dcontext, batch_state, lfirst(lc), one_qual_result); - bool all_rows_pass = true; for (size_t i = 0; i < n_result_words; i++) { or_result[i] |= one_qual_result[i]; - /* - * Note that we have set the bits for past-the-end rows in - * or_result to 1, so we can use simple comparison to zero here. - */ - all_rows_pass &= (~or_result[i] == 0); } - if (all_rows_pass) + + if (get_vector_qual_summary(or_result, n_rows) == AllRowsPass) { /* * We can sometimes avoing reading the columns required for the * rest of conditions if we break out early here. */ - return true; + return; } } - bool have_passing_rows = false; + for (size_t i = 0; i < n_result_words; i++) { result[i] &= or_result[i]; - have_passing_rows |= result[i] != 0; } - return have_passing_rows; } -static bool +static void compute_one_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, Node *qual, uint64 *restrict result) { if (!IsA(qual, BoolExpr)) { - return compute_plain_qual(dcontext, batch_state, qual, result); + compute_plain_qual(dcontext, batch_state, qual, result); + return; } BoolExpr *boolexpr = castNode(BoolExpr, qual); if (boolexpr->boolop == AND_EXPR) { - return compute_qual_conjunction(dcontext, batch_state, boolexpr->args, result); + compute_qual_conjunction(dcontext, batch_state, boolexpr->args, result); + return; } /* @@ -462,7 +437,7 @@ compute_one_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, * NOT and consider it non-vectorizable at planning time. So only OR is left. */ Assert(boolexpr->boolop == OR_EXPR); - return compute_qual_disjunction(dcontext, batch_state, boolexpr->args, result); + compute_qual_disjunction(dcontext, batch_state, boolexpr->args, result); } /* @@ -470,22 +445,18 @@ compute_one_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, * it means the entire batch is filtered out, and we use this for further * optimizations. */ -static bool +static VectorQualSummary compute_vector_quals(DecompressContext *dcontext, DecompressBatchState *batch_state) { - if (!dcontext->vectorized_quals_constified) - { - return true; - } - /* * Allocate the bitmap that will hold the vectorized qual results. We will * initialize it to all ones and AND the individual quals to it. */ - const int bitmap_bytes = sizeof(uint64) * (((uint64) batch_state->total_batch_rows + 63) / 64); + const size_t n_rows = batch_state->total_batch_rows; + const int bitmap_bytes = sizeof(uint64) * ((n_rows + 63) / 64); batch_state->vector_qual_result = palloc(bitmap_bytes); memset(batch_state->vector_qual_result, 0xFF, bitmap_bytes); - if (batch_state->total_batch_rows % 64 != 0) + if (n_rows % 64 != 0) { /* * We have to zero out the bits for past-the-end elements in the last @@ -499,10 +470,12 @@ compute_vector_quals(DecompressContext *dcontext, DecompressBatchState *batch_st /* * Compute the quals. */ - return compute_qual_conjunction(dcontext, - batch_state, - dcontext->vectorized_quals_constified, - batch_state->vector_qual_result); + compute_qual_conjunction(dcontext, + batch_state, + dcontext->vectorized_quals_constified, + batch_state->vector_qual_result); + + return get_vector_qual_summary(batch_state->vector_qual_result, n_rows); } /* @@ -635,8 +608,10 @@ compressed_batch_set_compressed_tuple(DecompressContext *dcontext, } } - const bool have_passing_rows = compute_vector_quals(dcontext, batch_state); - if (!have_passing_rows && !dcontext->batch_sorted_merge) + VectorQualSummary vector_qual_summary = dcontext->vectorized_quals_constified != NIL ? + compute_vector_quals(dcontext, batch_state) : + AllRowsPass; + if (vector_qual_summary == NoRowsPass && !dcontext->batch_sorted_merge) { /* * The entire batch doesn't pass the vectorized quals, so we might be @@ -669,6 +644,15 @@ compressed_batch_set_compressed_tuple(DecompressContext *dcontext, Assert(column_values->decompression_type != DT_Invalid); } } + + /* + * If all rows pass, no need to test the vector qual for each row. This + * is a common case for time range conditions. + */ + if (vector_qual_summary == AllRowsPass) + { + batch_state->vector_qual_result = NULL; + } } MemoryContextSwitchTo(old_context); diff --git a/tsl/src/nodes/decompress_chunk/pred_vector_array.c b/tsl/src/nodes/decompress_chunk/pred_vector_array.c index e743ae2bccd..dccdbe771d3 100644 --- a/tsl/src/nodes/decompress_chunk/pred_vector_array.c +++ b/tsl/src/nodes/decompress_chunk/pred_vector_array.c @@ -21,10 +21,10 @@ static inline void vector_array_predicate_impl(VectorPredicate *vector_const_predicate, bool is_or, const ArrowArray *vector, Datum array, uint64 *restrict final_result) { - const size_t result_bits = vector->length; - const size_t result_words = (result_bits + 63) / 64; + const size_t n_rows = vector->length; + const size_t result_words = (n_rows + 63) / 64; - uint64 *restrict array_result = NULL; + uint64 *restrict array_result = final_result; /* * For OR, we need an intermediate storage to accumulate the results * from all elements. @@ -38,17 +38,6 @@ vector_array_predicate_impl(VectorPredicate *vector_const_predicate, bool is_or, { array_result_storage[i] = 0; } - - if (vector->length % 64 != 0) - { - /* - * Set the bits for past-the-end elements to 1. This way it's more - * convenient to check for early exit, and the final result should - * have them already set to 0 so it doesn't matter. - */ - const uint64 mask = ((uint64) -1) << (vector->length % 64); - array_result[vector->length / 64] = mask; - } } ArrayType *arr = DatumGetArrayTypeP(array); @@ -84,7 +73,7 @@ vector_array_predicate_impl(VectorPredicate *vector_const_predicate, bool is_or, for (size_t word = 0; word < result_words; word++) { - final_result[word] = 0; + array_result[word] = 0; } return; } @@ -111,7 +100,7 @@ vector_array_predicate_impl(VectorPredicate *vector_const_predicate, bool is_or, } else { - single_result = final_result; + single_result = array_result; } vector_const_predicate(vector, constvalue, single_result); @@ -125,46 +114,14 @@ vector_array_predicate_impl(VectorPredicate *vector_const_predicate, bool is_or, } /* - * On big arrays, we want to sometimes check if we can exit early, - * to avoid being slower than the non-vectorized version which exits - * at first possibility. The frequency is chosen by benchmarking. - * In debug mode, do this more frequently to simplify testing. + * The bitmaps are small, no more than 15 qwords for our maximal + * compressed batch size of 1000 rows, so we can check for early exit + * after every row. */ -#ifdef NDEBUG - if (array_index > 0 && array_index % 16 == 0) -#else - if (array_index > 0 && array_index % 3 == 0) -#endif + VectorQualSummary summary = get_vector_qual_summary(array_result, n_rows); + if (summary == (is_or ? AllRowsPass : NoRowsPass)) { - if (is_or) - { - bool all_rows_match = true; - for (size_t word = 0; word < result_words; word++) - { - /* - * Note that we have set the bits for past-the-end rows in - * array_result to 1, so we can use simple comparison to - * zero here. - */ - all_rows_match &= (~array_result[word] == 0); - } - if (all_rows_match) - { - return; - } - } - else - { - bool any_rows_match = false; - for (size_t word = 0; word < result_words; word++) - { - any_rows_match |= (final_result[word] != 0); - } - if (!any_rows_match) - { - return; - } - } + return; } } diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.h b/tsl/src/nodes/decompress_chunk/vector_predicates.h index 06a4b40435b..c8874efeef3 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.h +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.h @@ -17,3 +17,43 @@ void vector_array_predicate(VectorPredicate *scalar_predicate, bool is_or, const Datum array, uint64 *restrict result); void vector_nulltest(const ArrowArray *arrow, int test_type, uint64 *restrict result); + +typedef enum VectorQualSummary +{ + AllRowsPass, + NoRowsPass, + SomeRowsPass +} VectorQualSummary; + +static pg_attribute_always_inline VectorQualSummary +get_vector_qual_summary(uint64 *restrict qual_result, size_t n_rows) +{ + bool any_rows_pass = false; + bool all_rows_pass = true; + for (size_t i = 0; i < n_rows / 64; i++) + { + any_rows_pass |= (qual_result[i] != 0); + all_rows_pass &= (~qual_result[i] == 0); + } + + if (n_rows % 64 != 0) + { + const uint64 last_word_mask = -1ULL >> (64 - n_rows % 64); + any_rows_pass |= (qual_result[n_rows / 64] & last_word_mask) != 0; + all_rows_pass &= ((~qual_result[n_rows / 64]) & last_word_mask) == 0; + } + + Assert(!(all_rows_pass && !any_rows_pass)); + + if (!any_rows_pass) + { + return NoRowsPass; + } + + if (all_rows_pass) + { + return AllRowsPass; + } + + return SomeRowsPass; +} From 1e491c7217e7f74a53de09296ba1488cbe4f22e1 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 29 Jan 2024 19:22:45 +0100 Subject: [PATCH 191/249] fixup --- tsl/src/nodes/decompress_chunk/compressed_batch.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 183c7b26d62..061388fbde7 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -329,19 +329,17 @@ compute_plain_qual(DecompressContext *dcontext, DecompressBatchState *batch_stat } } - /* Process the result. */ - const size_t n_batch_result_words = (batch_state->total_batch_rows + 63) / 64; + /* Translate the result if the column had a default value. */ if (column_values->arrow == NULL) { - /* The column had a default value. */ Assert(column_values->decompression_type == DT_Default); - if (!(default_value_predicate_result & 1)) { /* * We had a default value for the compressed column, and it * didn't pass the predicate, so the entire batch didn't pass. */ + const size_t n_batch_result_words = (batch_state->total_batch_rows + 63) / 64; for (size_t i = 0; i < n_batch_result_words; i++) { result[i] = 0; From f9bda72797462e8c7c87e4cec0bc595b43bc2862 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 30 Jan 2024 20:57:20 +0100 Subject: [PATCH 192/249] cmake woes -- trying to put compression tests into libdir --- .github/workflows/libfuzzer.yaml | 4 ++-- CMakeLists.txt | 3 ++- tsl/CMakeLists.txt | 21 +++++++++++++++++++ tsl/src/CMakeLists.txt | 2 +- tsl/src/compression/CMakeLists.txt | 2 -- tsl/src/compression/compression.c | 1 - tsl/src/compression/compression.h | 3 +++ tsl/test/CMakeLists.txt | 4 ++-- tsl/test/src/CMakeLists.txt | 9 +++++++- .../src/compression_sql_test.c} | 5 ++--- .../src/compression_sql_test.h} | 2 +- ..._compression.c => compression_unit_test.c} | 0 .../src}/decompress_arithmetic_test_impl.c | 0 .../src}/decompress_text_test_impl.c | 6 ++---- 14 files changed, 44 insertions(+), 18 deletions(-) rename tsl/{src/compression/compression_test.c => test/src/compression_sql_test.c} (99%) rename tsl/{src/compression/compression_test.h => test/src/compression_sql_test.h} (92%) rename tsl/test/src/{test_compression.c => compression_unit_test.c} (100%) rename tsl/{src/compression => test/src}/decompress_arithmetic_test_impl.c (100%) rename tsl/{src/compression => test/src}/decompress_text_test_impl.c (98%) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 1d89d83cd43..7d425e9f3de 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -65,7 +65,7 @@ jobs: CC=clang ./configure --prefix=$HOME/$PG_INSTALL_DIR --with-openssl \ --without-readline --without-zlib --without-libxml --enable-cassert \ --enable-debug CC=clang \ - CFLAGS="-DTS_COMPRESSION_FUZZING=1 -fuse-ld=lld -ggdb3 -O2 -fno-omit-frame-pointer" + CFLAGS="-fuse-ld=lld -ggdb3 -O2 -fno-omit-frame-pointer" make -j$(nproc) - name: Install PostgreSQL @@ -93,7 +93,7 @@ jobs: cmake -B build -S . -DASSERTIONS=ON -DLINTER=OFF -DCMAKE_VERBOSE_MAKEFILE=1 \ -DWARNINGS_AS_ERRORS=1 -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=clang \ -DCMAKE_C_FLAGS="-fsanitize=fuzzer-no-link -lstdc++ -L$LIBFUZZER_PATH -l:libclang_rt.fuzzer_no_main-x86_64.a -static-libsan" \ - -DPG_PATH=$HOME/$PG_INSTALL_DIR + -DCOMPRESSION_FUZZING=1 -DPG_PATH=$HOME/$PG_INSTALL_DIR make -C build -j$(nproc) install diff --git a/CMakeLists.txt b/CMakeLists.txt index 06b0e1756f7..0b6c83fa678 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -681,7 +681,8 @@ add_subdirectory(test) add_subdirectory(src) if(NOT APACHE_ONLY) - add_subdirectory(tsl) + + add_subdirectory(tsl) endif() add_custom_target(licensecheck diff --git a/tsl/CMakeLists.txt b/tsl/CMakeLists.txt index 2022cefc7f0..6d73cd7695b 100644 --- a/tsl/CMakeLists.txt +++ b/tsl/CMakeLists.txt @@ -1,2 +1,23 @@ +option(CODECOVERAGE "Enable fuzzing of compression using Libfuzzer" OFF) + +if (COMPRESSION_FUZZING) + if (NOT CMAKE_C_COMPILER_ID MATCHES "Clang|AppleClang") + message(FATAL_ERROR "Code coverage is only available with Clang or AppleClang compilers, but we're using ${CMAKE_C_COMPILER_ID} (${CMAKE_C_COMPILER})") + endif() + get_filename_component(CLANG_DIR ${CMAKE_C_COMPILER} DIRECTORY) + string(REGEX MATCH "^[0-9]+" CMAKE_C_COMPILER_VERSION_MAJOR + ${CMAKE_C_COMPILER_VERSION}) + find_program(LLVM_CONFIG "llvm-config-${CMAKE_C_COMPILER_VERSION_MAJOR}" PATHS ${CLANG_DIR} NO_DEFAULT_PATH REQUIRED) + message(STATUS "llvm-config is ${LLVM_CONFIG}") + execute_process(COMMAND ${LLVM_CONFIG} --libdir OUTPUT_VARIABLE LLVM_LIB_DIR OUTPUT_STRIP_TRAILING_WHITESPACE) + message(STATUS "libdir is ${LLVM_LIB_DIR}") + set(LIB_FUZZER_NAME "libclang_rt.fuzzer_no_main-x86_64.a") + execute_process(COMMAND find "${LLVM_LIB_DIR}/clang/${CMAKE_C_COMPILER_VERSION_MAJOR}" -type f -name ${LIB_FUZZER_NAME} OUTPUT_VARIABLE LIB_FUZZER_FULL_NAMEPATH OUTPUT_STRIP_TRAILING_WHITESPACE) + message(STATUS "full is ${LIB_FUZZER_FULL_NAMEPATH}") + # add_link_options(-fsanitize=fuzzer-no-link -static-libsan -lstdc++) + add_compile_definitions(TS_COMPRESSION_FUZZING=1) +endif() + add_subdirectory(test) add_subdirectory(src) + diff --git a/tsl/src/CMakeLists.txt b/tsl/src/CMakeLists.txt index 5a5461fcd43..d6fc2a76266 100644 --- a/tsl/src/CMakeLists.txt +++ b/tsl/src/CMakeLists.txt @@ -18,7 +18,7 @@ set(TSL_LIBRARY_NAME ${PROJECT_NAME}-tsl) include(build-defs.cmake) -if(CMAKE_BUILD_TYPE MATCHES Debug) +if(CMAKE_BUILD_TYPE MATCHES Debug OR COMPRESSION_FUZZING) add_library(${TSL_LIBRARY_NAME} MODULE ${SOURCES} $) else() diff --git a/tsl/src/compression/CMakeLists.txt b/tsl/src/compression/CMakeLists.txt index cca347bccdf..e0719823484 100644 --- a/tsl/src/compression/CMakeLists.txt +++ b/tsl/src/compression/CMakeLists.txt @@ -3,8 +3,6 @@ set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/array.c ${CMAKE_CURRENT_SOURCE_DIR}/compression.c ${CMAKE_CURRENT_SOURCE_DIR}/compression_storage.c - ${CMAKE_CURRENT_SOURCE_DIR}/compression_test.c - ${CMAKE_CURRENT_SOURCE_DIR}/decompress_text_test_impl.c ${CMAKE_CURRENT_SOURCE_DIR}/create.c ${CMAKE_CURRENT_SOURCE_DIR}/datum_serialize.c ${CMAKE_CURRENT_SOURCE_DIR}/deltadelta.c diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index aa847ab86db..54db3d87bbf 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -47,7 +47,6 @@ #include "array.h" #include "chunk.h" #include "compression.h" -#include "compression_test.h" #include "create.h" #include "custom_type_cache.h" #include "debug_assert.h" diff --git a/tsl/src/compression/compression.h b/tsl/src/compression/compression.h index 0a3346f2c7f..0f89a181047 100644 --- a/tsl/src/compression/compression.h +++ b/tsl/src/compression/compression.h @@ -404,3 +404,6 @@ consumeCompressedData(StringInfo si, int bytes) * We use this limit for sanity checks in case the compressed data is corrupt. */ #define GLOBAL_MAX_ROWS_PER_COMPRESSION 1015 + +const CompressionAlgorithmDefinition * +algorithm_definition(CompressionAlgorithm algo); diff --git a/tsl/test/CMakeLists.txt b/tsl/test/CMakeLists.txt index c3c613e459f..e953e372b49 100644 --- a/tsl/test/CMakeLists.txt +++ b/tsl/test/CMakeLists.txt @@ -131,6 +131,6 @@ if(_install_checks) add_dependencies(installcheck installcheck-t) endif() -if(CMAKE_BUILD_TYPE MATCHES Debug) +if(CMAKE_BUILD_TYPE MATCHES Debug OR COMPRESSION_FUZZING) add_subdirectory(src) -endif(CMAKE_BUILD_TYPE MATCHES Debug) +endif() diff --git a/tsl/test/src/CMakeLists.txt b/tsl/test/src/CMakeLists.txt index 70d7d976ad2..fcf7d0b093c 100644 --- a/tsl/test/src/CMakeLists.txt +++ b/tsl/test/src/CMakeLists.txt @@ -1,4 +1,6 @@ -set(SOURCES test_chunk_stats.c test_merge_chunk.c test_compression.c +set(SOURCES test_chunk_stats.c test_merge_chunk.c + compression_unit_test.c compression_sql_test.c + decompress_text_test_impl.c test_continuous_agg.c) include(${PROJECT_SOURCE_DIR}/tsl/src/build-defs.cmake) @@ -14,3 +16,8 @@ target_include_directories(${TSL_TESTS_LIB_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/test/src) target_include_directories(${TSL_TESTS_LIB_NAME} PRIVATE ${PG_INCLUDEDIR}) target_compile_definitions(${TSL_TESTS_LIB_NAME} PUBLIC TS_SUBMODULE) + +if (COMPRESSION_FUZZING) + target_link_libraries(${TSL_TESTS_LIB_NAME} ${LIB_FUZZER_FULL_NAMEPATH} stdc++) + target_compile_options(${TSL_TESTS_LIB_NAME} PUBLIC -fsanitize=fuzzer-no-link -static-libsan -lstdc++) +endif() diff --git a/tsl/src/compression/compression_test.c b/tsl/test/src/compression_sql_test.c similarity index 99% rename from tsl/src/compression/compression_test.c rename to tsl/test/src/compression_sql_test.c index 26f226cc46f..564f29a6a98 100644 --- a/tsl/src/compression/compression_test.c +++ b/tsl/test/src/compression_sql_test.c @@ -11,10 +11,9 @@ #include #include -#include "compression_test.h" +#include "compression_sql_test.h" -#include "compression.h" -#include "arrow_c_data_interface.h" +#include "compression/arrow_c_data_interface.h" #if !defined(NDEBUG) || defined(TS_COMPRESSION_FUZZING) diff --git a/tsl/src/compression/compression_test.h b/tsl/test/src/compression_sql_test.h similarity index 92% rename from tsl/src/compression/compression_test.h rename to tsl/test/src/compression_sql_test.h index f10402eee52..da5c1f39948 100644 --- a/tsl/src/compression/compression_test.h +++ b/tsl/test/src/compression_sql_test.h @@ -5,7 +5,7 @@ */ #pragma once -#include "compression.h" +#include "compression/compression.h" int decompress_ARRAY_TEXT(const uint8 *Data, size_t Size, bool bulk); diff --git a/tsl/test/src/test_compression.c b/tsl/test/src/compression_unit_test.c similarity index 100% rename from tsl/test/src/test_compression.c rename to tsl/test/src/compression_unit_test.c diff --git a/tsl/src/compression/decompress_arithmetic_test_impl.c b/tsl/test/src/decompress_arithmetic_test_impl.c similarity index 100% rename from tsl/src/compression/decompress_arithmetic_test_impl.c rename to tsl/test/src/decompress_arithmetic_test_impl.c diff --git a/tsl/src/compression/decompress_text_test_impl.c b/tsl/test/src/decompress_text_test_impl.c similarity index 98% rename from tsl/src/compression/decompress_text_test_impl.c rename to tsl/test/src/decompress_text_test_impl.c index 30f3b4a6f91..e4741af4d44 100644 --- a/tsl/src/compression/decompress_text_test_impl.c +++ b/tsl/test/src/decompress_text_test_impl.c @@ -7,11 +7,9 @@ #include -#include "compression.h" +#include "compression_sql_test.h" -#include "compression_test.h" - -#include "arrow_c_data_interface.h" +#include "compression/arrow_c_data_interface.h" static uint32 arrow_get_str(ArrowArray *arrow, int arrow_row, const char **str) From cb312c659069af6955095c914ac6eaf89f982047 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 30 Jan 2024 21:15:28 +0100 Subject: [PATCH 193/249] Move files to test lib --- tsl/CMakeLists.txt | 18 ++---------------- tsl/src/compression/compression.h | 3 +-- tsl/test/src/CMakeLists.txt | 12 +++--------- 3 files changed, 6 insertions(+), 27 deletions(-) diff --git a/tsl/CMakeLists.txt b/tsl/CMakeLists.txt index 6d73cd7695b..4bfca90dffa 100644 --- a/tsl/CMakeLists.txt +++ b/tsl/CMakeLists.txt @@ -1,21 +1,7 @@ option(CODECOVERAGE "Enable fuzzing of compression using Libfuzzer" OFF) -if (COMPRESSION_FUZZING) - if (NOT CMAKE_C_COMPILER_ID MATCHES "Clang|AppleClang") - message(FATAL_ERROR "Code coverage is only available with Clang or AppleClang compilers, but we're using ${CMAKE_C_COMPILER_ID} (${CMAKE_C_COMPILER})") - endif() - get_filename_component(CLANG_DIR ${CMAKE_C_COMPILER} DIRECTORY) - string(REGEX MATCH "^[0-9]+" CMAKE_C_COMPILER_VERSION_MAJOR - ${CMAKE_C_COMPILER_VERSION}) - find_program(LLVM_CONFIG "llvm-config-${CMAKE_C_COMPILER_VERSION_MAJOR}" PATHS ${CLANG_DIR} NO_DEFAULT_PATH REQUIRED) - message(STATUS "llvm-config is ${LLVM_CONFIG}") - execute_process(COMMAND ${LLVM_CONFIG} --libdir OUTPUT_VARIABLE LLVM_LIB_DIR OUTPUT_STRIP_TRAILING_WHITESPACE) - message(STATUS "libdir is ${LLVM_LIB_DIR}") - set(LIB_FUZZER_NAME "libclang_rt.fuzzer_no_main-x86_64.a") - execute_process(COMMAND find "${LLVM_LIB_DIR}/clang/${CMAKE_C_COMPILER_VERSION_MAJOR}" -type f -name ${LIB_FUZZER_NAME} OUTPUT_VARIABLE LIB_FUZZER_FULL_NAMEPATH OUTPUT_STRIP_TRAILING_WHITESPACE) - message(STATUS "full is ${LIB_FUZZER_FULL_NAMEPATH}") - # add_link_options(-fsanitize=fuzzer-no-link -static-libsan -lstdc++) - add_compile_definitions(TS_COMPRESSION_FUZZING=1) +if(COMPRESSION_FUZZING) + add_compile_definitions(TS_COMPRESSION_FUZZING=1) endif() add_subdirectory(test) diff --git a/tsl/src/compression/compression.h b/tsl/src/compression/compression.h index 0f89a181047..cd5e305abe2 100644 --- a/tsl/src/compression/compression.h +++ b/tsl/src/compression/compression.h @@ -405,5 +405,4 @@ consumeCompressedData(StringInfo si, int bytes) */ #define GLOBAL_MAX_ROWS_PER_COMPRESSION 1015 -const CompressionAlgorithmDefinition * -algorithm_definition(CompressionAlgorithm algo); +const CompressionAlgorithmDefinition *algorithm_definition(CompressionAlgorithm algo); diff --git a/tsl/test/src/CMakeLists.txt b/tsl/test/src/CMakeLists.txt index fcf7d0b093c..7591f271aaa 100644 --- a/tsl/test/src/CMakeLists.txt +++ b/tsl/test/src/CMakeLists.txt @@ -1,7 +1,6 @@ -set(SOURCES test_chunk_stats.c test_merge_chunk.c - compression_unit_test.c compression_sql_test.c - decompress_text_test_impl.c - test_continuous_agg.c) +set(SOURCES + test_chunk_stats.c test_merge_chunk.c compression_unit_test.c + compression_sql_test.c decompress_text_test_impl.c test_continuous_agg.c) include(${PROJECT_SOURCE_DIR}/tsl/src/build-defs.cmake) @@ -16,8 +15,3 @@ target_include_directories(${TSL_TESTS_LIB_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/test/src) target_include_directories(${TSL_TESTS_LIB_NAME} PRIVATE ${PG_INCLUDEDIR}) target_compile_definitions(${TSL_TESTS_LIB_NAME} PUBLIC TS_SUBMODULE) - -if (COMPRESSION_FUZZING) - target_link_libraries(${TSL_TESTS_LIB_NAME} ${LIB_FUZZER_FULL_NAMEPATH} stdc++) - target_compile_options(${TSL_TESTS_LIB_NAME} PUBLIC -fsanitize=fuzzer-no-link -static-libsan -lstdc++) -endif() From f770fd31035ac749b9664b9886e5b46c91a03237 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 30 Jan 2024 21:20:24 +0100 Subject: [PATCH 194/249] cleanup --- CMakeLists.txt | 3 +-- tsl/CMakeLists.txt | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b6c83fa678..06b0e1756f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -681,8 +681,7 @@ add_subdirectory(test) add_subdirectory(src) if(NOT APACHE_ONLY) - - add_subdirectory(tsl) + add_subdirectory(tsl) endif() add_custom_target(licensecheck diff --git a/tsl/CMakeLists.txt b/tsl/CMakeLists.txt index 4bfca90dffa..9058a14625a 100644 --- a/tsl/CMakeLists.txt +++ b/tsl/CMakeLists.txt @@ -6,4 +6,3 @@ endif() add_subdirectory(test) add_subdirectory(src) - From 1150f50fbf9ea6bfdf083901bdfcae3c2a3de88c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 30 Jan 2024 22:12:59 +0100 Subject: [PATCH 195/249] benchmark bulk text (2024-01-30 no. 1) From a71838eed21d230fb5459d408d3241300aa99fcc Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 14:10:41 +0100 Subject: [PATCH 196/249] fixes after merge --- .../nodes/decompress_chunk/compressed_batch.c | 103 ++++++++---------- tsl/test/expected/decompress_vector_qual.out | 4 +- 2 files changed, 45 insertions(+), 62 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index dad5683950d..92dfb7f539c 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -117,66 +117,6 @@ get_max_text_datum_size(ArrowArray *text_array) return maxbytes; } -static void -translate_from_dictionary(const ArrowArray *arrow, uint64 *restrict dict_result, - uint64 *restrict final_result) -{ - Assert(arrow->dictionary != NULL); - - /* Translate dictionary results to per-value results. */ - const size_t n = arrow->length; - int16 *restrict indices = (int16 *) arrow->buffers[1]; - for (size_t outer = 0; outer < n / 64; outer++) - { - uint64 word = 0; - for (size_t inner = 0; inner < 64; inner++) - { - const size_t row = outer * 64 + inner; - const size_t bit_index = inner; -#define INNER_LOOP \ - const int16 index = indices[row]; \ - const bool valid = arrow_row_is_valid(dict_result, index); \ - word |= ((uint64) valid) << bit_index; - - INNER_LOOP - - // fprintf(stderr, "dict-coded row %ld: index %d, valid %d\n", row, index, - // valid); - } - final_result[outer] &= word; - } - - if (n % 64) - { - uint64 word = 0; - for (size_t row = (n / 64) * 64; row < n; row++) - { - const size_t bit_index = row % 64; - - INNER_LOOP - } - final_result[n / 64] &= word; - } -#undef INNER_LOOP -} - -static int -get_max_text_datum_size(ArrowArray *text_array) -{ - int maxbytes = 0; - uint32 *offsets = (uint32 *) text_array->buffers[1]; - for (int i = 0; i < text_array->length; i++) - { - const int curbytes = offsets[i + 1] - offsets[i]; - if (curbytes > maxbytes) - { - maxbytes = curbytes; - } - } - - return maxbytes; -} - static void decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state, int i) { @@ -304,6 +244,49 @@ decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state } } +static void +translate_from_dictionary(const ArrowArray *arrow, uint64 *restrict dict_result, + uint64 *restrict final_result) +{ + Assert(arrow->dictionary != NULL); + + /* Translate dictionary results to per-value results. */ + const size_t n = arrow->length; + int16 *restrict indices = (int16 *) arrow->buffers[1]; + for (size_t outer = 0; outer < n / 64; outer++) + { + uint64 word = 0; + for (size_t inner = 0; inner < 64; inner++) + { + const size_t row = outer * 64 + inner; + const size_t bit_index = inner; +#define INNER_LOOP \ + const int16 index = indices[row]; \ + const bool valid = arrow_row_is_valid(dict_result, index); \ + word |= ((uint64) valid) << bit_index; + + INNER_LOOP + + // fprintf(stderr, "dict-coded row %ld: index %d, valid %d\n", row, index, + // valid); + } + final_result[outer] &= word; + } + + if (n % 64) + { + uint64 word = 0; + for (size_t row = (n / 64) * 64; row < n; row++) + { + const size_t bit_index = row % 64; + + INNER_LOOP + } + final_result[n / 64] &= word; + } +#undef INNER_LOOP +} + static void compute_plain_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, Node *qual, uint64 *restrict result) diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index da99de770c0..ca07bb7595b 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -1059,7 +1059,7 @@ select create_hypertable('t', 'ts'); NOTICE: adding not-null constraint to column "ts" create_hypertable ------------------- - (7,public,t,t) + (9,public,t,t) (1 row) alter table t set (timescaledb.compress, timescaledb.compress_segmentby = 'd'); @@ -1079,7 +1079,7 @@ insert into t select x, 5, case when x % 2 = 0 then null else 'different-with-nu insert into t select x, 6, 'одинаковый' from generate_series(1, 1000) x; insert into t select x, 7, '異なる' || x from generate_series(1, 1000) x; select count(compress_chunk(x, true)) from show_chunks('t') x; -NOTICE: chunk "_hyper_7_15_chunk" is already compressed +NOTICE: chunk "_hyper_9_17_chunk" is already compressed count ------- 1 From ab20509295d8e32d663df59e7788659bc881cadf Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 15:11:16 +0100 Subject: [PATCH 197/249] some cleanup --- .../nodes/decompress_chunk/compressed_batch.c | 6 +-- tsl/src/nodes/decompress_chunk/pred_text.c | 54 +++---------------- tsl/src/nodes/decompress_chunk/pred_text.h | 12 ----- .../nodes/decompress_chunk/ts_like_match.c | 4 +- tsl/test/expected/decompress_vector_qual.out | 24 +++++++++ tsl/test/sql/decompress_vector_qual.sql | 4 ++ 6 files changed, 38 insertions(+), 66 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 92dfb7f539c..9dc97270851 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -245,8 +245,8 @@ decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state } static void -translate_from_dictionary(const ArrowArray *arrow, uint64 *restrict dict_result, - uint64 *restrict final_result) +translate_bitmap_from_dictionary(const ArrowArray *arrow, uint64 *restrict dict_result, + uint64 *restrict final_result) { Assert(arrow->dictionary != NULL); @@ -455,7 +455,7 @@ compute_plain_qual(DecompressContext *dcontext, DecompressBatchState *batch_stat */ if (vector->dictionary) { - translate_from_dictionary(vector, predicate_result_nodict, predicate_result); + translate_bitmap_from_dictionary(vector, predicate_result_nodict, predicate_result); } /* diff --git a/tsl/src/nodes/decompress_chunk/pred_text.c b/tsl/src/nodes/decompress_chunk/pred_text.c index df284b1de25..3bfe510994b 100644 --- a/tsl/src/nodes/decompress_chunk/pred_text.c +++ b/tsl/src/nodes/decompress_chunk/pred_text.c @@ -63,31 +63,17 @@ vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *res #undef INNER_LOOP } +/* + * Generate specializations for LIKE functions based on database encoding. This + * follows the Postgres code from backend/utils/adt/like.c. + */ + #define LIKE_TRUE 1 #define LIKE_FALSE 0 #define LIKE_ABORT (-1) -#define NextByte(p, plen) ((p)++, (plen)--) - -/* Set up to compile like_match.c for single-byte characters */ -#define CHAREQ(p1, p2) (*(p1) == *(p2)) -#define NextChar(p, plen) NextByte((p), (plen)) -#define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--) - -#define MatchText SB_MatchText -#define do_like_escape SB_do_like_escape - -#include "ts_like_match.c" - -/* setup to compile like_match.c for single byte case insensitive matches */ -#define MATCH_LOWER(t) (((t) >= 'A' && (t) <= 'Z') ? ((t) + 'a' - 'A') : (t)) -#define NextChar(p, plen) NextByte((p), (plen)) -#define MatchText SB_IMatchText - -#include "ts_like_match.c" - /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */ - +#define NextByte(p, plen) ((p)++, (plen)--) #define NextChar(p, plen) \ do \ { \ @@ -146,34 +132,6 @@ vector_const_like_impl(const ArrowArray *arrow, const Datum constdatum, uint64 * #undef INNER_LOOP } -void -vector_const_textlike_singlebyte(const ArrowArray *arrow, const Datum constdatum, - uint64 *restrict result) -{ - return vector_const_like_impl(arrow, constdatum, result, SB_MatchText, true); -} - -void -vector_const_textnlike_singlebyte(const ArrowArray *arrow, const Datum constdatum, - uint64 *restrict result) -{ - return vector_const_like_impl(arrow, constdatum, result, SB_MatchText, false); -} - -void -vector_const_texticlike_singlebyte(const ArrowArray *arrow, const Datum constdatum, - uint64 *restrict result) -{ - return vector_const_like_impl(arrow, constdatum, result, SB_IMatchText, true); -} - -void -vector_const_texticnlike_singlebyte(const ArrowArray *arrow, const Datum constdatum, - uint64 *restrict result) -{ - return vector_const_like_impl(arrow, constdatum, result, SB_IMatchText, false); -} - void vector_const_textlike_utf8(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) { diff --git a/tsl/src/nodes/decompress_chunk/pred_text.h b/tsl/src/nodes/decompress_chunk/pred_text.h index da87e57a9dd..a633a263165 100644 --- a/tsl/src/nodes/decompress_chunk/pred_text.h +++ b/tsl/src/nodes/decompress_chunk/pred_text.h @@ -12,18 +12,6 @@ extern void vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result); -extern void vector_const_textlike_singlebyte(const ArrowArray *arrow, const Datum constdatum, - uint64 *restrict result); - -extern void vector_const_textnlike_singlebyte(const ArrowArray *arrow, const Datum constdatum, - uint64 *restrict result); - -extern void vector_const_texticlike_singlebyte(const ArrowArray *arrow, const Datum constdatum, - uint64 *restrict result); - -extern void vector_const_texticnlike_singlebyte(const ArrowArray *arrow, const Datum constdatum, - uint64 *restrict result); - extern void vector_const_textlike_utf8(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result); diff --git a/tsl/src/nodes/decompress_chunk/ts_like_match.c b/tsl/src/nodes/decompress_chunk/ts_like_match.c index d3539d72ac4..923e88f8bf6 100644 --- a/tsl/src/nodes/decompress_chunk/ts_like_match.c +++ b/tsl/src/nodes/decompress_chunk/ts_like_match.c @@ -10,9 +10,7 @@ * PostgreSQL License. Please see the NOTICE at the top level * directory for a copy of the PostgreSQL License. * - * These function were copied from the PostgreSQL core planner, since - * they were declared static in the core planner, but we need them for - * our manipulations. + * This is a copy of backend/utils/adt/like_match.c. */ /*-------------------- diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index ca07bb7595b..d07a72d9eaa 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -1197,12 +1197,36 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%один 1000 | 1 | 1000 | 6 | 6 (1 row) +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%одилаковый%'; + count | min | max | min | max +-------+-----+-----+-----+----- + 0 | | | | +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%одимаковый%'; + count | min | max | min | max +-------+-----+-----+-----+----- + 0 | | | | +(1 row) + select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異なる%'; count | min | max | min | max -------+-----+------+-----+----- 1000 | 1 | 1000 | 7 | 7 (1 row) +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異オる%'; + count | min | max | min | max +-------+-----+-----+-----+----- + 0 | | | | +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異にる%'; + count | min | max | min | max +-------+-----+-----+-----+----- + 0 | | | | +(1 row) + select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'same%'; count | min | max | min | max -------+-----+------+-----+----- diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 8bce8592e8d..adf12ecdaf0 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -397,7 +397,11 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a is not null; select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%одинаковый%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%одилаковый%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%одимаковый%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異なる%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異オる%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異にる%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'same%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same'; select count(*), min(ts), max(ts), min(d), max(d) from t where a not like '%same%'; From 14994f17eb13ffea341ff815da48560fa0d49bc4 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 15:36:10 +0100 Subject: [PATCH 198/249] Fix UBSan failure in bulk text decompression I can't reproduce it locally and the ubsan logs are not there, so not sure what it is, might be direct conversion from void* to varlena w/o the intermediate datum pointer. --- tsl/src/compression/array.c | 42 +++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index 78b8e6b2aa3..d30688c2142 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -509,20 +509,40 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, uint32 offset = 0; for (int i = 0; i < n_notnull; i++) { - void *vardata = consumeCompressedData(si, sizes[i]); + Datum vardata = PointerGetDatum(consumeCompressedData(si, sizes[i])); + /* * Check for potentially corrupt varlena headers since we're reading them - * directly from compressed data. We can only have a plain datum - * with 1-byte or 4-byte header here, no TOAST or compressed data. - */ - CheckCompressedData(VARATT_IS_4B_U(vardata) || - (VARATT_IS_1B(vardata) && !VARATT_IS_1B_E(vardata))); - /* - * Full varsize must be larger or equal than the header size so that the - * calculation of size without header doesn't overflow. + * directly from compressed data. */ - CheckCompressedData((VARATT_IS_1B(vardata) && VARSIZE_1B(vardata) >= VARHDRSZ_SHORT) || - (VARSIZE_4B(vardata) >= VARHDRSZ)); + if (VARATT_IS_4B_U(vardata)) + { + /* + * Full varsize must be larger or equal than the header size so that + * the calculation of size without header doesn't overflow. + */ + CheckCompressedData(VARSIZE_4B(vardata) >= VARHDRSZ); + } + else if (VARATT_IS_1B(vardata)) + { + /* Can't have a TOAST pointer here. */ + CheckCompressedData(!VARATT_IS_1B_E(vardata)); + + /* + * Full varsize must be larger or equal than the header size so that + * the calculation of size without header doesn't overflow. + */ + CheckCompressedData(VARSIZE_1B(vardata) >= VARHDRSZ_SHORT); + } + else + { + /* + * Can only have an uncompressed datum with 1-byte or 4-byte header + * here, no TOAST or compressed data. + */ + CheckCompressedData(false); + } + /* Varsize must match the size stored in the sizes array for this element. */ CheckCompressedData(VARSIZE_ANY(vardata) == sizes[i]); From 1eb9d411c03231653ab626beeafe4d2fcd176f94 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 15:45:25 +0100 Subject: [PATCH 199/249] some tweaks --- .github/workflows/sanitizer-build-and-test.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/sanitizer-build-and-test.yaml b/.github/workflows/sanitizer-build-and-test.yaml index 34a13971313..9bc9c6f7699 100644 --- a/.github/workflows/sanitizer-build-and-test.yaml +++ b/.github/workflows/sanitizer-build-and-test.yaml @@ -16,11 +16,11 @@ env: name: "Sanitizer" PG_SRC_DIR: "pgbuild" PG_INSTALL_DIR: "postgresql" - extra_packages: "clang-15 llvm-15 llvm-15-dev llvm-15-tools" - llvm_config: "llvm-config-15" - CLANG: "clang-15" - CC: "clang-15" - CXX: "clang-15" + extra_packages: "clang-17 llvm-17 llvm-17-dev llvm-17-tools" + llvm_config: "llvm-config-17" + CLANG: "clang-17" + CC: "clang-17" + CXX: "clang-17" # gcc CFLAGS, disable inlining for function name pattern matching to work for suppressions # CFLAGS: "-g -fsanitize=address,undefined -fno-omit-frame-pointer -O1 -fno-inline" # CXXFLAGS: "-g -fsanitize=address,undefined -fno-omit-frame-pointer -O1 -fno-inline" @@ -44,7 +44,7 @@ env: log_exe_name=true print_suppressions=false exitcode=27 UBSAN_OPTIONS: suppressions=${{ github.workspace }}/scripts/suppressions/suppr_ub.txt - print_stacktrace=1 halt_on_error=1 log_path=${{ github.workspace }}/sanitizer + print_stacktrace=1 abort_on_error=1 log_path=${{ github.workspace }}/sanitizer log_exe_name=true print_suppressions=false exitcode=27 IGNORES: "bgw_db_scheduler bgw_db_scheduler_fixed" @@ -71,7 +71,7 @@ jobs: fail-fast: false matrix: # "os" has to be in the matrix due to a bug in "env": https://github.community/t/how-to-use-env-context/16975 - os: ["ubuntu-22.04"] + os: ubuntu-latest pg: ${{ fromJson(needs.config.outputs.pg_latest) }} steps: - name: Install Linux Dependencies From 017a34e36f08762b8d8d812d21b63b2f32dbc6d3 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 15:46:45 +0100 Subject: [PATCH 200/249] actions --- .github/workflows/sanitizer-build-and-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sanitizer-build-and-test.yaml b/.github/workflows/sanitizer-build-and-test.yaml index 9bc9c6f7699..03df868b7d4 100644 --- a/.github/workflows/sanitizer-build-and-test.yaml +++ b/.github/workflows/sanitizer-build-and-test.yaml @@ -71,7 +71,7 @@ jobs: fail-fast: false matrix: # "os" has to be in the matrix due to a bug in "env": https://github.community/t/how-to-use-env-context/16975 - os: ubuntu-latest + os: [ubuntu-latest] pg: ${{ fromJson(needs.config.outputs.pg_latest) }} steps: - name: Install Linux Dependencies From 06be871f5bedc4fbc4bd10fb9562a5229283454e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 15:57:13 +0100 Subject: [PATCH 201/249] fix --- tsl/src/nodes/decompress_chunk/pred_text.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/pred_text.c b/tsl/src/nodes/decompress_chunk/pred_text.c index 3bfe510994b..0448063b493 100644 --- a/tsl/src/nodes/decompress_chunk/pred_text.c +++ b/tsl/src/nodes/decompress_chunk/pred_text.c @@ -135,12 +135,12 @@ vector_const_like_impl(const ArrowArray *arrow, const Datum constdatum, uint64 * void vector_const_textlike_utf8(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) { - return vector_const_like_impl(arrow, constdatum, result, UTF8_MatchText, true); + vector_const_like_impl(arrow, constdatum, result, UTF8_MatchText, true); } void vector_const_textnlike_utf8(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) { - return vector_const_like_impl(arrow, constdatum, result, UTF8_MatchText, false); + vector_const_like_impl(arrow, constdatum, result, UTF8_MatchText, false); } From bec9f605b02800a52d9d2a9d393981af7bf31cf4 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 15:58:54 +0100 Subject: [PATCH 202/249] tidy --- tsl/src/nodes/decompress_chunk/vector_predicates.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.h b/tsl/src/nodes/decompress_chunk/vector_predicates.h index c8874efeef3..141563149dd 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.h +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.h @@ -13,8 +13,8 @@ typedef void(VectorPredicate)(const ArrowArray *, Datum, uint64 *restrict); VectorPredicate *get_vector_const_predicate(Oid pg_predicate); -void vector_array_predicate(VectorPredicate *scalar_predicate, bool is_or, const ArrowArray *vector, - Datum array, uint64 *restrict result); +void vector_array_predicate(VectorPredicate *vector_const_predicate, bool is_or, + const ArrowArray *vector, Datum array, uint64 *restrict final_result); void vector_nulltest(const ArrowArray *arrow, int test_type, uint64 *restrict result); From 7f238900d27b720cc67b5cc214fb22b585467fd6 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 16:07:41 +0100 Subject: [PATCH 203/249] better coverage for like --- tsl/test/expected/decompress_vector_qual.out | 66 +++++++++++++++++--- tsl/test/sql/decompress_vector_qual.sql | 20 ++++-- 2 files changed, 72 insertions(+), 14 deletions(-) diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index d07a72d9eaa..b7de58d44f5 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -1227,30 +1227,76 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異に 0 | | | | (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'same%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different1%'; count | min | max | min | max -------+-----+------+-----+----- - 1500 | 1 | 1000 | 2 | 4 + 112 | 1 | 1000 | 3 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same'; - count | min | max | min | max --------+-----+------+-----+----- - 1000 | 1 | 1000 | 2 | 2 +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 1 | 1 | 1 | 3 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a not like '%same%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a not like '%different1%'; count | min | max | min | max -------+-----+------+-----+----- - 5500 | 1 | 1000 | 0 | 7 + 6888 | 1 | 1000 | 0 | 7 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1%'; count | min | max | min | max -------+-----+------+-----+----- - 1500 | 1 | 1000 | 2 | 4 + 112 | 1 | 1000 | 3 | 3 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'same_'; + count | min | max | min | max +-------+-----+-----+-----+----- + 0 | | | | +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_'; + count | min | max | min | max +-------+-----+-----+-----+----- + 9 | 1 | 9 | 3 | 3 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 9 | 11 | 91 | 3 | 3 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_%1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 199 | 1 | 991 | 3 | 5 (1 row) +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%nulls_'; + count | min | max | min | max +-------+-----+-----+-----+----- + 5 | 1 | 9 | 5 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\%'; + count | min | max | min | max +-------+-----+-----+-----+----- + 0 | | | | +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 1 | 1 | 1 | 3 | 3 +(1 row) + +\set ON_ERROR_STOP 0 +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\'; +ERROR: LIKE pattern must not end with escape character +\set ON_ERROR_STOP 1 reset timescaledb.debug_require_vector_qual; select count(distinct a) from t; count diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index adf12ecdaf0..dcb2087c5f6 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -402,10 +402,22 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%одим select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異なる%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異オる%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異にる%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'same%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a not like '%same%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same%'; + +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different1%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a not like '%different1%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'same_'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_%1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%nulls_'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\1'; + +\set ON_ERROR_STOP 0 +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\'; +\set ON_ERROR_STOP 1 reset timescaledb.debug_require_vector_qual; select count(distinct a) from t; From 498c0de22aec76f9c6b701f7915c6eb3967ea585 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 16:09:15 +0100 Subject: [PATCH 204/249] clang 16 --- .github/workflows/sanitizer-build-and-test.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/sanitizer-build-and-test.yaml b/.github/workflows/sanitizer-build-and-test.yaml index 03df868b7d4..e7c2de371aa 100644 --- a/.github/workflows/sanitizer-build-and-test.yaml +++ b/.github/workflows/sanitizer-build-and-test.yaml @@ -16,11 +16,11 @@ env: name: "Sanitizer" PG_SRC_DIR: "pgbuild" PG_INSTALL_DIR: "postgresql" - extra_packages: "clang-17 llvm-17 llvm-17-dev llvm-17-tools" - llvm_config: "llvm-config-17" - CLANG: "clang-17" - CC: "clang-17" - CXX: "clang-17" + extra_packages: "clang-16 llvm-16 llvm-16-dev llvm-16-tools" + llvm_config: "llvm-config-16" + CLANG: "clang-16" + CC: "clang-16" + CXX: "clang-16" # gcc CFLAGS, disable inlining for function name pattern matching to work for suppressions # CFLAGS: "-g -fsanitize=address,undefined -fno-omit-frame-pointer -O1 -fno-inline" # CXXFLAGS: "-g -fsanitize=address,undefined -fno-omit-frame-pointer -O1 -fno-inline" From 645a19bac6ee49016d6d31f16094ab5d67f7a71d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 16:28:07 +0100 Subject: [PATCH 205/249] recompress chunks --- tsl/test/expected/decompress_vector_qual.out | 2 ++ tsl/test/sql/decompress_vector_qual.sql | 1 + 2 files changed, 3 insertions(+) diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index b7de58d44f5..cc3d713a873 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -1085,6 +1085,8 @@ NOTICE: chunk "_hyper_9_17_chunk" is already compressed 1 (1 row) +select format('call recompress_chunk(''%s'')', x) from show_chunks('t') x \gexec +call recompress_chunk('_timescaledb_internal._hyper_9_17_chunk') set timescaledb.debug_require_vector_qual to 'only'; -- -- Uncomment to generate the test reference w/o the vector optimizations. -- set timescaledb.enable_bulk_decompression to off; diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index dcb2087c5f6..2ab8dd86cc0 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -371,6 +371,7 @@ insert into t select x, 6, 'одинаковый' from generate_series(1, 1000) insert into t select x, 7, '異なる' || x from generate_series(1, 1000) x; select count(compress_chunk(x, true)) from show_chunks('t') x; +select format('call recompress_chunk(''%s'')', x) from show_chunks('t') x \gexec set timescaledb.debug_require_vector_qual to 'only'; -- -- Uncomment to generate the test reference w/o the vector optimizations. From ff9dedba71d4bf45be1090f092974c257ec5eaa4 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 16:40:02 +0100 Subject: [PATCH 206/249] test for hashed saop --- tsl/test/expected/decompress_vector_qual.out | 20 ++++++++++++++++++++ tsl/test/sql/decompress_vector_qual.sql | 17 +++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index cc3d713a873..2031dcad125 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -906,6 +906,26 @@ select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 888 and me 1 (1 row) +-- The Postgres planner chooses to build a hash table for large arrays, we currently +-- don't vectorize in this case. +set timescaledb.debug_require_vector_qual to 'forbid'; +select count(*) from singlebatch where metric2 = any(array[ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, +10, 11, 12, 13, 14, 15, 16, 17, 18, 19, +20, 21, 22, 23, 24, 25, 26, 27, 28, 29, +30, 31, 32, 33, 34, 35, 36, 37, 38, 39, +40, 41, 42, 43, 44, 45, 46, 47, 48, 49, +50, 51, 52, 53, 54, 55, 56, 57, 58, 59, +60, 61, 62, 63, 64, 65, 66, 67, 68, 69, +70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, +90, 91, 92, 93, 94, 95, 96, 97, 98, 99 +]::int8[]); + count +------- + 5 +(1 row) + reset timescaledb.enable_bulk_decompression; reset timescaledb.debug_require_vector_qual; -- Comparison with other column not vectorized. diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 2ab8dd86cc0..2dad21a0c31 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -288,6 +288,23 @@ select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 777 and me select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 888 and metric2 = 12); select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 888 and metric2 = 666); + +-- The Postgres planner chooses to build a hash table for large arrays, we currently +-- don't vectorize in this case. +set timescaledb.debug_require_vector_qual to 'forbid'; +select count(*) from singlebatch where metric2 = any(array[ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, +10, 11, 12, 13, 14, 15, 16, 17, 18, 19, +20, 21, 22, 23, 24, 25, 26, 27, 28, 29, +30, 31, 32, 33, 34, 35, 36, 37, 38, 39, +40, 41, 42, 43, 44, 45, 46, 47, 48, 49, +50, 51, 52, 53, 54, 55, 56, 57, 58, 59, +60, 61, 62, 63, 64, 65, 66, 67, 68, 69, +70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, +90, 91, 92, 93, 94, 95, 96, 97, 98, 99 +]::int8[]); + reset timescaledb.enable_bulk_decompression; reset timescaledb.debug_require_vector_qual; From 6272635adaa39488a646f932d1bc124c5679e216 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 16:41:22 +0100 Subject: [PATCH 207/249] clang 15 --- .github/workflows/sanitizer-build-and-test.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/sanitizer-build-and-test.yaml b/.github/workflows/sanitizer-build-and-test.yaml index e7c2de371aa..367b32a0521 100644 --- a/.github/workflows/sanitizer-build-and-test.yaml +++ b/.github/workflows/sanitizer-build-and-test.yaml @@ -16,11 +16,11 @@ env: name: "Sanitizer" PG_SRC_DIR: "pgbuild" PG_INSTALL_DIR: "postgresql" - extra_packages: "clang-16 llvm-16 llvm-16-dev llvm-16-tools" - llvm_config: "llvm-config-16" - CLANG: "clang-16" - CC: "clang-16" - CXX: "clang-16" + extra_packages: "clang-15 llvm-15 llvm-15-dev llvm-15-tools" + llvm_config: "llvm-config-15" + CLANG: "clang-15" + CC: "clang-15" + CXX: "clang-15" # gcc CFLAGS, disable inlining for function name pattern matching to work for suppressions # CFLAGS: "-g -fsanitize=address,undefined -fno-omit-frame-pointer -O1 -fno-inline" # CXXFLAGS: "-g -fsanitize=address,undefined -fno-omit-frame-pointer -O1 -fno-inline" From 26d1d185f08e4238f596df76409f485643c53690 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 17:02:50 +0100 Subject: [PATCH 208/249] fixes + more coverage --- tsl/test/expected/decompress_vector_qual.out | 45 ++++++++++++++++++-- tsl/test/sql/decompress_vector_qual.sql | 15 +++++-- 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 2031dcad125..95d9043d1a7 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -906,9 +906,16 @@ select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 888 and me 1 (1 row) --- The Postgres planner chooses to build a hash table for large arrays, we currently --- don't vectorize in this case. -set timescaledb.debug_require_vector_qual to 'forbid'; +-- On versions >= 14, the Postgres planner chooses to build a hash table for +-- large arrays. We currently don't vectorize in this case. +select 1 from set_config('timescaledb.debug_require_vector_qual', + case when current_setting('server_version_num')::int >= 140000 then 'forbid' else 'only' end, + false); + ?column? +---------- + 1 +(1 row) + select count(*) from singlebatch where metric2 = any(array[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, @@ -1249,18 +1256,48 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異に 0 | | | | (1 row) +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '異_る_'; + count | min | max | min | max +-------+-----+-----+-----+----- + 9 | 1 | 9 | 7 | 7 +(1 row) + select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different1%'; count | min | max | min | max -------+-----+------+-----+----- 112 | 1 | 1000 | 3 | 3 (1 row) +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_'; + count | min | max | min | max +-------+-----+------+-----+----- + 1500 | 1 | 1000 | 3 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%%'; + count | min | max | min | max +-------+-----+------+-----+----- + 1500 | 1 | 1000 | 3 | 5 +(1 row) + select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1'; count | min | max | min | max -------+-----+-----+-----+----- 1 | 1 | 1 | 3 | 3 (1 row) +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 199 | 1 | 991 | 3 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%%1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 200 | 1 | 991 | 3 | 5 +(1 row) + select count(*), min(ts), max(ts), min(d), max(d) from t where a not like '%different1%'; count | min | max | min | max -------+-----+------+-----+----- @@ -1318,6 +1355,8 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different \set ON_ERROR_STOP 0 select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\'; ERROR: LIKE pattern must not end with escape character +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%\'; +ERROR: LIKE pattern must not end with escape character \set ON_ERROR_STOP 1 reset timescaledb.debug_require_vector_qual; select count(distinct a) from t; diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 2dad21a0c31..7178a112ddc 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -289,9 +289,12 @@ select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 888 and me select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 888 and metric2 = 666); --- The Postgres planner chooses to build a hash table for large arrays, we currently --- don't vectorize in this case. -set timescaledb.debug_require_vector_qual to 'forbid'; +-- On versions >= 14, the Postgres planner chooses to build a hash table for +-- large arrays. We currently don't vectorize in this case. +select 1 from set_config('timescaledb.debug_require_vector_qual', + case when current_setting('server_version_num')::int >= 140000 then 'forbid' else 'only' end, + false); + select count(*) from singlebatch where metric2 = any(array[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, @@ -420,9 +423,14 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%одим select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異なる%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異オる%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異にる%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '異_る_'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different1%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%%1'; select count(*), min(ts), max(ts), min(d), max(d) from t where a not like '%different1%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'same_'; @@ -435,6 +443,7 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different \set ON_ERROR_STOP 0 select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%\'; \set ON_ERROR_STOP 1 reset timescaledb.debug_require_vector_qual; From 3db423fa2ef3259c819032620d303721b8d8d998 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 17:10:55 +0100 Subject: [PATCH 209/249] license --- tsl/src/nodes/decompress_chunk/ts_like_match.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/ts_like_match.c b/tsl/src/nodes/decompress_chunk/ts_like_match.c index 923e88f8bf6..9c196aa6c06 100644 --- a/tsl/src/nodes/decompress_chunk/ts_like_match.c +++ b/tsl/src/nodes/decompress_chunk/ts_like_match.c @@ -1,7 +1,7 @@ -/* - * This file and its contents are licensed under the Apache License 2.0. + /* + * This file and its contents are licensed under the Timescale License. * Please see the included NOTICE for copyright information and - * LICENSE-APACHE for a copy of the license. + * LICENSE-TIMESCALE for a copy of the license. */ /* From db2a898248fa43a6895912071811b501b3d54645 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 17:18:17 +0100 Subject: [PATCH 210/249] support text inequality --- tsl/src/nodes/decompress_chunk/pred_text.c | 25 ++++++++++++++----- tsl/src/nodes/decompress_chunk/pred_text.h | 3 +++ .../nodes/decompress_chunk/ts_like_match.c | 2 +- .../decompress_chunk/vector_predicates.c | 3 +++ tsl/test/expected/decompress_vector_qual.out | 6 +++++ tsl/test/sql/decompress_vector_qual.sql | 1 + 6 files changed, 33 insertions(+), 7 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/pred_text.c b/tsl/src/nodes/decompress_chunk/pred_text.c index 0448063b493..166c71d4e5c 100644 --- a/tsl/src/nodes/decompress_chunk/pred_text.c +++ b/tsl/src/nodes/decompress_chunk/pred_text.c @@ -15,8 +15,9 @@ #include #endif -void -vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) +static void +vector_const_text_comparison(const ArrowArray *arrow, const Datum constdatum, bool needequal, + uint64 *restrict result) { Assert(!arrow->dictionary); @@ -39,10 +40,10 @@ vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *res const uint32 end = offsets[row + 1]; \ Assert(end >= start); \ const uint32 veclen = end - start; \ - bool valid = veclen != textlen ? \ - false : \ - (strncmp((char *) &values[start], (char *) cstring, textlen) == 0); \ - word |= ((uint64) valid) << bit_index; + bool isequal = veclen != textlen ? \ + false : \ + (strncmp((char *) &values[start], (char *) cstring, textlen) == 0); \ + word |= ((uint64) (isequal == needequal)) << bit_index; INNER_LOOP } @@ -63,6 +64,18 @@ vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *res #undef INNER_LOOP } +void +vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) +{ + vector_const_text_comparison(arrow, constdatum, /* needequal = */ true, result); +} + +void +vector_const_textne(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) +{ + vector_const_text_comparison(arrow, constdatum, /* needequal = */ false, result); +} + /* * Generate specializations for LIKE functions based on database encoding. This * follows the Postgres code from backend/utils/adt/like.c. diff --git a/tsl/src/nodes/decompress_chunk/pred_text.h b/tsl/src/nodes/decompress_chunk/pred_text.h index a633a263165..467af660b7b 100644 --- a/tsl/src/nodes/decompress_chunk/pred_text.h +++ b/tsl/src/nodes/decompress_chunk/pred_text.h @@ -12,6 +12,9 @@ extern void vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result); +extern void vector_const_textne(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result); + extern void vector_const_textlike_utf8(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result); diff --git a/tsl/src/nodes/decompress_chunk/ts_like_match.c b/tsl/src/nodes/decompress_chunk/ts_like_match.c index 9c196aa6c06..05b49d15632 100644 --- a/tsl/src/nodes/decompress_chunk/ts_like_match.c +++ b/tsl/src/nodes/decompress_chunk/ts_like_match.c @@ -1,4 +1,4 @@ - /* +/* * This file and its contents are licensed under the Timescale License. * Please see the included NOTICE for copyright information and * LICENSE-TIMESCALE for a copy of the license. diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.c b/tsl/src/nodes/decompress_chunk/vector_predicates.c index aa70b86fde5..44310157511 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.c +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.c @@ -45,6 +45,9 @@ get_vector_const_predicate(Oid pg_predicate) case F_TEXTEQ: return vector_const_texteq; + + case F_TEXTNE: + return vector_const_textne; } if (GetDatabaseEncoding() == PG_UTF8) diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 95d9043d1a7..7f8106e04c6 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -1136,6 +1136,12 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'same'; 1000 | 1 | 1000 | 2 | 2 (1 row) +select count(*), min(ts), max(ts), min(d), max(d) from t where a != 'same'; + count | min | max | min | max +-------+-----+------+-----+----- + 6000 | 1 | 1000 | 0 | 7 +(1 row) + select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'одинаковый'; count | min | max | min | max -------+-----+------+-----+----- diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 7178a112ddc..73061c42111 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -401,6 +401,7 @@ set timescaledb.debug_require_vector_qual to 'only'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'default'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = ''; select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'same'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a != 'same'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'одинаковый'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'same-with-nulls'; select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different1'; From 8ba519228022d6add07c41d1d19bb07fdc25b732 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 17:23:21 +0100 Subject: [PATCH 211/249] tests --- tsl/test/expected/decompress_vector_qual.out | 66 +++++++++++--------- tsl/test/sql/decompress_vector_qual.sql | 21 ++++--- 2 files changed, 47 insertions(+), 40 deletions(-) diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 7f8106e04c6..0210831ac7a 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -1268,16 +1268,16 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a like '異_る_' 9 | 1 | 9 | 7 | 7 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different1%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1%'; count | min | max | min | max -------+-----+------+-----+----- 112 | 1 | 1000 | 3 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_'; - count | min | max | min | max --------+-----+------+-----+----- - 1500 | 1 | 1000 | 3 | 5 +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 1 | 1 | 1 | 3 | 3 (1 row) select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%%'; @@ -1286,76 +1286,82 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different 1500 | 1 | 1000 | 3 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%%1'; count | min | max | min | max -------+-----+-----+-----+----- - 1 | 1 | 1 | 3 | 3 + 200 | 1 | 991 | 3 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%\1'; count | min | max | min | max -------+-----+-----+-----+----- - 199 | 1 | 991 | 3 | 5 + 200 | 1 | 991 | 3 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%%1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_'; + count | min | max | min | max +-------+-----+------+-----+----- + 1500 | 1 | 1000 | 3 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_1'; count | min | max | min | max -------+-----+-----+-----+----- - 200 | 1 | 991 | 3 | 5 + 199 | 1 | 991 | 3 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a not like '%different1%'; - count | min | max | min | max --------+-----+------+-----+----- - 6888 | 1 | 1000 | 0 | 7 +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%nulls_'; + count | min | max | min | max +-------+-----+-----+-----+----- + 5 | 1 | 9 | 5 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different1%'; count | min | max | min | max -------+-----+------+-----+----- 112 | 1 | 1000 | 3 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'same_'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\%'; count | min | max | min | max -------+-----+-----+-----+----- 0 | | | | (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\1'; count | min | max | min | max -------+-----+-----+-----+----- - 9 | 1 | 9 | 3 | 3 + 1 | 1 | 1 | 3 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_%1'; count | min | max | min | max -------+-----+-----+-----+----- - 9 | 11 | 91 | 3 | 3 + 199 | 1 | 991 | 3 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_%1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_'; count | min | max | min | max -------+-----+-----+-----+----- - 199 | 1 | 991 | 3 | 5 + 9 | 1 | 9 | 3 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%nulls_'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_1'; count | min | max | min | max -------+-----+-----+-----+----- - 5 | 1 | 9 | 5 | 5 + 9 | 11 | 91 | 3 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'same_'; count | min | max | min | max -------+-----+-----+-----+----- 0 | | | | (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\1'; - count | min | max | min | max --------+-----+-----+-----+----- - 1 | 1 | 1 | 3 | 3 +select count(*), min(ts), max(ts), min(d), max(d) from t where a not like '%different1%'; + count | min | max | min | max +-------+-----+------+-----+----- + 6888 | 1 | 1000 | 0 | 7 (1 row) \set ON_ERROR_STOP 0 diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 73061c42111..34d7a26e1e5 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -426,21 +426,22 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異オ select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異にる%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like '異_る_'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different1%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%%'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%%1'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a not like '%different1%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'same_'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_1'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_%1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%\1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_1'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%nulls_'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different1%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_%1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_1'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'same_'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a not like '%different1%'; \set ON_ERROR_STOP 0 select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\'; From e052975f2f502af5773db5d08c35ced91c3157bc Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 17:41:15 +0100 Subject: [PATCH 212/249] does it reproduce? --- tsl/src/compression/array.c | 42 ++++++++++--------------------------- 1 file changed, 11 insertions(+), 31 deletions(-) diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index d30688c2142..78b8e6b2aa3 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -509,40 +509,20 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, uint32 offset = 0; for (int i = 0; i < n_notnull; i++) { - Datum vardata = PointerGetDatum(consumeCompressedData(si, sizes[i])); - + void *vardata = consumeCompressedData(si, sizes[i]); /* * Check for potentially corrupt varlena headers since we're reading them - * directly from compressed data. + * directly from compressed data. We can only have a plain datum + * with 1-byte or 4-byte header here, no TOAST or compressed data. */ - if (VARATT_IS_4B_U(vardata)) - { - /* - * Full varsize must be larger or equal than the header size so that - * the calculation of size without header doesn't overflow. - */ - CheckCompressedData(VARSIZE_4B(vardata) >= VARHDRSZ); - } - else if (VARATT_IS_1B(vardata)) - { - /* Can't have a TOAST pointer here. */ - CheckCompressedData(!VARATT_IS_1B_E(vardata)); - - /* - * Full varsize must be larger or equal than the header size so that - * the calculation of size without header doesn't overflow. - */ - CheckCompressedData(VARSIZE_1B(vardata) >= VARHDRSZ_SHORT); - } - else - { - /* - * Can only have an uncompressed datum with 1-byte or 4-byte header - * here, no TOAST or compressed data. - */ - CheckCompressedData(false); - } - + CheckCompressedData(VARATT_IS_4B_U(vardata) || + (VARATT_IS_1B(vardata) && !VARATT_IS_1B_E(vardata))); + /* + * Full varsize must be larger or equal than the header size so that the + * calculation of size without header doesn't overflow. + */ + CheckCompressedData((VARATT_IS_1B(vardata) && VARSIZE_1B(vardata) >= VARHDRSZ_SHORT) || + (VARSIZE_4B(vardata) >= VARHDRSZ)); /* Varsize must match the size stored in the sizes array for this element. */ CheckCompressedData(VARSIZE_ANY(vardata) == sizes[i]); From 70afac327ed69c77952ef67414f46cfa32ca7a9e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 17:46:25 +0100 Subject: [PATCH 213/249] more coverage --- tsl/test/expected/decompress_vector_qual.out | 18 ++++++++++++++++++ tsl/test/sql/decompress_vector_qual.sql | 3 +++ 2 files changed, 21 insertions(+) diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 0210831ac7a..efa4f126823 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -1268,6 +1268,12 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a like '異_る_' 9 | 1 | 9 | 7 | 7 (1 row) +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%'; + count | min | max | min | max +-------+-----+------+-----+----- + 7000 | 1 | 1000 | 0 | 7 +(1 row) + select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1%'; count | min | max | min | max -------+-----+------+-----+----- @@ -1304,6 +1310,18 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different 1500 | 1 | 1000 | 3 | 5 (1 row) +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%__'; + count | min | max | min | max +-------+-----+------+-----+----- + 1491 | 1 | 1000 | 3 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%___'; + count | min | max | min | max +-------+-----+------+-----+----- + 1401 | 1 | 1000 | 3 | 5 +(1 row) + select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_1'; count | min | max | min | max -------+-----+-----+-----+----- diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 34d7a26e1e5..006a1b3c330 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -426,12 +426,15 @@ select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異オ select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異にる%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like '異_る_'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%%'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%%1'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%\1'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%__'; +select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%___'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_1'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%nulls_'; select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different1%'; From 125e1848ddcbb0cbd347eb32f8d6f75f721927a1 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 18:02:05 +0100 Subject: [PATCH 214/249] old config --- .github/workflows/sanitizer-build-and-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sanitizer-build-and-test.yaml b/.github/workflows/sanitizer-build-and-test.yaml index 367b32a0521..a45377a2d29 100644 --- a/.github/workflows/sanitizer-build-and-test.yaml +++ b/.github/workflows/sanitizer-build-and-test.yaml @@ -44,7 +44,7 @@ env: log_exe_name=true print_suppressions=false exitcode=27 UBSAN_OPTIONS: suppressions=${{ github.workspace }}/scripts/suppressions/suppr_ub.txt - print_stacktrace=1 abort_on_error=1 log_path=${{ github.workspace }}/sanitizer + print_stacktrace=1 halt_on_error=1 log_path=${{ github.workspace }}/sanitizer log_exe_name=true print_suppressions=false exitcode=27 IGNORES: "bgw_db_scheduler bgw_db_scheduler_fixed" From 2a49cb91644d1fc0cf296e32a30556e26a33ab6e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 18:06:41 +0100 Subject: [PATCH 215/249] log path --- .github/workflows/sanitizer-build-and-test.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sanitizer-build-and-test.yaml b/.github/workflows/sanitizer-build-and-test.yaml index a45377a2d29..3d21d457f46 100644 --- a/.github/workflows/sanitizer-build-and-test.yaml +++ b/.github/workflows/sanitizer-build-and-test.yaml @@ -216,7 +216,9 @@ jobs: uses: actions/upload-artifact@v3 with: name: sanitizer logs ${{ matrix.os }} ${{ env.name }} ${{ matrix.pg }} - path: ${{ github.workspace }}/sanitizer + # The log_path sanitizer option means "Write logs to "log_path.pid". + # https://github.com/google/sanitizers/wiki/SanitizerCommonFlags + path: ${{ github.workspace }}/sanitizer* - name: Upload test results to the database if: always() From e7ec508e62bd680eaf7fff4feff488a87300d7ff Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 18:13:34 +0100 Subject: [PATCH 216/249] old os --- .github/workflows/sanitizer-build-and-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sanitizer-build-and-test.yaml b/.github/workflows/sanitizer-build-and-test.yaml index 3d21d457f46..de18dfcedbe 100644 --- a/.github/workflows/sanitizer-build-and-test.yaml +++ b/.github/workflows/sanitizer-build-and-test.yaml @@ -71,7 +71,7 @@ jobs: fail-fast: false matrix: # "os" has to be in the matrix due to a bug in "env": https://github.community/t/how-to-use-env-context/16975 - os: [ubuntu-latest] + os: ["ubuntu-22.04"] pg: ${{ fromJson(needs.config.outputs.pg_latest) }} steps: - name: Install Linux Dependencies From 6c4fd1c62d416f29398164774a167b9292ee5350 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 18:14:57 +0100 Subject: [PATCH 217/249] upload more sanitizer logs to db --- scripts/upload_ci_stats.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/upload_ci_stats.sh b/scripts/upload_ci_stats.sh index 38c0ff734da..29df99c4a83 100755 --- a/scripts/upload_ci_stats.sh +++ b/scripts/upload_ci_stats.sh @@ -150,7 +150,8 @@ do done # Upload the logs. -for x in sanitizer/* {sqlsmith/sqlsmith,sanitizer,stacktrace,postgres-failure}.log *.diff +# Note that the sanitizer setting log_path means "write logs to "log_path.pid". +for x in sanitizer* sanitizer/* {sqlsmith/sqlsmith,sanitizer,stacktrace,postgres-failure}.log *.diff do if ! [ -e "$x" ]; then continue ; fi "${PSQL[@]}" <<<" From 10b8d2962bebc270e35716bd34d5654adb11fda8 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 18:31:01 +0100 Subject: [PATCH 218/249] simplify a function --- .../nodes/decompress_chunk/compressed_batch.c | 94 ++++++++++++------- 1 file changed, 61 insertions(+), 33 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 9dc97270851..d8a0e8e08c0 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -18,19 +18,14 @@ #include "nodes/decompress_chunk/compressed_batch.h" #include "nodes/decompress_chunk/vector_predicates.h" -/* - * Create a single value ArrowArray from Postgres Datum. This is used to run - * the usual vectorized predicates on compressed columns with default values. - */ static ArrowArray * -make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) +make_single_value_arrow_pod(Oid pgtype, Datum datum, bool isnull) { struct ArrowWithBuffers { ArrowArray arrow; - uint64 arrow_buffers_array_storage[3]; - uint64 nulls_buffer[1]; - uint32 offsets_buffer[2]; + uint64 arrow_buffers_array_storage[2]; + uint64 validity_buffer[1]; uint64 values_buffer[8 /* 64-byte padding as required by Arrow. */]; }; @@ -38,19 +33,9 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) ArrowArray *arrow = &with_buffers->arrow; arrow->length = 1; arrow->buffers = (const void **) with_buffers->arrow_buffers_array_storage; - arrow->buffers[0] = &with_buffers->nulls_buffer; - - if (pgtype == TEXTOID) - { - arrow->n_buffers = 3; - arrow->buffers[1] = with_buffers->offsets_buffer; - arrow->buffers[2] = with_buffers->values_buffer; - } - else - { - arrow->n_buffers = 2; - arrow->buffers[1] = with_buffers->values_buffer; - } + arrow->n_buffers = 2; + arrow->buffers[0] = with_buffers->validity_buffer; + arrow->buffers[1] = with_buffers->values_buffer; if (isnull) { @@ -65,18 +50,6 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) arrow_set_row_validity((uint64 *) arrow->buffers[0], 0, true); - if (pgtype == TEXTOID) - { - text *detoasted = PG_DETOAST_DATUM(datum); - ((uint32 *) arrow->buffers[1])[1] = VARSIZE_ANY_EXHDR(detoasted); - arrow->buffers[2] = VARDATA(detoasted); - return arrow; - } - - /* - * Fixed-width by-value types. - */ - arrow->buffers[1] = with_buffers->values_buffer; #define FOR_TYPE(PGTYPE, CTYPE, FROMDATUM) \ case PGTYPE: \ *((CTYPE *) arrow->buffers[1]) = FROMDATUM(datum); \ @@ -100,6 +73,61 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) return arrow; } +static ArrowArray * +make_single_value_arrow_text(Datum datum, bool isnull) +{ + struct ArrowWithBuffers + { + ArrowArray arrow; + uint64 arrow_buffers_array_storage[3]; + uint64 validity_buffer[1]; + uint32 offsets_buffer[2]; + uint64 values_buffer[8 /* 64-byte padding as required by Arrow. */]; + }; + + struct ArrowWithBuffers *with_buffers = palloc0(sizeof(struct ArrowWithBuffers)); + ArrowArray *arrow = &with_buffers->arrow; + arrow->length = 1; + arrow->buffers = (const void **) with_buffers->arrow_buffers_array_storage; + arrow->n_buffers = 3; + arrow->buffers[0] = with_buffers->validity_buffer; + arrow->buffers[1] = with_buffers->offsets_buffer; + arrow->buffers[2] = with_buffers->values_buffer; + + if (isnull) + { + /* + * The validity bitmap was initialized to invalid on allocation, and + * the Datum might be invalid if the value is null (important on i386 + * where it might be pass-by-reference), so don't read it. + */ + arrow->null_count = 1; + return arrow; + } + + arrow_set_row_validity((uint64 *) arrow->buffers[0], 0, true); + + text *detoasted = PG_DETOAST_DATUM(datum); + ((uint32 *) arrow->buffers[1])[1] = VARSIZE_ANY_EXHDR(detoasted); + arrow->buffers[2] = VARDATA(detoasted); + return arrow; +} + +/* + * Create a single value ArrowArray from Postgres Datum. This is used to run + * the usual vectorized predicates on compressed columns with default values. + */ +static ArrowArray * +make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) +{ + if (pgtype == TEXTOID) + { + return make_single_value_arrow_text(datum, isnull); + } + + return make_single_value_arrow_pod(pgtype, datum, isnull); +} + static int get_max_text_datum_size(ArrowArray *text_array) { From a4109555aebbd23b52b1d63c975a99c3f9ec63c1 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 18:31:34 +0100 Subject: [PATCH 219/249] show sanitizer logs --- .github/workflows/sanitizer-build-and-test.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sanitizer-build-and-test.yaml b/.github/workflows/sanitizer-build-and-test.yaml index de18dfcedbe..0ab253a5c84 100644 --- a/.github/workflows/sanitizer-build-and-test.yaml +++ b/.github/workflows/sanitizer-build-and-test.yaml @@ -204,6 +204,10 @@ jobs: ./scripts/bundle_coredumps.sh grep -C40 "was terminated by signal" postgres.log > postgres-failure.log ||: + - name: Show sanitizer logs + if: always() + run: tail -vn +1 sanitizer* + - name: Coredumps if: always() && steps.collectlogs.outputs.coredumps == 'true' uses: actions/upload-artifact@v3 @@ -211,7 +215,7 @@ jobs: name: Coredumps ${{ matrix.os }} ${{ env.name }} ${{ matrix.pg }} path: coredumps - - name: sanitizer logs + - name: Upload sanitizer logs if: always() uses: actions/upload-artifact@v3 with: From 2cfae4e7b1441ca9e4b550dddef110fc1431b16d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 18:31:47 +0100 Subject: [PATCH 220/249] Revert "does it reproduce?" This reverts commit e052975f2f502af5773db5d08c35ced91c3157bc. --- tsl/src/compression/array.c | 42 +++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index 78b8e6b2aa3..d30688c2142 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -509,20 +509,40 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, uint32 offset = 0; for (int i = 0; i < n_notnull; i++) { - void *vardata = consumeCompressedData(si, sizes[i]); + Datum vardata = PointerGetDatum(consumeCompressedData(si, sizes[i])); + /* * Check for potentially corrupt varlena headers since we're reading them - * directly from compressed data. We can only have a plain datum - * with 1-byte or 4-byte header here, no TOAST or compressed data. - */ - CheckCompressedData(VARATT_IS_4B_U(vardata) || - (VARATT_IS_1B(vardata) && !VARATT_IS_1B_E(vardata))); - /* - * Full varsize must be larger or equal than the header size so that the - * calculation of size without header doesn't overflow. + * directly from compressed data. */ - CheckCompressedData((VARATT_IS_1B(vardata) && VARSIZE_1B(vardata) >= VARHDRSZ_SHORT) || - (VARSIZE_4B(vardata) >= VARHDRSZ)); + if (VARATT_IS_4B_U(vardata)) + { + /* + * Full varsize must be larger or equal than the header size so that + * the calculation of size without header doesn't overflow. + */ + CheckCompressedData(VARSIZE_4B(vardata) >= VARHDRSZ); + } + else if (VARATT_IS_1B(vardata)) + { + /* Can't have a TOAST pointer here. */ + CheckCompressedData(!VARATT_IS_1B_E(vardata)); + + /* + * Full varsize must be larger or equal than the header size so that + * the calculation of size without header doesn't overflow. + */ + CheckCompressedData(VARSIZE_1B(vardata) >= VARHDRSZ_SHORT); + } + else + { + /* + * Can only have an uncompressed datum with 1-byte or 4-byte header + * here, no TOAST or compressed data. + */ + CheckCompressedData(false); + } + /* Varsize must match the size stored in the sizes array for this element. */ CheckCompressedData(VARSIZE_ANY(vardata) == sizes[i]); From 53666356b2f6281fe226a65afc225185354fd310 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 18:49:13 +0100 Subject: [PATCH 221/249] check alignment --- tsl/src/compression/array.c | 11 ++++++++++- tsl/src/compression/compression.h | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index d30688c2142..def30b8abae 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -14,6 +14,8 @@ #include #include +#include + #include "compression/array.h" #include "compression/compression.h" #include "compression/simple8b_rle.h" @@ -509,7 +511,7 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, uint32 offset = 0; for (int i = 0; i < n_notnull; i++) { - Datum vardata = PointerGetDatum(consumeCompressedData(si, sizes[i])); + void *vardata = consumeCompressedData(si, sizes[i]); /* * Check for potentially corrupt varlena headers since we're reading them @@ -517,6 +519,13 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, */ if (VARATT_IS_4B_U(vardata)) { + /* + * Should have proper alignment, accessing an unaligned struct is UB. + * The compression code respects the alignment requirements, see + * datum_to_bytes_and_advance(). + */ + CheckCompressedData(PointerGetDatum(vardata) % alignof(varattrib_4b) == 0); + /* * Full varsize must be larger or equal than the header size so that * the calculation of size without header doesn't overflow. diff --git a/tsl/src/compression/compression.h b/tsl/src/compression/compression.h index cd5e305abe2..282eee90501 100644 --- a/tsl/src/compression/compression.h +++ b/tsl/src/compression/compression.h @@ -378,7 +378,7 @@ extern enum CompressionAlgorithms compress_get_default_algorithm(Oid typeoid); */ #ifndef TS_COMPRESSION_FUZZING #define CORRUPT_DATA_MESSAGE(X) \ - (errmsg("the compressed data is corrupt"), errdetail(X), errcode(ERRCODE_DATA_CORRUPTED)) + (errmsg("the compressed data is corrupt"), errdetail("%s", X), errcode(ERRCODE_DATA_CORRUPTED)) #else #define CORRUPT_DATA_MESSAGE(X) (errcode(ERRCODE_DATA_CORRUPTED)) #endif From 870568f57acd76a4b0ee670bf92e5ae793fdd7be Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 18:50:53 +0100 Subject: [PATCH 222/249] add the accordion --- .github/workflows/sanitizer-build-and-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sanitizer-build-and-test.yaml b/.github/workflows/sanitizer-build-and-test.yaml index 0ab253a5c84..0e65fc47532 100644 --- a/.github/workflows/sanitizer-build-and-test.yaml +++ b/.github/workflows/sanitizer-build-and-test.yaml @@ -206,7 +206,7 @@ jobs: - name: Show sanitizer logs if: always() - run: tail -vn +1 sanitizer* + run: tail -vn +1 sanitizer* ||: - name: Coredumps if: always() && steps.collectlogs.outputs.coredumps == 'true' From 5e392dcda4cb20c776bc069add909bb16c8a7a93 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 18:53:51 +0100 Subject: [PATCH 223/249] benchmark text predicates (2024-01-31 no. 2) From a95f07f583758dd1a2c9667ad7efff9179971f38 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 20:40:20 +0100 Subject: [PATCH 224/249] everything is wrong --- .../workflows/sanitizer-build-and-test.yaml | 3 +- tsl/src/compression/array.c | 35 +++++++--- tsl/src/compression/compression.h | 2 + tsl/src/compression/dictionary.c | 1 + tsl/src/compression/gorilla_impl.c | 4 +- .../compression/simple8b_rle_decompress_all.h | 5 ++ tsl/test/expected/decompress_vector_qual.out | 68 +++++++++++++++++++ tsl/test/sql/decompress_vector_qual.sql | 37 ++++++++++ .../src/decompress_arithmetic_test_impl.c | 12 +++- tsl/test/src/decompress_text_test_impl.c | 12 +++- 10 files changed, 163 insertions(+), 16 deletions(-) diff --git a/.github/workflows/sanitizer-build-and-test.yaml b/.github/workflows/sanitizer-build-and-test.yaml index 0e65fc47532..1bbe9cc7149 100644 --- a/.github/workflows/sanitizer-build-and-test.yaml +++ b/.github/workflows/sanitizer-build-and-test.yaml @@ -206,7 +206,8 @@ jobs: - name: Show sanitizer logs if: always() - run: tail -vn +1 sanitizer* ||: + run: | + tail -vn +1 sanitizer* || : - name: Coredumps if: always() && steps.collectlogs.outputs.coredumps == 'true' diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index def30b8abae..c99f3b080bf 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -495,12 +495,18 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, Simple8bRleSerialized *sizes_serialized = bytes_deserialize_simple8b_and_advance(si); - uint32 sizes[GLOBAL_MAX_ROWS_PER_COMPRESSION]; + /* + * We need a quite significant padding of 63 elements, not bytes, after the + * last element, because we work in Simple8B blocks which can contain up to + * 64 elements. + */ + uint32 sizes[GLOBAL_MAX_ROWS_PER_COMPRESSION + 63]; const uint16 n_notnull = simple8brle_decompress_all_buf_uint32(sizes_serialized, sizes, sizeof(sizes) / sizeof(sizes[0])); const int n_total = has_nulls ? nulls_serialized->num_elements : n_notnull; + CheckCompressedData(n_total >= n_notnull); uint32 *offsets = (uint32 *) MemoryContextAllocZero(dest_mctx, @@ -511,7 +517,17 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, uint32 offset = 0; for (int i = 0; i < n_notnull; i++) { - void *vardata = consumeCompressedData(si, sizes[i]); + void *unaligned = consumeCompressedData(si, sizes[i]); + + /* + * We start reading from the end of previous datum, but this pointer + * might be not aligned as required for varlena-4b struct. We have to + * align it here. Note that sizes[i] includes the alignment as well in + * addition to the varlena size. + * + * See the corresponding row-by-row code in bytes_to_datum_and_advance(). + */ + void *vardata = DatumGetPointer(att_align_pointer(unaligned, TYPALIGN_INT, -1, unaligned)); /* * Check for potentially corrupt varlena headers since we're reading them @@ -519,13 +535,6 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, */ if (VARATT_IS_4B_U(vardata)) { - /* - * Should have proper alignment, accessing an unaligned struct is UB. - * The compression code respects the alignment requirements, see - * datum_to_bytes_and_advance(). - */ - CheckCompressedData(PointerGetDatum(vardata) % alignof(varattrib_4b) == 0); - /* * Full varsize must be larger or equal than the header size so that * the calculation of size without header doesn't overflow. @@ -552,8 +561,12 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, CheckCompressedData(false); } - /* Varsize must match the size stored in the sizes array for this element. */ - CheckCompressedData(VARSIZE_ANY(vardata) == sizes[i]); + /* + * Size of varlena plus alignment must match the size stored in the + * sizes array for this element. + */ + const Datum alignment_bytes = PointerGetDatum(vardata) - PointerGetDatum(unaligned); + CheckCompressedData(VARSIZE_ANY(vardata) + alignment_bytes == sizes[i]); const uint32 textlen = VARSIZE_ANY_EXHDR(vardata); memcpy(&arrow_bodies[offset], VARDATA_ANY(vardata), textlen); diff --git a/tsl/src/compression/compression.h b/tsl/src/compression/compression.h index 282eee90501..c6d5fdbf99e 100644 --- a/tsl/src/compression/compression.h +++ b/tsl/src/compression/compression.h @@ -387,6 +387,8 @@ extern enum CompressionAlgorithms compress_get_default_algorithm(Oid typeoid); if (unlikely(!(X))) \ ereport(ERROR, CORRUPT_DATA_MESSAGE(#X)) +//#define CheckCompressedData(X) Assert(X) + inline static void * consumeCompressedData(StringInfo si, int bytes) { diff --git a/tsl/src/compression/dictionary.c b/tsl/src/compression/dictionary.c index 63fa38104b2..2147105c7d7 100644 --- a/tsl/src/compression/dictionary.c +++ b/tsl/src/compression/dictionary.c @@ -426,6 +426,7 @@ tsl_text_dictionary_decompress_all(Datum compressed, Oid element_type, MemoryCon const uint16 n_notnull = indices_serialized->num_elements; const uint16 n_total = header->has_nulls ? nulls_serialized->num_elements : n_notnull; + CheckCompressedData(n_total >= n_notnull); const uint16 n_padded = n_total + 63; /* This is the padding requirement of simple8brle_decompress_all. */ int16 *restrict indices = MemoryContextAlloc(dest_mctx, sizeof(int16) * n_padded); diff --git a/tsl/src/compression/gorilla_impl.c b/tsl/src/compression/gorilla_impl.c index 41f98a31345..1fdd1d1a956 100644 --- a/tsl/src/compression/gorilla_impl.c +++ b/tsl/src/compression/gorilla_impl.c @@ -51,11 +51,11 @@ FUNCTION_NAME(gorilla_decompress_all, ELEMENT_TYPE)(CompressedGorillaData *goril const uint16 leading_zeros_padded = unpack_leading_zeros_array(&gorilla_data->leading_zeros, all_leading_zeros); - uint8 bit_widths[MAX_NUM_LEADING_ZEROS_PADDED_N64]; + uint8 bit_widths[GLOBAL_MAX_ROWS_PER_COMPRESSION + 63]; const uint16 num_bit_widths = simple8brle_decompress_all_buf_uint8(gorilla_data->num_bits_used_per_xor, bit_widths, - MAX_NUM_LEADING_ZEROS_PADDED_N64); + sizeof(bit_widths) / sizeof(bit_widths[0])); BitArray xors_bitarray = gorilla_data->xors; BitArrayIterator xors_iterator; diff --git a/tsl/src/compression/simple8b_rle_decompress_all.h b/tsl/src/compression/simple8b_rle_decompress_all.h index 52f8de8499b..8d36cd19087 100644 --- a/tsl/src/compression/simple8b_rle_decompress_all.h +++ b/tsl/src/compression/simple8b_rle_decompress_all.h @@ -21,6 +21,11 @@ FUNCTION_NAME(simple8brle_decompress_all_buf, { const uint16 n_total_values = compressed->num_elements; + /* + * Caller must have allocated a properly sized buffer, see the comment above. + */ + Assert(n_buffer_elements >= n_total_values + 63); + const uint16 num_selector_slots = simple8brle_num_selector_slots_for_num_blocks(compressed->num_blocks); const uint16 num_blocks = compressed->num_blocks; diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index a567d375015..759f516740e 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -1053,3 +1053,71 @@ select * from date_table where ts < CURRENT_DATE; 01-01-2021 (3 rows) +-- Text columns. Only tests bulk decompression for now. +create table text_table(ts int, d int); +select create_hypertable('text_table', 'ts'); +NOTICE: adding not-null constraint to column "ts" + create_hypertable +------------------------- + (9,public,text_table,t) +(1 row) + +alter table text_table set (timescaledb.compress, timescaledb.compress_segmentby = 'd'); +insert into text_table select x, 0 /*, default */ from generate_series(1, 1000) x; +select count(compress_chunk(x, true)) from show_chunks('text_table') x; + count +------- + 1 +(1 row) + +alter table text_table add column a text default 'default'; +insert into text_table select x, 1, '' from generate_series(1, 1000) x; +insert into text_table select x, 2, 'same' from generate_series(1, 1000) x; +insert into text_table select x, 3, 'different' || x from generate_series(1, 1000) x; +insert into text_table select x, 4, case when x % 2 = 0 then null else 'same-with-nulls' end from generate_series(1, 1000) x; +insert into text_table select x, 5, case when x % 2 = 0 then null else 'different-with-nulls' || x end from generate_series(1, 1000) x; +insert into text_table select x, 6, 'одинаковый' from generate_series(1, 1000) x; +insert into text_table select x, 7, '異なる' || x from generate_series(1, 1000) x; +-- Some text values with varying lengths in a single batch. They are all different +-- to prevent dictionary compression, because we want to test particular orders +-- here as well. +insert into text_table select x, 8, repeat( x::text || 'a', x) from generate_series(1, 100) x; +insert into text_table select x + 100, 8, repeat((101 - x)::text || 'b', (101 - x)) from generate_series(1, 100) x; +insert into text_table select x + 200, 8, repeat((101 - x)::text || 'c', (101 - x)) from generate_series(1, 100) x; +insert into text_table select x + 300, 8, repeat( x::text || 'd', x) from generate_series(1, 100) x; +set timescaledb.debug_require_vector_qual to 'forbid'; +select sum(length(a)) from text_table; + sum +-------- + 118551 +(1 row) + +select count(distinct a) from text_table; + count +------- + 2905 +(1 row) + +select count(compress_chunk(x, true)) from show_chunks('text_table') x; +NOTICE: chunk "_hyper_9_17_chunk" is already compressed + count +------- + 1 +(1 row) + +select format('call recompress_chunk(''%s'')', x) from show_chunks('text_table') x \gexec +call recompress_chunk('_timescaledb_internal._hyper_9_17_chunk') +set timescaledb.enable_bulk_decompression to on; +set timescaledb.debug_require_vector_qual to 'forbid'; +select sum(length(a)) from text_table; + sum +-------- + 118551 +(1 row) + +select count(distinct a) from text_table; + count +------- + 2905 +(1 row) + diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index b9105ec4ec8..d68d05db828 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -352,3 +352,40 @@ select * from date_table where ts <= '2021-01-02'; select * from date_table where ts < '2021-01-02'; select * from date_table where ts < CURRENT_DATE; +-- Text columns. Only tests bulk decompression for now. +create table text_table(ts int, d int); +select create_hypertable('text_table', 'ts'); +alter table text_table set (timescaledb.compress, timescaledb.compress_segmentby = 'd'); + +insert into text_table select x, 0 /*, default */ from generate_series(1, 1000) x; +select count(compress_chunk(x, true)) from show_chunks('text_table') x; +alter table text_table add column a text default 'default'; + +insert into text_table select x, 1, '' from generate_series(1, 1000) x; +insert into text_table select x, 2, 'same' from generate_series(1, 1000) x; +insert into text_table select x, 3, 'different' || x from generate_series(1, 1000) x; +insert into text_table select x, 4, case when x % 2 = 0 then null else 'same-with-nulls' end from generate_series(1, 1000) x; +insert into text_table select x, 5, case when x % 2 = 0 then null else 'different-with-nulls' || x end from generate_series(1, 1000) x; +insert into text_table select x, 6, 'одинаковый' from generate_series(1, 1000) x; +insert into text_table select x, 7, '異なる' || x from generate_series(1, 1000) x; + +-- Some text values with varying lengths in a single batch. They are all different +-- to prevent dictionary compression, because we want to test particular orders +-- here as well. +insert into text_table select x, 8, repeat( x::text || 'a', x) from generate_series(1, 100) x; +insert into text_table select x + 100, 8, repeat((101 - x)::text || 'b', (101 - x)) from generate_series(1, 100) x; +insert into text_table select x + 200, 8, repeat((101 - x)::text || 'c', (101 - x)) from generate_series(1, 100) x; +insert into text_table select x + 300, 8, repeat( x::text || 'd', x) from generate_series(1, 100) x; + +set timescaledb.debug_require_vector_qual to 'forbid'; +select sum(length(a)) from text_table; +select count(distinct a) from text_table; + +select count(compress_chunk(x, true)) from show_chunks('text_table') x; +select format('call recompress_chunk(''%s'')', x) from show_chunks('text_table') x \gexec + +set timescaledb.enable_bulk_decompression to on; +set timescaledb.debug_require_vector_qual to 'forbid'; + +select sum(length(a)) from text_table; +select count(distinct a) from text_table; diff --git a/tsl/test/src/decompress_arithmetic_test_impl.c b/tsl/test/src/decompress_arithmetic_test_impl.c index c8445095d05..83db75b0a73 100644 --- a/tsl/test/src/decompress_arithmetic_test_impl.c +++ b/tsl/test/src/decompress_arithmetic_test_impl.c @@ -200,7 +200,17 @@ FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, * 3) The bulk decompression must absolutely work on the correct compressed * data we've just generated. */ - arrow = decompress_all(compressed_data, PG_TYPE_OID, CurrentMemoryContext); + PG_TRY(); + { + arrow = decompress_all(compressed_data, PG_TYPE_OID, CurrentMemoryContext); + } + PG_CATCH(); + { + EmitErrorReport(); + elog(PANIC, "bulk decompression failed for data that we've just compressed"); + } + PG_END_TRY(); + FUNCTION_NAME2(check_arrow, CTYPE)(arrow, PANIC, results, n); return n; diff --git a/tsl/test/src/decompress_text_test_impl.c b/tsl/test/src/decompress_text_test_impl.c index e4741af4d44..da9a158704b 100644 --- a/tsl/test/src/decompress_text_test_impl.c +++ b/tsl/test/src/decompress_text_test_impl.c @@ -220,7 +220,17 @@ decompress_generic_text(const uint8 *Data, size_t Size, bool bulk, int requested * 3) The bulk decompression must absolutely work on the correct compressed * data we've just generated. */ - arrow = decompress_all(compressed_data, TEXTOID, CurrentMemoryContext); + PG_TRY(); + { + arrow = decompress_all(compressed_data, TEXTOID, CurrentMemoryContext); + } + PG_CATCH(); + { + EmitErrorReport(); + elog(PANIC, "bulk decompression failed for data that we've just compressed"); + } + PG_END_TRY(); + decompress_generic_text_check_arrow(arrow, PANIC, results, n); return n; From cfbdec9f53e6b40839a023cdcfecd27d41afd5fe Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 20:45:41 +0100 Subject: [PATCH 225/249] test fixup --- tsl/test/expected/compression_algos.out | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tsl/test/expected/compression_algos.out b/tsl/test/expected/compression_algos.out index fef1cb4444a..05604e11060 100644 --- a/tsl/test/expected/compression_algos.out +++ b/tsl/test/expected/compression_algos.out @@ -1595,10 +1595,10 @@ group by 2, 3 order by 1 desc ; count | bulk_result | rowbyrow_result -------+-------------+----------------- - 21 | XX001 | XX001 + 15 | XX001 | XX001 + 8 | true | true 6 | 08P01 | 08P01 2 | 3F000 | 3F000 - 2 | true | true 1 | 22021 | 22021 1 | false | false (6 rows) @@ -1615,8 +1615,8 @@ group by 2, 3 order by 1 desc -------+-------------+----------------- 51 | XX001 | XX001 4 | 08P01 | 08P01 - 4 | XX001 | true - 2 | true | true + 3 | true | true + 3 | XX001 | true 2 | 22021 | 22021 1 | 3F000 | 3F000 1 | false | false From 55a968aa6fa91493e458d402ac120ba9df4fd5e9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 20:48:23 +0100 Subject: [PATCH 226/249] add another test --- .../fuzzing/compression/array-text/length-saw | Bin 0 -> 62444 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tsl/test/fuzzing/compression/array-text/length-saw diff --git a/tsl/test/fuzzing/compression/array-text/length-saw b/tsl/test/fuzzing/compression/array-text/length-saw new file mode 100644 index 0000000000000000000000000000000000000000..600db2b823dc0dcc14a6fcd4e55dad4186f94737 GIT binary patch literal 62444 zcmeI552R&V9mnq*^TsueYZ?(zN-0I0{eN#FA|fIpBK7-IPdr|piAO|4L_8uQA|fIp zA|fIpA|fIpA|fIp>T~AIyYJ4Ox#ymH&e?mfz1FfmckVgcUTgh+-#ho7bM{`}^?UB* zi!Xe}_0FO5 z%ytEk>Fq=7nJo&S(OZYcGgm2qKyMjp&pcBB)OqtzdFE*akmgN8>6uFkpvxPF!ZQ~Y zK$Np#<(YE@P~;8$;F*mA$Z^{Dop5Zt2|an?~h&SQ9^Ei=n9Vy68n9(^2i{o-?f9s1S$Qt?L8XE=QnNV zaX>P^Zd;E4GWk{8_|hSbU$(U`8glqWTlsP!fuFafFBGExSzGuLA@rZND_A^yL-)RaD z6N3A8v+@8TvTxOchX!H2TK66l#PrR&^Kc-bZ`7>^0?~ZEZaf4C0#oRtKKJuSzoF;pAx2gv1)xXnC}Z!&sL?63UhqADttVc;8WGgM}pBmSq?r14E>3+_oKtOA1^yUE)4py zvh^dvh#xH*KNbx4k+Sxqz*rwHD?c0t`cPT=fiTJk%fb(VAwE#9d~fJK6oc;vo!g@K zJ)mb*bbeFlwkle`5%hU~(fIDr;eAEzJHpxTDJtIuPJLHV`n7Q0JBq@$!%1%|)_&S6 z;fx13TrV|_-ekIe7CXJ8qe;v_Il&_S!Wj;NACCbWaHR_!7ewBqK<8W zGZ5Lf!%6(a>{>XF{{*%RoXY<$+Y!#@KdoH z!iPXvA!dt!SXq2$OM!4%-DnGfh*?Bx%YvX;v1^NixLG7^ON7u_oo)+-=rIS_av=d6 z8n$T20ke!P9n!$PWD!6nn93{;NCwBBMFaU@%CwjuCET+X8Dxdo+2Vu5aHw09kQ~Avpbo{wEOV6t2tMNwAE?4kf-t*Z*-am`<$Xu!3fTZQuP?NZD2_dAuY8;?$F z-83HGZS=LqvwN++-gti2>BYv8`@KHdIQC%B%Z;O$o^=bHf$3Sd!%3K)^;$R&)3fdZ zr($~69pP+D&$>Hw!1Sy)fbLXFV1Shv`|52qR*8*5ksUn4b0MFfOKN9Rr5O^sFPn=$M{$JeUB} zvyKXL;CR-tVH!-&Iswdt=~<_N$uK?ZWH2A5XPpwJ#PqBa!>pK|b$XZ>)3eS3b7Oke zxnO!s&pIPSfazK1g&;6J>+BE*re_TSLc#Q`fj~5vo;4f@2-C9$1u2b4&ui2tV@K@F+J--A$m;Dx?D&A)3Yuba=`SgONTTtJ!=Gz38rU_1Cqh?tkFO| zn4UEzND0%kMh017de-U=Ar_Kg6UZ^mlQx3OwXEmS^=cN^sJd@ zDu6neo;7oo0tkfZSu`)J8*Q2>qjFeaI+6hI(8%S+~& z3ZM=bfh6;^0!YJkL&;oH0A0AqESZZ6APV;)By+9+ig2?^G8+YugZr40K>@Vj=C))I zg0&8kIGaT;(l_aX1gv|KWFdbRy-AvoJZl{#LCBoNt&$p~%{pdD3UX$#x}*aM<3*4} zAX`i(k^-cPZ%CqtJTZYu+>j(*K8YAI#EdDiLVEaUB}&K*^RvVUiQ!e3$RH~=6_A)9 zCH8%gXdoXp<&Zca8TLw%2p|(SuaTug8tfw^i-sK7%t@9D39y%zEEJ+=6Ej&NgwDQk zvN(vFP5NY65Hx!s%7P$bHk*{CK)CGVDT{zu*&J2Ehd|kDRsx48*|b+eh7j5JSb~N4 z*fd(ggy7g~TLOg0*qmHKgRt1gU4nv`*z8`yfqlQCdu>ITZuVO)wF%5h;(YB9R8HAU@VGj%Hc2&wLaxQ7=@yvatI7T-BZ~c`crgO_JhvUdX+t(C&gvurqGQ# zwsIrrL$O@h9Xe2pS9XN6De^13z^T*?mTTcWiV(|oIEh-u_=&HCGag`Xz0^2*pVjJ5 vG>)CI#OFfe$USaAoj0D} Date: Wed, 31 Jan 2024 21:03:31 +0100 Subject: [PATCH 227/249] forgotten file --- tsl/test/expected/compression_algos.out | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsl/test/expected/compression_algos.out b/tsl/test/expected/compression_algos.out index 05604e11060..7843841cc1a 100644 --- a/tsl/test/expected/compression_algos.out +++ b/tsl/test/expected/compression_algos.out @@ -1596,7 +1596,7 @@ group by 2, 3 order by 1 desc count | bulk_result | rowbyrow_result -------+-------------+----------------- 15 | XX001 | XX001 - 8 | true | true + 9 | true | true 6 | 08P01 | 08P01 2 | 3F000 | 3F000 1 | 22021 | 22021 From f10167d6e89d3d68f644e7d2ac23e951caa38466 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 21:05:30 +0100 Subject: [PATCH 228/249] rename the table --- tsl/test/expected/decompress_vector_qual.out | 130 +++++++++---------- tsl/test/sql/decompress_vector_qual.sql | 130 +++++++++---------- 2 files changed, 130 insertions(+), 130 deletions(-) diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index efa4f126823..a985d1ee42d 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -1081,315 +1081,315 @@ select * from date_table where ts < CURRENT_DATE; (3 rows) -- Vectorized comparison for text -create table t(ts int, d int); -select create_hypertable('t', 'ts'); +create table text_table(ts int, d int); +select create_hypertable('text_table', 'ts'); NOTICE: adding not-null constraint to column "ts" - create_hypertable -------------------- - (9,public,t,t) + create_hypertable +------------------------- + (9,public,text_table,t) (1 row) -alter table t set (timescaledb.compress, timescaledb.compress_segmentby = 'd'); -insert into t select x, 0 /*, default */ from generate_series(1, 1000) x; -select count(compress_chunk(x, true)) from show_chunks('t') x; +alter table text_table set (timescaledb.compress, timescaledb.compress_segmentby = 'd'); +insert into text_table select x, 0 /*, default */ from generate_series(1, 1000) x; +select count(compress_chunk(x, true)) from show_chunks('text_table') x; count ------- 1 (1 row) -alter table t add column a text default 'default'; -insert into t select x, 1, '' from generate_series(1, 1000) x; -insert into t select x, 2, 'same' from generate_series(1, 1000) x; -insert into t select x, 3, 'different' || x from generate_series(1, 1000) x; -insert into t select x, 4, case when x % 2 = 0 then null else 'same-with-nulls' end from generate_series(1, 1000) x; -insert into t select x, 5, case when x % 2 = 0 then null else 'different-with-nulls' || x end from generate_series(1, 1000) x; -insert into t select x, 6, 'одинаковый' from generate_series(1, 1000) x; -insert into t select x, 7, '異なる' || x from generate_series(1, 1000) x; -select count(compress_chunk(x, true)) from show_chunks('t') x; +alter table text_table add column a text default 'default'; +insert into text_table select x, 1, '' from generate_series(1, 1000) x; +insert into text_table select x, 2, 'same' from generate_series(1, 1000) x; +insert into text_table select x, 3, 'different' || x from generate_series(1, 1000) x; +insert into text_table select x, 4, case when x % 2 = 0 then null else 'same-with-nulls' end from generate_series(1, 1000) x; +insert into text_table select x, 5, case when x % 2 = 0 then null else 'different-with-nulls' || x end from generate_series(1, 1000) x; +insert into text_table select x, 6, 'одинаковый' from generate_series(1, 1000) x; +insert into text_table select x, 7, '異なる' || x from generate_series(1, 1000) x; +select count(compress_chunk(x, true)) from show_chunks('text_table') x; NOTICE: chunk "_hyper_9_17_chunk" is already compressed count ------- 1 (1 row) -select format('call recompress_chunk(''%s'')', x) from show_chunks('t') x \gexec +select format('call recompress_chunk(''%s'')', x) from show_chunks('text_table') x \gexec call recompress_chunk('_timescaledb_internal._hyper_9_17_chunk') set timescaledb.debug_require_vector_qual to 'only'; -- -- Uncomment to generate the test reference w/o the vector optimizations. -- set timescaledb.enable_bulk_decompression to off; -- set timescaledb.debug_require_vector_qual to 'forbid'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'default'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'default'; count | min | max | min | max -------+-----+------+-----+----- 1000 | 1 | 1000 | 0 | 0 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a = ''; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = ''; count | min | max | min | max -------+-----+------+-----+----- 1000 | 1 | 1000 | 1 | 1 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'same'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'same'; count | min | max | min | max -------+-----+------+-----+----- 1000 | 1 | 1000 | 2 | 2 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a != 'same'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a != 'same'; count | min | max | min | max -------+-----+------+-----+----- 6000 | 1 | 1000 | 0 | 7 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'одинаковый'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'одинаковый'; count | min | max | min | max -------+-----+------+-----+----- 1000 | 1 | 1000 | 6 | 6 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'same-with-nulls'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'same-with-nulls'; count | min | max | min | max -------+-----+-----+-----+----- 500 | 1 | 999 | 4 | 4 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different1'; count | min | max | min | max -------+-----+-----+-----+----- 1 | 1 | 1 | 3 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a = '異なる1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = '異なる1'; count | min | max | min | max -------+-----+-----+-----+----- 1 | 1 | 1 | 7 | 7 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different-with-nulls1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different-with-nulls1'; count | min | max | min | max -------+-----+-----+-----+----- 1 | 1 | 1 | 5 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different1000'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different1000'; count | min | max | min | max -------+------+------+-----+----- 1 | 1000 | 1000 | 3 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different-with-nulls999'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different-with-nulls999'; count | min | max | min | max -------+-----+-----+-----+----- 1 | 999 | 999 | 5 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('same', 'different500'); +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a in ('same', 'different500'); count | min | max | min | max -------+-----+------+-----+----- 1001 | 1 | 1000 | 2 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('same-with-nulls', 'different-with-nulls499'); +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a in ('same-with-nulls', 'different-with-nulls499'); count | min | max | min | max -------+-----+-----+-----+----- 501 | 1 | 999 | 4 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('different500', 'default'); +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a in ('different500', 'default'); count | min | max | min | max -------+-----+------+-----+----- 1001 | 1 | 1000 | 0 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different500' or a = 'default'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different500' or a = 'default'; count | min | max | min | max -------+-----+------+-----+----- 1001 | 1 | 1000 | 0 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a is null; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a is null; count | min | max | min | max -------+-----+------+-----+----- 1000 | 2 | 1000 | 4 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a is not null; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a is not null; count | min | max | min | max -------+-----+------+-----+----- 7000 | 1 | 1000 | 0 | 7 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%same%'; count | min | max | min | max -------+-----+------+-----+----- 1500 | 1 | 1000 | 2 | 4 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%одинаковый%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%одинаковый%'; count | min | max | min | max -------+-----+------+-----+----- 1000 | 1 | 1000 | 6 | 6 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%одилаковый%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%одилаковый%'; count | min | max | min | max -------+-----+-----+-----+----- 0 | | | | (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%одимаковый%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%одимаковый%'; count | min | max | min | max -------+-----+-----+-----+----- 0 | | | | (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異なる%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%異なる%'; count | min | max | min | max -------+-----+------+-----+----- 1000 | 1 | 1000 | 7 | 7 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異オる%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%異オる%'; count | min | max | min | max -------+-----+-----+-----+----- 0 | | | | (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異にる%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%異にる%'; count | min | max | min | max -------+-----+-----+-----+----- 0 | | | | (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '異_る_'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '異_る_'; count | min | max | min | max -------+-----+-----+-----+----- 9 | 1 | 9 | 7 | 7 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%'; count | min | max | min | max -------+-----+------+-----+----- 7000 | 1 | 1000 | 0 | 7 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%different1%'; count | min | max | min | max -------+-----+------+-----+----- 112 | 1 | 1000 | 3 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%different1'; count | min | max | min | max -------+-----+-----+-----+----- 1 | 1 | 1 | 3 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%%'; count | min | max | min | max -------+-----+------+-----+----- 1500 | 1 | 1000 | 3 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%%1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%%1'; count | min | max | min | max -------+-----+-----+-----+----- 200 | 1 | 991 | 3 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%\1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%\1'; count | min | max | min | max -------+-----+-----+-----+----- 200 | 1 | 991 | 3 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%_'; count | min | max | min | max -------+-----+------+-----+----- 1500 | 1 | 1000 | 3 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%__'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%__'; count | min | max | min | max -------+-----+------+-----+----- 1491 | 1 | 1000 | 3 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%___'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%___'; count | min | max | min | max -------+-----+------+-----+----- 1401 | 1 | 1000 | 3 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%_1'; count | min | max | min | max -------+-----+-----+-----+----- 199 | 1 | 991 | 3 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%nulls_'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%nulls_'; count | min | max | min | max -------+-----+-----+-----+----- 5 | 1 | 9 | 5 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different1%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different1%'; count | min | max | min | max -------+-----+------+-----+----- 112 | 1 | 1000 | 3 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different\%'; count | min | max | min | max -------+-----+-----+-----+----- 0 | | | | (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different\1'; count | min | max | min | max -------+-----+-----+-----+----- 1 | 1 | 1 | 3 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_%1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different_%1'; count | min | max | min | max -------+-----+-----+-----+----- 199 | 1 | 991 | 3 | 5 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different_'; count | min | max | min | max -------+-----+-----+-----+----- 9 | 1 | 9 | 3 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different_1'; count | min | max | min | max -------+-----+-----+-----+----- 9 | 11 | 91 | 3 | 3 (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'same_'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'same_'; count | min | max | min | max -------+-----+-----+-----+----- 0 | | | | (1 row) -select count(*), min(ts), max(ts), min(d), max(d) from t where a not like '%different1%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a not like '%different1%'; count | min | max | min | max -------+-----+------+-----+----- 6888 | 1 | 1000 | 0 | 7 (1 row) \set ON_ERROR_STOP 0 -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different\'; ERROR: LIKE pattern must not end with escape character -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%\'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%\'; ERROR: LIKE pattern must not end with escape character \set ON_ERROR_STOP 1 reset timescaledb.debug_require_vector_qual; -select count(distinct a) from t; +select count(distinct a) from text_table; count ------- 2505 diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 006a1b3c330..b2f7d028897 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -374,85 +374,85 @@ select * from date_table where ts < CURRENT_DATE; -- Vectorized comparison for text -create table t(ts int, d int); -select create_hypertable('t', 'ts'); -alter table t set (timescaledb.compress, timescaledb.compress_segmentby = 'd'); +create table text_table(ts int, d int); +select create_hypertable('text_table', 'ts'); +alter table text_table set (timescaledb.compress, timescaledb.compress_segmentby = 'd'); -insert into t select x, 0 /*, default */ from generate_series(1, 1000) x; -select count(compress_chunk(x, true)) from show_chunks('t') x; -alter table t add column a text default 'default'; +insert into text_table select x, 0 /*, default */ from generate_series(1, 1000) x; +select count(compress_chunk(x, true)) from show_chunks('text_table') x; +alter table text_table add column a text default 'default'; -insert into t select x, 1, '' from generate_series(1, 1000) x; -insert into t select x, 2, 'same' from generate_series(1, 1000) x; -insert into t select x, 3, 'different' || x from generate_series(1, 1000) x; -insert into t select x, 4, case when x % 2 = 0 then null else 'same-with-nulls' end from generate_series(1, 1000) x; -insert into t select x, 5, case when x % 2 = 0 then null else 'different-with-nulls' || x end from generate_series(1, 1000) x; -insert into t select x, 6, 'одинаковый' from generate_series(1, 1000) x; -insert into t select x, 7, '異なる' || x from generate_series(1, 1000) x; +insert into text_table select x, 1, '' from generate_series(1, 1000) x; +insert into text_table select x, 2, 'same' from generate_series(1, 1000) x; +insert into text_table select x, 3, 'different' || x from generate_series(1, 1000) x; +insert into text_table select x, 4, case when x % 2 = 0 then null else 'same-with-nulls' end from generate_series(1, 1000) x; +insert into text_table select x, 5, case when x % 2 = 0 then null else 'different-with-nulls' || x end from generate_series(1, 1000) x; +insert into text_table select x, 6, 'одинаковый' from generate_series(1, 1000) x; +insert into text_table select x, 7, '異なる' || x from generate_series(1, 1000) x; -select count(compress_chunk(x, true)) from show_chunks('t') x; -select format('call recompress_chunk(''%s'')', x) from show_chunks('t') x \gexec +select count(compress_chunk(x, true)) from show_chunks('text_table') x; +select format('call recompress_chunk(''%s'')', x) from show_chunks('text_table') x \gexec set timescaledb.debug_require_vector_qual to 'only'; -- -- Uncomment to generate the test reference w/o the vector optimizations. -- set timescaledb.enable_bulk_decompression to off; -- set timescaledb.debug_require_vector_qual to 'forbid'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'default'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a = ''; -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'same'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a != 'same'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'одинаковый'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'same-with-nulls'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different1'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a = '異なる1'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different-with-nulls1'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different1000'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different-with-nulls999'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('same', 'different500'); -select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('same-with-nulls', 'different-with-nulls499'); -select count(*), min(ts), max(ts), min(d), max(d) from t where a in ('different500', 'default'); -select count(*), min(ts), max(ts), min(d), max(d) from t where a = 'different500' or a = 'default'; - -select count(*), min(ts), max(ts), min(d), max(d) from t where a is null; -select count(*), min(ts), max(ts), min(d), max(d) from t where a is not null; - -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%same%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%одинаковый%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%одилаковый%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%одимаковый%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異なる%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異オる%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%異にる%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '異_る_'; - -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like '%different1'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%%1'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%\1'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%__'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%___'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%_1'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%nulls_'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different1%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\%'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\1'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_%1'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different_1'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'same_'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a not like '%different1%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'default'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = ''; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'same'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a != 'same'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'одинаковый'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'same-with-nulls'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = '異なる1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different-with-nulls1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different1000'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different-with-nulls999'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a in ('same', 'different500'); +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a in ('same-with-nulls', 'different-with-nulls499'); +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a in ('different500', 'default'); +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different500' or a = 'default'; + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a is null; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a is not null; + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%same%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%одинаковый%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%одилаковый%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%одимаковый%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%異なる%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%異オる%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%異にる%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '異_る_'; + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%different1%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%different1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%%1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%\1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%_'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%__'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%___'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%_1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%nulls_'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different1%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different\%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different\1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different_%1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different_'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different_1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'same_'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a not like '%different1%'; \set ON_ERROR_STOP 0 -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different\'; -select count(*), min(ts), max(ts), min(d), max(d) from t where a like 'different%\'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different\'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%\'; \set ON_ERROR_STOP 1 reset timescaledb.debug_require_vector_qual; -select count(distinct a) from t; +select count(distinct a) from text_table; reset timescaledb.debug_require_vector_qual; reset timescaledb.enable_bulk_decompression; From 74eccd263890da61b302c114dd3bdf7e3665acf7 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 21:19:42 +0100 Subject: [PATCH 229/249] update actions to node 20 --- .github/workflows/libfuzzer.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index ac08fc7b7ec..a10a264ef26 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -49,7 +49,7 @@ jobs: # leading to a tainted cache - name: Cache PostgreSQL id: cache-postgresql - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/${{ env.PG_SRC_DIR }} key: "postgresql-libfuzzer-${{ steps.get-date.outputs.date }}-${{ hashFiles('.github/**') }}" @@ -145,7 +145,7 @@ jobs: uses: actions/checkout@v4 - name: Download the installation directory - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: fuzzing-install-dir From e067b4997f4b247ca4d7d46da3afa2936f97c58c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 21:26:54 +0100 Subject: [PATCH 230/249] forgotten reference --- tsl/test/expected/compression_algos.out | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsl/test/expected/compression_algos.out b/tsl/test/expected/compression_algos.out index 05604e11060..7843841cc1a 100644 --- a/tsl/test/expected/compression_algos.out +++ b/tsl/test/expected/compression_algos.out @@ -1596,7 +1596,7 @@ group by 2, 3 order by 1 desc count | bulk_result | rowbyrow_result -------+-------------+----------------- 15 | XX001 | XX001 - 8 | true | true + 9 | true | true 6 | 08P01 | 08P01 2 | 3F000 | 3F000 1 | 22021 | 22021 From b25f3a5c7e8c62cc71cf9302ac316f166bfba937 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 21:36:36 +0100 Subject: [PATCH 231/249] check liveness after checking the interesting cases --- .github/workflows/libfuzzer.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index a10a264ef26..36c83c6b92b 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -240,9 +240,6 @@ jobs: exit 1 fi - # Check that the server is still alive. - psql -c "select 1" - ls db/corpus | wc -l fn="ts_read_compressed_data_directory('${{ matrix.case.algo }}', @@ -267,10 +264,12 @@ jobs: echo "Internal program errors: $errors" [ $errors -eq 0 ] || exit 1 - # Shouldn't have any WARNINGS in the log. ! grep -F "] WARNING: " postgres.log + # Check that the server is still alive. + psql -c "select 1" + - name: Collect the logs if: always() id: collectlogs From 86ed6de51f34a80798977df956bb34ecbe7d8a78 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 31 Jan 2024 22:22:25 +0100 Subject: [PATCH 232/249] Add more interesting cases --- tsl/test/expected/compression_algos.out | 16 ++++++++-------- .../01accf1c403c681e8ccc10349c97a28ef2afbdd3 | Bin 0 -> 1046 bytes .../3f82cf837a5f3fae26f7cbb25ed3d903eed71687 | Bin 0 -> 1167 bytes .../5d1be7e9dda1ee8896be5b7e34a85ee16452a7b4 | 1 + .../6a126964d691ada7de1d25c976c4e7b481858665 | Bin 0 -> 36 bytes .../bc6960f7ed1b4b216c5cbc2721ea5136ffb80aae | Bin 0 -> 1081 bytes .../c2588a11e70caad0b9c91272f81d48348b94f938 | Bin 0 -> 1259 bytes .../d57ef126bce1cf2bb5a63addf172a9cf9bedb7da | Bin 0 -> 1078 bytes .../ebddebf24b791e82c4ac007197581ec4fa2886ee | Bin 0 -> 163 bytes .../ec536d9d30f08137531d4bfe676a545d897ae98b | Bin 0 -> 1155 bytes .../03be2aaf02e2f78738dcd55209eb660c63460f73 | Bin 0 -> 625 bytes .../0ce9675bea488114cd61e0902df0f65bb266b414 | Bin 0 -> 625 bytes .../193ff1ad23aaeaa8feafcbe7168f259e5063e715 | Bin 0 -> 1221 bytes .../24a09483ef45200b6ed894e43934767755155e3d | Bin 0 -> 77 bytes .../2dd04d16222832a133c966eb5e9c4e7ae4d31059 | Bin 0 -> 115 bytes .../39e6777ccb030519cefc62205e2df23e703aa9fa | Bin 0 -> 89 bytes .../3b2185a90d32decad650ceb443751a4023368cdd | Bin 0 -> 167 bytes .../446dbbeac1d19ca3ac51def5be4e9fcccf97cdc6 | Bin 0 -> 80 bytes .../48ddda607e4778f35289784b3f6c80234660856d | Bin 0 -> 72 bytes .../4979455aba5b4c7d302af47c4fd941214e16d3a9 | Bin 0 -> 72 bytes .../5e81672e813bd1e06ecba224f520328498f27ea8 | Bin 0 -> 161 bytes .../626bf1b65f1a0c5fba061fac07a711f970b07a80 | Bin 0 -> 626 bytes .../75fc076b6fc8dac4f65c31fa4fd34ad236530422 | Bin 0 -> 105 bytes .../76fa9dc37fc42934404c72df57d296c663ee807d | Bin 0 -> 193 bytes .../79ef4a8ba594c7a2b611bc33fc8b83fe8ac14998 | Bin 0 -> 73 bytes .../7bbc7585698a7c3375bea9c3bcff8838722d8f64 | Bin 0 -> 57 bytes .../7be90a9058961ac6ee07f764618805dfe8b6cfae | Bin 0 -> 56 bytes .../8426e28aabe6684eb7200bd032ff0dad5a5169af | Bin 0 -> 57 bytes .../8a5c3216797d7a54adeafc86b708b942a9894b2f | Bin 0 -> 97 bytes .../9a78211436f6d425ec38f5c4e02270801f3524f8 | 1 + .../9b99593353a610c4bee0d6a94a01a3296080c0fb | Bin 0 -> 2 bytes .../a54a56388dd751dc1ea1727f8e2965b349b54800 | Bin 0 -> 620 bytes .../ae02ec1f395c202f6cd2965ea34d73dc35e10fdf | Bin 0 -> 137 bytes .../af094ea4a3607992332e87dbe90f1a0f0cf82e09 | Bin 0 -> 49 bytes .../b931131d935fb27ebf7977285299dcf11bb52eb4 | Bin 0 -> 97 bytes .../e767ec96033f7d9da306711adab0a6557fd4b71e | Bin 0 -> 321 bytes .../f0f2e7efda77af51c83fe7870bd04d1b93556116 | Bin 0 -> 76 bytes .../f2d42e12fb2ed451e7ba0b270a9065916a391fb1 | Bin 0 -> 49 bytes .../f6377e2f32fd888f1877518c26bf6be4e24f92bd | Bin 0 -> 1073 bytes .../fc8f69146bf6f556444c5acd8053537a292db418 | Bin 0 -> 49 bytes .../fee17c65913f4eddcae3f69d1c2b6f318b938af2 | Bin 0 -> 72 bytes 41 files changed, 10 insertions(+), 8 deletions(-) create mode 100644 tsl/test/fuzzing/compression/array-text/01accf1c403c681e8ccc10349c97a28ef2afbdd3 create mode 100644 tsl/test/fuzzing/compression/array-text/3f82cf837a5f3fae26f7cbb25ed3d903eed71687 create mode 100644 tsl/test/fuzzing/compression/array-text/5d1be7e9dda1ee8896be5b7e34a85ee16452a7b4 create mode 100644 tsl/test/fuzzing/compression/array-text/6a126964d691ada7de1d25c976c4e7b481858665 create mode 100644 tsl/test/fuzzing/compression/array-text/bc6960f7ed1b4b216c5cbc2721ea5136ffb80aae create mode 100644 tsl/test/fuzzing/compression/array-text/c2588a11e70caad0b9c91272f81d48348b94f938 create mode 100644 tsl/test/fuzzing/compression/array-text/d57ef126bce1cf2bb5a63addf172a9cf9bedb7da create mode 100644 tsl/test/fuzzing/compression/array-text/ebddebf24b791e82c4ac007197581ec4fa2886ee create mode 100644 tsl/test/fuzzing/compression/array-text/ec536d9d30f08137531d4bfe676a545d897ae98b create mode 100644 tsl/test/fuzzing/compression/dictionary-text/03be2aaf02e2f78738dcd55209eb660c63460f73 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/0ce9675bea488114cd61e0902df0f65bb266b414 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/193ff1ad23aaeaa8feafcbe7168f259e5063e715 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/24a09483ef45200b6ed894e43934767755155e3d create mode 100644 tsl/test/fuzzing/compression/dictionary-text/2dd04d16222832a133c966eb5e9c4e7ae4d31059 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/39e6777ccb030519cefc62205e2df23e703aa9fa create mode 100644 tsl/test/fuzzing/compression/dictionary-text/3b2185a90d32decad650ceb443751a4023368cdd create mode 100644 tsl/test/fuzzing/compression/dictionary-text/446dbbeac1d19ca3ac51def5be4e9fcccf97cdc6 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/48ddda607e4778f35289784b3f6c80234660856d create mode 100644 tsl/test/fuzzing/compression/dictionary-text/4979455aba5b4c7d302af47c4fd941214e16d3a9 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/5e81672e813bd1e06ecba224f520328498f27ea8 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/626bf1b65f1a0c5fba061fac07a711f970b07a80 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/75fc076b6fc8dac4f65c31fa4fd34ad236530422 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/76fa9dc37fc42934404c72df57d296c663ee807d create mode 100644 tsl/test/fuzzing/compression/dictionary-text/79ef4a8ba594c7a2b611bc33fc8b83fe8ac14998 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/7bbc7585698a7c3375bea9c3bcff8838722d8f64 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/7be90a9058961ac6ee07f764618805dfe8b6cfae create mode 100644 tsl/test/fuzzing/compression/dictionary-text/8426e28aabe6684eb7200bd032ff0dad5a5169af create mode 100644 tsl/test/fuzzing/compression/dictionary-text/8a5c3216797d7a54adeafc86b708b942a9894b2f create mode 100644 tsl/test/fuzzing/compression/dictionary-text/9a78211436f6d425ec38f5c4e02270801f3524f8 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/9b99593353a610c4bee0d6a94a01a3296080c0fb create mode 100644 tsl/test/fuzzing/compression/dictionary-text/a54a56388dd751dc1ea1727f8e2965b349b54800 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/ae02ec1f395c202f6cd2965ea34d73dc35e10fdf create mode 100644 tsl/test/fuzzing/compression/dictionary-text/af094ea4a3607992332e87dbe90f1a0f0cf82e09 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/b931131d935fb27ebf7977285299dcf11bb52eb4 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/e767ec96033f7d9da306711adab0a6557fd4b71e create mode 100644 tsl/test/fuzzing/compression/dictionary-text/f0f2e7efda77af51c83fe7870bd04d1b93556116 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/f2d42e12fb2ed451e7ba0b270a9065916a391fb1 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/f6377e2f32fd888f1877518c26bf6be4e24f92bd create mode 100644 tsl/test/fuzzing/compression/dictionary-text/fc8f69146bf6f556444c5acd8053537a292db418 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/fee17c65913f4eddcae3f69d1c2b6f318b938af2 diff --git a/tsl/test/expected/compression_algos.out b/tsl/test/expected/compression_algos.out index 7843841cc1a..36fc74065a5 100644 --- a/tsl/test/expected/compression_algos.out +++ b/tsl/test/expected/compression_algos.out @@ -1595,9 +1595,9 @@ group by 2, 3 order by 1 desc ; count | bulk_result | rowbyrow_result -------+-------------+----------------- - 15 | XX001 | XX001 - 9 | true | true - 6 | 08P01 | 08P01 + 17 | XX001 | XX001 + 15 | true | true + 7 | 08P01 | 08P01 2 | 3F000 | 3F000 1 | 22021 | 22021 1 | false | false @@ -1613,11 +1613,11 @@ group by 2, 3 order by 1 desc ; count | bulk_result | rowbyrow_result -------+-------------+----------------- - 51 | XX001 | XX001 - 4 | 08P01 | 08P01 - 3 | true | true - 3 | XX001 | true - 2 | 22021 | 22021 + 78 | XX001 | XX001 + 5 | 08P01 | 08P01 + 4 | true | true + 4 | XX001 | true + 3 | 22021 | 22021 1 | 3F000 | 3F000 1 | false | false (7 rows) diff --git a/tsl/test/fuzzing/compression/array-text/01accf1c403c681e8ccc10349c97a28ef2afbdd3 b/tsl/test/fuzzing/compression/array-text/01accf1c403c681e8ccc10349c97a28ef2afbdd3 new file mode 100644 index 0000000000000000000000000000000000000000..00a5f7bbffaef07e63852c05b7849772dabcdb18 GIT binary patch literal 1046 ocmZQ%C`gY_PAo~x$xmk}Nv$Yh00HKIU}}^Y4S~@R7=a-G0I=)^*#H0l literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/3f82cf837a5f3fae26f7cbb25ed3d903eed71687 b/tsl/test/fuzzing/compression/array-text/3f82cf837a5f3fae26f7cbb25ed3d903eed71687 new file mode 100644 index 0000000000000000000000000000000000000000..1f88b03407d5b503109c8b343dd6b8f854df2d6f GIT binary patch literal 1167 zcmZQ%C`gY_PAo~x$xmk}Nv$Yh00HJtK!P_VGc7F@$VdSR;RlL1q!}4;NRTB=&@fG8 zV+pE8R!lW^JsBqPlA#Ah3kM31fepk3dY=g)Y>!|PW~mTXGGtW9$^xA_AYlPYj6`G| z27y5mma;%#33Q8CG$`K}rRJ6BmS>hQa%bRhpKL5h2NFP2#{%JJCZvIk19EuM;3^P) MhNV$Ph61200Eu@PB>(^b literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/5d1be7e9dda1ee8896be5b7e34a85ee16452a7b4 b/tsl/test/fuzzing/compression/array-text/5d1be7e9dda1ee8896be5b7e34a85ee16452a7b4 new file mode 100644 index 00000000000..303e398c82e --- /dev/null +++ b/tsl/test/fuzzing/compression/array-text/5d1be7e9dda1ee8896be5b7e34a85ee16452a7b4 @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/tsl/test/fuzzing/compression/array-text/6a126964d691ada7de1d25c976c4e7b481858665 b/tsl/test/fuzzing/compression/array-text/6a126964d691ada7de1d25c976c4e7b481858665 new file mode 100644 index 0000000000000000000000000000000000000000..2031608b3299d23d4bf4630deb48756e288f789d GIT binary patch literal 36 lcmZQ%EJ%+}PAo~x$xmk}Nv$YhU|?imVCH9lfKVV{1^}b)2SWe= literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/bc6960f7ed1b4b216c5cbc2721ea5136ffb80aae b/tsl/test/fuzzing/compression/array-text/bc6960f7ed1b4b216c5cbc2721ea5136ffb80aae new file mode 100644 index 0000000000000000000000000000000000000000..a0ba87a1079adf6947e4eda58eb76ba7ffa837ab GIT binary patch literal 1081 zcmZQ%C`gY_PAo~x$xmk}Nv$Yh00HI~K!P_VGc7H(C^fId(7?a|D8K_3u(Y%U@$Md1eVCcLol($i{+n$jXCB5P_zS1;WovNCSpL5s<@^hNglc9Vk#zkqQpA z5C&r)kC6cwhK?nmaARPAngC`GG{MM7O&1U}OcU8Qf~t`fQ;l6uHIs-phyztA8;A?^ zJrhFM9>E05q2@mWh-|8fxi$ksCW=zr97e2}n1M7yV8)l2=D?W5)45=!q?8!g%?>97 RuxrC53d+0su=IIU)c6 literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/d57ef126bce1cf2bb5a63addf172a9cf9bedb7da b/tsl/test/fuzzing/compression/array-text/d57ef126bce1cf2bb5a63addf172a9cf9bedb7da new file mode 100644 index 0000000000000000000000000000000000000000..269d95e5d7281d994d89c33a84c262b1a5c310e1 GIT binary patch literal 1078 zcmZQ%C`gY_PAo~x$xmk}Nv$Yh00HI~K!P_VGc7F@%qN>jL(*x8#HJg`zj6$gn$44gZ&K-4V*!R N>~JNZZV~_^0|4rwY}Nn( literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/0ce9675bea488114cd61e0902df0f65bb266b414 b/tsl/test/fuzzing/compression/dictionary-text/0ce9675bea488114cd61e0902df0f65bb266b414 new file mode 100644 index 0000000000000000000000000000000000000000..e0ceba082c0460a46be69f96af6793d1989617b1 GIT binary patch literal 625 zcmb7?Jqp4=5QX0^0kzWfwiX`1#xh`Iqc@PCk|q#~6c(OHtn9SZ(u;Tiv9Va^XH6mo z(GRjS+00|!&c-|v=)n%`HQMaawh}<3^1kV z3)IPLuT*6oybN$F6b(4et#dfN1984+V<=2yN>M%Kd5i4x+$z09m2R>%uFxNoKQ(95 zmvg!rrQ@RytsN(PLXM8GrlRrJDId5%66cJMTrY4xgPxRV?}p5T2Ns|ihMQ2>Dzr;K bi%LbK3n|r}iSs1Rkn;8&UGm3Q@G5!&UT?VU literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/193ff1ad23aaeaa8feafcbe7168f259e5063e715 b/tsl/test/fuzzing/compression/dictionary-text/193ff1ad23aaeaa8feafcbe7168f259e5063e715 new file mode 100644 index 0000000000000000000000000000000000000000..492ae25446f287dfa35ca84de95a976d17fe6011 GIT binary patch literal 1221 zcmZQ#EJ%+}PAo~x$xmk}Nv$YhU|?YU0wRDI1o{~m**^3ruRD dX-vlsw2KC_{lhu51O{Ee4?Y)B5=7Y24ge~8JPiN< literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/24a09483ef45200b6ed894e43934767755155e3d b/tsl/test/fuzzing/compression/dictionary-text/24a09483ef45200b6ed894e43934767755155e3d new file mode 100644 index 0000000000000000000000000000000000000000..b9af7a27b03bf88e113227d0e1469d640f26a1e0 GIT binary patch literal 77 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh17+C&80Hc@=!#AA?Y4I~6L1qD|@Kv-yK SC`ghSh#BtyiD(cHq#FPL#Sgat literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/3b2185a90d32decad650ceb443751a4023368cdd b/tsl/test/fuzzing/compression/dictionary-text/3b2185a90d32decad650ceb443751a4023368cdd new file mode 100644 index 0000000000000000000000000000000000000000..4cf12c61b2ae059eabf35867e0d6527ee0d01dab GIT binary patch literal 167 zcmZQ#EJ%+}PAo~x$xmk}Nv$YhU|?YU0wRDI1o{~m*-0313#43X#g?|fEXynY{$0$e-MM| z|9^%l|i44`;Ll0jzz G6#xKy4#-#l literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/75fc076b6fc8dac4f65c31fa4fd34ad236530422 b/tsl/test/fuzzing/compression/dictionary-text/75fc076b6fc8dac4f65c31fa4fd34ad236530422 new file mode 100644 index 0000000000000000000000000000000000000000..cd21b9ed2b8689abf2d26b493c62b3ff14db0830 GIT binary patch literal 105 zcmZQ#C`gY_PAo~x$xmk}Nv$Yh00I!f!5|I2)2F4daT0js4{{8>|A0)U6 z2pAYRVTwT_AVG)(ND>A@!$6{7^`W7$p->PDG6Sxei2=neKyk2hKx!CLfxJLu#b8mO FUI6wdD;)p; literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/79ef4a8ba594c7a2b611bc33fc8b83fe8ac14998 b/tsl/test/fuzzing/compression/dictionary-text/79ef4a8ba594c7a2b611bc33fc8b83fe8ac14998 new file mode 100644 index 0000000000000000000000000000000000000000..ec70dbd3d3a897f3629d1d6cf5a6676539d3b341 GIT binary patch literal 73 zcmZQ#EJ%+}PAo~x$xmk}Nv$YhU|?YU3?hIS1o#;k2)2F4dano$8r0D(UPgB^n) IOc^5s0NvsU=Kufz literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/7bbc7585698a7c3375bea9c3bcff8838722d8f64 b/tsl/test/fuzzing/compression/dictionary-text/7bbc7585698a7c3375bea9c3bcff8838722d8f64 new file mode 100644 index 0000000000000000000000000000000000000000..f6e3ac22f9cafde39950b077b5d98b7aafdddd3b GIT binary patch literal 57 zcmZQ#C`gY_PAo~x$xmk}Nv$Yh00KrJW&%>H!a~8|B_mLQT7G&u!#4&QkOH6z0H@at AaR2}S literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/7be90a9058961ac6ee07f764618805dfe8b6cfae b/tsl/test/fuzzing/compression/dictionary-text/7be90a9058961ac6ee07f764618805dfe8b6cfae new file mode 100644 index 0000000000000000000000000000000000000000..5076d4c666a36c0fb003be6cfe05b435e6110d57 GIT binary patch literal 56 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh17+4q>1R4G_FgS|oFnqIX0LcI`P`ZSH F0|0fB3wi(m literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/8426e28aabe6684eb7200bd032ff0dad5a5169af b/tsl/test/fuzzing/compression/dictionary-text/8426e28aabe6684eb7200bd032ff0dad5a5169af new file mode 100644 index 0000000000000000000000000000000000000000..b14a908aa6fcda594534d8e4358e5dbd07d80eab GIT binary patch literal 57 zcmZQ#C`gY_PAo~x$xmk}Nv$Yh00MC!W@2DqToo1y1}_<-%)=lK1Zp5c%K%7VQwLIvu1;K>iQzvOFx)E4 RhZqeq6AqZ*EF=aa0{~sO8gu{v literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/af094ea4a3607992332e87dbe90f1a0f0cf82e09 b/tsl/test/fuzzing/compression/dictionary-text/af094ea4a3607992332e87dbe90f1a0f0cf82e09 new file mode 100644 index 0000000000000000000000000000000000000000..46f0878ad9de34e43a6ace3124b69ace342019e5 GIT binary patch literal 49 tcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=3M3dgSQug%7_?Y`Yy@Cr000j-2U-9C literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/b931131d935fb27ebf7977285299dcf11bb52eb4 b/tsl/test/fuzzing/compression/dictionary-text/b931131d935fb27ebf7977285299dcf11bb52eb4 new file mode 100644 index 0000000000000000000000000000000000000000..3312fe547a308c58dd73261cf96f250f34b8e272 GIT binary patch literal 97 zcmZQ#EJ%+}PAo~x$xmk}Nv$YhU|?YU0wRDI1o#;k2)2Ik8^8Ys^49|9P~bQr$r cGynxzKoTkq;$Q#&XJ`G-z@Wrn2qeJ(08PLb2LJ#7 literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/e767ec96033f7d9da306711adab0a6557fd4b71e b/tsl/test/fuzzing/compression/dictionary-text/e767ec96033f7d9da306711adab0a6557fd4b71e new file mode 100644 index 0000000000000000000000000000000000000000..343f09135c0d222a723debb825a1d74dc68fc298 GIT binary patch literal 321 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEU|m>5A)|AA5r=_L&L45=j*3}!$XIB-;e zGm#jFAmBd`@BuNDi;H3?00I^k79<03F@cI085jb97!P2=;vcYbh;9~;vl#!2{r|7S v@J*)yC}RY~j0~~Nh7fNv{x$&e4H>?GHGv3rhW{Xn=|6~i0ka)SGco`GFb+zg literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/f0f2e7efda77af51c83fe7870bd04d1b93556116 b/tsl/test/fuzzing/compression/dictionary-text/f0f2e7efda77af51c83fe7870bd04d1b93556116 new file mode 100644 index 0000000000000000000000000000000000000000..2fbea9bb8d89f1318bb2cd4d8ed94f9c9af030b2 GIT binary patch literal 76 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=03;Y07+C)Q|Ia95_ literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/fee17c65913f4eddcae3f69d1c2b6f318b938af2 b/tsl/test/fuzzing/compression/dictionary-text/fee17c65913f4eddcae3f69d1c2b6f318b938af2 new file mode 100644 index 0000000000000000000000000000000000000000..f7ca816fb8772ce486baa6f2d6b909b84dd1966b GIT binary patch literal 72 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=03;Y07+C%%=OpGQrk8*v{sYC-Are56fq@@P LGB79uX_gWImaz^p literal 0 HcmV?d00001 From ae22c1a4eadf268ccebd7449b481fe2e04a8eadc Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:45:41 +0100 Subject: [PATCH 233/249] add a case --- .../68c94392b09d47edea2a48f62808073bf83448f3 | Bin 0 -> 166 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tsl/test/fuzzing/compression/dictionary-text/68c94392b09d47edea2a48f62808073bf83448f3 diff --git a/tsl/test/fuzzing/compression/dictionary-text/68c94392b09d47edea2a48f62808073bf83448f3 b/tsl/test/fuzzing/compression/dictionary-text/68c94392b09d47edea2a48f62808073bf83448f3 new file mode 100644 index 0000000000000000000000000000000000000000..c367012e07d38a1964ba1798f59598f649ef12d1 GIT binary patch literal 166 zcmZQ#EJ%+}PAo~x$xmk}Nv$YhU|?YU0wRDI1o{~m Date: Thu, 1 Feb 2024 10:50:38 +0100 Subject: [PATCH 234/249] more test cases --- tsl/test/expected/compression_algos.out | 4 ++-- .../3902e1ec9e8894b6befc97144a957e69f696bc75 | Bin 0 -> 193 bytes .../b18ecac8feda2826b91131b386b8842a1fca17e5 | Bin 0 -> 108 bytes 3 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 tsl/test/fuzzing/compression/dictionary-text/3902e1ec9e8894b6befc97144a957e69f696bc75 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/b18ecac8feda2826b91131b386b8842a1fca17e5 diff --git a/tsl/test/expected/compression_algos.out b/tsl/test/expected/compression_algos.out index 36fc74065a5..27cde6265a4 100644 --- a/tsl/test/expected/compression_algos.out +++ b/tsl/test/expected/compression_algos.out @@ -1613,10 +1613,10 @@ group by 2, 3 order by 1 desc ; count | bulk_result | rowbyrow_result -------+-------------+----------------- - 78 | XX001 | XX001 + 80 | XX001 | XX001 5 | 08P01 | 08P01 + 5 | XX001 | true 4 | true | true - 4 | XX001 | true 3 | 22021 | 22021 1 | 3F000 | 3F000 1 | false | false diff --git a/tsl/test/fuzzing/compression/dictionary-text/3902e1ec9e8894b6befc97144a957e69f696bc75 b/tsl/test/fuzzing/compression/dictionary-text/3902e1ec9e8894b6befc97144a957e69f696bc75 new file mode 100644 index 0000000000000000000000000000000000000000..734c29905667a68c78b30eb38af746ed2d71aa26 GIT binary patch literal 193 zcmZQ#EJ%+}PAo~x$xmk}Nv$YhU|?YU0wRDI1o#;k Date: Thu, 1 Feb 2024 10:54:18 +0100 Subject: [PATCH 235/249] cleanup --- tsl/src/compression/array.c | 2 -- tsl/src/compression/compression.h | 2 -- 2 files changed, 4 deletions(-) diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index c99f3b080bf..d73287c8a73 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -14,8 +14,6 @@ #include #include -#include - #include "compression/array.h" #include "compression/compression.h" #include "compression/simple8b_rle.h" diff --git a/tsl/src/compression/compression.h b/tsl/src/compression/compression.h index d11ab22e538..5f917eaa7c1 100644 --- a/tsl/src/compression/compression.h +++ b/tsl/src/compression/compression.h @@ -386,8 +386,6 @@ extern enum CompressionAlgorithms compress_get_default_algorithm(Oid typeoid); if (unlikely(!(X))) \ ereport(ERROR, CORRUPT_DATA_MESSAGE(#X)) -//#define CheckCompressedData(X) Assert(X) - inline static void * consumeCompressedData(StringInfo si, int bytes) { From 8dfaf4bb8bddbe05c2cdd52afe8f7be7985f1b2f Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 1 Feb 2024 11:08:05 +0100 Subject: [PATCH 236/249] more predictable choice of interesting cases --- .github/workflows/libfuzzer.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 36c83c6b92b..ebe4354fb8b 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -254,7 +254,7 @@ jobs: # Save interesting cases because the caches are not available for download from UI mkdir -p interesting psql -qtAX -c "select distinct on (location) 'db/' || path from $fn - order by location, bytes + order by location, bytes, path " | xargs cp -t interesting # Check that we don't have any internal errors From a2d9142c54a1b3b85421951048c58338d8cc9293 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 1 Feb 2024 12:00:30 +0100 Subject: [PATCH 237/249] Apply suggestions from code review Co-authored-by: Jan Nidzwetzki Signed-off-by: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> --- .github/workflows/sanitizer-build-and-test.yaml | 2 +- scripts/upload_ci_stats.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/sanitizer-build-and-test.yaml b/.github/workflows/sanitizer-build-and-test.yaml index 7fa2420e3e3..261455cd48f 100644 --- a/.github/workflows/sanitizer-build-and-test.yaml +++ b/.github/workflows/sanitizer-build-and-test.yaml @@ -221,7 +221,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: sanitizer logs ${{ matrix.os }} ${{ env.name }} ${{ matrix.pg }} - # The log_path sanitizer option means "Write logs to "log_path.pid". + # The log_path sanitizer option means "Write logs to 'log_path.pid'". # https://github.com/google/sanitizers/wiki/SanitizerCommonFlags path: ${{ github.workspace }}/sanitizer* diff --git a/scripts/upload_ci_stats.sh b/scripts/upload_ci_stats.sh index 29df99c4a83..e6b6ba7e85a 100755 --- a/scripts/upload_ci_stats.sh +++ b/scripts/upload_ci_stats.sh @@ -150,7 +150,7 @@ do done # Upload the logs. -# Note that the sanitizer setting log_path means "write logs to "log_path.pid". +# Note that the sanitizer setting log_path means "write logs to 'log_path.pid'". for x in sanitizer* sanitizer/* {sqlsmith/sqlsmith,sanitizer,stacktrace,postgres-failure}.log *.diff do if ! [ -e "$x" ]; then continue ; fi From ffdb3df9db7447fe4908b34110d8b8b88198efbd Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 1 Feb 2024 12:30:38 +0100 Subject: [PATCH 238/249] no need to zero --- tsl/src/compression/array.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index c99f3b080bf..aba1f6fa17c 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -509,10 +509,10 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, CheckCompressedData(n_total >= n_notnull); uint32 *offsets = - (uint32 *) MemoryContextAllocZero(dest_mctx, - pad_to_multiple(64, sizeof(*offsets) * (n_total + 1))); + (uint32 *) MemoryContextAlloc(dest_mctx, + pad_to_multiple(64, sizeof(*offsets) * (n_total + 1))); uint8 *arrow_bodies = - (uint8 *) MemoryContextAllocZero(dest_mctx, pad_to_multiple(64, si->len - si->cursor)); + (uint8 *) MemoryContextAlloc(dest_mctx, pad_to_multiple(64, si->len - si->cursor)); uint32 offset = 0; for (int i = 0; i < n_notnull; i++) From b26782414f457e4d44d6e3eb0a93760cdf8cb412 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 1 Feb 2024 14:46:50 +0100 Subject: [PATCH 239/249] cleanup --- .../nodes/decompress_chunk/compressed_batch.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index d8a0e8e08c0..797f2a78e22 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -19,7 +19,7 @@ #include "nodes/decompress_chunk/vector_predicates.h" static ArrowArray * -make_single_value_arrow_pod(Oid pgtype, Datum datum, bool isnull) +make_single_value_arrow_arithmetic(Oid arithmetic_type, Datum datum, bool isnull) { struct ArrowWithBuffers { @@ -55,7 +55,7 @@ make_single_value_arrow_pod(Oid pgtype, Datum datum, bool isnull) *((CTYPE *) arrow->buffers[1]) = FROMDATUM(datum); \ break - switch (pgtype) + switch (arithmetic_type) { FOR_TYPE(INT8OID, int64, DatumGetInt64); FOR_TYPE(INT4OID, int32, DatumGetInt32); @@ -66,7 +66,7 @@ make_single_value_arrow_pod(Oid pgtype, Datum datum, bool isnull) FOR_TYPE(TIMESTAMPOID, Timestamp, DatumGetTimestamp); FOR_TYPE(DATEOID, DateADT, DatumGetDateADT); default: - elog(ERROR, "unexpected column type '%s'", format_type_be(pgtype)); + elog(ERROR, "unexpected column type '%s'", format_type_be(arithmetic_type)); pg_unreachable(); } @@ -125,7 +125,7 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) return make_single_value_arrow_text(datum, isnull); } - return make_single_value_arrow_pod(pgtype, datum, isnull); + return make_single_value_arrow_arithmetic(pgtype, datum, isnull); } static int @@ -272,13 +272,17 @@ decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state } } +/* + * When we have a dictionary-encoded Arrow Array, and have run a predicate on + * the dictionary, this function is used to translate the dictionary predicate + * result to the final predicate result. + */ static void translate_bitmap_from_dictionary(const ArrowArray *arrow, uint64 *restrict dict_result, uint64 *restrict final_result) { Assert(arrow->dictionary != NULL); - /* Translate dictionary results to per-value results. */ const size_t n = arrow->length; int16 *restrict indices = (int16 *) arrow->buffers[1]; for (size_t outer = 0; outer < n / 64; outer++) @@ -294,9 +298,6 @@ translate_bitmap_from_dictionary(const ArrowArray *arrow, uint64 *restrict dict_ word |= ((uint64) valid) << bit_index; INNER_LOOP - - // fprintf(stderr, "dict-coded row %ld: index %d, valid %d\n", row, index, - // valid); } final_result[outer] &= word; } From 9cd735edb2932b54ee237fcb7cba3e32e6491d04 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 1 Mar 2024 13:44:09 +0300 Subject: [PATCH 240/249] review fixes --- tsl/src/CMakeLists.txt | 1 + tsl/src/import/CMakeLists.txt | 2 ++ .../ts_like_match.c | 0 tsl/src/nodes/decompress_chunk/pred_text.c | 2 +- tsl/test/expected/decompress_vector_qual.out | 18 +++++++++++++----- tsl/test/sql/decompress_vector_qual.sql | 9 +++++++-- 6 files changed, 24 insertions(+), 8 deletions(-) create mode 100644 tsl/src/import/CMakeLists.txt rename tsl/src/{nodes/decompress_chunk => import}/ts_like_match.c (100%) diff --git a/tsl/src/CMakeLists.txt b/tsl/src/CMakeLists.txt index d6fc2a76266..4752f839506 100644 --- a/tsl/src/CMakeLists.txt +++ b/tsl/src/CMakeLists.txt @@ -51,4 +51,5 @@ install(TARGETS ${TSL_LIBRARY_NAME} DESTINATION ${PG_PKGLIBDIR}) add_subdirectory(bgw_policy) add_subdirectory(compression) add_subdirectory(continuous_aggs) +add_subdirectory(import) add_subdirectory(nodes) diff --git a/tsl/src/import/CMakeLists.txt b/tsl/src/import/CMakeLists.txt new file mode 100644 index 00000000000..ccac900c3ee --- /dev/null +++ b/tsl/src/import/CMakeLists.txt @@ -0,0 +1,2 @@ +set(SOURCES "") +target_sources(${PROJECT_NAME} PRIVATE ${SOURCES}) diff --git a/tsl/src/nodes/decompress_chunk/ts_like_match.c b/tsl/src/import/ts_like_match.c similarity index 100% rename from tsl/src/nodes/decompress_chunk/ts_like_match.c rename to tsl/src/import/ts_like_match.c diff --git a/tsl/src/nodes/decompress_chunk/pred_text.c b/tsl/src/nodes/decompress_chunk/pred_text.c index 166c71d4e5c..d576566ee68 100644 --- a/tsl/src/nodes/decompress_chunk/pred_text.c +++ b/tsl/src/nodes/decompress_chunk/pred_text.c @@ -95,7 +95,7 @@ vector_const_textne(const ArrowArray *arrow, const Datum constdatum, uint64 *res } while ((plen) > 0 && (*(p) &0xC0) == 0x80) #define MatchText UTF8_MatchText -#include "ts_like_match.c" +#include "import/ts_like_match.c" static void vector_const_like_impl(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result, diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 07f45af3d43..f2a1fb0034c 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -1532,11 +1532,19 @@ ERROR: LIKE pattern must not end with escape character select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%\'; ERROR: LIKE pattern must not end with escape character \set ON_ERROR_STOP 1 -reset timescaledb.debug_require_vector_qual; -select count(distinct a) from text_table; - count -------- - 2905 +-- We don't vectorize comparison operators with text because they are probably +-- not very useful. +set timescaledb.debug_require_vector_qual to 'forbid'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a < 'same'; + count | min | max | min | max +-------+-----+------+-----+----- + 3900 | 1 | 1000 | 0 | 8 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a > 'same'; + count | min | max | min | max +-------+-----+------+-----+----- + 2500 | 1 | 1000 | 4 | 7 (1 row) reset timescaledb.debug_require_vector_qual; diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index c75dce8b1b1..074c546282e 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -501,8 +501,13 @@ select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like ' select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%\'; \set ON_ERROR_STOP 1 -reset timescaledb.debug_require_vector_qual; -select count(distinct a) from text_table; + +-- We don't vectorize comparison operators with text because they are probably +-- not very useful. +set timescaledb.debug_require_vector_qual to 'forbid'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a < 'same'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a > 'same'; + reset timescaledb.debug_require_vector_qual; reset timescaledb.enable_bulk_decompression; From e4d2e5d24824c1e6b4fa39cfa812d5bb26536850 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 8 Mar 2024 11:27:52 +0300 Subject: [PATCH 241/249] move the recursion check later --- tsl/src/import/ts_like_match.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tsl/src/import/ts_like_match.c b/tsl/src/import/ts_like_match.c index 05b49d15632..ace525f0cda 100644 --- a/tsl/src/import/ts_like_match.c +++ b/tsl/src/import/ts_like_match.c @@ -38,9 +38,6 @@ MatchText(const char *t, int tlen, const char *p, int plen) if (plen == 1 && *p == '%') return LIKE_TRUE; - /* Since this function recurses, it could be driven to stack overflow */ - check_stack_depth(); - /* * In this loop, we advance by char when matching wildcards (and thus on * recursive entry to this function we are properly char-synced). On other @@ -129,6 +126,9 @@ MatchText(const char *t, int tlen, const char *p, int plen) { if (GETCHAR(*t) == firstpat) { + /* Since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + int matched = MatchText(t, tlen, p, plen); if (matched != LIKE_FALSE) From 8f58261bc268261a7a810c39b43e8cae7aca03a5 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 18 Mar 2024 17:48:43 +0100 Subject: [PATCH 242/249] benchmark vectorized text (2024-03-18 no. 1) From ebde5741d7e649085c02e92eb31b824aadbbe877 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 25 Mar 2024 14:50:13 +0100 Subject: [PATCH 243/249] benchmark vectorized text (2024-03-25 no. 1) From 43a2bc44259bb4aecbc385d726fc36cde6cf598f Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 27 Mar 2024 12:28:33 +0100 Subject: [PATCH 244/249] review fixes --- tsl/src/import/ts_like_match.c | 3 ++- tsl/src/nodes/decompress_chunk/compressed_batch.c | 12 ++++++++++-- tsl/src/nodes/decompress_chunk/pred_text.c | 12 +++++++++--- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/tsl/src/import/ts_like_match.c b/tsl/src/import/ts_like_match.c index ace525f0cda..8eca1e0c922 100644 --- a/tsl/src/import/ts_like_match.c +++ b/tsl/src/import/ts_like_match.c @@ -10,7 +10,8 @@ * PostgreSQL License. Please see the NOTICE at the top level * directory for a copy of the PostgreSQL License. * - * This is a copy of backend/utils/adt/like_match.c. + * This is a copy of backend/utils/adt/like_match.c from PG 15.0, git commit sha + * 2a7ce2e2ce474504a707ec03e128fde66cfb8b48. */ /*-------------------- diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 1273fcf7962..973374669f2 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -18,6 +18,9 @@ #include "nodes/decompress_chunk/compressed_batch.h" #include "nodes/decompress_chunk/vector_predicates.h" +/* + * Create a single-value ArrowArray of an arithmetic type. + */ static ArrowArray * make_single_value_arrow_arithmetic(Oid arithmetic_type, Datum datum, bool isnull) { @@ -26,7 +29,8 @@ make_single_value_arrow_arithmetic(Oid arithmetic_type, Datum datum, bool isnull ArrowArray arrow; uint64 arrow_buffers_array_storage[2]; uint64 validity_buffer[1]; - uint64 values_buffer[8 /* 64-byte padding as required by Arrow. */]; + /* The value buffer has 64-byte padding as required by Arrow. */ + uint64 values_buffer[8]; }; struct ArrowWithBuffers *with_buffers = palloc0(sizeof(struct ArrowWithBuffers)); @@ -73,6 +77,9 @@ make_single_value_arrow_arithmetic(Oid arithmetic_type, Datum datum, bool isnull return arrow; } +/* + * Create a single-value ArrowArray of text. + */ static ArrowArray * make_single_value_arrow_text(Datum datum, bool isnull) { @@ -82,7 +89,8 @@ make_single_value_arrow_text(Datum datum, bool isnull) uint64 arrow_buffers_array_storage[3]; uint64 validity_buffer[1]; uint32 offsets_buffer[2]; - uint64 values_buffer[8 /* 64-byte padding as required by Arrow. */]; + /* The value buffer has 64-byte padding as required by Arrow. */ + uint64 values_buffer[8]; }; struct ArrowWithBuffers *with_buffers = palloc0(sizeof(struct ArrowWithBuffers)); diff --git a/tsl/src/nodes/decompress_chunk/pred_text.c b/tsl/src/nodes/decompress_chunk/pred_text.c index d576566ee68..e91c87c4ac6 100644 --- a/tsl/src/nodes/decompress_chunk/pred_text.c +++ b/tsl/src/nodes/decompress_chunk/pred_text.c @@ -78,7 +78,8 @@ vector_const_textne(const ArrowArray *arrow, const Datum constdatum, uint64 *res /* * Generate specializations for LIKE functions based on database encoding. This - * follows the Postgres code from backend/utils/adt/like.c. + * follows the Postgres code from backend/utils/adt/like.c, version 15.0, + * commit sha 2a7ce2e2ce474504a707ec03e128fde66cfb8b48 */ #define LIKE_TRUE 1 @@ -117,6 +118,11 @@ vector_const_like_impl(const ArrowArray *arrow, const Datum constdatum, uint64 * { const size_t row = outer * 64 + inner; const size_t bit_index = inner; + /* + * The inner loop could have been an inline function, but it would have 5 + * parameters and one of them in/out, so a macro probably has better + * readability. + */ #define INNER_LOOP \ const uint32 start = offsets[row]; \ const uint32 end = offsets[row + 1]; \ @@ -148,12 +154,12 @@ vector_const_like_impl(const ArrowArray *arrow, const Datum constdatum, uint64 * void vector_const_textlike_utf8(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) { - vector_const_like_impl(arrow, constdatum, result, UTF8_MatchText, true); + vector_const_like_impl(arrow, constdatum, result, UTF8_MatchText, /* should_match = */ true); } void vector_const_textnlike_utf8(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) { - vector_const_like_impl(arrow, constdatum, result, UTF8_MatchText, false); + vector_const_like_impl(arrow, constdatum, result, UTF8_MatchText, /* should_match = */ false); } From 82f9ab108ab0eb9a572ae05307b7b96720769701 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 27 Mar 2024 12:32:55 +0100 Subject: [PATCH 245/249] comment --- tsl/src/import/ts_like_match.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tsl/src/import/ts_like_match.c b/tsl/src/import/ts_like_match.c index 8eca1e0c922..4d2c737fd46 100644 --- a/tsl/src/import/ts_like_match.c +++ b/tsl/src/import/ts_like_match.c @@ -12,6 +12,8 @@ * * This is a copy of backend/utils/adt/like_match.c from PG 15.0, git commit sha * 2a7ce2e2ce474504a707ec03e128fde66cfb8b48. + * It has one modification: the check_stack_depth() check is moved to happen + * before recursion to simplify the non-recursive code path. */ /*-------------------- From 3a0763fcea49174cf5cae122ec00e085f60ec818 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 27 Mar 2024 12:39:38 +0100 Subject: [PATCH 246/249] comment --- tsl/src/nodes/decompress_chunk/pred_text.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tsl/src/nodes/decompress_chunk/pred_text.c b/tsl/src/nodes/decompress_chunk/pred_text.c index e91c87c4ac6..487fb61f032 100644 --- a/tsl/src/nodes/decompress_chunk/pred_text.c +++ b/tsl/src/nodes/decompress_chunk/pred_text.c @@ -79,7 +79,9 @@ vector_const_textne(const ArrowArray *arrow, const Datum constdatum, uint64 *res /* * Generate specializations for LIKE functions based on database encoding. This * follows the Postgres code from backend/utils/adt/like.c, version 15.0, - * commit sha 2a7ce2e2ce474504a707ec03e128fde66cfb8b48 + * commit sha 2a7ce2e2ce474504a707ec03e128fde66cfb8b48. + * The copy of PG code begins here. + * ---------------------------------------------------------------------------- */ #define LIKE_TRUE 1 @@ -98,6 +100,11 @@ vector_const_textne(const ArrowArray *arrow, const Datum constdatum, uint64 *res #include "import/ts_like_match.c" +/* + * ---------------------------------------------------------------------------- + * The copy of PG code ends here. + */ + static void vector_const_like_impl(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result, int (*match)(const char *, int, const char *, int), bool should_match) From 7ab250fb765cd2d538a89afe6bfa56b3193402dd Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 27 Mar 2024 20:13:16 +0100 Subject: [PATCH 247/249] remove restrict from const objects --- tsl/src/nodes/decompress_chunk/pred_vector_array.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/pred_vector_array.c b/tsl/src/nodes/decompress_chunk/pred_vector_array.c index 2c094bc8c0c..f15d217b571 100644 --- a/tsl/src/nodes/decompress_chunk/pred_vector_array.c +++ b/tsl/src/nodes/decompress_chunk/pred_vector_array.c @@ -47,9 +47,9 @@ vector_array_predicate(VectorPredicate *vector_const_predicate, bool is_or, char typalign; get_typlenbyvalalign(ARR_ELEMTYPE(arr), &typlen, &typbyval, &typalign); - const char *restrict array_data = (const char *) ARR_DATA_PTR(arr); + const char *array_data = (const char *) ARR_DATA_PTR(arr); const size_t nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); - const uint64 *restrict array_null_bitmap = (uint64 *) ARR_NULLBITMAP(arr); + const uint64 *array_null_bitmap = (uint64 *) ARR_NULLBITMAP(arr); for (size_t array_index = 0; array_index < nitems; array_index++) { @@ -79,7 +79,7 @@ vector_array_predicate(VectorPredicate *vector_const_predicate, bool is_or, } Datum constvalue = fetch_att(array_data, typbyval, typlen); array_data = att_addlength_pointer(array_data, typlen, array_data); - array_data = (char *restrict) att_align_nominal(array_data, typalign); + array_data = (const char *) att_align_nominal(array_data, typalign); /* * For OR, we also need an intermediate storage for predicate result From 02918bfd4e8e72a16a1f009d4cb01e4be3af2265 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 27 Mar 2024 20:21:30 +0100 Subject: [PATCH 248/249] replace restrict with const on read-only objects --- tsl/src/nodes/decompress_chunk/compressed_batch.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 973374669f2..76331050d8c 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -289,13 +289,13 @@ decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state * result to the final predicate result. */ static void -translate_bitmap_from_dictionary(const ArrowArray *arrow, uint64 *restrict dict_result, +translate_bitmap_from_dictionary(const ArrowArray *arrow, const uint64 *dict_result, uint64 *restrict final_result) { Assert(arrow->dictionary != NULL); const size_t n = arrow->length; - int16 *restrict indices = (int16 *) arrow->buffers[1]; + const int16 *indices = (int16 *) arrow->buffers[1]; for (size_t outer = 0; outer < n / 64; outer++) { uint64 word = 0; @@ -983,7 +983,7 @@ make_next_tuple(DecompressBatchState *batch_state, uint16 arrow_row, int num_com * such as UUID. */ const uint8 value_bytes = column_values->decompression_type; - const char *restrict src = column_values->buffers[1]; + const char *src = column_values->buffers[1]; *column_values->output_value = PointerGetDatum(&src[value_bytes * arrow_row]); *column_values->output_isnull = !arrow_row_is_valid(column_values->buffers[0], arrow_row); @@ -1000,7 +1000,7 @@ make_next_tuple(DecompressBatchState *batch_state, uint16 arrow_row, int num_com */ const uint8 value_bytes = column_values->decompression_type; Assert(value_bytes <= SIZEOF_DATUM); - const char *restrict src = column_values->buffers[1]; + const char *src = column_values->buffers[1]; memcpy(column_values->output_value, &src[value_bytes * arrow_row], SIZEOF_DATUM); *column_values->output_isnull = !arrow_row_is_valid(column_values->buffers[0], arrow_row); From c355c25aed96c022555370f3de42ffc03c03fd77 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 28 Mar 2024 11:55:16 +0100 Subject: [PATCH 249/249] comment --- tsl/src/nodes/decompress_chunk/compressed_batch.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index 76331050d8c..0eb9be2e035 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -19,7 +19,8 @@ #include "nodes/decompress_chunk/vector_predicates.h" /* - * Create a single-value ArrowArray of an arithmetic type. + * Create a single-value ArrowArray of an arithmetic type. This is a specialized + * function because arithmetic types have a particular layout of ArrowArrays. */ static ArrowArray * make_single_value_arrow_arithmetic(Oid arithmetic_type, Datum datum, bool isnull) @@ -78,7 +79,8 @@ make_single_value_arrow_arithmetic(Oid arithmetic_type, Datum datum, bool isnull } /* - * Create a single-value ArrowArray of text. + * Create a single-value ArrowArray of text. This is a specialized function + * because the text ArrowArray has a specialized layout. */ static ArrowArray * make_single_value_arrow_text(Datum datum, bool isnull)