From af64c7bbfccfc534ff6dc85d5f8931ab9531f4cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Nordstr=C3=B6m?= Date: Wed, 11 Sep 2024 15:59:57 +0200 Subject: [PATCH] Support vectorized aggregation on Hypercore TAM Add support for running VectorAgg on top of scans on Hypercore TAM. Currently, only ColumnarScan can run below VectorAgg when Hypercore TAM is used. In theory, a SeqScan or IndexScan reading from Hypercore TAM should also work because they would produce Arrow slots. However, only ColumnarScan performs vectorized filtering, which is currently assumed to happen before the VectorAgg node. In ColumnarScan, it is necessary to turn off projection when VectorAgg is used. Otherwise, it would project the arrow slot into a virtual slot, thus losing the vector data. Ideally, a projection should never be planned to begin with, but this isn't possible since VectorAgg modifies existing non-vectorized Agg plans that already includes projections. --- .../workflows/linux-32bit-build-and-test.yaml | 2 +- .unreleased/pr_7655 | 1 + tsl/src/hypercore/arrow_tts.h | 8 +- tsl/src/nodes/columnar_scan/columnar_scan.c | 5 +- tsl/src/nodes/columnar_scan/columnar_scan.h | 2 +- tsl/src/nodes/vector_agg/CMakeLists.txt | 3 +- tsl/src/nodes/vector_agg/exec.c | 142 +++- tsl/src/nodes/vector_agg/plan.c | 27 +- tsl/src/nodes/vector_agg/plan.h | 2 +- tsl/src/nodes/vector_agg/plan_tam.c | 52 ++ tsl/src/nodes/vector_agg/vector_slot.h | 64 +- tsl/test/expected/hypercore_parallel.out | 48 +- tsl/test/expected/hypercore_vectoragg.out | 697 ++++++++++++++++++ tsl/test/expected/vector_agg_default.out | 3 + tsl/test/expected/vector_agg_filter.out | 2 + tsl/test/expected/vector_agg_functions.out | 2 + tsl/test/expected/vector_agg_memory.out | 3 + tsl/test/expected/vector_agg_param.out | 2 + tsl/test/expected/vector_agg_segmentby.out | 2 + tsl/test/expected/vectorized_aggregation.out | 5 + tsl/test/sql/CMakeLists.txt | 3 +- tsl/test/sql/hypercore_vectoragg.sql | 191 +++++ tsl/test/sql/vector_agg_default.sql | 5 + tsl/test/sql/vector_agg_filter.sql | 4 + tsl/test/sql/vector_agg_functions.sql | 4 + tsl/test/sql/vector_agg_memory.sql | 5 +- tsl/test/sql/vector_agg_param.sql | 2 + tsl/test/sql/vector_agg_segmentby.sql | 3 + tsl/test/sql/vectorized_aggregation.sql | 6 + 29 files changed, 1247 insertions(+), 48 deletions(-) create mode 100644 .unreleased/pr_7655 create mode 100644 tsl/src/nodes/vector_agg/plan_tam.c create mode 100644 tsl/test/expected/hypercore_vectoragg.out create mode 100644 tsl/test/sql/hypercore_vectoragg.sql diff --git a/.github/workflows/linux-32bit-build-and-test.yaml b/.github/workflows/linux-32bit-build-and-test.yaml index 0cec2577a08..ce8992d7c20 100644 --- a/.github/workflows/linux-32bit-build-and-test.yaml +++ b/.github/workflows/linux-32bit-build-and-test.yaml @@ -54,7 +54,7 @@ jobs: append-* transparent_decompression-* transparent_decompress_chunk-* pg_dump telemetry bgw_db_scheduler* hypercore_vacuum vectorized_aggregation vector_agg_text - vector_agg_groupagg + vector_agg_groupagg hypercore_parallel hypercore_vectoragg SKIPS: chunk_adaptive histogram_test-* EXTENSIONS: "postgres_fdw test_decoding pageinspect pgstattuple" strategy: diff --git a/.unreleased/pr_7655 b/.unreleased/pr_7655 new file mode 100644 index 00000000000..697616a8db0 --- /dev/null +++ b/.unreleased/pr_7655 @@ -0,0 +1 @@ +Implements: #7655 Support vectorized aggregation on Hypercore TAM diff --git a/tsl/src/hypercore/arrow_tts.h b/tsl/src/hypercore/arrow_tts.h index 509277375f3..830af1362cf 100644 --- a/tsl/src/hypercore/arrow_tts.h +++ b/tsl/src/hypercore/arrow_tts.h @@ -24,6 +24,7 @@ #include "arrow_cache.h" #include "compression/arrow_c_data_interface.h" #include "debug_assert.h" +#include "nodes/decompress_chunk/compressed_batch.h" #include @@ -88,6 +89,10 @@ typedef struct ArrowTupleTableSlot const uint64 *arrow_qual_result; /* Bitmap with result of qual * filtering over arrow_array. NULL if * no filtering has been applied. */ + + /* Struct to hold values for one column. Necessary for compatibility with + * vector aggs. */ + struct CompressedColumnValues ccvalues; } ArrowTupleTableSlot; extern const TupleTableSlotOps TTSOpsArrowTuple; @@ -413,8 +418,9 @@ arrow_slot_per_segment_memory_context(const TupleTableSlot *slot) return aslot->per_segment_mcxt; } -extern bool is_compressed_col(const TupleDesc tupdesc, AttrNumber attno); extern const ArrowArray *arrow_slot_get_array(TupleTableSlot *slot, AttrNumber attno); + +extern bool is_compressed_col(const TupleDesc tupdesc, AttrNumber attno); extern void arrow_slot_set_referenced_attrs(TupleTableSlot *slot, Bitmapset *attrs); extern void arrow_slot_set_index_attrs(TupleTableSlot *slot, Bitmapset *attrs); diff --git a/tsl/src/nodes/columnar_scan/columnar_scan.c b/tsl/src/nodes/columnar_scan/columnar_scan.c index e68f1613710..5e02a3875a5 100644 --- a/tsl/src/nodes/columnar_scan/columnar_scan.c +++ b/tsl/src/nodes/columnar_scan/columnar_scan.c @@ -996,9 +996,10 @@ static CustomScanMethods columnar_scan_plan_methods = { }; bool -is_columnar_scan(const CustomScan *scan) +is_columnar_scan(const Plan *plan) { - return scan->methods == &columnar_scan_plan_methods; + return IsA(plan, CustomScan) && + ((const CustomScan *) plan)->methods == &columnar_scan_plan_methods; } typedef struct VectorQualInfoHypercore diff --git a/tsl/src/nodes/columnar_scan/columnar_scan.h b/tsl/src/nodes/columnar_scan/columnar_scan.h index 1dc9c94f436..8ae4f362912 100644 --- a/tsl/src/nodes/columnar_scan/columnar_scan.h +++ b/tsl/src/nodes/columnar_scan/columnar_scan.h @@ -20,7 +20,7 @@ typedef struct ColumnarScanPath extern ColumnarScanPath *columnar_scan_path_create(PlannerInfo *root, RelOptInfo *rel, Relids required_outer, int parallel_workers); extern void columnar_scan_set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Hypertable *ht); -extern bool is_columnar_scan(const CustomScan *scan); +extern bool is_columnar_scan(const Plan *plan); extern void _columnar_scan_init(void); #endif /* TIMESCALEDB_COLUMNAR_SCAN_H */ diff --git a/tsl/src/nodes/vector_agg/CMakeLists.txt b/tsl/src/nodes/vector_agg/CMakeLists.txt index c38b0aa74a1..69b10f4a8b5 100644 --- a/tsl/src/nodes/vector_agg/CMakeLists.txt +++ b/tsl/src/nodes/vector_agg/CMakeLists.txt @@ -5,5 +5,6 @@ set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/grouping_policy_batch.c ${CMAKE_CURRENT_SOURCE_DIR}/grouping_policy_hash.c ${CMAKE_CURRENT_SOURCE_DIR}/plan.c - ${CMAKE_CURRENT_SOURCE_DIR}/plan_decompress_chunk.c) + ${CMAKE_CURRENT_SOURCE_DIR}/plan_decompress_chunk.c + ${CMAKE_CURRENT_SOURCE_DIR}/plan_tam.c) target_sources(${TSL_LIBRARY_NAME} PRIVATE ${SOURCES}) diff --git a/tsl/src/nodes/vector_agg/exec.c b/tsl/src/nodes/vector_agg/exec.c index 2bd6584ad1b..228dafe96af 100644 --- a/tsl/src/nodes/vector_agg/exec.c +++ b/tsl/src/nodes/vector_agg/exec.c @@ -8,14 +8,19 @@ #include #include +#include #include #include #include +#include #include #include "nodes/vector_agg/exec.h" #include "compression/arrow_c_data_interface.h" +#include "hypercore/arrow_tts.h" +#include "hypercore/vector_quals.h" +#include "nodes/columnar_scan/columnar_scan.h" #include "nodes/decompress_chunk/compressed_batch.h" #include "nodes/decompress_chunk/exec.h" #include "nodes/decompress_chunk/vector_quals.h" @@ -23,9 +28,8 @@ #include "nodes/vector_agg/plan.h" static int -get_input_offset(const CustomScanState *state, const Var *var) +get_input_offset_decompress_chunk(const DecompressChunkState *decompress_state, const Var *var) { - const DecompressChunkState *decompress_state = (DecompressChunkState *) state; const DecompressContext *dcontext = &decompress_state->decompress_context; /* @@ -58,16 +62,57 @@ get_input_offset(const CustomScanState *state, const Var *var) } static void -get_column_storage_properties(const CustomScanState *state, int input_offset, - GroupingColumn *result) +get_column_storage_properties_decompress_chunk(const DecompressChunkState *state, int input_offset, + GroupingColumn *result) { - const DecompressChunkState *decompress_state = (DecompressChunkState *) state; - const DecompressContext *dcontext = &decompress_state->decompress_context; + const DecompressContext *dcontext = &state->decompress_context; const CompressionColumnDescription *desc = &dcontext->compressed_chunk_columns[input_offset]; result->value_bytes = desc->value_bytes; result->by_value = desc->by_value; } +/* + * Given a Var reference, get the offset of the corresponding attribute in the + * input tuple. + * + * For a node returning arrow slots, this is just the attribute number in the + * Var. But if the node is DecompressChunk, it is necessary to translate + * between the compressed and non-compressed columns. + */ +static int +get_input_offset(const CustomScanState *state, const Var *var) +{ + if (TTS_IS_ARROWTUPLE(state->ss.ss_ScanTupleSlot)) + return AttrNumberGetAttrOffset(var->varattno); + + return get_input_offset_decompress_chunk((const DecompressChunkState *) state, var); +} + +/* + * Get the type length and "byval" properties for the grouping column given by + * the input offset. + * + * For a node returning arrow slots, the properties can be read directly from + * the scanned relation's tuple descriptor. For DecompressChunk, the input + * offset references the compressed relation. + */ +static void +get_column_storage_properties(const CustomScanState *state, int input_offset, + GroupingColumn *result) +{ + if (TTS_IS_ARROWTUPLE(state->ss.ss_ScanTupleSlot)) + { + const TupleDesc tupdesc = RelationGetDescr(state->ss.ss_currentRelation); + result->by_value = TupleDescAttr(tupdesc, input_offset)->attbyval; + result->value_bytes = TupleDescAttr(tupdesc, input_offset)->attlen; + return; + } + + get_column_storage_properties_decompress_chunk((const DecompressChunkState *) state, + input_offset, + result); +} + static void vector_agg_begin(CustomScanState *node, EState *estate, int eflags) { @@ -312,6 +357,49 @@ compressed_batch_get_next_slot(VectorAggState *vector_agg_state) return &batch_state->decompressed_scan_slot_data.base; } +/* + * Get the next slot to aggregate for a arrow tuple table slot. + * + * Implements "get next slot" on top of ColumnarScan (or any node producing + * ArrowTupleTableSlots). It just reads the slot from the child node. + */ +static TupleTableSlot * +arrow_get_next_slot(VectorAggState *vector_agg_state) +{ + TupleTableSlot *slot = vector_agg_state->custom.ss.ss_ScanTupleSlot; + + if (!TTS_EMPTY(slot)) + { + Assert(TTS_IS_ARROWTUPLE(slot)); + + /* If we read an arrow slot previously, the entire arrow array should + * have been aggregated so we should mark it is consumed so that we + * get the next array (or end) when we read the next slot. */ + + arrow_slot_mark_consumed(slot); + } + + slot = ExecProcNode(linitial(vector_agg_state->custom.custom_ps)); + + if (TupIsNull(slot)) + { + /* The input has ended. */ + vector_agg_state->input_ended = true; + return NULL; + } + + Assert(TTS_IS_ARROWTUPLE(slot)); + + /* Filtering should have happened in the scan node below so the slot + * should not be consumed here. */ + Assert(!arrow_slot_is_consumed(slot)); + + /* Remember the slot until we're called next time */ + vector_agg_state->custom.ss.ss_ScanTupleSlot = slot; + + return slot; +} + /* * Initialize vector quals for a compressed batch. * @@ -341,6 +429,18 @@ compressed_batch_init_vector_quals(VectorAggState *agg_state, VectorAggDef *agg_ return &agg_state->vqual_state.vqstate; } +/* + * Initialize FILTER vector quals for an arrow tuple slot. + * + * Used to implement vectorized aggregate function filter clause. + */ +static VectorQualState * +arrow_init_vector_quals(VectorAggState *agg_state, VectorAggDef *agg_def, TupleTableSlot *slot) +{ + vector_qual_state_init(&agg_state->vqual_state.vqstate, agg_def->filter_clauses, slot); + return &agg_state->vqual_state.vqstate; +} + static TupleTableSlot * vector_agg_exec(CustomScanState *node) { @@ -481,20 +581,42 @@ Node * vector_agg_state_create(CustomScan *cscan) { VectorAggState *state = (VectorAggState *) newNode(sizeof(VectorAggState), T_CustomScanState); + CustomScan *childscan = castNode(CustomScan, linitial(cscan->custom_plans)); state->custom.methods = &exec_methods; /* * Initialize VectorAggState to process vector slots from different - * subnodes. Currently, only compressed batches are supported, but arrow - * slots will be supported as well. + * subnodes. + * + * VectorAgg supports two child nodes: ColumnarScan (producing arrow tuple + * table slots) and DecompressChunk (producing compressed batches). + * + * When the child is ColumnarScan, VectorAgg expects Arrow slots that + * carry arrow arrays. ColumnarScan performs standard qual filtering and + * vectorized qual filtering prior to handing the slot up to VectorAgg. + * + * When the child is DecompressChunk, VectorAgg doesn't read the slot from + * the child node. Instead, it bypasses DecompressChunk and reads + * compressed tuples directly from the grandchild. It therefore needs to + * handle batch decompression and vectorized qual filtering itself, in its + * own "get next slot" implementation. * * The vector qual init functions are needed to implement vectorized * aggregate function FILTER clauses for arrow tuple table slots and * compressed batches, respectively. */ - state->get_next_slot = compressed_batch_get_next_slot; - state->init_vector_quals = compressed_batch_init_vector_quals; + if (is_columnar_scan(&childscan->scan.plan)) + { + state->get_next_slot = arrow_get_next_slot; + state->init_vector_quals = arrow_init_vector_quals; + } + else + { + Assert(strcmp(childscan->methods->CustomName, "DecompressChunk") == 0); + state->get_next_slot = compressed_batch_get_next_slot; + state->init_vector_quals = compressed_batch_init_vector_quals; + } return (Node *) state; } diff --git a/tsl/src/nodes/vector_agg/plan.c b/tsl/src/nodes/vector_agg/plan.c index ea65ae8f834..8ae45d372d3 100644 --- a/tsl/src/nodes/vector_agg/plan.c +++ b/tsl/src/nodes/vector_agg/plan.c @@ -18,6 +18,7 @@ #include "exec.h" #include "import/list.h" +#include "nodes/columnar_scan/columnar_scan.h" #include "nodes/decompress_chunk/vector_quals.h" #include "nodes/vector_agg.h" #include "utils.h" @@ -177,6 +178,27 @@ vector_agg_plan_create(Plan *childplan, Agg *agg, List *resolved_targetlist, lfirst(list_nth_cell(vector_agg->custom_private, VASI_GroupingType)) = makeInteger(grouping_type); +#if PG15_GE + if (is_columnar_scan(childplan)) + { + CustomScan *custom = castNode(CustomScan, childplan); + + /* + * ColumnarScan should not project when doing vectorized + * aggregation. If it projects, it will turn the arrow slot into a set + * of virtual slots and the vector data will not be passed up to + * VectorAgg. + * + * To make ColumnarScan avoid projection, unset the custom scan node's + * projection flag. Normally, it is to late to change this flag as + * PostgreSQL already planned projection based on it. However, + * ColumnarScan rechecks this flag before it begins execution and + * ignores any projection if the flag is not set. + */ + custom->flags &= ~CUSTOMPATH_SUPPORT_PROJECTION; + } +#endif + return (Plan *) vector_agg; } @@ -471,8 +493,11 @@ vectoragg_plan_possible(Plan *childplan, const List *rtable, VectorQualInfo *vqi CustomScan *customscan = castNode(CustomScan, childplan); bool vectoragg_possible = false; + RangeTblEntry *rte = rt_fetch(customscan->scan.scanrelid, rtable); - if (strcmp(customscan->methods->CustomName, "DecompressChunk") == 0) + if (ts_is_hypercore_am(ts_get_rel_am(rte->relid))) + vectoragg_possible = vectoragg_plan_tam(childplan, rtable, vqi); + else if (strcmp(customscan->methods->CustomName, "DecompressChunk") == 0) vectoragg_possible = vectoragg_plan_decompress_chunk(childplan, vqi); return vectoragg_possible; diff --git a/tsl/src/nodes/vector_agg/plan.h b/tsl/src/nodes/vector_agg/plan.h index 1effeb5a912..ae0100e548b 100644 --- a/tsl/src/nodes/vector_agg/plan.h +++ b/tsl/src/nodes/vector_agg/plan.h @@ -25,6 +25,6 @@ typedef enum extern void _vector_agg_init(void); extern bool vectoragg_plan_decompress_chunk(Plan *childplan, VectorQualInfo *vqi); - +extern bool vectoragg_plan_tam(Plan *childplan, const List *rtable, VectorQualInfo *vqi); Plan *try_insert_vector_agg_node(Plan *plan, List *rtable); bool has_vector_agg_node(Plan *plan, bool *has_normal_agg); diff --git a/tsl/src/nodes/vector_agg/plan_tam.c b/tsl/src/nodes/vector_agg/plan_tam.c new file mode 100644 index 00000000000..a914d0c5ce0 --- /dev/null +++ b/tsl/src/nodes/vector_agg/plan_tam.c @@ -0,0 +1,52 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ +#include +#include +#include +#include + +#include "hypercore/hypercore_handler.h" +#include "nodes/decompress_chunk/vector_quals.h" +#include "plan.h" + +bool +vectoragg_plan_tam(Plan *childplan, const List *rtable, VectorQualInfo *vqi) +{ + const CustomScan *customscan = castNode(CustomScan, childplan); + RangeTblEntry *rte = rt_fetch(customscan->scan.scanrelid, rtable); + Relation rel = table_open(rte->relid, AccessShareLock); + const HypercoreInfo *hinfo = RelationGetHypercoreInfo(rel); + + *vqi = (VectorQualInfo){ + .rti = customscan->scan.scanrelid, + .vector_attrs = (bool *) palloc0(sizeof(bool) * (hinfo->num_columns + 1)), + .segmentby_attrs = (bool *) palloc0(sizeof(bool) * (hinfo->num_columns + 1)), + /* + * Hypercore TAM and ColumnarScan do not yet support specific ordering + * (via pathkeys) so vector data will always be as read. + */ + .reverse = false, + }; + + for (int i = 0; i < hinfo->num_columns; i++) + { + AttrNumber attno = AttrOffsetGetAttrNumber(i); + + if (!hinfo->columns[i].is_dropped) + { + /* + * Hypercore TAM only supports bulk decompression, so all columns + * are vectorizable, including segmentby columns. + */ + vqi->vector_attrs[attno] = true; + vqi->segmentby_attrs[attno] = hinfo->columns[i].is_segmentby; + } + } + + table_close(rel, NoLock); + + return true; +} diff --git a/tsl/src/nodes/vector_agg/vector_slot.h b/tsl/src/nodes/vector_agg/vector_slot.h index 01a1a117b56..272f6f9204d 100644 --- a/tsl/src/nodes/vector_agg/vector_slot.h +++ b/tsl/src/nodes/vector_agg/vector_slot.h @@ -8,6 +8,8 @@ #include #include #include + +#include #include /* @@ -17,9 +19,19 @@ * batches. * */ + +/* + * Get the result vectorized filter bitmap. + */ static inline const uint64 * vector_slot_get_qual_result(const TupleTableSlot *slot, uint16 *num_rows) { + if (TTS_IS_ARROWTUPLE(slot)) + { + *num_rows = arrow_slot_total_row_count(slot); + return arrow_slot_get_qual_result(slot); + } + const DecompressBatchState *batch_state = (const DecompressBatchState *) slot; *num_rows = batch_state->total_batch_rows; return batch_state->vector_qual_result; @@ -27,15 +39,59 @@ vector_slot_get_qual_result(const TupleTableSlot *slot, uint16 *num_rows) /* * Return the arrow array or the datum (in case of single scalar value) for a - * given attribute. - * - * This is essentially doing the same thing as the separate functions above, - * but with a common return type. + * given attribute as a CompressedColumnValues struct. */ static inline const CompressedColumnValues * vector_slot_get_compressed_column_values(TupleTableSlot *slot, const AttrNumber attnum) { const uint16 offset = AttrNumberGetAttrOffset(attnum); + + if (TTS_IS_ARROWTUPLE(slot)) + { + ArrowTupleTableSlot *aslot = (ArrowTupleTableSlot *) slot; + const ArrowArray *arrow = arrow_slot_get_array(slot, attnum); + CompressedColumnValues *values = &aslot->ccvalues; + int16 attlen = TupleDescAttr(slot->tts_tupleDescriptor, offset)->attlen; + + MemSet(values, 0, sizeof(CompressedColumnValues)); + + if (arrow == NULL) + { + bool isnull; + + slot_getattr(slot, attnum, &isnull); + values->decompression_type = DT_Scalar; + values->output_value = &slot->tts_values[offset]; + values->output_isnull = &slot->tts_isnull[offset]; + } + else if (attlen > 0) + { + Assert(arrow->dictionary == NULL); + values->decompression_type = attlen; + values->arrow = (ArrowArray *) arrow; + values->buffers[0] = arrow->buffers[0]; + values->buffers[1] = arrow->buffers[1]; + } + else if (arrow->dictionary) + { + values->decompression_type = DT_ArrowTextDict; + values->buffers[0] = arrow->buffers[0]; + values->buffers[1] = arrow->dictionary->buffers[1]; + values->buffers[2] = arrow->dictionary->buffers[2]; + values->buffers[3] = arrow->buffers[1]; + } + else + { + values->decompression_type = DT_ArrowText; + values->buffers[0] = arrow->buffers[0]; + values->buffers[1] = arrow->buffers[1]; + values->buffers[2] = arrow->buffers[2]; + values->buffers[3] = NULL; + } + + return values; + } + const DecompressBatchState *batch_state = (const DecompressBatchState *) slot; const CompressedColumnValues *values = &batch_state->compressed_columns[offset]; return values; diff --git a/tsl/test/expected/hypercore_parallel.out b/tsl/test/expected/hypercore_parallel.out index 406e55c6e82..7b4b5085b8e 100644 --- a/tsl/test/expected/hypercore_parallel.out +++ b/tsl/test/expected/hypercore_parallel.out @@ -326,22 +326,22 @@ $$, :'hypertable')); -> Gather Workers Planned: 2 -> Parallel Append - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Vectorized Filter: (device_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Vectorized Filter: (device_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Vectorized Filter: (device_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Vectorized Filter: (device_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Vectorized Filter: (device_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Vectorized Filter: (device_id = 1) (22 rows) @@ -362,22 +362,22 @@ $$, :'hypertable')); -> Gather Workers Planned: 2 -> Parallel Append - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Scankey: (owner_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Scankey: (owner_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Scankey: (owner_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Scankey: (owner_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Scankey: (owner_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Scankey: (owner_id = 1) (22 rows) @@ -398,22 +398,22 @@ $$, :'hypertable')); -> Gather Workers Planned: 2 -> Parallel Append - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Vectorized Filter: (device_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Vectorized Filter: (device_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Vectorized Filter: (device_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Vectorized Filter: (device_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Vectorized Filter: (device_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Vectorized Filter: (device_id = 1) (22 rows) @@ -433,22 +433,22 @@ $$, :'hypertable')); -> Gather Workers Planned: 2 -> Parallel Append - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Scankey: (owner_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Scankey: (owner_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Scankey: (owner_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Scankey: (owner_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Scankey: (owner_id = 1) - -> Partial GroupAggregate + -> Custom Scan (VectorAgg) -> Parallel Custom Scan (ColumnarScan) on _hyper_I_N_chunk Scankey: (owner_id = 1) (22 rows) diff --git a/tsl/test/expected/hypercore_vectoragg.out b/tsl/test/expected/hypercore_vectoragg.out new file mode 100644 index 00000000000..496d80e616c --- /dev/null +++ b/tsl/test/expected/hypercore_vectoragg.out @@ -0,0 +1,697 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. +-- +-- Test VectorAgg on top of scans with Hypercore TAM. +-- +-- This test is not for the vectorized aggregation functionality +-- itself, since that is handled by other tests. Here we only test +-- that VectorAgg is compatible with scans on Hypercore TAM and that +-- the planning is done correctly. +-- +-- To run on a larger data set, the vectorized_aggregation and +-- vector_agg_* tests can be run with TAM enabled by default. This is +-- also a good way to verify that the output from Hypercore TAM is the +-- same. +-- +create table aggdata (time timestamptz, device int, location int, temp float); +select create_hypertable('aggdata', 'time', create_default_indexes=>false); +NOTICE: adding not-null constraint to column "time" + create_hypertable +---------------------- + (1,public,aggdata,t) +(1 row) + +insert into aggdata values ('2024-01-01 01:00', 1, 1, 1.0), ('2024-01-01 01:00', 2, 1, 2.0), ('2024-03-01 01:00', 3, 2, 3.0), ('2024-01-01 02:00', NULL, 1, 0.0), ('2024-01-01 02:00', NULL, 3, NULL); +select format('%I.%I', chunk_schema, chunk_name)::regclass as chunk +from timescaledb_information.chunks +where hypertable_name='aggdata' +limit 1 \gset +alter table aggdata set (timescaledb.compress_orderby='time', timescaledb.compress_segmentby='device'); +alter table :chunk set access method hypercore; +-- Add some non-compressed data to ensure vectoraggs work with both +-- compressed and non-compressed data. +insert into aggdata values ('2024-01-01 02:00', 1, 1, 3.0); +analyze aggdata; +-- +-- Run a number of queries to compare plans and output with vectorized +-- aggregation on and off. +-- +-- This is just to do basic checks to ensure VectorAggs produce the +-- expected output. +-- +set timescaledb.debug_require_vector_agg to 'require'; +explain (verbose, costs off) +select avg(device) from aggdata; + QUERY PLAN +---------------------------------------------------------------------------------------- + Finalize Aggregate + Output: avg(_hyper_1_1_chunk.device) + -> Append + -> Custom Scan (VectorAgg) + Output: (PARTIAL avg(_hyper_1_1_chunk.device)) + Grouping Policy: all compressed batches + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk.device + -> Partial Aggregate + Output: PARTIAL avg(_hyper_1_2_chunk.device) + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk.device +(12 rows) + +select avg(device) from aggdata; + avg +-------------------- + 1.7500000000000000 +(1 row) + +set timescaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select avg(device) from aggdata; + QUERY PLAN +---------------------------------------------------------------------------------------- + Finalize Aggregate + Output: avg(_hyper_1_1_chunk.device) + -> Append + -> Partial Aggregate + Output: PARTIAL avg(_hyper_1_1_chunk.device) + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk.device + -> Partial Aggregate + Output: PARTIAL avg(_hyper_1_2_chunk.device) + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk.device +(11 rows) + +select avg(device) from aggdata; + avg +-------------------- + 1.7500000000000000 +(1 row) + +-- +-- Test agg filter on segmentby column +-- +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select avg(temp) filter (where device > 1) from aggdata; + QUERY PLAN +--------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: avg(_hyper_1_1_chunk.temp) FILTER (WHERE (_hyper_1_1_chunk.device > 1)) + -> Append + -> Custom Scan (VectorAgg) + Output: (PARTIAL avg(_hyper_1_1_chunk.temp) FILTER (WHERE (_hyper_1_1_chunk.device > 1))) + Grouping Policy: all compressed batches + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk.temp, _hyper_1_1_chunk.device + -> Partial Aggregate + Output: PARTIAL avg(_hyper_1_2_chunk.temp) FILTER (WHERE (_hyper_1_2_chunk.device > 1)) + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk.temp, _hyper_1_2_chunk.device +(12 rows) + +set timescaledb.debug_require_vector_agg to 'require'; +select avg(temp) filter (where device > 1) from aggdata; + avg +----- + 2.5 +(1 row) + +set timescaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select avg(temp) filter (where device > 1) from aggdata; + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: avg(_hyper_1_1_chunk.temp) FILTER (WHERE (_hyper_1_1_chunk.device > 1)) + -> Append + -> Partial Aggregate + Output: PARTIAL avg(_hyper_1_1_chunk.temp) FILTER (WHERE (_hyper_1_1_chunk.device > 1)) + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk.temp, _hyper_1_1_chunk.device + -> Partial Aggregate + Output: PARTIAL avg(_hyper_1_2_chunk.temp) FILTER (WHERE (_hyper_1_2_chunk.device > 1)) + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk.temp, _hyper_1_2_chunk.device +(11 rows) + +select avg(temp) filter (where device > 1) from aggdata; + avg +----- + 2.5 +(1 row) + +-- +-- Test agg filter on non-segmentby column +-- +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select count(*) filter (where location < 3) from aggdata; + QUERY PLAN +----------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(*) FILTER (WHERE (_hyper_1_1_chunk.location < 3)) + -> Append + -> Custom Scan (VectorAgg) + Output: (PARTIAL count(*) FILTER (WHERE (_hyper_1_1_chunk.location < 3))) + Grouping Policy: all compressed batches + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk.location + -> Partial Aggregate + Output: PARTIAL count(*) FILTER (WHERE (_hyper_1_2_chunk.location < 3)) + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk.location +(12 rows) + +set timescaledb.debug_require_vector_agg to 'require'; +select count(*) filter (where location < 3) from aggdata; + count +------- + 5 +(1 row) + +set timescaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select count(*) filter (where location < 3) from aggdata; + QUERY PLAN +---------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(*) FILTER (WHERE (_hyper_1_1_chunk.location < 3)) + -> Append + -> Partial Aggregate + Output: PARTIAL count(*) FILTER (WHERE (_hyper_1_1_chunk.location < 3)) + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk.location + -> Partial Aggregate + Output: PARTIAL count(*) FILTER (WHERE (_hyper_1_2_chunk.location < 3)) + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk.location +(11 rows) + +select count(*) filter (where location < 3) from aggdata; + count +------- + 5 +(1 row) + +-- +-- Test grouping on non-segmentby column +-- +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select location, avg(temp) from aggdata where location=1 group by location; + QUERY PLAN +---------------------------------------------------------------------------------------- + Finalize GroupAggregate + Output: _hyper_1_1_chunk.location, avg(_hyper_1_1_chunk.temp) + -> Append + -> Custom Scan (VectorAgg) + Output: _hyper_1_1_chunk.location, (PARTIAL avg(_hyper_1_1_chunk.temp)) + Grouping Policy: hashed with single 4-byte key + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk.location, _hyper_1_1_chunk.temp + Vectorized Filter: (_hyper_1_1_chunk.location = 1) + -> Partial GroupAggregate + Output: _hyper_1_2_chunk.location, PARTIAL avg(_hyper_1_2_chunk.temp) + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk.location, _hyper_1_2_chunk.temp + Filter: (_hyper_1_2_chunk.location = 1) +(14 rows) + +set timescaledb.debug_require_vector_agg to 'require'; +select location, avg(temp) from aggdata where location=1 group by location; + location | avg +----------+----- + 1 | 1.5 +(1 row) + +set timescaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select location, avg(temp) from aggdata where location=1 group by location; + QUERY PLAN +---------------------------------------------------------------------------------------- + Finalize GroupAggregate + Output: _hyper_1_1_chunk.location, avg(_hyper_1_1_chunk.temp) + -> Append + -> Partial GroupAggregate + Output: _hyper_1_1_chunk.location, PARTIAL avg(_hyper_1_1_chunk.temp) + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk.location, _hyper_1_1_chunk.temp + Vectorized Filter: (_hyper_1_1_chunk.location = 1) + -> Partial GroupAggregate + Output: _hyper_1_2_chunk.location, PARTIAL avg(_hyper_1_2_chunk.temp) + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk.location, _hyper_1_2_chunk.temp + Filter: (_hyper_1_2_chunk.location = 1) +(13 rows) + +select location, avg(temp) from aggdata where location=1 group by location; + location | avg +----------+----- + 1 | 1.5 +(1 row) + +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select location, count(*) from aggdata where location=1 group by location; + QUERY PLAN +---------------------------------------------------------------------------------------- + Finalize GroupAggregate + Output: _hyper_1_1_chunk.location, count(*) + -> Append + -> Custom Scan (VectorAgg) + Output: _hyper_1_1_chunk.location, (PARTIAL count(*)) + Grouping Policy: hashed with single 4-byte key + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk.location + Vectorized Filter: (_hyper_1_1_chunk.location = 1) + -> Partial GroupAggregate + Output: _hyper_1_2_chunk.location, PARTIAL count(*) + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk.location + Filter: (_hyper_1_2_chunk.location = 1) +(14 rows) + +set timescaledb.debug_require_vector_agg to 'require'; +select location, count(*) from aggdata where location=1 group by location; + location | count +----------+------- + 1 | 4 +(1 row) + +set timescaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select location, count(*) from aggdata where location=1 group by location; + QUERY PLAN +---------------------------------------------------------------------------------------- + Finalize GroupAggregate + Output: _hyper_1_1_chunk.location, count(*) + -> Append + -> Partial GroupAggregate + Output: _hyper_1_1_chunk.location, PARTIAL count(*) + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk.location + Vectorized Filter: (_hyper_1_1_chunk.location = 1) + -> Partial GroupAggregate + Output: _hyper_1_2_chunk.location, PARTIAL count(*) + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk.location + Filter: (_hyper_1_2_chunk.location = 1) +(13 rows) + +select location, count(*) from aggdata where location=1 group by location; + location | count +----------+------- + 1 | 4 +(1 row) + +-- +-- Test ordering/grouping on segmentby, orderby columns +-- +-- This grouping is currently NOT supported by VectorAgg +-- +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select time, device, sum(temp) from aggdata where device is not null group by time, device order by time, device limit 10; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ + Limit + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, (sum(_hyper_1_1_chunk.temp)) + -> Sort + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, (sum(_hyper_1_1_chunk.temp)) + Sort Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device + -> Finalize HashAggregate + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, sum(_hyper_1_1_chunk.temp) + Group Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device + -> Append + -> Partial HashAggregate + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, PARTIAL sum(_hyper_1_1_chunk.temp) + Group Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp + Filter: (_hyper_1_1_chunk.device IS NOT NULL) + -> Partial HashAggregate + Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, PARTIAL sum(_hyper_1_2_chunk.temp) + Group Key: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp + Filter: (_hyper_1_2_chunk.device IS NOT NULL) +(21 rows) + +set timescaledb.debug_require_vector_agg to 'forbid'; +select time, device, sum(temp) from aggdata where device is not null group by time, device order by time, device limit 10; + time | device | sum +------------------------------+--------+----- + Mon Jan 01 01:00:00 2024 PST | 1 | 1 + Mon Jan 01 01:00:00 2024 PST | 2 | 2 + Mon Jan 01 02:00:00 2024 PST | 1 | 3 + Fri Mar 01 01:00:00 2024 PST | 3 | 3 +(4 rows) + +set timecaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select time, device, sum(temp) from aggdata where device is not null group by time, device order by time, device limit 10; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ + Limit + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, (sum(_hyper_1_1_chunk.temp)) + -> Sort + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, (sum(_hyper_1_1_chunk.temp)) + Sort Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device + -> Finalize HashAggregate + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, sum(_hyper_1_1_chunk.temp) + Group Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device + -> Append + -> Partial HashAggregate + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, PARTIAL sum(_hyper_1_1_chunk.temp) + Group Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp + Filter: (_hyper_1_1_chunk.device IS NOT NULL) + -> Partial HashAggregate + Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, PARTIAL sum(_hyper_1_2_chunk.temp) + Group Key: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp + Filter: (_hyper_1_2_chunk.device IS NOT NULL) +(21 rows) + +select time, device, sum(temp) from aggdata where device is not null group by time, device order by time, device limit 10; + time | device | sum +------------------------------+--------+----- + Mon Jan 01 01:00:00 2024 PST | 1 | 1 + Mon Jan 01 01:00:00 2024 PST | 2 | 2 + Mon Jan 01 02:00:00 2024 PST | 1 | 3 + Fri Mar 01 01:00:00 2024 PST | 3 | 3 +(4 rows) + +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select time, device, sum(temp) filter (where device is not null) from aggdata group by time, device order by time, device desc limit 10; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, (sum(_hyper_1_1_chunk.temp) FILTER (WHERE (_hyper_1_1_chunk.device IS NOT NULL))) + -> Sort + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, (sum(_hyper_1_1_chunk.temp) FILTER (WHERE (_hyper_1_1_chunk.device IS NOT NULL))) + Sort Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device DESC + -> Finalize HashAggregate + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, sum(_hyper_1_1_chunk.temp) FILTER (WHERE (_hyper_1_1_chunk.device IS NOT NULL)) + Group Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device + -> Append + -> Partial HashAggregate + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, PARTIAL sum(_hyper_1_1_chunk.temp) FILTER (WHERE (_hyper_1_1_chunk.device IS NOT NULL)) + Group Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp + -> Partial HashAggregate + Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, PARTIAL sum(_hyper_1_2_chunk.temp) FILTER (WHERE (_hyper_1_2_chunk.device IS NOT NULL)) + Group Key: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp +(19 rows) + +set timescaledb.debug_require_vector_agg to 'forbid'; +select time, device, sum(temp) filter (where device is not null) from aggdata group by time, device order by time, device desc limit 10; + time | device | sum +------------------------------+--------+----- + Mon Jan 01 01:00:00 2024 PST | 2 | 2 + Mon Jan 01 01:00:00 2024 PST | 1 | 1 + Mon Jan 01 02:00:00 2024 PST | | + Mon Jan 01 02:00:00 2024 PST | 1 | 3 + Fri Mar 01 01:00:00 2024 PST | 3 | 3 +(5 rows) + +set timescaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select time, device, sum(temp) filter (where device is not null) from aggdata group by time, device order by time, device desc limit 10; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, (sum(_hyper_1_1_chunk.temp) FILTER (WHERE (_hyper_1_1_chunk.device IS NOT NULL))) + -> Sort + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, (sum(_hyper_1_1_chunk.temp) FILTER (WHERE (_hyper_1_1_chunk.device IS NOT NULL))) + Sort Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device DESC + -> Finalize HashAggregate + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, sum(_hyper_1_1_chunk.temp) FILTER (WHERE (_hyper_1_1_chunk.device IS NOT NULL)) + Group Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device + -> Append + -> Partial HashAggregate + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, PARTIAL sum(_hyper_1_1_chunk.temp) FILTER (WHERE (_hyper_1_1_chunk.device IS NOT NULL)) + Group Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp + -> Partial HashAggregate + Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, PARTIAL sum(_hyper_1_2_chunk.temp) FILTER (WHERE (_hyper_1_2_chunk.device IS NOT NULL)) + Group Key: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp +(19 rows) + +select time, device, sum(temp) filter (where device is not null) from aggdata group by time, device order by time, device desc limit 10; + time | device | sum +------------------------------+--------+----- + Mon Jan 01 01:00:00 2024 PST | 2 | 2 + Mon Jan 01 01:00:00 2024 PST | 1 | 1 + Mon Jan 01 02:00:00 2024 PST | | + Mon Jan 01 02:00:00 2024 PST | 1 | 3 + Fri Mar 01 01:00:00 2024 PST | 3 | 3 +(5 rows) + +-- +-- Test ordering on time (orderby), ASC as well as DESC +-- +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select time, sum(temp) from aggdata group by time order by time limit 10; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Limit + Output: _hyper_1_1_chunk."time", (sum(_hyper_1_1_chunk.temp)) + -> Sort + Output: _hyper_1_1_chunk."time", (sum(_hyper_1_1_chunk.temp)) + Sort Key: _hyper_1_1_chunk."time" + -> Finalize HashAggregate + Output: _hyper_1_1_chunk."time", sum(_hyper_1_1_chunk.temp) + Group Key: _hyper_1_1_chunk."time" + -> Append + -> Custom Scan (VectorAgg) + Output: _hyper_1_1_chunk."time", (PARTIAL sum(_hyper_1_1_chunk.temp)) + Grouping Policy: hashed with single 8-byte key + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.temp + -> Partial HashAggregate + Output: _hyper_1_2_chunk."time", PARTIAL sum(_hyper_1_2_chunk.temp) + Group Key: _hyper_1_2_chunk."time" + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.temp +(19 rows) + +set timescaledb.debug_require_vector_agg to 'require'; +select time, sum(temp) from aggdata group by time order by time limit 10; + time | sum +------------------------------+----- + Mon Jan 01 01:00:00 2024 PST | 3 + Mon Jan 01 02:00:00 2024 PST | 3 + Fri Mar 01 01:00:00 2024 PST | 3 +(3 rows) + +set timescaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select time, sum(temp) from aggdata group by time order by time limit 10; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Limit + Output: _hyper_1_1_chunk."time", (sum(_hyper_1_1_chunk.temp)) + -> Sort + Output: _hyper_1_1_chunk."time", (sum(_hyper_1_1_chunk.temp)) + Sort Key: _hyper_1_1_chunk."time" + -> Finalize HashAggregate + Output: _hyper_1_1_chunk."time", sum(_hyper_1_1_chunk.temp) + Group Key: _hyper_1_1_chunk."time" + -> Append + -> Partial HashAggregate + Output: _hyper_1_1_chunk."time", PARTIAL sum(_hyper_1_1_chunk.temp) + Group Key: _hyper_1_1_chunk."time" + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.temp + -> Partial HashAggregate + Output: _hyper_1_2_chunk."time", PARTIAL sum(_hyper_1_2_chunk.temp) + Group Key: _hyper_1_2_chunk."time" + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.temp +(19 rows) + +select time, sum(temp) from aggdata group by time order by time limit 10; + time | sum +------------------------------+----- + Mon Jan 01 01:00:00 2024 PST | 3 + Mon Jan 01 02:00:00 2024 PST | 3 + Fri Mar 01 01:00:00 2024 PST | 3 +(3 rows) + +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select time, sum(temp) from aggdata group by time order by time desc limit 10; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Limit + Output: _hyper_1_2_chunk."time", (sum(_hyper_1_2_chunk.temp)) + -> Sort + Output: _hyper_1_2_chunk."time", (sum(_hyper_1_2_chunk.temp)) + Sort Key: _hyper_1_2_chunk."time" DESC + -> Finalize HashAggregate + Output: _hyper_1_2_chunk."time", sum(_hyper_1_2_chunk.temp) + Group Key: _hyper_1_2_chunk."time" + -> Append + -> Partial HashAggregate + Output: _hyper_1_2_chunk."time", PARTIAL sum(_hyper_1_2_chunk.temp) + Group Key: _hyper_1_2_chunk."time" + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.temp + -> Custom Scan (VectorAgg) + Output: _hyper_1_1_chunk."time", (PARTIAL sum(_hyper_1_1_chunk.temp)) + Grouping Policy: hashed with single 8-byte key + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.temp +(19 rows) + +set timescaledb.debug_require_vector_agg to 'require'; +select time, sum(temp) from aggdata group by time order by time desc limit 10; + time | sum +------------------------------+----- + Fri Mar 01 01:00:00 2024 PST | 3 + Mon Jan 01 02:00:00 2024 PST | 3 + Mon Jan 01 01:00:00 2024 PST | 3 +(3 rows) + +set timescaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select time, sum(temp) from aggdata group by time order by time desc limit 10; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Limit + Output: _hyper_1_2_chunk."time", (sum(_hyper_1_2_chunk.temp)) + -> Sort + Output: _hyper_1_2_chunk."time", (sum(_hyper_1_2_chunk.temp)) + Sort Key: _hyper_1_2_chunk."time" DESC + -> Finalize HashAggregate + Output: _hyper_1_2_chunk."time", sum(_hyper_1_2_chunk.temp) + Group Key: _hyper_1_2_chunk."time" + -> Append + -> Partial HashAggregate + Output: _hyper_1_2_chunk."time", PARTIAL sum(_hyper_1_2_chunk.temp) + Group Key: _hyper_1_2_chunk."time" + -> Seq Scan on _timescaledb_internal._hyper_1_2_chunk + Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.temp + -> Partial HashAggregate + Output: _hyper_1_1_chunk."time", PARTIAL sum(_hyper_1_1_chunk.temp) + Group Key: _hyper_1_1_chunk."time" + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_1_1_chunk + Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.temp +(19 rows) + +select time, sum(temp) from aggdata group by time order by time desc limit 10; + time | sum +------------------------------+----- + Fri Mar 01 01:00:00 2024 PST | 3 + Mon Jan 01 02:00:00 2024 PST | 3 + Mon Jan 01 01:00:00 2024 PST | 3 +(3 rows) + +-- +-- Test ordering on time (orderby), ASC as well as DESC with no segmentby +-- +create table aggdata_timeorder (like aggdata); +select create_hypertable('aggdata_timeorder', 'time', create_default_indexes=>false); + create_hypertable +-------------------------------- + (3,public,aggdata_timeorder,t) +(1 row) + +insert into aggdata_timeorder select * from aggdata; +select format('%I.%I', chunk_schema, chunk_name)::regclass as chunk +from timescaledb_information.chunks +where hypertable_name='aggdata_timeorder' +limit 1 \gset +alter table aggdata_timeorder set (timescaledb.compress_orderby='time', timescaledb.compress_segmentby=''); +alter table :chunk set access method hypercore; +analyze aggdata_timeorder; +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select time, sum(temp) from aggdata_timeorder group by time order by time limit 10; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Limit + Output: _hyper_3_4_chunk."time", (sum(_hyper_3_4_chunk.temp)) + -> Sort + Output: _hyper_3_4_chunk."time", (sum(_hyper_3_4_chunk.temp)) + Sort Key: _hyper_3_4_chunk."time" + -> Finalize HashAggregate + Output: _hyper_3_4_chunk."time", sum(_hyper_3_4_chunk.temp) + Group Key: _hyper_3_4_chunk."time" + -> Append + -> Custom Scan (VectorAgg) + Output: _hyper_3_4_chunk."time", (PARTIAL sum(_hyper_3_4_chunk.temp)) + Grouping Policy: hashed with single 8-byte key + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_3_4_chunk + Output: _hyper_3_4_chunk."time", _hyper_3_4_chunk.temp + -> Partial HashAggregate + Output: _hyper_3_5_chunk."time", PARTIAL sum(_hyper_3_5_chunk.temp) + Group Key: _hyper_3_5_chunk."time" + -> Seq Scan on _timescaledb_internal._hyper_3_5_chunk + Output: _hyper_3_5_chunk."time", _hyper_3_5_chunk.temp +(19 rows) + +set timescaledb.debug_require_vector_agg to 'require'; +select time, sum(temp) from aggdata_timeorder group by time order by time limit 10; + time | sum +------------------------------+----- + Mon Jan 01 01:00:00 2024 PST | 3 + Mon Jan 01 02:00:00 2024 PST | 3 + Fri Mar 01 01:00:00 2024 PST | 3 +(3 rows) + +explain (verbose, costs off) +select time, sum(temp) from aggdata_timeorder group by time order by time desc limit 10; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Limit + Output: _hyper_3_5_chunk."time", (sum(_hyper_3_5_chunk.temp)) + -> Sort + Output: _hyper_3_5_chunk."time", (sum(_hyper_3_5_chunk.temp)) + Sort Key: _hyper_3_5_chunk."time" DESC + -> Finalize HashAggregate + Output: _hyper_3_5_chunk."time", sum(_hyper_3_5_chunk.temp) + Group Key: _hyper_3_5_chunk."time" + -> Append + -> Partial HashAggregate + Output: _hyper_3_5_chunk."time", PARTIAL sum(_hyper_3_5_chunk.temp) + Group Key: _hyper_3_5_chunk."time" + -> Seq Scan on _timescaledb_internal._hyper_3_5_chunk + Output: _hyper_3_5_chunk."time", _hyper_3_5_chunk.temp + -> Custom Scan (VectorAgg) + Output: _hyper_3_4_chunk."time", (PARTIAL sum(_hyper_3_4_chunk.temp)) + Grouping Policy: hashed with single 8-byte key + -> Custom Scan (ColumnarScan) on _timescaledb_internal._hyper_3_4_chunk + Output: _hyper_3_4_chunk."time", _hyper_3_4_chunk.temp +(19 rows) + +select time, sum(temp) from aggdata_timeorder group by time order by time desc limit 10; + time | sum +------------------------------+----- + Fri Mar 01 01:00:00 2024 PST | 3 + Mon Jan 01 02:00:00 2024 PST | 3 + Mon Jan 01 01:00:00 2024 PST | 3 +(3 rows) + diff --git a/tsl/test/expected/vector_agg_default.out b/tsl/test/expected/vector_agg_default.out index 49b629cd239..9ab939edb69 100644 --- a/tsl/test/expected/vector_agg_default.out +++ b/tsl/test/expected/vector_agg_default.out @@ -2,6 +2,9 @@ -- Please see the included NOTICE for copyright information and -- LICENSE-TIMESCALE for a copy of the license. \c :TEST_DBNAME :ROLE_SUPERUSER +-- Uncomment these two settings to run this test with hypercore TAM +--set timescaledb.default_hypercore_use_access_method=true; +--set enable_indexscan=off; create function stable_abs(x int4) returns int4 as 'int4abs' language internal stable; create table dvagg(a int, b int); select create_hypertable('dvagg', 'a', chunk_time_interval => 1000); diff --git a/tsl/test/expected/vector_agg_filter.out b/tsl/test/expected/vector_agg_filter.out index 8310a828be5..7787a1e21b6 100644 --- a/tsl/test/expected/vector_agg_filter.out +++ b/tsl/test/expected/vector_agg_filter.out @@ -2,6 +2,8 @@ -- Please see the included NOTICE for copyright information and -- LICENSE-TIMESCALE for a copy of the license. \c :TEST_DBNAME :ROLE_SUPERUSER +-- Uncomment to run this test with hypercore TAM +--set timescaledb.default_hypercore_use_access_method=true; -- helper function: float -> pseudorandom float [-0.5..0.5] CREATE OR REPLACE FUNCTION mix(x anyelement) RETURNS float8 AS $$ SELECT hashfloat8(x::float8) / pow(2, 32) diff --git a/tsl/test/expected/vector_agg_functions.out b/tsl/test/expected/vector_agg_functions.out index 24c3e76b8d9..db5551e66cf 100644 --- a/tsl/test/expected/vector_agg_functions.out +++ b/tsl/test/expected/vector_agg_functions.out @@ -2,6 +2,8 @@ -- Please see the included NOTICE for copyright information and -- LICENSE-TIMESCALE for a copy of the license. \c :TEST_DBNAME :ROLE_SUPERUSER +-- Uncomment to run this test with hypercore TAM +--set timescaledb.default_hypercore_use_access_method=true; -- helper function: float -> pseudorandom float [-0.5..0.5] CREATE OR REPLACE FUNCTION mix(x anyelement) RETURNS float8 AS $$ SELECT hashfloat8(x::float8) / pow(2, 32) diff --git a/tsl/test/expected/vector_agg_memory.out b/tsl/test/expected/vector_agg_memory.out index 1600fbba164..3f812241647 100644 --- a/tsl/test/expected/vector_agg_memory.out +++ b/tsl/test/expected/vector_agg_memory.out @@ -2,6 +2,9 @@ -- Please see the included NOTICE for copyright information and -- LICENSE-TIMESCALE for a copy of the license. \c :TEST_DBNAME :ROLE_SUPERUSER +-- Uncomment to run this test with hypercore TAM +--set timescaledb.default_hypercore_use_access_method=true; +--set enable_indexscan=false; -- Helper function that returns the amount of memory currently allocated in a -- given memory context. create or replace function ts_debug_allocated_bytes(text = 'PortalContext') returns bigint diff --git a/tsl/test/expected/vector_agg_param.out b/tsl/test/expected/vector_agg_param.out index 3d717b10d2a..58e0aa8719a 100644 --- a/tsl/test/expected/vector_agg_param.out +++ b/tsl/test/expected/vector_agg_param.out @@ -2,6 +2,8 @@ -- Please see the included NOTICE for copyright information and -- LICENSE-TIMESCALE for a copy of the license. -- Test parameterized vector aggregation plans. +-- Uncomment to run this test with hypercore TAM +--set timescaledb.default_hypercore_use_access_method=true; create table pvagg(s int, a int); select create_hypertable('pvagg', 'a', chunk_time_interval => 1000); NOTICE: adding not-null constraint to column "a" diff --git a/tsl/test/expected/vector_agg_segmentby.out b/tsl/test/expected/vector_agg_segmentby.out index ee203af4ca2..fbc015e7943 100644 --- a/tsl/test/expected/vector_agg_segmentby.out +++ b/tsl/test/expected/vector_agg_segmentby.out @@ -4,6 +4,8 @@ \set CHUNKS 2::int \set CHUNK_ROWS 100000::int \set GROUPING_CARDINALITY 10::int +-- Uncomment to run this test with hypercore TAM +--set timescaledb.default_hypercore_use_access_method=true; create table svagg(t int, f int, s int); select create_hypertable('svagg', 's', chunk_time_interval => :GROUPING_CARDINALITY / :CHUNKS); NOTICE: adding not-null constraint to column "s" diff --git a/tsl/test/expected/vectorized_aggregation.out b/tsl/test/expected/vectorized_aggregation.out index f48d73027a2..a55ef2b133e 100644 --- a/tsl/test/expected/vectorized_aggregation.out +++ b/tsl/test/expected/vectorized_aggregation.out @@ -29,6 +29,11 @@ SELECT sum(segment_by_value), sum(int_value), sum(float_value) FROM testtable; 304695 | 304695 | 304695 (1 row) +-- +-- Enable this GUC to run this test with Hypercore TAM. The EXPLAINs +-- will differ, but the results should not. +-- +--SET timescaledb.default_hypercore_use_access_method = true; --- -- Tests with some chunks compressed --- diff --git a/tsl/test/sql/CMakeLists.txt b/tsl/test/sql/CMakeLists.txt index a12491cb756..89954d78b00 100644 --- a/tsl/test/sql/CMakeLists.txt +++ b/tsl/test/sql/CMakeLists.txt @@ -180,7 +180,8 @@ if((${PG_VERSION_MAJOR} GREATER_EQUAL "15")) endif() if((${PG_VERSION_MAJOR} GREATER_EQUAL "16")) - list(APPEND TEST_FILES cagg_planning.sql hypercore_parallel.sql) + list(APPEND TEST_FILES cagg_planning.sql hypercore_parallel.sql + hypercore_vectoragg.sql) endif() if((${PG_VERSION_MAJOR} GREATER_EQUAL "17")) diff --git a/tsl/test/sql/hypercore_vectoragg.sql b/tsl/test/sql/hypercore_vectoragg.sql new file mode 100644 index 00000000000..6bf52a6e9cc --- /dev/null +++ b/tsl/test/sql/hypercore_vectoragg.sql @@ -0,0 +1,191 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. + +-- +-- Test VectorAgg on top of scans with Hypercore TAM. +-- +-- This test is not for the vectorized aggregation functionality +-- itself, since that is handled by other tests. Here we only test +-- that VectorAgg is compatible with scans on Hypercore TAM and that +-- the planning is done correctly. +-- +-- To run on a larger data set, the vectorized_aggregation and +-- vector_agg_* tests can be run with TAM enabled by default. This is +-- also a good way to verify that the output from Hypercore TAM is the +-- same. +-- +create table aggdata (time timestamptz, device int, location int, temp float); +select create_hypertable('aggdata', 'time', create_default_indexes=>false); +insert into aggdata values ('2024-01-01 01:00', 1, 1, 1.0), ('2024-01-01 01:00', 2, 1, 2.0), ('2024-03-01 01:00', 3, 2, 3.0), ('2024-01-01 02:00', NULL, 1, 0.0), ('2024-01-01 02:00', NULL, 3, NULL); + +select format('%I.%I', chunk_schema, chunk_name)::regclass as chunk +from timescaledb_information.chunks +where hypertable_name='aggdata' +limit 1 \gset + +alter table aggdata set (timescaledb.compress_orderby='time', timescaledb.compress_segmentby='device'); +alter table :chunk set access method hypercore; + +-- Add some non-compressed data to ensure vectoraggs work with both +-- compressed and non-compressed data. +insert into aggdata values ('2024-01-01 02:00', 1, 1, 3.0); + +analyze aggdata; +-- +-- Run a number of queries to compare plans and output with vectorized +-- aggregation on and off. +-- +-- This is just to do basic checks to ensure VectorAggs produce the +-- expected output. +-- +set timescaledb.debug_require_vector_agg to 'require'; +explain (verbose, costs off) +select avg(device) from aggdata; +select avg(device) from aggdata; + +set timescaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select avg(device) from aggdata; +select avg(device) from aggdata; + +-- +-- Test agg filter on segmentby column +-- +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select avg(temp) filter (where device > 1) from aggdata; +set timescaledb.debug_require_vector_agg to 'require'; +select avg(temp) filter (where device > 1) from aggdata; + +set timescaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select avg(temp) filter (where device > 1) from aggdata; +select avg(temp) filter (where device > 1) from aggdata; + +-- +-- Test agg filter on non-segmentby column +-- +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select count(*) filter (where location < 3) from aggdata; +set timescaledb.debug_require_vector_agg to 'require'; +select count(*) filter (where location < 3) from aggdata; + +set timescaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select count(*) filter (where location < 3) from aggdata; +select count(*) filter (where location < 3) from aggdata; + +-- +-- Test grouping on non-segmentby column +-- +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select location, avg(temp) from aggdata where location=1 group by location; +set timescaledb.debug_require_vector_agg to 'require'; +select location, avg(temp) from aggdata where location=1 group by location; + +set timescaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select location, avg(temp) from aggdata where location=1 group by location; +select location, avg(temp) from aggdata where location=1 group by location; + +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select location, count(*) from aggdata where location=1 group by location; +set timescaledb.debug_require_vector_agg to 'require'; +select location, count(*) from aggdata where location=1 group by location; + +set timescaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select location, count(*) from aggdata where location=1 group by location; +select location, count(*) from aggdata where location=1 group by location; + +-- +-- Test ordering/grouping on segmentby, orderby columns +-- +-- This grouping is currently NOT supported by VectorAgg +-- +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select time, device, sum(temp) from aggdata where device is not null group by time, device order by time, device limit 10; +set timescaledb.debug_require_vector_agg to 'forbid'; +select time, device, sum(temp) from aggdata where device is not null group by time, device order by time, device limit 10; + +set timecaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select time, device, sum(temp) from aggdata where device is not null group by time, device order by time, device limit 10; +select time, device, sum(temp) from aggdata where device is not null group by time, device order by time, device limit 10; + +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select time, device, sum(temp) filter (where device is not null) from aggdata group by time, device order by time, device desc limit 10; +set timescaledb.debug_require_vector_agg to 'forbid'; +select time, device, sum(temp) filter (where device is not null) from aggdata group by time, device order by time, device desc limit 10; + +set timescaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select time, device, sum(temp) filter (where device is not null) from aggdata group by time, device order by time, device desc limit 10; +select time, device, sum(temp) filter (where device is not null) from aggdata group by time, device order by time, device desc limit 10; + +-- +-- Test ordering on time (orderby), ASC as well as DESC +-- +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select time, sum(temp) from aggdata group by time order by time limit 10; +set timescaledb.debug_require_vector_agg to 'require'; +select time, sum(temp) from aggdata group by time order by time limit 10; + +set timescaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select time, sum(temp) from aggdata group by time order by time limit 10; +select time, sum(temp) from aggdata group by time order by time limit 10; + + +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select time, sum(temp) from aggdata group by time order by time desc limit 10; +set timescaledb.debug_require_vector_agg to 'require'; +select time, sum(temp) from aggdata group by time order by time desc limit 10; + +set timescaledb.enable_vectorized_aggregation=false; +reset timescaledb.debug_require_vector_agg; +explain (verbose, costs off) +select time, sum(temp) from aggdata group by time order by time desc limit 10; +select time, sum(temp) from aggdata group by time order by time desc limit 10; + +-- +-- Test ordering on time (orderby), ASC as well as DESC with no segmentby +-- +create table aggdata_timeorder (like aggdata); +select create_hypertable('aggdata_timeorder', 'time', create_default_indexes=>false); +insert into aggdata_timeorder select * from aggdata; + +select format('%I.%I', chunk_schema, chunk_name)::regclass as chunk +from timescaledb_information.chunks +where hypertable_name='aggdata_timeorder' +limit 1 \gset + +alter table aggdata_timeorder set (timescaledb.compress_orderby='time', timescaledb.compress_segmentby=''); +alter table :chunk set access method hypercore; +analyze aggdata_timeorder; + +set timescaledb.enable_vectorized_aggregation=true; +explain (verbose, costs off) +select time, sum(temp) from aggdata_timeorder group by time order by time limit 10; +set timescaledb.debug_require_vector_agg to 'require'; +select time, sum(temp) from aggdata_timeorder group by time order by time limit 10; + +explain (verbose, costs off) +select time, sum(temp) from aggdata_timeorder group by time order by time desc limit 10; +select time, sum(temp) from aggdata_timeorder group by time order by time desc limit 10; diff --git a/tsl/test/sql/vector_agg_default.sql b/tsl/test/sql/vector_agg_default.sql index 83617ca8746..00a11b530e7 100644 --- a/tsl/test/sql/vector_agg_default.sql +++ b/tsl/test/sql/vector_agg_default.sql @@ -3,6 +3,11 @@ -- LICENSE-TIMESCALE for a copy of the license. \c :TEST_DBNAME :ROLE_SUPERUSER + +-- Uncomment these two settings to run this test with hypercore TAM +--set timescaledb.default_hypercore_use_access_method=true; +--set enable_indexscan=off; + create function stable_abs(x int4) returns int4 as 'int4abs' language internal stable; create table dvagg(a int, b int); diff --git a/tsl/test/sql/vector_agg_filter.sql b/tsl/test/sql/vector_agg_filter.sql index 8a5aec27370..f85806c7096 100644 --- a/tsl/test/sql/vector_agg_filter.sql +++ b/tsl/test/sql/vector_agg_filter.sql @@ -3,6 +3,10 @@ -- LICENSE-TIMESCALE for a copy of the license. \c :TEST_DBNAME :ROLE_SUPERUSER + +-- Uncomment to run this test with hypercore TAM +--set timescaledb.default_hypercore_use_access_method=true; + -- helper function: float -> pseudorandom float [-0.5..0.5] CREATE OR REPLACE FUNCTION mix(x anyelement) RETURNS float8 AS $$ SELECT hashfloat8(x::float8) / pow(2, 32) diff --git a/tsl/test/sql/vector_agg_functions.sql b/tsl/test/sql/vector_agg_functions.sql index e5f0e164d07..d96973ff995 100644 --- a/tsl/test/sql/vector_agg_functions.sql +++ b/tsl/test/sql/vector_agg_functions.sql @@ -3,6 +3,10 @@ -- LICENSE-TIMESCALE for a copy of the license. \c :TEST_DBNAME :ROLE_SUPERUSER + +-- Uncomment to run this test with hypercore TAM +--set timescaledb.default_hypercore_use_access_method=true; + -- helper function: float -> pseudorandom float [-0.5..0.5] CREATE OR REPLACE FUNCTION mix(x anyelement) RETURNS float8 AS $$ SELECT hashfloat8(x::float8) / pow(2, 32) diff --git a/tsl/test/sql/vector_agg_memory.sql b/tsl/test/sql/vector_agg_memory.sql index 84a32071319..3dc7272c2c0 100644 --- a/tsl/test/sql/vector_agg_memory.sql +++ b/tsl/test/sql/vector_agg_memory.sql @@ -4,6 +4,10 @@ \c :TEST_DBNAME :ROLE_SUPERUSER +-- Uncomment to run this test with hypercore TAM +--set timescaledb.default_hypercore_use_access_method=true; +--set enable_indexscan=false; + -- Helper function that returns the amount of memory currently allocated in a -- given memory context. create or replace function ts_debug_allocated_bytes(text = 'PortalContext') returns bigint @@ -33,7 +37,6 @@ vacuum analyze mvagg; -- here. create table log(n int, bytes int, a bigint, b bigint, c bigint, d bigint, e bigint, f bigint); - -- First, ensure that the underlying decompression has constant memory usage. explain (costs off) select distinct on (s0, s1) ts_debug_allocated_bytes() bytes, s0, s1, t diff --git a/tsl/test/sql/vector_agg_param.sql b/tsl/test/sql/vector_agg_param.sql index d695b839376..19a0e5d1d83 100644 --- a/tsl/test/sql/vector_agg_param.sql +++ b/tsl/test/sql/vector_agg_param.sql @@ -4,6 +4,8 @@ -- Test parameterized vector aggregation plans. +-- Uncomment to run this test with hypercore TAM +--set timescaledb.default_hypercore_use_access_method=true; create table pvagg(s int, a int); diff --git a/tsl/test/sql/vector_agg_segmentby.sql b/tsl/test/sql/vector_agg_segmentby.sql index e76641e649f..b831a9d4b9e 100644 --- a/tsl/test/sql/vector_agg_segmentby.sql +++ b/tsl/test/sql/vector_agg_segmentby.sql @@ -6,6 +6,9 @@ \set CHUNK_ROWS 100000::int \set GROUPING_CARDINALITY 10::int +-- Uncomment to run this test with hypercore TAM +--set timescaledb.default_hypercore_use_access_method=true; + create table svagg(t int, f int, s int); select create_hypertable('svagg', 's', chunk_time_interval => :GROUPING_CARDINALITY / :CHUNKS); diff --git a/tsl/test/sql/vectorized_aggregation.sql b/tsl/test/sql/vectorized_aggregation.sql index 86a9f5d801c..e4388a60fe7 100644 --- a/tsl/test/sql/vectorized_aggregation.sql +++ b/tsl/test/sql/vectorized_aggregation.sql @@ -27,6 +27,12 @@ ORDER BY time; -- Aggregation result without any vectorization SELECT sum(segment_by_value), sum(int_value), sum(float_value) FROM testtable; +-- +-- Enable this GUC to run this test with Hypercore TAM. The EXPLAINs +-- will differ, but the results should not. +-- +--SET timescaledb.default_hypercore_use_access_method = true; + --- -- Tests with some chunks compressed ---