diff --git a/tsl/src/CMakeLists.txt b/tsl/src/CMakeLists.txt index d6fc2a76266..4752f839506 100644 --- a/tsl/src/CMakeLists.txt +++ b/tsl/src/CMakeLists.txt @@ -51,4 +51,5 @@ install(TARGETS ${TSL_LIBRARY_NAME} DESTINATION ${PG_PKGLIBDIR}) add_subdirectory(bgw_policy) add_subdirectory(compression) add_subdirectory(continuous_aggs) +add_subdirectory(import) add_subdirectory(nodes) diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index 9cbce7a91b2..42f33b38223 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -499,10 +499,10 @@ text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls, CheckCompressedData(n_total >= n_notnull); uint32 *offsets = - (uint32 *) MemoryContextAllocZero(dest_mctx, - pad_to_multiple(64, sizeof(*offsets) * (n_total + 1))); + (uint32 *) MemoryContextAlloc(dest_mctx, + pad_to_multiple(64, sizeof(*offsets) * (n_total + 1))); uint8 *arrow_bodies = - (uint8 *) MemoryContextAllocZero(dest_mctx, pad_to_multiple(64, si->len - si->cursor)); + (uint8 *) MemoryContextAlloc(dest_mctx, pad_to_multiple(64, si->len - si->cursor)); uint32 offset = 0; for (uint32 i = 0; i < n_notnull; i++) diff --git a/tsl/src/import/CMakeLists.txt b/tsl/src/import/CMakeLists.txt new file mode 100644 index 00000000000..ccac900c3ee --- /dev/null +++ b/tsl/src/import/CMakeLists.txt @@ -0,0 +1,2 @@ +set(SOURCES "") +target_sources(${PROJECT_NAME} PRIVATE ${SOURCES}) diff --git a/tsl/src/import/ts_like_match.c b/tsl/src/import/ts_like_match.c new file mode 100644 index 00000000000..4d2c737fd46 --- /dev/null +++ b/tsl/src/import/ts_like_match.c @@ -0,0 +1,211 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +/* + * This file contains source code that was copied and/or modified from + * the PostgreSQL database, which is licensed under the open-source + * PostgreSQL License. Please see the NOTICE at the top level + * directory for a copy of the PostgreSQL License. + * + * This is a copy of backend/utils/adt/like_match.c from PG 15.0, git commit sha + * 2a7ce2e2ce474504a707ec03e128fde66cfb8b48. + * It has one modification: the check_stack_depth() check is moved to happen + * before recursion to simplify the non-recursive code path. + */ + +/*-------------------- + * Match text and pattern, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT. + * + * LIKE_TRUE: they match + * LIKE_FALSE: they don't match + * LIKE_ABORT: not only don't they match, but the text is too short. + * + * If LIKE_ABORT is returned, then no suffix of the text can match the + * pattern either, so an upper-level % scan can stop scanning now. + *-------------------- + */ + +#ifdef MATCH_LOWER +#define GETCHAR(t) MATCH_LOWER(t) +#else +#define GETCHAR(t) (t) +#endif + +static int +MatchText(const char *t, int tlen, const char *p, int plen) +{ + /* Fast path for match-everything pattern */ + if (plen == 1 && *p == '%') + return LIKE_TRUE; + + /* + * In this loop, we advance by char when matching wildcards (and thus on + * recursive entry to this function we are properly char-synced). On other + * occasions it is safe to advance by byte, as the text and pattern will + * be in lockstep. This allows us to perform all comparisons between the + * text and pattern on a byte by byte basis, even for multi-byte + * encodings. + */ + while (tlen > 0 && plen > 0) + { + if (*p == '\\') + { + /* Next pattern byte must match literally, whatever it is */ + NextByte(p, plen); + /* ... and there had better be one, per SQL standard */ + if (plen <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), + errmsg("LIKE pattern must not end with escape character"))); + if (GETCHAR(*p) != GETCHAR(*t)) + return LIKE_FALSE; + } + else if (*p == '%') + { + char firstpat; + + /* + * % processing is essentially a search for a text position at + * which the remainder of the text matches the remainder of the + * pattern, using a recursive call to check each potential match. + * + * If there are wildcards immediately following the %, we can skip + * over them first, using the idea that any sequence of N _'s and + * one or more %'s is equivalent to N _'s and one % (ie, it will + * match any sequence of at least N text characters). In this way + * we will always run the recursive search loop using a pattern + * fragment that begins with a literal character-to-match, thereby + * not recursing more than we have to. + */ + NextByte(p, plen); + + while (plen > 0) + { + if (*p == '%') + NextByte(p, plen); + else if (*p == '_') + { + /* If not enough text left to match the pattern, ABORT */ + if (tlen <= 0) + return LIKE_ABORT; + NextChar(t, tlen); + NextByte(p, plen); + } + else + break; /* Reached a non-wildcard pattern char */ + } + + /* + * If we're at end of pattern, match: we have a trailing % which + * matches any remaining text string. + */ + if (plen <= 0) + return LIKE_TRUE; + + /* + * Otherwise, scan for a text position at which we can match the + * rest of the pattern. The first remaining pattern char is known + * to be a regular or escaped literal character, so we can compare + * the first pattern byte to each text byte to avoid recursing + * more than we have to. This fact also guarantees that we don't + * have to consider a match to the zero-length substring at the + * end of the text. + */ + if (*p == '\\') + { + if (plen < 2) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), + errmsg("LIKE pattern must not end with escape character"))); + firstpat = GETCHAR(p[1]); + } + else + firstpat = GETCHAR(*p); + + while (tlen > 0) + { + if (GETCHAR(*t) == firstpat) + { + /* Since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + int matched = MatchText(t, tlen, p, plen); + + if (matched != LIKE_FALSE) + return matched; /* TRUE or ABORT */ + } + + NextChar(t, tlen); + } + + /* + * End of text with no match, so no point in trying later places + * to start matching this pattern. + */ + return LIKE_ABORT; + } + else if (*p == '_') + { + /* _ matches any single character, and we know there is one */ + NextChar(t, tlen); + NextByte(p, plen); + continue; + } + else if (GETCHAR(*p) != GETCHAR(*t)) + { + /* non-wildcard pattern char fails to match text char */ + return LIKE_FALSE; + } + + /* + * Pattern and text match, so advance. + * + * It is safe to use NextByte instead of NextChar here, even for + * multi-byte character sets, because we are not following immediately + * after a wildcard character. If we are in the middle of a multibyte + * character, we must already have matched at least one byte of the + * character from both text and pattern; so we cannot get out-of-sync + * on character boundaries. And we know that no backend-legal + * encoding allows ASCII characters such as '%' to appear as non-first + * bytes of characters, so we won't mistakenly detect a new wildcard. + */ + NextByte(t, tlen); + NextByte(p, plen); + } + + if (tlen > 0) + return LIKE_FALSE; /* end of pattern, but not of text */ + + /* + * End of text, but perhaps not of pattern. Match iff the remaining + * pattern can match a zero-length string, ie, it's zero or more %'s. + */ + while (plen > 0 && *p == '%') + NextByte(p, plen); + if (plen <= 0) + return LIKE_TRUE; + + /* + * End of text with no match, so no point in trying later places to start + * matching this pattern. + */ + return LIKE_ABORT; +} /* MatchText() */ + +#ifdef CHAREQ +#undef CHAREQ +#endif + +#undef NextChar +#undef CopyAdvChar +#undef MatchText + +#undef GETCHAR + +#ifdef MATCH_LOWER +#undef MATCH_LOWER + +#endif diff --git a/tsl/src/nodes/decompress_chunk/CMakeLists.txt b/tsl/src/nodes/decompress_chunk/CMakeLists.txt index 5c0c12f5d83..ab92ea29b74 100644 --- a/tsl/src/nodes/decompress_chunk/CMakeLists.txt +++ b/tsl/src/nodes/decompress_chunk/CMakeLists.txt @@ -8,6 +8,7 @@ set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/detoaster.c ${CMAKE_CURRENT_SOURCE_DIR}/exec.c ${CMAKE_CURRENT_SOURCE_DIR}/planner.c + ${CMAKE_CURRENT_SOURCE_DIR}/pred_text.c ${CMAKE_CURRENT_SOURCE_DIR}/pred_vector_array.c ${CMAKE_CURRENT_SOURCE_DIR}/qual_pushdown.c ${CMAKE_CURRENT_SOURCE_DIR}/vector_predicates.c) diff --git a/tsl/src/nodes/decompress_chunk/compressed_batch.c b/tsl/src/nodes/decompress_chunk/compressed_batch.c index c7a9041fb98..0eb9be2e035 100644 --- a/tsl/src/nodes/decompress_chunk/compressed_batch.c +++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c @@ -19,28 +19,28 @@ #include "nodes/decompress_chunk/vector_predicates.h" /* - * Create a single value ArrowArray from Postgres Datum. This is used to run - * the usual vectorized predicates on compressed columns with default values. + * Create a single-value ArrowArray of an arithmetic type. This is a specialized + * function because arithmetic types have a particular layout of ArrowArrays. */ static ArrowArray * -make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) +make_single_value_arrow_arithmetic(Oid arithmetic_type, Datum datum, bool isnull) { struct ArrowWithBuffers { ArrowArray arrow; - uint64 buffers[2]; - uint64 nulls_buffer; - uint64 values_buffer; + uint64 arrow_buffers_array_storage[2]; + uint64 validity_buffer[1]; + /* The value buffer has 64-byte padding as required by Arrow. */ + uint64 values_buffer[8]; }; struct ArrowWithBuffers *with_buffers = palloc0(sizeof(struct ArrowWithBuffers)); ArrowArray *arrow = &with_buffers->arrow; arrow->length = 1; - arrow->null_count = -1; + arrow->buffers = (const void **) with_buffers->arrow_buffers_array_storage; arrow->n_buffers = 2; - arrow->buffers = (const void **) &with_buffers->buffers; - arrow->buffers[0] = &with_buffers->nulls_buffer; - arrow->buffers[1] = &with_buffers->values_buffer; + arrow->buffers[0] = with_buffers->validity_buffer; + arrow->buffers[1] = with_buffers->values_buffer; if (isnull) { @@ -49,15 +49,18 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) * the Datum might be invalid if the value is null (important on i386 * where it might be pass-by-reference), so don't read it. */ + arrow->null_count = 1; return arrow; } + arrow_set_row_validity((uint64 *) arrow->buffers[0], 0, true); + #define FOR_TYPE(PGTYPE, CTYPE, FROMDATUM) \ case PGTYPE: \ - *((CTYPE *) &with_buffers->values_buffer) = FROMDATUM(datum); \ + *((CTYPE *) arrow->buffers[1]) = FROMDATUM(datum); \ break - switch (pgtype) + switch (arithmetic_type) { FOR_TYPE(INT8OID, int64, DatumGetInt64); FOR_TYPE(INT4OID, int32, DatumGetInt32); @@ -68,15 +71,73 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) FOR_TYPE(TIMESTAMPOID, Timestamp, DatumGetTimestamp); FOR_TYPE(DATEOID, DateADT, DatumGetDateADT); default: - elog(ERROR, "unexpected column type '%s'", format_type_be(pgtype)); + elog(ERROR, "unexpected column type '%s'", format_type_be(arithmetic_type)); pg_unreachable(); } - arrow_set_row_validity(&with_buffers->nulls_buffer, 0, true); + return arrow; +} + +/* + * Create a single-value ArrowArray of text. This is a specialized function + * because the text ArrowArray has a specialized layout. + */ +static ArrowArray * +make_single_value_arrow_text(Datum datum, bool isnull) +{ + struct ArrowWithBuffers + { + ArrowArray arrow; + uint64 arrow_buffers_array_storage[3]; + uint64 validity_buffer[1]; + uint32 offsets_buffer[2]; + /* The value buffer has 64-byte padding as required by Arrow. */ + uint64 values_buffer[8]; + }; + + struct ArrowWithBuffers *with_buffers = palloc0(sizeof(struct ArrowWithBuffers)); + ArrowArray *arrow = &with_buffers->arrow; + arrow->length = 1; + arrow->buffers = (const void **) with_buffers->arrow_buffers_array_storage; + arrow->n_buffers = 3; + arrow->buffers[0] = with_buffers->validity_buffer; + arrow->buffers[1] = with_buffers->offsets_buffer; + arrow->buffers[2] = with_buffers->values_buffer; + + if (isnull) + { + /* + * The validity bitmap was initialized to invalid on allocation, and + * the Datum might be invalid if the value is null (important on i386 + * where it might be pass-by-reference), so don't read it. + */ + arrow->null_count = 1; + return arrow; + } + + arrow_set_row_validity((uint64 *) arrow->buffers[0], 0, true); + text *detoasted = PG_DETOAST_DATUM(datum); + ((uint32 *) arrow->buffers[1])[1] = VARSIZE_ANY_EXHDR(detoasted); + arrow->buffers[2] = VARDATA(detoasted); return arrow; } +/* + * Create a single value ArrowArray from Postgres Datum. This is used to run + * the usual vectorized predicates on compressed columns with default values. + */ +static ArrowArray * +make_single_value_arrow(Oid pgtype, Datum datum, bool isnull) +{ + if (pgtype == TEXTOID) + { + return make_single_value_arrow_text(datum, isnull); + } + + return make_single_value_arrow_arithmetic(pgtype, datum, isnull); +} + static int get_max_text_datum_size(ArrowArray *text_array) { @@ -224,6 +285,50 @@ decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state } } +/* + * When we have a dictionary-encoded Arrow Array, and have run a predicate on + * the dictionary, this function is used to translate the dictionary predicate + * result to the final predicate result. + */ +static void +translate_bitmap_from_dictionary(const ArrowArray *arrow, const uint64 *dict_result, + uint64 *restrict final_result) +{ + Assert(arrow->dictionary != NULL); + + const size_t n = arrow->length; + const int16 *indices = (int16 *) arrow->buffers[1]; + for (size_t outer = 0; outer < n / 64; outer++) + { + uint64 word = 0; + for (size_t inner = 0; inner < 64; inner++) + { + const size_t row = outer * 64 + inner; + const size_t bit_index = inner; +#define INNER_LOOP \ + const int16 index = indices[row]; \ + const bool valid = arrow_row_is_valid(dict_result, index); \ + word |= ((uint64) valid) << bit_index; + + INNER_LOOP + } + final_result[outer] &= word; + } + + if (n % 64) + { + uint64 word = 0; + for (size_t row = (n / 64) * 64; row < n; row++) + { + const size_t bit_index = row % 64; + + INNER_LOOP + } + final_result[n / 64] &= word; + } +#undef INNER_LOOP +} + static void compute_plain_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, Node *qual, uint64 *restrict result) @@ -380,6 +485,27 @@ compute_plain_qual(DecompressContext *dcontext, DecompressBatchState *batch_stat Const *constnode = castNode(Const, lsecond(args)); Ensure(!constnode->constisnull, "vectorized predicate called for a null value"); + /* + * If the data is dictionary-encoded, we are going to compute the + * predicate on dictionary and then translate the results. + */ + const ArrowArray *vector_nodict = NULL; + uint64 *restrict predicate_result_nodict = NULL; + uint64 dict_result[(GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64]; + if (vector->dictionary) + { + const size_t dict_rows = vector->dictionary->length; + const size_t dict_result_words = (dict_rows + 63) / 64; + memset(dict_result, 0xFF, dict_result_words * 8); + predicate_result_nodict = dict_result; + vector_nodict = vector->dictionary; + } + else + { + predicate_result_nodict = predicate_result; + vector_nodict = vector; + } + /* * At last, compute the predicate. */ @@ -387,13 +513,22 @@ compute_plain_qual(DecompressContext *dcontext, DecompressBatchState *batch_stat { vector_array_predicate(vector_const_predicate, saop->useOr, - vector, + vector_nodict, constnode->constvalue, - predicate_result); + predicate_result_nodict); } else { - vector_const_predicate(vector, constnode->constvalue, predicate_result); + vector_const_predicate(vector_nodict, constnode->constvalue, predicate_result_nodict); + } + + /* + * If the vector is dictionary-encoded, we have just computed the + * predicate for dictionary and now have to translate it. + */ + if (vector->dictionary) + { + translate_bitmap_from_dictionary(vector, predicate_result_nodict, predicate_result); } /* @@ -850,7 +985,7 @@ make_next_tuple(DecompressBatchState *batch_state, uint16 arrow_row, int num_com * such as UUID. */ const uint8 value_bytes = column_values->decompression_type; - const char *restrict src = column_values->buffers[1]; + const char *src = column_values->buffers[1]; *column_values->output_value = PointerGetDatum(&src[value_bytes * arrow_row]); *column_values->output_isnull = !arrow_row_is_valid(column_values->buffers[0], arrow_row); @@ -867,7 +1002,7 @@ make_next_tuple(DecompressBatchState *batch_state, uint16 arrow_row, int num_com */ const uint8 value_bytes = column_values->decompression_type; Assert(value_bytes <= SIZEOF_DATUM); - const char *restrict src = column_values->buffers[1]; + const char *src = column_values->buffers[1]; memcpy(column_values->output_value, &src[value_bytes * arrow_row], SIZEOF_DATUM); *column_values->output_isnull = !arrow_row_is_valid(column_values->buffers[0], arrow_row); diff --git a/tsl/src/nodes/decompress_chunk/planner.c b/tsl/src/nodes/decompress_chunk/planner.c index 3dce92da4c3..492cb280b75 100644 --- a/tsl/src/nodes/decompress_chunk/planner.c +++ b/tsl/src/nodes/decompress_chunk/planner.c @@ -601,6 +601,18 @@ make_vectorized_qual(DecompressChunkPath *path, Node *qual) return NULL; } + if (OidIsValid(var->varcollid) && !get_collation_isdeterministic(var->varcollid)) + { + /* + * Can't vectorize string equality with a nondeterministic collation. + * Not sure if we have to check the collation of Const as well, but it + * will be known only at planning time. Currently we don't check it at + * all. Also this is untested because we don't have nondeterministic + * collations in all test configurations. + */ + return NULL; + } + if (opexpr) { /* diff --git a/tsl/src/nodes/decompress_chunk/pred_text.c b/tsl/src/nodes/decompress_chunk/pred_text.c new file mode 100644 index 00000000000..487fb61f032 --- /dev/null +++ b/tsl/src/nodes/decompress_chunk/pred_text.c @@ -0,0 +1,172 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +#include "pred_text.h" + +#include +#include + +#include "compat/compat.h" + +#if PG16_GE +#include +#endif + +static void +vector_const_text_comparison(const ArrowArray *arrow, const Datum constdatum, bool needequal, + uint64 *restrict result) +{ + Assert(!arrow->dictionary); + + text *consttext = (text *) DatumGetPointer(constdatum); + const size_t textlen = VARSIZE_ANY_EXHDR(consttext); + const uint8 *cstring = (uint8 *) VARDATA_ANY(consttext); + const uint32 *offsets = (uint32 *) arrow->buffers[1]; + const uint8 *values = (uint8 *) arrow->buffers[2]; + + const size_t n = arrow->length; + for (size_t outer = 0; outer < n / 64; outer++) + { + uint64 word = 0; + for (size_t inner = 0; inner < 64; inner++) + { + const size_t row = outer * 64 + inner; + const size_t bit_index = inner; +#define INNER_LOOP \ + const uint32 start = offsets[row]; \ + const uint32 end = offsets[row + 1]; \ + Assert(end >= start); \ + const uint32 veclen = end - start; \ + bool isequal = veclen != textlen ? \ + false : \ + (strncmp((char *) &values[start], (char *) cstring, textlen) == 0); \ + word |= ((uint64) (isequal == needequal)) << bit_index; + + INNER_LOOP + } + result[outer] &= word; + } + + if (n % 64) + { + uint64 word = 0; + for (size_t row = (n / 64) * 64; row < n; row++) + { + const size_t bit_index = row % 64; + INNER_LOOP + } + result[n / 64] &= word; + } + +#undef INNER_LOOP +} + +void +vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) +{ + vector_const_text_comparison(arrow, constdatum, /* needequal = */ true, result); +} + +void +vector_const_textne(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) +{ + vector_const_text_comparison(arrow, constdatum, /* needequal = */ false, result); +} + +/* + * Generate specializations for LIKE functions based on database encoding. This + * follows the Postgres code from backend/utils/adt/like.c, version 15.0, + * commit sha 2a7ce2e2ce474504a707ec03e128fde66cfb8b48. + * The copy of PG code begins here. + * ---------------------------------------------------------------------------- + */ + +#define LIKE_TRUE 1 +#define LIKE_FALSE 0 +#define LIKE_ABORT (-1) + +/* setup to compile like_match.c for UTF8 encoding, using fast NextChar */ +#define NextByte(p, plen) ((p)++, (plen)--) +#define NextChar(p, plen) \ + do \ + { \ + (p)++; \ + (plen)--; \ + } while ((plen) > 0 && (*(p) &0xC0) == 0x80) +#define MatchText UTF8_MatchText + +#include "import/ts_like_match.c" + +/* + * ---------------------------------------------------------------------------- + * The copy of PG code ends here. + */ + +static void +vector_const_like_impl(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result, + int (*match)(const char *, int, const char *, int), bool should_match) +{ + Assert(!arrow->dictionary); + + text *consttext = (text *) DatumGetPointer(constdatum); + const size_t textlen = VARSIZE_ANY_EXHDR(consttext); + const char *restrict cstring = VARDATA_ANY(consttext); + const uint32 *offsets = (uint32 *) arrow->buffers[1]; + const char *restrict values = arrow->buffers[2]; + + const size_t n = arrow->length; + for (size_t outer = 0; outer < n / 64; outer++) + { + uint64 word = 0; + for (size_t inner = 0; inner < 64; inner++) + { + const size_t row = outer * 64 + inner; + const size_t bit_index = inner; + /* + * The inner loop could have been an inline function, but it would have 5 + * parameters and one of them in/out, so a macro probably has better + * readability. + */ +#define INNER_LOOP \ + const uint32 start = offsets[row]; \ + const uint32 end = offsets[row + 1]; \ + Assert(end >= start); \ + const uint32 veclen = end - start; \ + int result = match(&values[start], veclen, cstring, textlen); \ + bool valid = (result == LIKE_TRUE) == should_match; \ + word |= ((uint64) valid) << bit_index; + + INNER_LOOP + } + result[outer] &= word; + } + + if (n % 64) + { + uint64 word = 0; + for (size_t row = (n / 64) * 64; row < n; row++) + { + const size_t bit_index = row % 64; + INNER_LOOP + } + result[n / 64] &= word; + } + +#undef INNER_LOOP +} + +void +vector_const_textlike_utf8(const ArrowArray *arrow, const Datum constdatum, uint64 *restrict result) +{ + vector_const_like_impl(arrow, constdatum, result, UTF8_MatchText, /* should_match = */ true); +} + +void +vector_const_textnlike_utf8(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result) +{ + vector_const_like_impl(arrow, constdatum, result, UTF8_MatchText, /* should_match = */ false); +} diff --git a/tsl/src/nodes/decompress_chunk/pred_text.h b/tsl/src/nodes/decompress_chunk/pred_text.h new file mode 100644 index 00000000000..467af660b7b --- /dev/null +++ b/tsl/src/nodes/decompress_chunk/pred_text.h @@ -0,0 +1,22 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ +#pragma once + +#include + +#include "compression/arrow_c_data_interface.h" + +extern void vector_const_texteq(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result); + +extern void vector_const_textne(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result); + +extern void vector_const_textlike_utf8(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result); + +extern void vector_const_textnlike_utf8(const ArrowArray *arrow, const Datum constdatum, + uint64 *restrict result); diff --git a/tsl/src/nodes/decompress_chunk/pred_vector_array.c b/tsl/src/nodes/decompress_chunk/pred_vector_array.c index dccdbe771d3..f15d217b571 100644 --- a/tsl/src/nodes/decompress_chunk/pred_vector_array.c +++ b/tsl/src/nodes/decompress_chunk/pred_vector_array.c @@ -17,9 +17,9 @@ * vector and each element of array, combines the result according to "is_or" * flag. Written along the lines of ExecEvalScalarArrayOp(). */ -static inline void -vector_array_predicate_impl(VectorPredicate *vector_const_predicate, bool is_or, - const ArrowArray *vector, Datum array, uint64 *restrict final_result) +void +vector_array_predicate(VectorPredicate *vector_const_predicate, bool is_or, + const ArrowArray *vector, Datum array, uint64 *restrict final_result) { const size_t n_rows = vector->length; const size_t result_words = (n_rows + 63) / 64; @@ -49,7 +49,7 @@ vector_array_predicate_impl(VectorPredicate *vector_const_predicate, bool is_or, const char *array_data = (const char *) ARR_DATA_PTR(arr); const size_t nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); - const uint64 *restrict array_null_bitmap = (uint64 *) ARR_NULLBITMAP(arr); + const uint64 *array_null_bitmap = (uint64 *) ARR_NULLBITMAP(arr); for (size_t array_index = 0; array_index < nitems; array_index++) { @@ -79,7 +79,7 @@ vector_array_predicate_impl(VectorPredicate *vector_const_predicate, bool is_or, } Datum constvalue = fetch_att(array_data, typbyval, typlen); array_data = att_addlength_pointer(array_data, typlen, array_data); - array_data = (char *) att_align_nominal(array_data, typalign); + array_data = (const char *) att_align_nominal(array_data, typalign); /* * For OR, we also need an intermediate storage for predicate result @@ -137,28 +137,3 @@ vector_array_predicate_impl(VectorPredicate *vector_const_predicate, bool is_or, } } } - -/* - * This is a thin wrapper to nudge the compiler to specialize the AND version - * which is much simpler than the OR version. - */ -static pg_noinline void -vector_array_predicate_and(VectorPredicate *scalar_predicate, const ArrowArray *vector, Datum array, - uint64 *restrict result) -{ - vector_array_predicate_impl(scalar_predicate, /* is_or = */ false, vector, array, result); -} - -void -vector_array_predicate(VectorPredicate *scalar_predicate, bool is_or, const ArrowArray *vector, - Datum array, uint64 *restrict result) -{ - if (is_or) - { - vector_array_predicate_impl(scalar_predicate, /* is_or = */ true, vector, array, result); - } - else - { - vector_array_predicate_and(scalar_predicate, vector, array, result); - } -} diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.c b/tsl/src/nodes/decompress_chunk/vector_predicates.c index ae1ce9a4e0a..44310157511 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.c +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.c @@ -12,6 +12,7 @@ #include #include +#include #include "compression/arrow_c_data_interface.h" @@ -27,6 +28,8 @@ */ #include "pred_vector_const_arithmetic_all.c" +#include "pred_text.h" + /* * Look up the vectorized implementation for a Postgres predicate, specified by * its Oid in pg_proc. Note that this Oid is different from the opcode. @@ -39,7 +42,26 @@ get_vector_const_predicate(Oid pg_predicate) #define GENERATE_DISPATCH_TABLE #include "pred_vector_const_arithmetic_all.c" #undef GENERATE_DISPATCH_TABLE + + case F_TEXTEQ: + return vector_const_texteq; + + case F_TEXTNE: + return vector_const_textne; } + + if (GetDatabaseEncoding() == PG_UTF8) + { + /* We have some simple LIKE vectorization for case-sensitive UTF8. */ + switch (pg_predicate) + { + case F_TEXTLIKE: + return vector_const_textlike_utf8; + case F_TEXTNLIKE: + return vector_const_textnlike_utf8; + } + } + return NULL; } diff --git a/tsl/src/nodes/decompress_chunk/vector_predicates.h b/tsl/src/nodes/decompress_chunk/vector_predicates.h index c8874efeef3..141563149dd 100644 --- a/tsl/src/nodes/decompress_chunk/vector_predicates.h +++ b/tsl/src/nodes/decompress_chunk/vector_predicates.h @@ -13,8 +13,8 @@ typedef void(VectorPredicate)(const ArrowArray *, Datum, uint64 *restrict); VectorPredicate *get_vector_const_predicate(Oid pg_predicate); -void vector_array_predicate(VectorPredicate *scalar_predicate, bool is_or, const ArrowArray *vector, - Datum array, uint64 *restrict result); +void vector_array_predicate(VectorPredicate *vector_const_predicate, bool is_or, + const ArrowArray *vector, Datum array, uint64 *restrict final_result); void vector_nulltest(const ArrowArray *arrow, int test_type, uint64 *restrict result); diff --git a/tsl/test/expected/decompress_vector_qual.out b/tsl/test/expected/decompress_vector_qual.out index 23f105775b5..8e77da74fc6 100644 --- a/tsl/test/expected/decompress_vector_qual.out +++ b/tsl/test/expected/decompress_vector_qual.out @@ -1011,6 +1011,33 @@ select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 888 and me 1 (1 row) +-- On versions >= 14, the Postgres planner chooses to build a hash table for +-- large arrays. We currently don't vectorize in this case. +select 1 from set_config('timescaledb.debug_require_vector_qual', + case when current_setting('server_version_num')::int >= 140000 then 'forbid' else 'only' end, + false); + ?column? +---------- + 1 +(1 row) + +select count(*) from singlebatch where metric2 = any(array[ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, +10, 11, 12, 13, 14, 15, 16, 17, 18, 19, +20, 21, 22, 23, 24, 25, 26, 27, 28, 29, +30, 31, 32, 33, 34, 35, 36, 37, 38, 39, +40, 41, 42, 43, 44, 45, 46, 47, 48, 49, +50, 51, 52, 53, 54, 55, 56, 57, 58, 59, +60, 61, 62, 63, 64, 65, 66, 67, 68, 69, +70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, +90, 91, 92, 93, 94, 95, 96, 97, 98, 99 +]::int8[]); + count +------- + 5 +(1 row) + reset timescaledb.enable_bulk_decompression; reset timescaledb.debug_require_vector_qual; -- Comparison with other column not vectorized. @@ -1166,7 +1193,7 @@ select * from date_table where ts < CURRENT_DATE; 01-01-2021 (3 rows) --- Text columns. Only tests bulk decompression for now. +-- Text columns. create table text_table(ts int, d int); select create_hypertable('text_table', 'ts'); NOTICE: adding not-null constraint to column "ts" @@ -1199,6 +1226,7 @@ insert into text_table select x, 8, repeat( x::text || 'a', insert into text_table select x + 100, 8, repeat((101 - x)::text || 'b', (101 - x)) from generate_series(1, 100) x; insert into text_table select x + 200, 8, repeat((101 - x)::text || 'c', (101 - x)) from generate_series(1, 100) x; insert into text_table select x + 300, 8, repeat( x::text || 'd', x) from generate_series(1, 100) x; +-- Use uncompressed table as reference. set timescaledb.debug_require_vector_qual to 'forbid'; select sum(length(a)) from text_table; sum @@ -1225,6 +1253,7 @@ NOTICE: chunk "_hyper_9_17_chunk" is already compressed _timescaledb_internal._hyper_9_17_chunk (1 row) +-- Check result with decompression. set timescaledb.enable_bulk_decompression to on; set timescaledb.debug_require_vector_qual to 'forbid'; select sum(length(a)) from text_table; @@ -1239,3 +1268,295 @@ select count(distinct a) from text_table; 2905 (1 row) +-- Test vectorized predicates. +set timescaledb.debug_require_vector_qual to 'only'; +-- -- Uncomment to generate the test reference w/o the vector optimizations. +-- set timescaledb.enable_bulk_decompression to off; +-- set timescaledb.debug_require_vector_qual to 'forbid'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'default'; + count | min | max | min | max +-------+-----+------+-----+----- + 1000 | 1 | 1000 | 0 | 0 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = ''; + count | min | max | min | max +-------+-----+------+-----+----- + 1000 | 1 | 1000 | 1 | 1 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'same'; + count | min | max | min | max +-------+-----+------+-----+----- + 1000 | 1 | 1000 | 2 | 2 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a != 'same'; + count | min | max | min | max +-------+-----+------+-----+----- + 6400 | 1 | 1000 | 0 | 8 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'одинаковый'; + count | min | max | min | max +-------+-----+------+-----+----- + 1000 | 1 | 1000 | 6 | 6 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'same-with-nulls'; + count | min | max | min | max +-------+-----+-----+-----+----- + 500 | 1 | 999 | 4 | 4 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 1 | 1 | 1 | 3 | 3 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = '異なる1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 1 | 1 | 1 | 7 | 7 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different-with-nulls1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 1 | 1 | 1 | 5 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different1000'; + count | min | max | min | max +-------+------+------+-----+----- + 1 | 1000 | 1000 | 3 | 3 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different-with-nulls999'; + count | min | max | min | max +-------+-----+-----+-----+----- + 1 | 999 | 999 | 5 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a in ('same', 'different500'); + count | min | max | min | max +-------+-----+------+-----+----- + 1001 | 1 | 1000 | 2 | 3 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a in ('same-with-nulls', 'different-with-nulls499'); + count | min | max | min | max +-------+-----+-----+-----+----- + 501 | 1 | 999 | 4 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a in ('different500', 'default'); + count | min | max | min | max +-------+-----+------+-----+----- + 1001 | 1 | 1000 | 0 | 3 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different500' or a = 'default'; + count | min | max | min | max +-------+-----+------+-----+----- + 1001 | 1 | 1000 | 0 | 3 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a is null; + count | min | max | min | max +-------+-----+------+-----+----- + 1000 | 2 | 1000 | 4 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a is not null; + count | min | max | min | max +-------+-----+------+-----+----- + 7400 | 1 | 1000 | 0 | 8 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%same%'; + count | min | max | min | max +-------+-----+------+-----+----- + 1500 | 1 | 1000 | 2 | 4 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%одинаковый%'; + count | min | max | min | max +-------+-----+------+-----+----- + 1000 | 1 | 1000 | 6 | 6 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%одилаковый%'; + count | min | max | min | max +-------+-----+-----+-----+----- + 0 | | | | +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%одимаковый%'; + count | min | max | min | max +-------+-----+-----+-----+----- + 0 | | | | +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%異なる%'; + count | min | max | min | max +-------+-----+------+-----+----- + 1000 | 1 | 1000 | 7 | 7 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%異オる%'; + count | min | max | min | max +-------+-----+-----+-----+----- + 0 | | | | +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%異にる%'; + count | min | max | min | max +-------+-----+-----+-----+----- + 0 | | | | +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '異_る_'; + count | min | max | min | max +-------+-----+-----+-----+----- + 9 | 1 | 9 | 7 | 7 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%'; + count | min | max | min | max +-------+-----+------+-----+----- + 7400 | 1 | 1000 | 0 | 8 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%different1%'; + count | min | max | min | max +-------+-----+------+-----+----- + 112 | 1 | 1000 | 3 | 3 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%different1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 1 | 1 | 1 | 3 | 3 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%%'; + count | min | max | min | max +-------+-----+------+-----+----- + 1500 | 1 | 1000 | 3 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%%1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 200 | 1 | 991 | 3 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%\1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 200 | 1 | 991 | 3 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%_'; + count | min | max | min | max +-------+-----+------+-----+----- + 1500 | 1 | 1000 | 3 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%__'; + count | min | max | min | max +-------+-----+------+-----+----- + 1491 | 1 | 1000 | 3 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%___'; + count | min | max | min | max +-------+-----+------+-----+----- + 1401 | 1 | 1000 | 3 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%_1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 199 | 1 | 991 | 3 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%nulls_'; + count | min | max | min | max +-------+-----+-----+-----+----- + 5 | 1 | 9 | 5 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different1%'; + count | min | max | min | max +-------+-----+------+-----+----- + 112 | 1 | 1000 | 3 | 3 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different\%'; + count | min | max | min | max +-------+-----+-----+-----+----- + 0 | | | | +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different\1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 1 | 1 | 1 | 3 | 3 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different_%1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 199 | 1 | 991 | 3 | 5 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different_'; + count | min | max | min | max +-------+-----+-----+-----+----- + 9 | 1 | 9 | 3 | 3 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different_1'; + count | min | max | min | max +-------+-----+-----+-----+----- + 9 | 11 | 91 | 3 | 3 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'same_'; + count | min | max | min | max +-------+-----+-----+-----+----- + 0 | | | | +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a not like '%different1%'; + count | min | max | min | max +-------+-----+------+-----+----- + 7288 | 1 | 1000 | 0 | 8 +(1 row) + +\set ON_ERROR_STOP 0 +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different\'; +ERROR: LIKE pattern must not end with escape character +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%\'; +ERROR: LIKE pattern must not end with escape character +\set ON_ERROR_STOP 1 +-- We don't vectorize comparison operators with text because they are probably +-- not very useful. +set timescaledb.debug_require_vector_qual to 'forbid'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a < 'same'; + count | min | max | min | max +-------+-----+------+-----+----- + 3900 | 1 | 1000 | 0 | 8 +(1 row) + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a > 'same'; + count | min | max | min | max +-------+-----+------+-----+----- + 2500 | 1 | 1000 | 4 | 7 +(1 row) + +reset timescaledb.debug_require_vector_qual; +reset timescaledb.enable_bulk_decompression; diff --git a/tsl/test/sql/decompress_vector_qual.sql b/tsl/test/sql/decompress_vector_qual.sql index 7d0b9c40cc0..074c546282e 100644 --- a/tsl/test/sql/decompress_vector_qual.sql +++ b/tsl/test/sql/decompress_vector_qual.sql @@ -316,6 +316,26 @@ select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 777 and me select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 888 and metric2 = 12); select count(*) from vectorqual where ts > '2024-01-01' or (metric3 = 888 and metric2 = 666); + +-- On versions >= 14, the Postgres planner chooses to build a hash table for +-- large arrays. We currently don't vectorize in this case. +select 1 from set_config('timescaledb.debug_require_vector_qual', + case when current_setting('server_version_num')::int >= 140000 then 'forbid' else 'only' end, + false); + +select count(*) from singlebatch where metric2 = any(array[ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, +10, 11, 12, 13, 14, 15, 16, 17, 18, 19, +20, 21, 22, 23, 24, 25, 26, 27, 28, 29, +30, 31, 32, 33, 34, 35, 36, 37, 38, 39, +40, 41, 42, 43, 44, 45, 46, 47, 48, 49, +50, 51, 52, 53, 54, 55, 56, 57, 58, 59, +60, 61, 62, 63, 64, 65, 66, 67, 68, 69, +70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, +90, 91, 92, 93, 94, 95, 96, 97, 98, 99 +]::int8[]); + reset timescaledb.enable_bulk_decompression; reset timescaledb.debug_require_vector_qual; @@ -381,7 +401,7 @@ select * from date_table where ts <= '2021-01-02'; select * from date_table where ts < '2021-01-02'; select * from date_table where ts < CURRENT_DATE; --- Text columns. Only tests bulk decompression for now. +-- Text columns. create table text_table(ts int, d int); select create_hypertable('text_table', 'ts'); alter table text_table set (timescaledb.compress, timescaledb.compress_segmentby = 'd'); @@ -406,6 +426,7 @@ insert into text_table select x + 100, 8, repeat((101 - x)::text || 'b', (101 - insert into text_table select x + 200, 8, repeat((101 - x)::text || 'c', (101 - x)) from generate_series(1, 100) x; insert into text_table select x + 300, 8, repeat( x::text || 'd', x) from generate_series(1, 100) x; +-- Use uncompressed table as reference. set timescaledb.debug_require_vector_qual to 'forbid'; select sum(length(a)) from text_table; select count(distinct a) from text_table; @@ -413,8 +434,81 @@ select count(distinct a) from text_table; select count(compress_chunk(x, true)) from show_chunks('text_table') x; select compress_chunk(x) from show_chunks('text_table') x; +-- Check result with decompression. set timescaledb.enable_bulk_decompression to on; set timescaledb.debug_require_vector_qual to 'forbid'; select sum(length(a)) from text_table; select count(distinct a) from text_table; + + +-- Test vectorized predicates. +set timescaledb.debug_require_vector_qual to 'only'; +-- -- Uncomment to generate the test reference w/o the vector optimizations. +-- set timescaledb.enable_bulk_decompression to off; +-- set timescaledb.debug_require_vector_qual to 'forbid'; + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'default'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = ''; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'same'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a != 'same'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'одинаковый'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'same-with-nulls'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = '異なる1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different-with-nulls1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different1000'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different-with-nulls999'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a in ('same', 'different500'); +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a in ('same-with-nulls', 'different-with-nulls499'); +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a in ('different500', 'default'); +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a = 'different500' or a = 'default'; + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a is null; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a is not null; + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%same%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%одинаковый%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%одилаковый%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%одимаковый%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%異なる%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%異オる%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%異にる%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '異_る_'; + +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%different1%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like '%different1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%%1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%\1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%_'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%__'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%___'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%_1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%nulls_'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different1%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different\%'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different\1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different_%1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different_'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different_1'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'same_'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a not like '%different1%'; + +\set ON_ERROR_STOP 0 +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different\'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a like 'different%\'; +\set ON_ERROR_STOP 1 + + +-- We don't vectorize comparison operators with text because they are probably +-- not very useful. +set timescaledb.debug_require_vector_qual to 'forbid'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a < 'same'; +select count(*), min(ts), max(ts), min(d), max(d) from text_table where a > 'same'; + + +reset timescaledb.debug_require_vector_qual; +reset timescaledb.enable_bulk_decompression; +