diff --git a/cmake/libraries.cmake b/cmake/libraries.cmake
index 1b43d523bfa..f705a062cbc 100644
--- a/cmake/libraries.cmake
+++ b/cmake/libraries.cmake
@@ -12,15 +12,14 @@ set(FLB_PATH_LIB_CHUNKIO     "lib/chunkio")
 set(FLB_PATH_LIB_LUAJIT      "lib/luajit-3065c9")
 set(FLB_PATH_LIB_MONKEY      "lib/monkey")
 set(FLB_PATH_LIB_JSMN        "lib/jsmn")
-set(FLB_PATH_LIB_MBEDTLS     "lib/mbedtls-2.28.0")
-set(FLB_PATH_LIB_SQLITE      "lib/sqlite-amalgamation-3390300")
+set(FLB_PATH_LIB_SQLITE      "lib/sqlite-amalgamation-3440200")
 set(FLB_PATH_LIB_JANSSON     "lib/jansson-e23f558")
 set(FLB_PATH_LIB_ONIGMO      "lib/onigmo")
-set(FLB_PATH_LIB_MPACK       "lib/mpack-amalgamation-1.1")
+set(FLB_PATH_LIB_MPACK       "lib/mpack-amalgamation-1.1.1")
 set(FLB_PATH_LIB_MINIZ       "lib/miniz")
 set(FLB_PATH_LIB_TUTF8E      "lib/tutf8e")
 set(FLB_PATH_LIB_CARES       "lib/c-ares-1.24.0")
 set(FLB_PATH_LIB_SNAPPY      "lib/snappy-fef67ac")
-set(FLB_PATH_LIB_RDKAFKA     "lib/librdkafka-2.1.0")
+set(FLB_PATH_LIB_RDKAFKA     "lib/librdkafka-2.3.0")
 set(FLB_PATH_LIB_RING_BUFFER "lib/lwrb")
 set(FLB_PATH_LIB_WASM_MICRO_RUNTIME  "lib/wasm-micro-runtime-WAMR-1.2.2")
diff --git a/lib/librdkafka-2.1.0/debian/watch b/lib/librdkafka-2.1.0/debian/watch
deleted file mode 100644
index 7b3bdea1132..00000000000
--- a/lib/librdkafka-2.1.0/debian/watch
+++ /dev/null
@@ -1,2 +0,0 @@
-version=3
-https://github.com/edenhill/librdkafka/tags .*/v?(\d[\d\.]*)\.tar\.gz
diff --git a/lib/librdkafka-2.1.0/packaging/debian/watch b/lib/librdkafka-2.1.0/packaging/debian/watch
deleted file mode 100644
index fc9aec86fc2..00000000000
--- a/lib/librdkafka-2.1.0/packaging/debian/watch
+++ /dev/null
@@ -1,2 +0,0 @@
-version=3
-http://github.com/edenhill/librdkafka/tags .*/(\d[\d\.]*)\.tar\.gz
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_assignor.h b/lib/librdkafka-2.1.0/src/rdkafka_assignor.h
deleted file mode 100644
index b90e7dc980d..00000000000
--- a/lib/librdkafka-2.1.0/src/rdkafka_assignor.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * librdkafka - The Apache Kafka C/C++ library
- *
- * Copyright (c) 2015 Magnus Edenhill
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- *    this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef _RDKAFKA_ASSIGNOR_H_
-#define _RDKAFKA_ASSIGNOR_H_
-
-
-
-/*!
- * Enumerates the different rebalance protocol types.
- *
- * @sa rd_kafka_rebalance_protocol()
- */
-typedef enum rd_kafka_rebalance_protocol_t {
-        RD_KAFKA_REBALANCE_PROTOCOL_NONE,       /**< Rebalance protocol is
-                                                     unknown */
-        RD_KAFKA_REBALANCE_PROTOCOL_EAGER,      /**< Eager rebalance
-                                                     protocol */
-        RD_KAFKA_REBALANCE_PROTOCOL_COOPERATIVE /**< Cooperative
-                                                     rebalance protocol*/
-} rd_kafka_rebalance_protocol_t;
-
-
-
-typedef struct rd_kafka_group_member_s {
-        /** Subscribed topics (partition field is ignored). */
-        rd_kafka_topic_partition_list_t *rkgm_subscription;
-        /** Partitions assigned to this member after running the assignor.
-         *  E.g., the current assignment coming out of the rebalance. */
-        rd_kafka_topic_partition_list_t *rkgm_assignment;
-        /** Partitions reported as currently owned by the member, read
-         *  from consumer metadata. E.g., the current assignment going into
-         *  the rebalance. */
-        rd_kafka_topic_partition_list_t *rkgm_owned;
-        /** List of eligible topics in subscription. E.g., subscribed topics
-         *  that exist. */
-        rd_list_t rkgm_eligible;
-        /** Member id (e.g., client.id-some-uuid). */
-        rd_kafkap_str_t *rkgm_member_id;
-        /** Group instance id. */
-        rd_kafkap_str_t *rkgm_group_instance_id;
-        /** Member-specific opaque userdata. */
-        rd_kafkap_bytes_t *rkgm_userdata;
-        /** Member metadata, e.g., the currently owned partitions. */
-        rd_kafkap_bytes_t *rkgm_member_metadata;
-        /** Group generation id. */
-        int rkgm_generation;
-} rd_kafka_group_member_t;
-
-
-int rd_kafka_group_member_cmp(const void *_a, const void *_b);
-
-int rd_kafka_group_member_find_subscription(rd_kafka_t *rk,
-                                            const rd_kafka_group_member_t *rkgm,
-                                            const char *topic);
-
-
-/**
- * Structure to hold metadata for a single topic and all its
- * subscribing members.
- */
-typedef struct rd_kafka_assignor_topic_s {
-        const rd_kafka_metadata_topic_t *metadata;
-        rd_list_t members; /* rd_kafka_group_member_t * */
-} rd_kafka_assignor_topic_t;
-
-
-int rd_kafka_assignor_topic_cmp(const void *_a, const void *_b);
-
-
-typedef struct rd_kafka_assignor_s {
-        rd_kafkap_str_t *rkas_protocol_type;
-        rd_kafkap_str_t *rkas_protocol_name;
-
-        int rkas_enabled;
-
-        /** Order for strategies. */
-        int rkas_index;
-
-        rd_kafka_rebalance_protocol_t rkas_protocol;
-
-        rd_kafka_resp_err_t (*rkas_assign_cb)(
-            rd_kafka_t *rk,
-            const struct rd_kafka_assignor_s *rkas,
-            const char *member_id,
-            const rd_kafka_metadata_t *metadata,
-            rd_kafka_group_member_t *members,
-            size_t member_cnt,
-            rd_kafka_assignor_topic_t **eligible_topics,
-            size_t eligible_topic_cnt,
-            char *errstr,
-            size_t errstr_size,
-            void *opaque);
-
-        rd_kafkap_bytes_t *(*rkas_get_metadata_cb)(
-            const struct rd_kafka_assignor_s *rkas,
-            void *assignor_state,
-            const rd_list_t *topics,
-            const rd_kafka_topic_partition_list_t *owned_partitions);
-
-        void (*rkas_on_assignment_cb)(
-            const struct rd_kafka_assignor_s *rkas,
-            void **assignor_state,
-            const rd_kafka_topic_partition_list_t *assignment,
-            const rd_kafkap_bytes_t *assignment_userdata,
-            const rd_kafka_consumer_group_metadata_t *rkcgm);
-
-        void (*rkas_destroy_state_cb)(void *assignor_state);
-
-        int (*rkas_unittest)(void);
-
-        void *rkas_opaque;
-} rd_kafka_assignor_t;
-
-
-rd_kafka_resp_err_t rd_kafka_assignor_add(
-    rd_kafka_t *rk,
-    const char *protocol_type,
-    const char *protocol_name,
-    rd_kafka_rebalance_protocol_t rebalance_protocol,
-    rd_kafka_resp_err_t (*assign_cb)(
-        rd_kafka_t *rk,
-        const struct rd_kafka_assignor_s *rkas,
-        const char *member_id,
-        const rd_kafka_metadata_t *metadata,
-        rd_kafka_group_member_t *members,
-        size_t member_cnt,
-        rd_kafka_assignor_topic_t **eligible_topics,
-        size_t eligible_topic_cnt,
-        char *errstr,
-        size_t errstr_size,
-        void *opaque),
-    rd_kafkap_bytes_t *(*get_metadata_cb)(
-        const struct rd_kafka_assignor_s *rkas,
-        void *assignor_state,
-        const rd_list_t *topics,
-        const rd_kafka_topic_partition_list_t *owned_partitions),
-    void (*on_assignment_cb)(const struct rd_kafka_assignor_s *rkas,
-                             void **assignor_state,
-                             const rd_kafka_topic_partition_list_t *assignment,
-                             const rd_kafkap_bytes_t *userdata,
-                             const rd_kafka_consumer_group_metadata_t *rkcgm),
-    void (*destroy_state_cb)(void *assignor_state),
-    int (*unittest_cb)(void),
-    void *opaque);
-
-rd_kafkap_bytes_t *rd_kafka_consumer_protocol_member_metadata_new(
-    const rd_list_t *topics,
-    const void *userdata,
-    size_t userdata_size,
-    const rd_kafka_topic_partition_list_t *owned_partitions);
-
-rd_kafkap_bytes_t *rd_kafka_assignor_get_metadata_with_empty_userdata(
-    const rd_kafka_assignor_t *rkas,
-    void *assignor_state,
-    const rd_list_t *topics,
-    const rd_kafka_topic_partition_list_t *owned_partitions);
-
-
-void rd_kafka_assignor_update_subscription(
-    const rd_kafka_assignor_t *rkas,
-    const rd_kafka_topic_partition_list_t *subscription);
-
-
-rd_kafka_resp_err_t rd_kafka_assignor_run(struct rd_kafka_cgrp_s *rkcg,
-                                          const rd_kafka_assignor_t *rkas,
-                                          rd_kafka_metadata_t *metadata,
-                                          rd_kafka_group_member_t *members,
-                                          int member_cnt,
-                                          char *errstr,
-                                          size_t errstr_size);
-
-rd_kafka_assignor_t *rd_kafka_assignor_find(rd_kafka_t *rk,
-                                            const char *protocol);
-
-int rd_kafka_assignors_init(rd_kafka_t *rk, char *errstr, size_t errstr_size);
-void rd_kafka_assignors_term(rd_kafka_t *rk);
-
-
-
-void rd_kafka_group_member_clear(rd_kafka_group_member_t *rkgm);
-
-
-rd_kafka_resp_err_t rd_kafka_range_assignor_init(rd_kafka_t *rk);
-rd_kafka_resp_err_t rd_kafka_roundrobin_assignor_init(rd_kafka_t *rk);
-rd_kafka_resp_err_t rd_kafka_sticky_assignor_init(rd_kafka_t *rk);
-
-#endif /* _RDKAFKA_ASSIGNOR_H_ */
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_range_assignor.c b/lib/librdkafka-2.1.0/src/rdkafka_range_assignor.c
deleted file mode 100644
index c83f1f1a44f..00000000000
--- a/lib/librdkafka-2.1.0/src/rdkafka_range_assignor.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * librdkafka - The Apache Kafka C/C++ library
- *
- * Copyright (c) 2015 Magnus Edenhill
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- *    this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-#include "rdkafka_int.h"
-#include "rdkafka_assignor.h"
-
-
-
-/**
- * Source:
- * https://github.com/apache/kafka/blob/trunk/clients/src/main/java/org/apache/kafka/clients/consumer/RangeAssignor.java
- *
- * The range assignor works on a per-topic basis. For each topic, we lay out the
- * available partitions in numeric order and the consumers in lexicographic
- * order. We then divide the number of partitions by the total number of
- * consumers to determine the number of partitions to assign to each consumer.
- * If it does not evenly divide, then the first few consumers will have one
- * extra partition.
- *
- * For example, suppose there are two consumers C0 and C1, two topics t0 and t1,
- * and each topic has 3 partitions, resulting in partitions t0p0, t0p1, t0p2,
- * t1p0, t1p1, and t1p2.
- *
- * The assignment will be:
- * C0: [t0p0, t0p1, t1p0, t1p1]
- * C1: [t0p2, t1p2]
- */
-
-rd_kafka_resp_err_t
-rd_kafka_range_assignor_assign_cb(rd_kafka_t *rk,
-                                  const rd_kafka_assignor_t *rkas,
-                                  const char *member_id,
-                                  const rd_kafka_metadata_t *metadata,
-                                  rd_kafka_group_member_t *members,
-                                  size_t member_cnt,
-                                  rd_kafka_assignor_topic_t **eligible_topics,
-                                  size_t eligible_topic_cnt,
-                                  char *errstr,
-                                  size_t errstr_size,
-                                  void *opaque) {
-        unsigned int ti;
-        int i;
-
-        /* The range assignor works on a per-topic basis. */
-        for (ti = 0; ti < eligible_topic_cnt; ti++) {
-                rd_kafka_assignor_topic_t *eligible_topic = eligible_topics[ti];
-                int numPartitionsPerConsumer;
-                int consumersWithExtraPartition;
-
-                /* For each topic, we lay out the available partitions in
-                 * numeric order and the consumers in lexicographic order. */
-                rd_list_sort(&eligible_topic->members,
-                             rd_kafka_group_member_cmp);
-
-                /* We then divide the number of partitions by the total number
-                 * of consumers to determine the number of partitions to assign
-                 * to each consumer. */
-                numPartitionsPerConsumer =
-                    eligible_topic->metadata->partition_cnt /
-                    rd_list_cnt(&eligible_topic->members);
-
-                /* If it does not evenly divide, then the first few consumers
-                 * will have one extra partition. */
-                consumersWithExtraPartition =
-                    eligible_topic->metadata->partition_cnt %
-                    rd_list_cnt(&eligible_topic->members);
-
-                rd_kafka_dbg(rk, CGRP, "ASSIGN",
-                             "range: Topic %s with %d partition(s) and "
-                             "%d subscribing member(s)",
-                             eligible_topic->metadata->topic,
-                             eligible_topic->metadata->partition_cnt,
-                             rd_list_cnt(&eligible_topic->members));
-
-                for (i = 0; i < rd_list_cnt(&eligible_topic->members); i++) {
-                        rd_kafka_group_member_t *rkgm =
-                            rd_list_elem(&eligible_topic->members, i);
-                        int start = numPartitionsPerConsumer * i +
-                                    RD_MIN(i, consumersWithExtraPartition);
-                        int length =
-                            numPartitionsPerConsumer +
-                            (i + 1 > consumersWithExtraPartition ? 0 : 1);
-
-                        if (length == 0)
-                                continue;
-
-                        rd_kafka_dbg(rk, CGRP, "ASSIGN",
-                                     "range: Member \"%s\": "
-                                     "assigned topic %s partitions %d..%d",
-                                     rkgm->rkgm_member_id->str,
-                                     eligible_topic->metadata->topic, start,
-                                     start + length - 1);
-                        rd_kafka_topic_partition_list_add_range(
-                            rkgm->rkgm_assignment,
-                            eligible_topic->metadata->topic, start,
-                            start + length - 1);
-                }
-        }
-
-        return 0;
-}
-
-
-
-/**
- * @brief Initialzie and add range assignor.
- */
-rd_kafka_resp_err_t rd_kafka_range_assignor_init(rd_kafka_t *rk) {
-        return rd_kafka_assignor_add(
-            rk, "consumer", "range", RD_KAFKA_REBALANCE_PROTOCOL_EAGER,
-            rd_kafka_range_assignor_assign_cb,
-            rd_kafka_assignor_get_metadata_with_empty_userdata, NULL, NULL,
-            NULL, NULL);
-}
diff --git a/lib/librdkafka-2.1.0/.clang-format-cpp b/lib/librdkafka-2.3.0/.clang-format-cpp
similarity index 100%
rename from lib/librdkafka-2.1.0/.clang-format-cpp
rename to lib/librdkafka-2.3.0/.clang-format-cpp
diff --git a/lib/librdkafka-2.1.0/.dir-locals.el b/lib/librdkafka-2.3.0/.dir-locals.el
similarity index 100%
rename from lib/librdkafka-2.1.0/.dir-locals.el
rename to lib/librdkafka-2.3.0/.dir-locals.el
diff --git a/lib/librdkafka-2.1.0/.formatignore b/lib/librdkafka-2.3.0/.formatignore
similarity index 100%
rename from lib/librdkafka-2.1.0/.formatignore
rename to lib/librdkafka-2.3.0/.formatignore
diff --git a/lib/librdkafka-2.1.0/.gdbmacros b/lib/librdkafka-2.3.0/.gdbmacros
similarity index 100%
rename from lib/librdkafka-2.1.0/.gdbmacros
rename to lib/librdkafka-2.3.0/.gdbmacros
diff --git a/lib/librdkafka-2.1.0/.github/ISSUE_TEMPLATE b/lib/librdkafka-2.3.0/.github/ISSUE_TEMPLATE
similarity index 77%
rename from lib/librdkafka-2.1.0/.github/ISSUE_TEMPLATE
rename to lib/librdkafka-2.3.0/.github/ISSUE_TEMPLATE
index ed7b6165fcd..648040edd75 100644
--- a/lib/librdkafka-2.1.0/.github/ISSUE_TEMPLATE
+++ b/lib/librdkafka-2.3.0/.github/ISSUE_TEMPLATE
@@ -1,6 +1,6 @@
-Read the FAQ first: https://github.com/edenhill/librdkafka/wiki/FAQ
+Read the FAQ first: https://github.com/confluentinc/librdkafka/wiki/FAQ
 
-Do NOT create issues for questions, use the discussion forum: https://github.com/edenhill/librdkafka/discussions
+Do NOT create issues for questions, use the discussion forum: https://github.com/confluentinc/librdkafka/discussions
 
 
 
@@ -14,7 +14,7 @@ How to reproduce
 <your steps how to reproduce goes here, or remove section if not relevant>
 
 
-**IMPORTANT**: Always try to reproduce the issue on the latest released version (see https://github.com/edenhill/librdkafka/releases), if it can't be reproduced on the latest version the issue has been fixed.
+**IMPORTANT**: Always try to reproduce the issue on the latest released version (see https://github.com/confluentinc/librdkafka/releases), if it can't be reproduced on the latest version the issue has been fixed.
 
 
 Checklist
diff --git a/lib/librdkafka-2.1.0/.gitignore b/lib/librdkafka-2.3.0/.gitignore
similarity index 100%
rename from lib/librdkafka-2.1.0/.gitignore
rename to lib/librdkafka-2.3.0/.gitignore
diff --git a/lib/librdkafka-2.1.0/.semaphore/project.yml b/lib/librdkafka-2.3.0/.semaphore/project.yml
similarity index 100%
rename from lib/librdkafka-2.1.0/.semaphore/project.yml
rename to lib/librdkafka-2.3.0/.semaphore/project.yml
diff --git a/lib/librdkafka-2.3.0/.semaphore/project_public.yml b/lib/librdkafka-2.3.0/.semaphore/project_public.yml
new file mode 100644
index 00000000000..7e095c94d90
--- /dev/null
+++ b/lib/librdkafka-2.3.0/.semaphore/project_public.yml
@@ -0,0 +1,20 @@
+# This file is managed by ServiceBot plugin - Semaphore. The content in this file is created using a common
+# template and configurations in service.yml.
+# Modifications in this file will be overwritten by generated content in the nightly run.
+# For more information, please refer to the page:
+# https://confluentinc.atlassian.net/wiki/spaces/Foundations/pages/2871296194/Add+SemaphoreCI
+apiVersion: v1alpha
+kind: Project
+metadata:
+  name: librdkafka
+  description: ""
+spec:
+  visibility: private
+  repository:
+    url: git@github.com:confluentinc/librdkafka.git
+    pipeline_file: .semaphore/semaphore.yml
+    integration_type: github_app
+    status:
+      pipeline_files:
+      - path: .semaphore/semaphore.yml
+        level: pipeline
diff --git a/lib/librdkafka-2.1.0/.semaphore/semaphore.yml b/lib/librdkafka-2.3.0/.semaphore/semaphore.yml
similarity index 96%
rename from lib/librdkafka-2.1.0/.semaphore/semaphore.yml
rename to lib/librdkafka-2.3.0/.semaphore/semaphore.yml
index 275bb76aaf1..f58bcc23e1a 100644
--- a/lib/librdkafka-2.1.0/.semaphore/semaphore.yml
+++ b/lib/librdkafka-2.3.0/.semaphore/semaphore.yml
@@ -3,6 +3,8 @@ name: 'librdkafka build and release artifact pipeline'
 agent:
   machine:
     type: s1-prod-macos-arm64
+execution_time_limit:
+  hours: 3
 global_job_config:
   prologue:
     commands:
@@ -105,6 +107,12 @@ blocks:
       agent:
         machine:
           type: s1-prod-ubuntu20-04-amd64-2
+      env_vars:
+        - name: CFLAGS
+          value: -std=gnu90 # Test minimum C standard, default in CentOS 7
+      prologue:
+        commands:
+          - docker login --username $DOCKERHUB_USER --password $DOCKERHUB_APIKEY
       jobs:
         - name: 'Build and integration tests'
           commands:
@@ -126,7 +134,7 @@ blocks:
             - make -j -C tests build
             - make -C tests run_local_quick
             - DESTDIR="$PWD/dest" make install
-            - (cd tests && python3 -m trivup.clusters.KafkaCluster --version 3.1.0 --cmd 'make quick')
+            - (cd tests && python3 -m trivup.clusters.KafkaCluster --version 3.4.0 --cmd 'make quick')
 
 
   - name: 'Linux x64: release artifact docker builds'
@@ -137,6 +145,9 @@ blocks:
       agent:
         machine:
           type: s1-prod-ubuntu20-04-amd64-2
+      prologue:
+        commands:
+          - docker login --username $DOCKERHUB_USER --password $DOCKERHUB_APIKEY
       epilogue:
         commands:
           - '[[ -z $SEMAPHORE_GIT_TAG_NAME ]] || artifact push workflow artifacts/ --destination artifacts/${ARTIFACT_KEY}/'
@@ -190,6 +201,9 @@ blocks:
       agent:
         machine:
           type: s1-prod-ubuntu20-04-arm64-1
+      prologue:
+        commands:
+          - docker login --username $DOCKERHUB_USER --password $DOCKERHUB_APIKEY
       epilogue:
         commands:
           - '[[ -z $SEMAPHORE_GIT_TAG_NAME ]] || artifact push workflow artifacts/ --destination artifacts/${ARTIFACT_KEY}/'
@@ -236,11 +250,8 @@ blocks:
           value: UCRT64
       prologue:
         commands:
-          - cache restore msys2-x64-${Env:ARTIFACT_KEY}
           # Set up msys2
           - "& .\\win32\\setup-msys2.ps1"
-          - cache delete msys2-x64-${Env:ARTIFACT_KEY}
-          - cache store msys2-x64-${Env:ARTIFACT_KEY} c:/msys64
       epilogue:
         commands:
           - if ($env:SEMAPHORE_GIT_TAG_NAME -ne "") { artifact push workflow artifacts/ --destination artifacts/$Env:ARTIFACT_KEY/ }
@@ -274,8 +285,6 @@ blocks:
           # install vcpkg in the parent directory.
           - pwd
           - cd ..
-          # Restore vcpkg caches, if any.
-          - cache restore vcpkg-archives-$Env:ARTIFACT_KEY
           # Setup vcpkg
           - "& .\\librdkafka\\win32\\setup-vcpkg.ps1"
           - cd librdkafka
@@ -284,11 +293,8 @@ blocks:
           - ..\vcpkg\vcpkg --feature-flags=versions install --triplet $Env:triplet
           - cd ..
           - pwd
-          # Store vcpkg caches
           - ls vcpkg/
           - echo $Env:VCPKG_ROOT
-          - cache delete vcpkg-archives-$Env:ARTIFACT_KEY
-          - cache store vcpkg-archives-$Env:ARTIFACT_KEY C:/Users/semaphore/AppData/Local/vcpkg/archives
           - pwd
           - cd librdkafka
       epilogue:
diff --git a/lib/librdkafka-2.1.0/CHANGELOG.md b/lib/librdkafka-2.3.0/CHANGELOG.md
similarity index 81%
rename from lib/librdkafka-2.1.0/CHANGELOG.md
rename to lib/librdkafka-2.3.0/CHANGELOG.md
index 857526c6eb8..ea7206ceacc 100644
--- a/lib/librdkafka-2.1.0/CHANGELOG.md
+++ b/lib/librdkafka-2.3.0/CHANGELOG.md
@@ -1,3 +1,224 @@
+# librdkafka v2.3.0
+
+librdkafka v2.3.0 is a feature release:
+
+ * [KIP-516](https://cwiki.apache.org/confluence/display/KAFKA/KIP-516%3A+Topic+Identifiers)
+   Partial support of topic identifiers. Topic identifiers in metadata response
+   available through the new `rd_kafka_DescribeTopics` function (#4300, #4451).
+ * [KIP-117](https://cwiki.apache.org/confluence/display/KAFKA/KIP-117%3A+Add+a+public+AdminClient+API+for+Kafka+admin+operations) Add support for AdminAPI `DescribeCluster()` and `DescribeTopics()`
+  (#4240, @jainruchir).
+ * [KIP-430](https://cwiki.apache.org/confluence/display/KAFKA/KIP-430+-+Return+Authorized+Operations+in+Describe+Responses):
+   Return authorized operations in Describe Responses.
+   (#4240, @jainruchir).
+ * [KIP-580](https://cwiki.apache.org/confluence/display/KAFKA/KIP-580%3A+Exponential+Backoff+for+Kafka+Clients): Added Exponential Backoff mechanism for
+   retriable requests with `retry.backoff.ms` as minimum backoff and `retry.backoff.max.ms` as the
+   maximum backoff, with 20% jitter (#4422).
+ * [KIP-396](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=97551484): completed the implementation with
+   the addition of ListOffsets (#4225).
+ * Fixed ListConsumerGroupOffsets not fetching offsets for all the topics in a group with Apache Kafka version below 2.4.0.
+ * Add missing destroy that leads to leaking partition structure memory when there
+   are partition leader changes and a stale leader epoch is received (#4429).
+ * Fix a segmentation fault when closing a consumer using the
+   cooperative-sticky assignor before the first assignment (#4381).
+ * Fix for insufficient buffer allocation when allocating rack information (@wolfchimneyrock, #4449).
+ * Fix for infinite loop of OffsetForLeaderEpoch requests on quick leader changes. (#4433).
+ * Fix to add leader epoch to control messages, to make sure they're stored
+   for committing even without a subsequent fetch message (#4434).
+ * Fix for stored offsets not being committed if they lacked the leader epoch (#4442).
+ * Upgrade OpenSSL to v3.0.11 (while building from source) with various security fixes,
+   check the [release notes](https://www.openssl.org/news/cl30.txt)
+   (#4454, started by @migarc1).
+ * Fix to ensure permanent errors during offset validation continue being retried and
+   don't cause an offset reset (#4447).
+ * Fix to ensure max.poll.interval.ms is reset when rd_kafka_poll is called with
+   consume_cb (#4431).
+ * Fix for idempotent producer fatal errors, triggered after a possibly persisted message state (#4438).
+ * Fix `rd_kafka_query_watermark_offsets` continuing beyond timeout expiry (#4460).
+ * Fix `rd_kafka_query_watermark_offsets` not refreshing the partition leader
+   after a leader change and subsequent `NOT_LEADER_OR_FOLLOWER` error (#4225).
+
+
+## Upgrade considerations
+
+ * `retry.backoff.ms`:
+   If it is set greater than `retry.backoff.max.ms` which has the default value of 1000 ms then it is assumes the value of `retry.backoff.max.ms`.
+   To change this behaviour make sure that `retry.backoff.ms` is always less than `retry.backoff.max.ms`.
+   If equal then the backoff will be linear instead of exponential.
+
+ * `topic.metadata.refresh.fast.interval.ms`:
+   If it is set greater than `retry.backoff.max.ms` which has the default value of 1000 ms then it is assumes the value of `retry.backoff.max.ms`.
+   To change this behaviour make sure that `topic.metadata.refresh.fast.interval.ms` is always less than `retry.backoff.max.ms`.
+   If equal then the backoff will be linear instead of exponential.
+
+
+## Fixes
+
+### General fixes
+
+ * An assertion failed with insufficient buffer size when allocating
+   rack information on 32bit architectures.
+   Solved by aligning all allocations to the maximum allowed word size (#4449).
+ * The timeout for `rd_kafka_query_watermark_offsets` was not enforced after
+   making the necessary ListOffsets requests, and thus, it never timed out in
+   case of broker/network issues. Fixed by setting an absolute timeout (#4460).
+
+### Idempotent producer fixes
+
+ * After a possibly persisted error, such as a disconnection or a timeout, next expected sequence
+   used to increase, leading to a fatal error if the message wasn't persisted and
+   the second one in queue failed with an `OUT_OF_ORDER_SEQUENCE_NUMBER`.
+   The error could contain the message "sequence desynchronization" with
+   just one possibly persisted error or "rewound sequence number" in case of
+   multiple errored messages.
+   Solved by treating the possible persisted message as _not_ persisted,
+   and expecting a `DUPLICATE_SEQUENCE_NUMBER` error in case it was or
+   `NO_ERROR` in case it wasn't, in both cases the message will be considered
+   delivered (#4438).
+
+### Consumer fixes
+
+  * Stored offsets were excluded from the commit if the leader epoch was
+    less than committed epoch, as it's possible if leader epoch is the default -1.
+    This didn't happen in Python, Go and .NET bindings when stored position was
+    taken from the message.
+    Solved by checking only that the stored offset is greater
+    than committed one, if either stored or committed leader epoch is -1 (#4442).
+  * If an OffsetForLeaderEpoch request was being retried, and the leader changed
+    while the retry was in-flight, an infinite loop of requests was triggered,
+    because we weren't updating the leader epoch correctly.
+    Fixed by updating the leader epoch before sending the request (#4433).
+  * During offset validation a permanent error like host resolution failure
+    would cause an offset reset.
+    This isn't what's expected or what the Java implementation does.
+    Solved by retrying even in case of permanent errors (#4447).
+  * If using `rd_kafka_poll_set_consumer`, along with a consume callback, and then
+    calling `rd_kafka_poll` to service the callbacks, would not reset
+    `max.poll.interval.ms.` This was because we were only checking `rk_rep` for
+    consumer messages, while the method to service the queue internally also
+    services the queue forwarded to from `rk_rep`, which is `rkcg_q`.
+    Solved by moving the `max.poll.interval.ms` check into `rd_kafka_q_serve` (#4431).
+  * After a leader change a `rd_kafka_query_watermark_offsets` call would continue
+    trying to call ListOffsets on the old leader, if the topic wasn't included in
+    the subscription set, so it started querying the new leader only after
+    `topic.metadata.refresh.interval.ms` (#4225).
+
+
+
+# librdkafka v2.2.0
+
+librdkafka v2.2.0 is a feature release:
+
+ * Fix a segmentation fault when subscribing to non-existent topics and
+   using the consume batch functions (#4273).
+ * Store offset commit metadata in `rd_kafka_offsets_store` (@mathispesch, #4084).
+ * Fix a bug that happens when skipping tags, causing buffer underflow in
+   MetadataResponse (#4278).
+ * Fix a bug where topic leader is not refreshed in the same metadata call even if the leader is
+   present.
+ * [KIP-881](https://cwiki.apache.org/confluence/display/KAFKA/KIP-881%3A+Rack-aware+Partition+Assignment+for+Kafka+Consumers):
+   Add support for rack-aware partition assignment for consumers
+   (#4184, #4291, #4252).
+ * Fix several bugs with sticky assignor in case of partition ownership
+   changing between members of the consumer group (#4252).
+ * [KIP-368](https://cwiki.apache.org/confluence/display/KAFKA/KIP-368%3A+Allow+SASL+Connections+to+Periodically+Re-Authenticate):
+   Allow SASL Connections to Periodically Re-Authenticate
+   (#4301, started by @vctoriawu).
+ * Avoid treating an OpenSSL error as a permanent error and treat unclean SSL
+   closes as normal ones (#4294).
+ * Added `fetch.queue.backoff.ms` to the consumer to control how long
+   the consumer backs off next fetch attempt. (@bitemyapp, @edenhill, #2879)
+ * [KIP-235](https://cwiki.apache.org/confluence/display/KAFKA/KIP-235%3A+Add+DNS+alias+support+for+secured+connection):
+   Add DNS alias support for secured connection (#4292).
+ * [KIP-339](https://cwiki.apache.org/confluence/display/KAFKA/KIP-339%3A+Create+a+new+IncrementalAlterConfigs+API):
+   IncrementalAlterConfigs API (started by @PrasanthV454, #4110).
+ * [KIP-554](https://cwiki.apache.org/confluence/display/KAFKA/KIP-554%3A+Add+Broker-side+SCRAM+Config+API): Add Broker-side SCRAM Config API (#4241).
+
+
+## Enhancements
+
+ * Added `fetch.queue.backoff.ms` to the consumer to control how long
+   the consumer backs off next fetch attempt. When the pre-fetch queue
+   has exceeded its queuing thresholds: `queued.min.messages` and
+   `queued.max.messages.kbytes` it backs off for 1 seconds.
+   If those parameters have to be set too high to hold 1 s of data,
+   this new parameter allows to back off the fetch earlier, reducing memory
+   requirements.
+
+
+## Fixes
+
+### General fixes
+
+ * Fix a bug that happens when skipping tags, causing buffer underflow in
+   MetadataResponse. This is triggered since RPC version 9 (v2.1.0),
+   when using Confluent Platform, only when racks are set,
+   observers are activated and there is more than one partition.
+   Fixed by skipping the correct amount of bytes when tags are received.
+ * Avoid treating an OpenSSL error as a permanent error and treat unclean SSL
+   closes as normal ones. When SSL connections are closed without `close_notify`,
+   in OpenSSL 3.x a new type of error is set and it was interpreted as permanent
+   in librdkafka. It can cause a different issue depending on the RPC.
+   If received when waiting for OffsetForLeaderEpoch response, it triggers
+   an offset reset following the configured policy.
+   Solved by treating SSL errors as transport errors and
+   by setting an OpenSSL flag that allows to treat unclean SSL closes as normal
+   ones. These types of errors can happen it the other side doesn't support `close_notify` or if there's a TCP connection reset.
+
+
+### Consumer fixes
+
+  * In case of multiple owners of a partition with different generations, the
+    sticky assignor would pick the earliest (lowest generation) member as the
+    current owner, which would lead to stickiness violations. Fixed by
+    choosing the latest (highest generation) member.
+  * In case where the same partition is owned by two members with the same
+    generation, it indicates an issue. The sticky assignor had some code to
+    handle this, but it was non-functional, and did not have parity with the
+    Java assignor. Fixed by invalidating any such partition from the current
+    assignment completely.
+
+
+
+# librdkafka v2.1.1
+
+librdkafka v2.1.1 is a maintenance release:
+
+ * Avoid duplicate messages when a fetch response is received
+   in the middle of an offset validation request (#4261).
+ * Fix segmentation fault when subscribing to a non-existent topic and
+   calling `rd_kafka_message_leader_epoch()` on the polled `rkmessage` (#4245).
+ * Fix a segmentation fault when fetching from follower and the partition lease
+   expires while waiting for the result of a list offsets operation (#4254).
+ * Fix documentation for the admin request timeout, incorrectly stating -1 for infinite
+   timeout. That timeout can't be infinite.
+ * Fix CMake pkg-config cURL require and use
+   pkg-config `Requires.private` field (@FantasqueX, @stertingen, #4180).
+ * Fixes certain cases where polling would not keep the consumer
+   in the group or make it rejoin it (#4256).
+ * Fix to the C++ set_leader_epoch method of TopicPartitionImpl,
+   that wasn't storing the passed value (@pavel-pimenov, #4267).
+
+## Fixes
+
+### Consumer fixes
+
+ * Duplicate messages can be emitted when a fetch response is received
+   in the middle of an offset validation request. Solved by avoiding
+   a restart from last application offset when offset validation succeeds.
+ * When fetching from follower, if the partition lease expires after 5 minutes,
+   and a list offsets operation was requested to retrieve the earliest
+   or latest offset, it resulted in segmentation fault. This was fixed by
+   allowing threads different from the main one to call
+   the `rd_kafka_toppar_set_fetch_state` function, given they hold
+   the lock on the `rktp`.
+ * In v2.1.0, a bug was fixed which caused polling any queue to reset the
+   `max.poll.interval.ms`. Only certain functions were made to reset the timer,
+   but it is possible for the user to obtain the queue with messages from
+   the broker, skipping these functions. This was fixed by encoding information
+   in a queue itself, that, whether polling, resets the timer.
+
+
+
 # librdkafka v2.1.0
 
 librdkafka v2.1.0 is a feature release:
@@ -64,11 +285,18 @@ librdkafka v2.1.0 is a feature release:
    any of the **seek**, **pause**, **resume** or **rebalancing** operation, `on_consume`
    interceptors might be called incorrectly (maybe multiple times) for not consumed messages.
 
+### Consume API
+
+ * Duplicate messages can be emitted when a fetch response is received
+   in the middle of an offset validation request.
+ * Segmentation fault when subscribing to a non-existent topic and
+   calling `rd_kafka_message_leader_epoch()` on the polled `rkmessage`.
+
 
 
 # librdkafka v2.0.2
 
-librdkafka v2.0.2 is a bugfix release:
+librdkafka v2.0.2 is a maintenance release:
 
 * Fix OpenSSL version in Win32 nuget package (#4152).
 
@@ -76,7 +304,7 @@ librdkafka v2.0.2 is a bugfix release:
 
 # librdkafka v2.0.1
 
-librdkafka v2.0.1 is a bugfix release:
+librdkafka v2.0.1 is a maintenance release:
 
 * Fixed nuget package for Linux ARM64 release (#4150).
 
@@ -502,7 +730,7 @@ librdkafka v1.8.0 is a security release:
  * Upgrade bundled zlib version from 1.2.8 to 1.2.11 in the `librdkafka.redist`
    NuGet package. The updated zlib version fixes CVEs:
    CVE-2016-9840, CVE-2016-9841, CVE-2016-9842, CVE-2016-9843
-   See https://github.com/edenhill/librdkafka/issues/2934 for more information.
+   See https://github.com/confluentinc/librdkafka/issues/2934 for more information.
  * librdkafka now uses [vcpkg](https://vcpkg.io/) for up-to-date Windows
    dependencies in the `librdkafka.redist` NuGet package:
    OpenSSL 1.1.1l, zlib 1.2.11, zstd 1.5.0.
@@ -1215,4 +1443,4 @@ v1.4.2 is a maintenance release with the following fixes and enhancements:
 
 # Older releases
 
-See https://github.com/edenhill/librdkafka/releases
+See https://github.com/confluentinc/librdkafka/releases
diff --git a/lib/librdkafka-2.1.0/CMakeLists.txt b/lib/librdkafka-2.3.0/CMakeLists.txt
similarity index 100%
rename from lib/librdkafka-2.1.0/CMakeLists.txt
rename to lib/librdkafka-2.3.0/CMakeLists.txt
diff --git a/lib/librdkafka-2.1.0/CODE_OF_CONDUCT.md b/lib/librdkafka-2.3.0/CODE_OF_CONDUCT.md
similarity index 87%
rename from lib/librdkafka-2.1.0/CODE_OF_CONDUCT.md
rename to lib/librdkafka-2.3.0/CODE_OF_CONDUCT.md
index dbbde19c9c3..83503cf4a17 100644
--- a/lib/librdkafka-2.1.0/CODE_OF_CONDUCT.md
+++ b/lib/librdkafka-2.3.0/CODE_OF_CONDUCT.md
@@ -34,7 +34,7 @@ This Code of Conduct applies both within project spaces and in public spaces whe
 
 ## Enforcement
 
-Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at rdkafka@edenhill.se. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
+Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at cloud-support@confluent.io. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
 
 Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
 
diff --git a/lib/librdkafka-2.1.0/CONFIGURATION.md b/lib/librdkafka-2.3.0/CONFIGURATION.md
similarity index 96%
rename from lib/librdkafka-2.1.0/CONFIGURATION.md
rename to lib/librdkafka-2.3.0/CONFIGURATION.md
index 0ebec417c79..4a75378b539 100644
--- a/lib/librdkafka-2.1.0/CONFIGURATION.md
+++ b/lib/librdkafka-2.3.0/CONFIGURATION.md
@@ -14,7 +14,7 @@ max.in.flight.requests.per.connection    |  *  | 1 .. 1000000    |       1000000
 max.in.flight                            |  *  | 1 .. 1000000    |       1000000 | low        | Alias for `max.in.flight.requests.per.connection`: Maximum number of in-flight requests per broker connection. This is a generic property applied to all broker communication, however it is primarily relevant to produce requests. In particular, note that other mechanisms limit the number of outstanding consumer fetch request per broker to one. <br>*Type: integer*
 topic.metadata.refresh.interval.ms       |  *  | -1 .. 3600000   |        300000 | low        | Period of time in milliseconds at which topic and broker metadata is refreshed in order to proactively discover any new brokers, topics, partitions or partition leader changes. Use -1 to disable the intervalled refresh (not recommended). If there are no locally referenced topics (no topic objects created, no messages produced, no subscription or no assignment) then only the broker list will be refreshed every interval but no more often than every 10s. <br>*Type: integer*
 metadata.max.age.ms                      |  *  | 1 .. 86400000   |        900000 | low        | Metadata cache max age. Defaults to topic.metadata.refresh.interval.ms * 3 <br>*Type: integer*
-topic.metadata.refresh.fast.interval.ms  |  *  | 1 .. 60000      |           250 | low        | When a topic loses its leader a new metadata request will be enqueued with this initial interval, exponentially increasing until the topic metadata has been refreshed. This is used to recover quickly from transitioning leader brokers. <br>*Type: integer*
+topic.metadata.refresh.fast.interval.ms  |  *  | 1 .. 60000      |           100 | low        | When a topic loses its leader a new metadata request will be enqueued immediately and then with this initial interval, exponentially increasing upto `retry.backoff.max.ms`, until the topic metadata has been refreshed. If not set explicitly, it will be defaulted to `retry.backoff.ms`. This is used to recover quickly from transitioning leader brokers. <br>*Type: integer*
 topic.metadata.refresh.fast.cnt          |  *  | 0 .. 1000       |            10 | low        | **DEPRECATED** No longer used. <br>*Type: integer*
 topic.metadata.refresh.sparse            |  *  | true, false     |          true | low        | Sparse metadata requests (consumes less network bandwidth) <br>*Type: boolean*
 topic.metadata.propagation.max.ms        |  *  | 0 .. 3600000    |         30000 | low        | Apache Kafka topic creation is asynchronous and it takes some time for a new topic to propagate throughout the cluster to all brokers. If a client requests topic metadata after manual topic creation but before the topic has been fully propagated to the broker the client is requesting metadata from, the topic will seem to be non-existent and the client will mark the topic as such, failing queued produced messages with `ERR__UNKNOWN_TOPIC`. This setting delays marking a topic as non-existent until the configured propagation max time has passed. The maximum propagation time is calculated from the time the topic is first referenced in the client, e.g., on produce(). <br>*Type: integer*
@@ -118,6 +118,7 @@ enable.auto.offset.store                 |  C  | true, false     |          true
 queued.min.messages                      |  C  | 1 .. 10000000   |        100000 | medium     | Minimum number of messages per topic+partition librdkafka tries to maintain in the local consumer queue. <br>*Type: integer*
 queued.max.messages.kbytes               |  C  | 1 .. 2097151    |         65536 | medium     | Maximum number of kilobytes of queued pre-fetched messages in the local consumer queue. If using the high-level consumer this setting applies to the single consumer queue, regardless of the number of partitions. When using the legacy simple consumer or when separate partition queues are used this setting applies per partition. This value may be overshot by fetch.message.max.bytes. This property has higher priority than queued.min.messages. <br>*Type: integer*
 fetch.wait.max.ms                        |  C  | 0 .. 300000     |           500 | low        | Maximum time the broker may wait to fill the Fetch response with fetch.min.bytes of messages. <br>*Type: integer*
+fetch.queue.backoff.ms                   |  C  | 0 .. 300000     |          1000 | medium     | How long to postpone the next fetch request for a topic+partition in case the current fetch queue thresholds (queued.min.messages or queued.max.messages.kbytes) have been exceded. This property may need to be decreased if the queue thresholds are set low and the application is experiencing long (~1s) delays between messages. Low values may increase CPU utilization. <br>*Type: integer*
 fetch.message.max.bytes                  |  C  | 1 .. 1000000000 |       1048576 | medium     | Initial maximum number of bytes per topic+partition to request when fetching messages from the broker. If the client encounters a message larger than this value it will gradually try to increase it until the entire message can be fetched. <br>*Type: integer*
 max.partition.fetch.bytes                |  C  | 1 .. 1000000000 |       1048576 | medium     | Alias for `fetch.message.max.bytes`: Initial maximum number of bytes per topic+partition to request when fetching messages from the broker. If the client encounters a message larger than this value it will gradually try to increase it until the entire message can be fetched. <br>*Type: integer*
 fetch.max.bytes                          |  C  | 0 .. 2147483135 |      52428800 | medium     | Maximum amount of data the broker shall return for a Fetch request. Messages are fetched in batches by the consumer and if the first message batch in the first non-empty partition of the Fetch request is larger than this value, then the message batch will still be returned to ensure the consumer can make progress. The maximum message batch size accepted by the broker is defined via `message.max.bytes` (broker config) or `max.message.bytes` (broker topic config). `fetch.max.bytes` is automatically adjusted upwards to be at least `message.max.bytes` (consumer config). <br>*Type: integer*
@@ -141,7 +142,8 @@ queue.buffering.max.ms                   |  P  | 0 .. 900000     |             5
 linger.ms                                |  P  | 0 .. 900000     |             5 | high       | Alias for `queue.buffering.max.ms`: Delay in milliseconds to wait for messages in the producer queue to accumulate before constructing message batches (MessageSets) to transmit to brokers. A higher value allows larger and more effective (less overhead, improved compression) batches of messages to accumulate at the expense of increased message delivery latency. <br>*Type: float*
 message.send.max.retries                 |  P  | 0 .. 2147483647 |    2147483647 | high       | How many times to retry sending a failing Message. **Note:** retrying may cause reordering unless `enable.idempotence` is set to true. <br>*Type: integer*
 retries                                  |  P  | 0 .. 2147483647 |    2147483647 | high       | Alias for `message.send.max.retries`: How many times to retry sending a failing Message. **Note:** retrying may cause reordering unless `enable.idempotence` is set to true. <br>*Type: integer*
-retry.backoff.ms                         |  P  | 1 .. 300000     |           100 | medium     | The backoff time in milliseconds before retrying a protocol request. <br>*Type: integer*
+retry.backoff.ms                         |  P  | 1 .. 300000     |           100 | medium     | The backoff time in milliseconds before retrying a protocol request, this is the first backoff time, and will be backed off exponentially until number of retries is exhausted, and it's capped by retry.backoff.max.ms. <br>*Type: integer*
+retry.backoff.max.ms                     |  P  | 1 .. 300000     |          1000 | medium     | The max backoff time in milliseconds before retrying a protocol request, this is the atmost backoff allowed for exponentially backed off requests. <br>*Type: integer*
 queue.buffering.backpressure.threshold   |  P  | 1 .. 1000000    |             1 | low        | The threshold of outstanding not yet transmitted broker requests needed to backpressure the producer's message accumulator. If the number of not yet transmitted requests equals or exceeds this number, produce request creation that would have otherwise been triggered (for example, in accordance with linger.ms) will be delayed. A lower number yields larger and more effective batches. A higher value can improve latency when using compression on slow machines. <br>*Type: integer*
 compression.codec                        |  P  | none, gzip, snappy, lz4, zstd |          none | medium     | compression codec to use for compressing message sets. This is the default value for all topics, may be overridden by the topic configuration property `compression.codec`.  <br>*Type: enum value*
 compression.type                         |  P  | none, gzip, snappy, lz4, zstd |          none | medium     | Alias for `compression.codec`: compression codec to use for compressing message sets. This is the default value for all topics, may be overridden by the topic configuration property `compression.codec`.  <br>*Type: enum value*
@@ -151,6 +153,7 @@ delivery.report.only.error               |  P  | true, false     |         false
 dr_cb                                    |  P  |                 |               | low        | Delivery report callback (set with rd_kafka_conf_set_dr_cb()) <br>*Type: see dedicated API*
 dr_msg_cb                                |  P  |                 |               | low        | Delivery report callback (set with rd_kafka_conf_set_dr_msg_cb()) <br>*Type: see dedicated API*
 sticky.partitioning.linger.ms            |  P  | 0 .. 900000     |            10 | low        | Delay in milliseconds to wait to assign new sticky partitions for each topic. By default, set to double the time of linger.ms. To disable sticky behavior, set to 0. This behavior affects messages with the key NULL in all cases, and messages with key lengths of zero when the consistent_random partitioner is in use. These messages would otherwise be assigned randomly. A higher value allows for more effective batching of these messages. <br>*Type: integer*
+client.dns.lookup                        |  *  | use_all_dns_ips, resolve_canonical_bootstrap_servers_only | use_all_dns_ips | low        | Controls how the client uses DNS lookups. By default, when the lookup returns multiple IP addresses for a hostname, they will all be attempted for connection before the connection is considered failed. This applies to both bootstrap and advertised servers. If the value is set to `resolve_canonical_bootstrap_servers_only`, each entry will be resolved and expanded into a list of canonical names. NOTE: Default here is different from the Java client's default behavior, which connects only to the first IP address returned for a hostname.  <br>*Type: enum value*
 
 
 ## Topic configuration properties
diff --git a/lib/librdkafka-2.1.0/CONTRIBUTING.md b/lib/librdkafka-2.3.0/CONTRIBUTING.md
similarity index 95%
rename from lib/librdkafka-2.1.0/CONTRIBUTING.md
rename to lib/librdkafka-2.3.0/CONTRIBUTING.md
index 45ab45f9b70..e6afdc1ea0b 100644
--- a/lib/librdkafka-2.1.0/CONTRIBUTING.md
+++ b/lib/librdkafka-2.3.0/CONTRIBUTING.md
@@ -164,7 +164,7 @@ For more information on the test suite see [tests/README.md].
 
 ## How to get your changes into the main sources
 
-File a [pull request on github](https://github.com/edenhill/librdkafka/pulls)
+File a [pull request on github](https://github.com/confluentinc/librdkafka/pulls)
 
 Your change will be reviewed and discussed there and you will be
 expected to correct flaws pointed out and update accordingly, or the change
@@ -250,15 +250,21 @@ E.g.:
 *Note: The code format style is enforced by our clang-format and pep8 rules,
 so that is not covered here.*
 
-## C standard "C98"
+## Minimum C standard: "gnu90"
 
-This is a mix of C89 and C99, to be compatible with old MSVC versions.
+This is the GCC default before 5.1.0, present in CentOS 7, [still supported](https://docs.confluent.io/platform/current/installation/versions-interoperability.html#operating-systems)
+up to its EOL in 2024.
 
-Notable, it is C99 with the following limitations:
+To test it, configure with GCC and `CFLAGS="-std=gnu90"`.
+
+It has the following notable limitations:
 
- * No variable declarations after statements.
  * No in-line variable declarations.
 
+**Note**: the "No variable declarations after
+  statements" (-Wdeclaration-after-statement) requirement has been dropped.
+  Visual Studio 2012, the last version not implementing C99, has reached EOL,
+  and there were violations already.
 
 ## Function and globals naming
 
diff --git a/lib/librdkafka-2.1.0/Doxyfile b/lib/librdkafka-2.3.0/Doxyfile
similarity index 99%
rename from lib/librdkafka-2.1.0/Doxyfile
rename to lib/librdkafka-2.3.0/Doxyfile
index 33fc31a4e06..e283b73b48c 100644
--- a/lib/librdkafka-2.1.0/Doxyfile
+++ b/lib/librdkafka-2.3.0/Doxyfile
@@ -1210,7 +1210,7 @@ DOCSET_FEEDNAME        = "librdkafka documentation"
 # The default value is: org.doxygen.Project.
 # This tag requires that the tag GENERATE_DOCSET is set to YES.
 
-DOCSET_BUNDLE_ID       = se.edenhill.librdkafka
+DOCSET_BUNDLE_ID       = io.confluent.librdkafka
 
 # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
 # the documentation publisher. This should be a reverse domain-name style
@@ -1218,13 +1218,13 @@ DOCSET_BUNDLE_ID       = se.edenhill.librdkafka
 # The default value is: org.doxygen.Publisher.
 # This tag requires that the tag GENERATE_DOCSET is set to YES.
 
-DOCSET_PUBLISHER_ID    = se.edenhill
+DOCSET_PUBLISHER_ID    = io.confluent
 
 # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
 # The default value is: Publisher.
 # This tag requires that the tag GENERATE_DOCSET is set to YES.
 
-DOCSET_PUBLISHER_NAME  = Magnus Edenhill
+DOCSET_PUBLISHER_NAME  = Confluent Inc.
 
 # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
 # additional HTML index files: index.hhp, index.hhc, and index.hhk. The
@@ -1309,7 +1309,7 @@ QCH_FILE               =
 # The default value is: org.doxygen.Project.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
-QHP_NAMESPACE          = se.edenhill.librdkafka
+QHP_NAMESPACE          = io.confluent.librdkafka
 
 # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
 # Help Project output. For more information please see Qt Help Project / Virtual
@@ -1368,7 +1368,7 @@ GENERATE_ECLIPSEHELP   = NO
 # The default value is: org.doxygen.Project.
 # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
 
-ECLIPSE_DOC_ID         = se.edenhill.librdkafka
+ECLIPSE_DOC_ID         = io.confluent.librdkafka
 
 # If you want full control over the layout of the generated HTML pages it might
 # be necessary to disable the index and replace it with your own. The
diff --git a/lib/librdkafka-2.1.0/INTRODUCTION.md b/lib/librdkafka-2.3.0/INTRODUCTION.md
similarity index 95%
rename from lib/librdkafka-2.1.0/INTRODUCTION.md
rename to lib/librdkafka-2.3.0/INTRODUCTION.md
index 66f796bcab2..b0e2bd38b08 100644
--- a/lib/librdkafka-2.1.0/INTRODUCTION.md
+++ b/lib/librdkafka-2.3.0/INTRODUCTION.md
@@ -184,7 +184,7 @@ soon as possible.
 Lower buffering time leads to smaller batches and larger per-message overheads,
 increasing network, memory and CPU usage for producers, brokers and consumers.
 
-See [How to decrease message latency](https://github.com/edenhill/librdkafka/wiki/How-to-decrease-message-latency) for more info.
+See [How to decrease message latency](https://github.com/confluentinc/librdkafka/wiki/How-to-decrease-message-latency) for more info.
 
 
 #### Latency measurement
@@ -319,7 +319,8 @@ error code set.
 
 The application should typically not attempt to retry producing the message
 on failure, but instead configure librdkafka to perform these retries
-using the `retries` and `retry.backoff.ms` configuration properties.
+using the `retries`, `retry.backoff.ms` and `retry.backoff.max.ms`
+configuration properties.
 
 
 #### Error: Timed out in transmission queue
@@ -1876,7 +1877,7 @@ The [Apache Kafka Implementation Proposals (KIPs)](https://cwiki.apache.org/conf
 | KIP-84 - SASL SCRAM                                                      | 0.10.2.0                    | Supported                                                                                     |
 | KIP-85 - SASL config properties                                          | 0.10.2.0                    | Supported                                                                                     |
 | KIP-86 - Configurable SASL callbacks                                     | 2.0.0                       | Not supported                                                                                 |
-| KIP-88 - AdminAPI: ListGroupOffsets                                      | 0.10.2.0                    | Supported                                                                                 |
+| KIP-88 - AdminAPI: ListGroupOffsets                                      | 0.10.2.0                    | Supported                                                                                     |
 | KIP-91 - Intuitive timeouts in Producer                                  | 2.1.0                       | Supported                                                                                     |
 | KIP-92 - Per-partition lag metrics in Consumer                           | 0.10.2.0                    | Supported                                                                                     |
 | KIP-97 - Backwards compatibility with older brokers                      | 0.10.2.0                    | Supported                                                                                     |
@@ -1900,7 +1901,7 @@ The [Apache Kafka Implementation Proposals (KIPs)](https://cwiki.apache.org/conf
 | KIP-226 - AdminAPI: Dynamic broker config                                | 1.1.0                       | Supported                                                                                     |
 | KIP-227 - Consumer Incremental Fetch                                     | 1.1.0                       | Not supported                                                                                 |
 | KIP-229 - AdminAPI: DeleteGroups                                         | 1.1.0                       | Supported                                                                                     |
-| KIP-235 - DNS alias for secure connections                               | 2.1.0                       | Not supported                                                                                 |
+| KIP-235 - DNS alias for secure connections                               | 2.1.0                       | Supported                                                                                     |
 | KIP-249 - AdminAPI: Deletegation Tokens                                  | 2.0.0                       | Not supported                                                                                 |
 | KIP-255 - SASL OAUTHBEARER                                               | 2.0.0                       | Supported                                                                                     |
 | KIP-266 - Fix indefinite consumer timeouts                               | 2.0.0                       | Supported (bound by session.timeout.ms and max.poll.interval.ms)                              |
@@ -1909,7 +1910,7 @@ The [Apache Kafka Implementation Proposals (KIPs)](https://cwiki.apache.org/conf
 | KIP-302 - Use all addresses for resolved broker hostname                 | 2.1.0                       | Supported                                                                                     |
 | KIP-320 - Consumer: handle log truncation                                | 2.1.0, 2.2.0                | Supported                                                                                     |
 | KIP-322 - DeleteTopics disabled error code                               | 2.1.0                       | Supported                                                                                     |
-| KIP-339 - AdminAPI: incrementalAlterConfigs                              | 2.3.0                       | Not supported                                                                                 |
+| KIP-339 - AdminAPI: incrementalAlterConfigs                              | 2.3.0                       | Supported                                                                                     |
 | KIP-341 - Update Sticky partition assignment data                        | 2.3.0                       | Not supported (superceeded by KIP-429)                                                        |
 | KIP-342 - Custom SASL OAUTHBEARER extensions                             | 2.1.0                       | Supported                                                                                     |
 | KIP-345 - Consumer: Static membership                                    | 2.4.0                       | Supported                                                                                     |
@@ -1917,16 +1918,16 @@ The [Apache Kafka Implementation Proposals (KIPs)](https://cwiki.apache.org/conf
 | KIP-359 - Producer: use EpochLeaderId                                    | 2.4.0                       | Not supported                                                                                 |
 | KIP-360 - Improve handling of unknown Idempotent Producer                | 2.5.0                       | Supported                                                                                     |
 | KIP-361 - Consumer: add config to disable auto topic creation            | 2.3.0                       | Supported                                                                                     |
-| KIP-368 - SASL periodic reauth                                           | 2.2.0                       | Not supported                                                                                 |
+| KIP-368 - SASL periodic reauth                                           | 2.2.0                       | Supported                                                                                     |
 | KIP-369 - Always roundRobin partitioner                                  | 2.4.0                       | Not supported                                                                                 |
 | KIP-389 - Consumer group max size                                        | 2.2.0                       | Supported (error is propagated to application, but the consumer does not raise a fatal error) |
 | KIP-392 - Allow consumers to fetch from closest replica                  | 2.4.0                       | Supported                                                                                     |
 | KIP-394 - Consumer: require member.id in JoinGroupRequest                | 2.2.0                       | Supported                                                                                     |
-| KIP-396 - AdminAPI: commit/list offsets                                  | 2.4.0                       | Partially supported (remaining APIs available outside Admin client)                           |
+| KIP-396 - AdminAPI: commit/list offsets                                  | 2.4.0                       | Supported                                                                                     |
 | KIP-412 - AdminAPI: adjust log levels                                    | 2.4.0                       | Not supported                                                                                 |
 | KIP-421 - Variables in client config files                               | 2.3.0                       | Not applicable (librdkafka, et.al, does not provide a config file interface, and shouldn't)   |
 | KIP-429 - Consumer: incremental rebalance protocol                       | 2.4.0                       | Supported                                                                                     |
-| KIP-430 - AdminAPI: return authorized operations in Describe.. responses | 2.3.0                       | Not supported                                                                                 |
+| KIP-430 - AdminAPI: return authorized operations in Describe.. responses | 2.3.0                       | Supported                                                                                     |
 | KIP-436 - Start time in stats                                            | 2.3.0                       | Supported                                                                                     |
 | KIP-447 - Producer scalability for EOS                                   | 2.5.0                       | Supported                                                                                     |
 | KIP-455 - AdminAPI: Replica assignment                                   | 2.4.0 (WIP)                 | Not supported                                                                                 |
@@ -1938,6 +1939,7 @@ The [Apache Kafka Implementation Proposals (KIPs)](https://cwiki.apache.org/conf
 | KIP-496 - AdminAPI: delete offsets                                       | 2.4.0                       | Supported                                                                                     |
 | KIP-511 - Collect Client's Name and Version                              | 2.4.0                       | Supported                                                                                     |
 | KIP-514 - Bounded flush()                                                | 2.4.0                       | Supported                                                                                     |
+| KIP-516 - Topic Identifiers                                              | 2.8.0 (WIP)                 | Partially Supported                                                                           |
 | KIP-517 - Consumer poll() metrics                                        | 2.4.0                       | Not supported                                                                                 |
 | KIP-518 - Allow listing consumer groups per state                        | 2.6.0                       | Supported                                                                                     |
 | KIP-519 - Make SSL engine configurable                                   | 2.6.0                       | Supported                                                                                     |
@@ -1945,10 +1947,11 @@ The [Apache Kafka Implementation Proposals (KIPs)](https://cwiki.apache.org/conf
 | KIP-526 - Reduce Producer Metadata Lookups for Large Number of Topics    | 2.5.0                       | Not supported                                                                                 |
 | KIP-533 - Add default API timeout to AdminClient                         | 2.5.0                       | Not supported                                                                                 |
 | KIP-546 - Add Client Quota APIs to AdminClient                           | 2.6.0                       | Not supported                                                                                 |
+| KIP-554 - Add Broker-side SCRAM Config API                               | 2.7.0                       | Supported                                                                                     |
 | KIP-559 - Make the Kafka Protocol Friendlier with L7 Proxies             | 2.5.0                       | Not supported                                                                                 |
 | KIP-568 - Explicit rebalance triggering on the Consumer                  | 2.6.0                       | Not supported                                                                                 |
 | KIP-659 - Add metadata to DescribeConfigsResponse                        | 2.6.0                       | Not supported                                                                                 |
-| KIP-580 - Exponential backoff for Kafka clients                          | WIP                         | Partially supported                                                                           |
+| KIP-580 - Exponential backoff for Kafka clients                          | 3.7.0 (WIP)                 | supported                                                                           |
 | KIP-584 - Versioning scheme for features                                 | WIP                         | Not supported                                                                                 |
 | KIP-588 - Allow producers to recover gracefully from txn timeouts        | 2.8.0 (WIP)                 | Not supported                                                                                 |
 | KIP-601 - Configurable socket connection timeout                         | 2.7.0                       | Supported                                                                                     |
@@ -1957,49 +1960,56 @@ The [Apache Kafka Implementation Proposals (KIPs)](https://cwiki.apache.org/conf
 | KIP-654 - Aborted txns with non-flushed msgs should not be fatal         | 2.7.0                       | Supported                                                                                     |
 | KIP-735 - Increase default consumer session timeout                      | 3.0.0                       | Supported                                                                                     |
 | KIP-768 - SASL/OAUTHBEARER OIDC support                                  | 3.0                         | Supported                                                                                     |
+| KIP-881 - Rack-aware Partition Assignment for Kafka Consumers            | 3.5.0 (WIP)                 | Supported                                                                                     |
 
 
 
 
 ### Supported protocol versions
 
-"Kafka max" is the maximum ApiVersion supported in Apache Kafka 3.3.1, while
+"Kafka max" is the maximum ApiVersion supported in Apache Kafka 3.5.0, while
 "librdkafka max" is the maximum ApiVersion supported in the latest
 release of librdkafka.
 
 
-| ApiKey  | Request name        | Kafka max   | librdkafka max          |
-| ------- | ------------------- | ----------- | ----------------------- |
-| 0       | Produce             | 9           | 7                       |
-| 1       | Fetch               | 13          | 11                      |
-| 2       | ListOffsets         | 7           | 2                       |
-| 3       | Metadata            | 12          | 9                       |
-| 8       | OffsetCommit        | 8           | 7                       |
-| 9       | OffsetFetch         | 8           | 7                       |
-| 10      | FindCoordinator     | 4           | 2                       |
-| 11      | JoinGroup           | 9           | 5                       |
-| 12      | Heartbeat           | 4           | 3                       |
-| 13      | LeaveGroup          | 5           | 1                       |
-| 14      | SyncGroup           | 5           | 3                       |
-| 15      | DescribeGroups      | 5           | 4                       |
-| 16      | ListGroups          | 4           | 4                       |
-| 17      | SaslHandshake       | 1           | 1                       |
-| 18      | ApiVersions         | 3           | 3                       |
-| 19      | CreateTopics        | 7           | 4                       |
-| 20      | DeleteTopics        | 6           | 1                       |
-| 21      | DeleteRecords       | 2           | 1                       |
-| 22      | InitProducerId      | 4           | 4                       |
-| 24      | AddPartitionsToTxn  | 3           | 0                       |
-| 25      | AddOffsetsToTxn     | 3           | 0                       |
-| 26      | EndTxn              | 3           | 1                       |
-| 28      | TxnOffsetCommit     | 3           | 3                       |
-| 32      | DescribeConfigs     | 4           | 1                       |
-| 33      | AlterConfigs        | 2           | 1                       |
-| 36      | SaslAuthenticate    | 2           | 0                       |
-| 37      | CreatePartitions    | 3           | 0                       |
-| 42      | DeleteGroups        | 2           | 1                       |
-| 47      | OffsetDelete        | 0           | 0                       |
-
+| ApiKey  | Request name                  | Kafka max   | librdkafka max          |
+| ------- | ------------------------------| ----------- | ----------------------- |
+| 0       | Produce                       | 9           | 7                       |
+| 1       | Fetch                         | 15          | 11                      |
+| 2       | ListOffsets                   | 8           | 7                       |
+| 3       | Metadata                      | 12          | 12                      |
+| 8       | OffsetCommit                  | 8           | 7                       |
+| 9       | OffsetFetch                   | 8           | 7                       |
+| 10      | FindCoordinator               | 4           | 2                       |
+| 11      | JoinGroup                     | 9           | 5                       |
+| 12      | Heartbeat                     | 4           | 3                       |
+| 13      | LeaveGroup                    | 5           | 1                       |
+| 14      | SyncGroup                     | 5           | 3                       |
+| 15      | DescribeGroups                | 5           | 4                       |
+| 16      | ListGroups                    | 4           | 4                       |
+| 17      | SaslHandshake                 | 1           | 1                       |
+| 18      | ApiVersions                   | 3           | 3                       |
+| 19      | CreateTopics                  | 7           | 4                       |
+| 20      | DeleteTopics                  | 6           | 1                       |
+| 21      | DeleteRecords                 | 2           | 1                       |
+| 22      | InitProducerId                | 4           | 4                       |
+| 23      | OffsetForLeaderEpoch          | 4           | 2                       |
+| 24      | AddPartitionsToTxn            | 4           | 0                       |
+| 25      | AddOffsetsToTxn               | 3           | 0                       |
+| 26      | EndTxn                        | 3           | 1                       |
+| 28      | TxnOffsetCommit               | 3           | 3                       |
+| 29      | DescribeAcls                  | 3           | 1                       |
+| 30      | CreateAcls                    | 3           | 1                       |
+| 31      | DeleteAcls                    | 3           | 1                       |
+| 32      | DescribeConfigs               | 4           | 1                       |
+| 33      | AlterConfigs                  | 2           | 2                       |
+| 36      | SaslAuthenticate              | 2           | 1                       |
+| 37      | CreatePartitions              | 3           | 0                       |
+| 42      | DeleteGroups                  | 2           | 1                       |
+| 44      | IncrementalAlterConfigs       | 1           | 1                       |
+| 47      | OffsetDelete                  | 0           | 0                       |
+| 50      | DescribeUserScramCredentials  | 0           | 0                       |
+| 51      | AlterUserScramCredentials     | 0           | 0                       |
 
 
 # Recommendations for language binding developers
@@ -2061,9 +2071,4 @@ librdkafka (file a github pull request).
 
 ## Community support
 
-You are welcome to direct your users to
-[librdkafka's Gitter chat room](http://gitter.im/edenhill/librdkafka) as long as
-you monitor the conversions in there to pick up questions specific to your
-bindings.
-But for the most part user questions are usually generic enough to apply to all
-librdkafka bindings.
+Community support is offered through GitHub Issues and Discussions.
diff --git a/lib/librdkafka-2.1.0/LICENSE b/lib/librdkafka-2.3.0/LICENSE
similarity index 94%
rename from lib/librdkafka-2.1.0/LICENSE
rename to lib/librdkafka-2.3.0/LICENSE
index 193ffaae283..660e3cfb005 100644
--- a/lib/librdkafka-2.1.0/LICENSE
+++ b/lib/librdkafka-2.3.0/LICENSE
@@ -1,6 +1,7 @@
 librdkafka - Apache Kafka C driver library
 
-Copyright (c) 2012-2020, Magnus Edenhill
+Copyright (c) 2012-2022, Magnus Edenhill
+              2023, Confluent Inc.
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/LICENSE.cjson b/lib/librdkafka-2.3.0/LICENSE.cjson
similarity index 100%
rename from lib/librdkafka-2.1.0/LICENSE.cjson
rename to lib/librdkafka-2.3.0/LICENSE.cjson
diff --git a/lib/librdkafka-2.1.0/LICENSE.crc32c b/lib/librdkafka-2.3.0/LICENSE.crc32c
similarity index 100%
rename from lib/librdkafka-2.1.0/LICENSE.crc32c
rename to lib/librdkafka-2.3.0/LICENSE.crc32c
diff --git a/lib/librdkafka-2.1.0/LICENSE.fnv1a b/lib/librdkafka-2.3.0/LICENSE.fnv1a
similarity index 100%
rename from lib/librdkafka-2.1.0/LICENSE.fnv1a
rename to lib/librdkafka-2.3.0/LICENSE.fnv1a
diff --git a/lib/librdkafka-2.1.0/LICENSE.hdrhistogram b/lib/librdkafka-2.3.0/LICENSE.hdrhistogram
similarity index 100%
rename from lib/librdkafka-2.1.0/LICENSE.hdrhistogram
rename to lib/librdkafka-2.3.0/LICENSE.hdrhistogram
diff --git a/lib/librdkafka-2.1.0/LICENSE.lz4 b/lib/librdkafka-2.3.0/LICENSE.lz4
similarity index 100%
rename from lib/librdkafka-2.1.0/LICENSE.lz4
rename to lib/librdkafka-2.3.0/LICENSE.lz4
diff --git a/lib/librdkafka-2.1.0/LICENSE.murmur2 b/lib/librdkafka-2.3.0/LICENSE.murmur2
similarity index 100%
rename from lib/librdkafka-2.1.0/LICENSE.murmur2
rename to lib/librdkafka-2.3.0/LICENSE.murmur2
diff --git a/lib/librdkafka-2.1.0/LICENSE.pycrc b/lib/librdkafka-2.3.0/LICENSE.pycrc
similarity index 100%
rename from lib/librdkafka-2.1.0/LICENSE.pycrc
rename to lib/librdkafka-2.3.0/LICENSE.pycrc
diff --git a/lib/librdkafka-2.1.0/LICENSE.queue b/lib/librdkafka-2.3.0/LICENSE.queue
similarity index 100%
rename from lib/librdkafka-2.1.0/LICENSE.queue
rename to lib/librdkafka-2.3.0/LICENSE.queue
diff --git a/lib/librdkafka-2.1.0/LICENSE.regexp b/lib/librdkafka-2.3.0/LICENSE.regexp
similarity index 100%
rename from lib/librdkafka-2.1.0/LICENSE.regexp
rename to lib/librdkafka-2.3.0/LICENSE.regexp
diff --git a/lib/librdkafka-2.1.0/LICENSE.snappy b/lib/librdkafka-2.3.0/LICENSE.snappy
similarity index 100%
rename from lib/librdkafka-2.1.0/LICENSE.snappy
rename to lib/librdkafka-2.3.0/LICENSE.snappy
diff --git a/lib/librdkafka-2.1.0/LICENSE.tinycthread b/lib/librdkafka-2.3.0/LICENSE.tinycthread
similarity index 100%
rename from lib/librdkafka-2.1.0/LICENSE.tinycthread
rename to lib/librdkafka-2.3.0/LICENSE.tinycthread
diff --git a/lib/librdkafka-2.1.0/LICENSE.wingetopt b/lib/librdkafka-2.3.0/LICENSE.wingetopt
similarity index 100%
rename from lib/librdkafka-2.1.0/LICENSE.wingetopt
rename to lib/librdkafka-2.3.0/LICENSE.wingetopt
diff --git a/lib/librdkafka-2.1.0/LICENSES.txt b/lib/librdkafka-2.3.0/LICENSES.txt
similarity index 99%
rename from lib/librdkafka-2.1.0/LICENSES.txt
rename to lib/librdkafka-2.3.0/LICENSES.txt
index 1ab8a1dd4d7..ed892149192 100644
--- a/lib/librdkafka-2.1.0/LICENSES.txt
+++ b/lib/librdkafka-2.3.0/LICENSES.txt
@@ -2,7 +2,8 @@ LICENSE
 --------------------------------------------------------------
 librdkafka - Apache Kafka C driver library
 
-Copyright (c) 2012-2020, Magnus Edenhill
+Copyright (c) 2012-2022, Magnus Edenhill
+              2023, Confluent Inc.
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/Makefile b/lib/librdkafka-2.3.0/Makefile
similarity index 100%
rename from lib/librdkafka-2.1.0/Makefile
rename to lib/librdkafka-2.3.0/Makefile
diff --git a/lib/librdkafka-2.1.0/README.md b/lib/librdkafka-2.3.0/README.md
similarity index 85%
rename from lib/librdkafka-2.1.0/README.md
rename to lib/librdkafka-2.3.0/README.md
index 640b8791c56..06f196bc0ee 100644
--- a/lib/librdkafka-2.1.0/README.md
+++ b/lib/librdkafka-2.3.0/README.md
@@ -2,8 +2,9 @@ librdkafka - the Apache Kafka C/C++ client library
 ==================================================
 
 Copyright (c) 2012-2022, [Magnus Edenhill](http://www.edenhill.se/).
+              2023 [Confluent Inc.](https://www.confluent.io/).
 
-[https://github.com/edenhill/librdkafka](https://github.com/edenhill/librdkafka)
+[https://github.com/confluentinc/librdkafka](https://github.com/confluentinc/librdkafka)
 
 **librdkafka** is a C library implementation of the
 [Apache Kafka](https://kafka.apache.org/) protocol, providing Producer, Consumer
@@ -25,8 +26,8 @@ affiliation with and is not endorsed by The Apache Software Foundation.
   * Simple (legacy) consumer
   * Admin client
   * Compression: snappy, gzip, lz4, zstd
-  * [SSL](https://github.com/edenhill/librdkafka/wiki/Using-SSL-with-librdkafka) support
-  * [SASL](https://github.com/edenhill/librdkafka/wiki/Using-SASL-with-librdkafka) (GSSAPI/Kerberos/SSPI, PLAIN, SCRAM, OAUTHBEARER) support
+  * [SSL](https://github.com/confluentinc/librdkafka/wiki/Using-SSL-with-librdkafka) support
+  * [SASL](https://github.com/confluentinc/librdkafka/wiki/Using-SASL-with-librdkafka) (GSSAPI/Kerberos/SSPI, PLAIN, SCRAM, OAUTHBEARER) support
   * Full list of [supported KIPs](INTRODUCTION.md#supported-kips)
   * Broker version support: >=0.8 (see [Broker version compatibility](INTRODUCTION.md#broker-version-compatibility))
   * Guaranteed API stability for C & C++ APIs (ABI safety guaranteed for C)
@@ -39,14 +40,14 @@ affiliation with and is not endorsed by The Apache Software Foundation.
 # Documentation
 
  * Public API in [C header](src/rdkafka.h) and [C++ header](src-cpp/rdkafkacpp.h).
- * Introduction and manual in [INTRODUCTION.md](https://github.com/edenhill/librdkafka/blob/master/INTRODUCTION.md).
+ * Introduction and manual in [INTRODUCTION.md](https://github.com/confluentinc/librdkafka/blob/master/INTRODUCTION.md).
  * Configuration properties in
-[CONFIGURATION.md](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md).
- * Statistics metrics in [STATISTICS.md](https://github.com/edenhill/librdkafka/blob/master/STATISTICS.md).
- * [Frequently asked questions](https://github.com/edenhill/librdkafka/wiki).
+[CONFIGURATION.md](https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md).
+ * Statistics metrics in [STATISTICS.md](https://github.com/confluentinc/librdkafka/blob/master/STATISTICS.md).
+ * [Frequently asked questions](https://github.com/confluentinc/librdkafka/wiki).
  * Step-by-step tutorial [Getting Started with Apache Kafka and C/C++](https://developer.confluent.io/get-started/c/).
 
-**NOTE**: The `master` branch is actively developed, use latest [release](https://github.com/edenhill/librdkafka/releases) for production use.
+**NOTE**: The `master` branch is actively developed, use latest [release](https://github.com/confluentinc/librdkafka/releases) for production use.
 
 
 # Installation
@@ -160,11 +161,11 @@ Commercial support is available from [Confluent Inc](https://www.confluent.io/)
 
 ## Community support
 
-**Only the [latest official release](https://github.com/edenhill/librdkafka/releases) is supported for community members.**
+**Only the [latest official release](https://github.com/confluentinc/librdkafka/releases) is supported for community members.**
 
-File bug reports and feature requests using [GitHub Issues](https://github.com/edenhill/librdkafka/issues).
+File bug reports and feature requests using [GitHub Issues](https://github.com/confluentinc/librdkafka/issues).
 
-Questions and discussions are welcome on the [Discussions](https://github.com/edenhill/librdkafka/discussions) forum, and on the [Confluent Community slack](https://launchpass.com/confluentcommunity) #clients channel.
+Questions and discussions are welcome on the [Discussions](https://github.com/confluentinc/librdkafka/discussions) forum, and on the [Confluent Community slack](https://launchpass.com/confluentcommunity) #clients channel.
 
 
 # Language bindings #
@@ -195,4 +196,4 @@ Questions and discussions are welcome on the [Discussions](https://github.com/ed
   * Swift: [Perfect-Kafka](https://github.com/PerfectlySoft/Perfect-Kafka)
 
 
-See [Powered by librdkafka](https://github.com/edenhill/librdkafka/wiki/Powered-by-librdkafka) for an incomplete list of librdkafka users.
+See [Powered by librdkafka](https://github.com/confluentinc/librdkafka/wiki/Powered-by-librdkafka) for an incomplete list of librdkafka users.
diff --git a/lib/librdkafka-2.1.0/README.win32 b/lib/librdkafka-2.3.0/README.win32
similarity index 100%
rename from lib/librdkafka-2.1.0/README.win32
rename to lib/librdkafka-2.3.0/README.win32
diff --git a/lib/librdkafka-2.1.0/STATISTICS.md b/lib/librdkafka-2.3.0/STATISTICS.md
similarity index 100%
rename from lib/librdkafka-2.1.0/STATISTICS.md
rename to lib/librdkafka-2.3.0/STATISTICS.md
diff --git a/lib/librdkafka-2.1.0/configure b/lib/librdkafka-2.3.0/configure
similarity index 100%
rename from lib/librdkafka-2.1.0/configure
rename to lib/librdkafka-2.3.0/configure
diff --git a/lib/librdkafka-2.1.0/configure.self b/lib/librdkafka-2.3.0/configure.self
similarity index 98%
rename from lib/librdkafka-2.1.0/configure.self
rename to lib/librdkafka-2.3.0/configure.self
index bb0a975c94f..3c49956b46c 100644
--- a/lib/librdkafka-2.1.0/configure.self
+++ b/lib/librdkafka-2.3.0/configure.self
@@ -4,7 +4,7 @@
 mkl_meta_set "description" "name"      "librdkafka"
 mkl_meta_set "description" "oneline"   "The Apache Kafka C/C++ library"
 mkl_meta_set "description" "long"      "Full Apache Kafka protocol support, including producer and consumer"
-mkl_meta_set "description" "copyright" "Copyright (c) 2012-2019 Magnus Edenhill"
+mkl_meta_set "description" "copyright" "Copyright (c) 2012-2022, Magnus Edenhill, 2023, Confluent Inc."
 
 # Enable generation of pkg-config .pc file
 mkl_mkvar_set "" GEN_PKG_CONFIG y
diff --git a/lib/librdkafka-2.1.0/debian/.gitignore b/lib/librdkafka-2.3.0/debian/.gitignore
similarity index 100%
rename from lib/librdkafka-2.1.0/debian/.gitignore
rename to lib/librdkafka-2.3.0/debian/.gitignore
diff --git a/lib/librdkafka-2.1.0/debian/changelog b/lib/librdkafka-2.3.0/debian/changelog
similarity index 100%
rename from lib/librdkafka-2.1.0/debian/changelog
rename to lib/librdkafka-2.3.0/debian/changelog
diff --git a/lib/librdkafka-2.1.0/debian/compat b/lib/librdkafka-2.3.0/debian/compat
similarity index 100%
rename from lib/librdkafka-2.1.0/debian/compat
rename to lib/librdkafka-2.3.0/debian/compat
diff --git a/lib/librdkafka-2.1.0/debian/control b/lib/librdkafka-2.3.0/debian/control
similarity index 98%
rename from lib/librdkafka-2.1.0/debian/control
rename to lib/librdkafka-2.3.0/debian/control
index bddaf4724db..c14b664f3e5 100644
--- a/lib/librdkafka-2.1.0/debian/control
+++ b/lib/librdkafka-2.3.0/debian/control
@@ -5,7 +5,7 @@ Uploaders: Christos Trochalakis <ctrochalakis@debian.org>
 Build-Depends: debhelper (>= 9), zlib1g-dev, libssl-dev, libsasl2-dev, liblz4-dev, python3
 Standards-Version: 3.9.7
 Section: libs
-Homepage: https://github.com/edenhill/librdkafka
+Homepage: https://github.com/confluentinc/librdkafka
 Vcs-Git: https://anonscm.debian.org/cgit/pkg-kafka/librdkafka.git -b debian
 Vcs-Browser: https://anonscm.debian.org/cgit/pkg-kafka/librdkafka.git
 
diff --git a/lib/librdkafka-2.1.0/debian/copyright b/lib/librdkafka-2.3.0/debian/copyright
similarity index 97%
rename from lib/librdkafka-2.1.0/debian/copyright
rename to lib/librdkafka-2.3.0/debian/copyright
index aa6c33cce42..965cbae0587 100644
--- a/lib/librdkafka-2.1.0/debian/copyright
+++ b/lib/librdkafka-2.3.0/debian/copyright
@@ -1,6 +1,6 @@
 Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
 Upstream-Name: librdkafka
-Source: https://github.com/edenhill/librdkafka
+Source: https://github.com/confluentinc/librdkafka
 
 License: BSD-2-clause
   Redistribution and use in source and binary forms, with or without
@@ -25,7 +25,7 @@ License: BSD-2-clause
   POSSIBILITY OF SUCH DAMAGE.
 
 Files: *
-Copyright: 2012-2015, Magnus Edenhill
+Copyright: 2012-2022, Magnus Edenhill; 2023, Confluent Inc.
 License: BSD-2-clause
 
 Files: src/rdcrc32.c src/rdcrc32.h
@@ -40,7 +40,7 @@ License: MIT
   .
   The above copyright notice and this permission notice shall be included in
   all copies or substantial portions of the Software.
-  . 
+  .
   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
diff --git a/lib/librdkafka-2.1.0/debian/gbp.conf b/lib/librdkafka-2.3.0/debian/gbp.conf
similarity index 100%
rename from lib/librdkafka-2.1.0/debian/gbp.conf
rename to lib/librdkafka-2.3.0/debian/gbp.conf
diff --git a/lib/librdkafka-2.1.0/debian/librdkafka++1.install b/lib/librdkafka-2.3.0/debian/librdkafka++1.install
similarity index 100%
rename from lib/librdkafka-2.1.0/debian/librdkafka++1.install
rename to lib/librdkafka-2.3.0/debian/librdkafka++1.install
diff --git a/lib/librdkafka-2.1.0/debian/librdkafka-dev.examples b/lib/librdkafka-2.3.0/debian/librdkafka-dev.examples
similarity index 100%
rename from lib/librdkafka-2.1.0/debian/librdkafka-dev.examples
rename to lib/librdkafka-2.3.0/debian/librdkafka-dev.examples
diff --git a/lib/librdkafka-2.1.0/debian/librdkafka-dev.install b/lib/librdkafka-2.3.0/debian/librdkafka-dev.install
similarity index 100%
rename from lib/librdkafka-2.1.0/debian/librdkafka-dev.install
rename to lib/librdkafka-2.3.0/debian/librdkafka-dev.install
diff --git a/lib/librdkafka-2.1.0/debian/librdkafka1.docs b/lib/librdkafka-2.3.0/debian/librdkafka1.docs
similarity index 100%
rename from lib/librdkafka-2.1.0/debian/librdkafka1.docs
rename to lib/librdkafka-2.3.0/debian/librdkafka1.docs
diff --git a/lib/librdkafka-2.1.0/debian/librdkafka1.install b/lib/librdkafka-2.3.0/debian/librdkafka1.install
similarity index 100%
rename from lib/librdkafka-2.1.0/debian/librdkafka1.install
rename to lib/librdkafka-2.3.0/debian/librdkafka1.install
diff --git a/lib/librdkafka-2.1.0/debian/librdkafka1.symbols b/lib/librdkafka-2.3.0/debian/librdkafka1.symbols
similarity index 100%
rename from lib/librdkafka-2.1.0/debian/librdkafka1.symbols
rename to lib/librdkafka-2.3.0/debian/librdkafka1.symbols
diff --git a/lib/librdkafka-2.1.0/debian/rules b/lib/librdkafka-2.3.0/debian/rules
similarity index 100%
rename from lib/librdkafka-2.1.0/debian/rules
rename to lib/librdkafka-2.3.0/debian/rules
diff --git a/lib/librdkafka-2.1.0/debian/source/format b/lib/librdkafka-2.3.0/debian/source/format
similarity index 100%
rename from lib/librdkafka-2.1.0/debian/source/format
rename to lib/librdkafka-2.3.0/debian/source/format
diff --git a/lib/librdkafka-2.3.0/debian/watch b/lib/librdkafka-2.3.0/debian/watch
new file mode 100644
index 00000000000..ed5855f0c95
--- /dev/null
+++ b/lib/librdkafka-2.3.0/debian/watch
@@ -0,0 +1,2 @@
+version=3
+https://github.com/confluentinc/librdkafka/tags .*/v?(\d[\d\.]*)\.tar\.gz
diff --git a/lib/librdkafka-2.1.0/dev-conf.sh b/lib/librdkafka-2.3.0/dev-conf.sh
similarity index 98%
rename from lib/librdkafka-2.1.0/dev-conf.sh
rename to lib/librdkafka-2.3.0/dev-conf.sh
index 23931a77e65..ebc4451b948 100755
--- a/lib/librdkafka-2.1.0/dev-conf.sh
+++ b/lib/librdkafka-2.3.0/dev-conf.sh
@@ -2,7 +2,7 @@
 #
 # librdkafka - Apache Kafka C library
 #
-# Copyright (c) 2018 Magnus Edenhill
+# Copyright (c) 2018-2022, Magnus Edenhill
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/examples/.gitignore b/lib/librdkafka-2.3.0/examples/.gitignore
similarity index 82%
rename from lib/librdkafka-2.1.0/examples/.gitignore
rename to lib/librdkafka-2.3.0/examples/.gitignore
index 4190608c421..9b2c65a2f72 100644
--- a/lib/librdkafka-2.1.0/examples/.gitignore
+++ b/lib/librdkafka-2.3.0/examples/.gitignore
@@ -15,5 +15,10 @@ rdkafka_performance
 transactions
 list_consumer_groups
 describe_consumer_groups
+describe_topics
+describe_cluster
 list_consumer_group_offsets
 alter_consumer_group_offsets
+incremental_alter_configs
+user_scram
+list_offsets
\ No newline at end of file
diff --git a/lib/librdkafka-2.1.0/examples/CMakeLists.txt b/lib/librdkafka-2.3.0/examples/CMakeLists.txt
similarity index 50%
rename from lib/librdkafka-2.1.0/examples/CMakeLists.txt
rename to lib/librdkafka-2.3.0/examples/CMakeLists.txt
index bbbb89ad900..8c0079abeea 100644
--- a/lib/librdkafka-2.1.0/examples/CMakeLists.txt
+++ b/lib/librdkafka-2.3.0/examples/CMakeLists.txt
@@ -26,6 +26,41 @@ target_link_libraries(openssl_engine_example_cpp PUBLIC rdkafka++)
 add_executable(misc misc.c ${win32_sources})
 target_link_libraries(misc PUBLIC rdkafka)
 
+add_executable(idempotent_producer idempotent_producer.c ${win32_sources})
+target_link_libraries(idempotent_producer PUBLIC rdkafka)
+
+add_executable(transactions transactions.c ${win32_sources})
+target_link_libraries(transactions PUBLIC rdkafka)
+
+add_executable(delete_records delete_records.c ${win32_sources})
+target_link_libraries(delete_records PUBLIC rdkafka)
+
+add_executable(list_consumer_groups list_consumer_groups.c ${win32_sources})
+target_link_libraries(list_consumer_groups PUBLIC rdkafka)
+
+add_executable(describe_consumer_groups describe_consumer_groups.c ${win32_sources})
+target_link_libraries(describe_consumer_groups PUBLIC rdkafka)
+
+add_executable(list_consumer_group_offsets list_consumer_group_offsets.c ${win32_sources})
+target_link_libraries(list_consumer_group_offsets PUBLIC rdkafka)
+
+add_executable(alter_consumer_group_offsets alter_consumer_group_offsets.c ${win32_sources})
+target_link_libraries(alter_consumer_group_offsets PUBLIC rdkafka)
+
+add_executable(incremental_alter_configs incremental_alter_configs.c ${win32_sources})
+target_link_libraries(incremental_alter_configs PUBLIC rdkafka)
+
+add_executable(user_scram user_scram.c ${win32_sources})
+target_link_libraries(user_scram PUBLIC rdkafka)
+
+add_executable(describe_topics describe_topics.c ${win32_sources})
+target_link_libraries(describe_topics PUBLIC rdkafka)
+
+add_executable(describe_cluster describe_cluster.c ${win32_sources})
+target_link_libraries(describe_cluster PUBLIC rdkafka)
+
+add_executable(list_offsets list_offsets.c ${win32_sources})
+target_link_libraries(list_offsets PUBLIC rdkafka)
 
 # The targets below has Unix include dirs and do not compile on Windows.
 if(NOT WIN32)
@@ -37,4 +72,5 @@ if(NOT WIN32)
 
     add_executable(kafkatest_verifiable_client kafkatest_verifiable_client.cpp)
     target_link_libraries(kafkatest_verifiable_client PUBLIC rdkafka++)
+
 endif(NOT WIN32)
diff --git a/lib/librdkafka-2.1.0/examples/Makefile b/lib/librdkafka-2.3.0/examples/Makefile
similarity index 85%
rename from lib/librdkafka-2.1.0/examples/Makefile
rename to lib/librdkafka-2.3.0/examples/Makefile
index 15fba3c2afd..f76702d02c3 100644
--- a/lib/librdkafka-2.1.0/examples/Makefile
+++ b/lib/librdkafka-2.3.0/examples/Makefile
@@ -6,8 +6,13 @@ EXAMPLES ?= rdkafka_example rdkafka_performance rdkafka_example_cpp \
 	openssl_engine_example_cpp \
 	list_consumer_groups \
 	describe_consumer_groups \
+	describe_topics \
+	describe_cluster \
 	list_consumer_group_offsets \
 	alter_consumer_group_offsets \
+	incremental_alter_configs \
+	user_scram \
+	list_offsets \
 	misc
 
 all: $(EXAMPLES)
@@ -72,6 +77,14 @@ describe_consumer_groups: ../src/librdkafka.a describe_consumer_groups.c
 	$(CC) $(CPPFLAGS) $(CFLAGS) $@.c -o $@ $(LDFLAGS) \
 		../src/librdkafka.a $(LIBS)
 
+describe_topics: ../src/librdkafka.a describe_topics.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) $@.c -o $@ $(LDFLAGS) \
+		../src/librdkafka.a $(LIBS)
+
+describe_cluster: ../src/librdkafka.a describe_cluster.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) $@.c -o $@ $(LDFLAGS) \
+		../src/librdkafka.a $(LIBS)
+
 list_consumer_group_offsets: ../src/librdkafka.a list_consumer_group_offsets.c
 	$(CC) $(CPPFLAGS) $(CFLAGS) $@.c -o $@ $(LDFLAGS) \
 		../src/librdkafka.a $(LIBS)
@@ -80,6 +93,10 @@ alter_consumer_group_offsets: ../src/librdkafka.a alter_consumer_group_offsets.c
 	$(CC) $(CPPFLAGS) $(CFLAGS) $@.c -o $@ $(LDFLAGS) \
 		../src/librdkafka.a $(LIBS)
 
+incremental_alter_configs: ../src/librdkafka.a incremental_alter_configs.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) $@.c -o $@ $(LDFLAGS) \
+		../src/librdkafka.a $(LIBS)
+
 rdkafka_complex_consumer_example: ../src/librdkafka.a rdkafka_complex_consumer_example.c
 	$(CC) $(CPPFLAGS) $(CFLAGS) rdkafka_complex_consumer_example.c -o $@ $(LDFLAGS) \
 		../src/librdkafka.a $(LIBS)
@@ -128,6 +145,14 @@ openssl_engine_example_cpp: ../src-cpp/librdkafka++.a ../src/librdkafka.a openss
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) openssl_engine_example.cpp -o $@ $(LDFLAGS) \
 		../src-cpp/librdkafka++.a ../src/librdkafka.a $(LIBS)
 
+user_scram: ../src/librdkafka.a user_scram.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) $@.c -o $@ $(LDFLAGS) \
+		../src/librdkafka.a $(LIBS)
+
+list_offsets: ../src/librdkafka.a list_offsets.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) $@.c -o $@ $(LDFLAGS) \
+		../src/librdkafka.a $(LIBS)
+
 misc: ../src/librdkafka.a misc.c
 	$(CC) $(CPPFLAGS) $(CFLAGS) $@.c -o $@ $(LDFLAGS) \
 		../src/librdkafka.a $(LIBS)
diff --git a/lib/librdkafka-2.1.0/examples/README.md b/lib/librdkafka-2.3.0/examples/README.md
similarity index 88%
rename from lib/librdkafka-2.1.0/examples/README.md
rename to lib/librdkafka-2.3.0/examples/README.md
index 3caee3b861c..236291c5496 100644
--- a/lib/librdkafka-2.1.0/examples/README.md
+++ b/lib/librdkafka-2.3.0/examples/README.md
@@ -34,5 +34,9 @@ For more complex uses, see:
  * [delete_records.c](delete_records.c) - Delete records.
  * [list_consumer_groups.c](list_consumer_groups.c) - List consumer groups.
  * [describe_consumer_groups.c](describe_consumer_groups.c) - Describe consumer groups.
+ * [describe_topics.c](describe_topics.c) - Describe topics.
+ * [describe_cluster.c](describe_cluster.c) - Describe cluster.
  * [list_consumer_group_offsets.c](list_consumer_group_offsets.c) - List offsets of a consumer group.
  * [alter_consumer_group_offsets.c](alter_consumer_group_offsets.c) - Alter offsets of a consumer group.
+ * [incremental_alter_configs.c](incremental_alter_configs.c) - Incrementally alter resource configurations.
+ * [user_scram.c](user_scram.c) - Describe or alter user SCRAM credentials.
diff --git a/lib/librdkafka-2.1.0/examples/alter_consumer_group_offsets.c b/lib/librdkafka-2.3.0/examples/alter_consumer_group_offsets.c
similarity index 100%
rename from lib/librdkafka-2.1.0/examples/alter_consumer_group_offsets.c
rename to lib/librdkafka-2.3.0/examples/alter_consumer_group_offsets.c
diff --git a/lib/librdkafka-2.1.0/examples/consumer.c b/lib/librdkafka-2.3.0/examples/consumer.c
similarity index 98%
rename from lib/librdkafka-2.1.0/examples/consumer.c
rename to lib/librdkafka-2.3.0/examples/consumer.c
index 21b27ca7827..8ce6f77f4da 100644
--- a/lib/librdkafka-2.1.0/examples/consumer.c
+++ b/lib/librdkafka-2.3.0/examples/consumer.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019, Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,7 +30,7 @@
 /**
  * Simple high-level balanced Apache Kafka consumer
  * using the Kafka driver from librdkafka
- * (https://github.com/edenhill/librdkafka)
+ * (https://github.com/confluentinc/librdkafka)
  */
 
 #include <stdio.h>
diff --git a/lib/librdkafka-2.1.0/examples/delete_records.c b/lib/librdkafka-2.3.0/examples/delete_records.c
similarity index 99%
rename from lib/librdkafka-2.1.0/examples/delete_records.c
rename to lib/librdkafka-2.3.0/examples/delete_records.c
index 2660996a577..5a7cc6848ee 100644
--- a/lib/librdkafka-2.1.0/examples/delete_records.c
+++ b/lib/librdkafka-2.3.0/examples/delete_records.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.3.0/examples/describe_cluster.c b/lib/librdkafka-2.3.0/examples/describe_cluster.c
new file mode 100644
index 00000000000..c37da17f9fd
--- /dev/null
+++ b/lib/librdkafka-2.3.0/examples/describe_cluster.c
@@ -0,0 +1,322 @@
+/*
+ * librdkafka - Apache Kafka C library
+ *
+ * Copyright (c) 2023, Confluent Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * DescribeCluster usage example.
+ */
+
+#include <stdio.h>
+#include <signal.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#ifdef _WIN32
+#include "../win32/wingetopt.h"
+#else
+#include <getopt.h>
+#endif
+
+
+/* Typical include path would be <librdkafka/rdkafka.h>, but this program
+ * is builtin from within the librdkafka source tree and thus differs. */
+#include "rdkafka.h"
+
+
+const char *argv0;
+static rd_kafka_queue_t *queue = NULL; /** Admin result queue.
+                                        *  This is a global so we can
+                                        *  yield in stop() */
+static volatile sig_atomic_t run = 1;
+
+/**
+ * @brief Signal termination of program
+ */
+static void stop(int sig) {
+        if (!run) {
+                fprintf(stderr, "%% Forced termination\n");
+                exit(2);
+        }
+        run = 0;
+
+        if (queue)
+                rd_kafka_queue_yield(queue);
+}
+
+
+static void usage(const char *reason, ...) {
+
+        fprintf(stderr,
+                "Describe cluster usage examples\n"
+                "\n"
+                "Usage: %s <options> <include_cluster_authorized_operations>"
+                "\n"
+                "Options:\n"
+                "   -b <brokers>    Bootstrap server list to connect to.\n"
+                "   -X <prop=val>   Set librdkafka configuration property.\n"
+                "                   See CONFIGURATION.md for full list.\n"
+                "   -d <dbg,..>     Enable librdkafka debugging (%s).\n"
+                "\n",
+                argv0, rd_kafka_get_debug_contexts());
+
+        if (reason) {
+                va_list ap;
+                char reasonbuf[512];
+
+                va_start(ap, reason);
+                vsnprintf(reasonbuf, sizeof(reasonbuf), reason, ap);
+                va_end(ap);
+
+                fprintf(stderr, "ERROR: %s\n", reasonbuf);
+        }
+
+        exit(reason ? 1 : 0);
+}
+
+
+#define fatal(...)                                                             \
+        do {                                                                   \
+                fprintf(stderr, "ERROR: ");                                    \
+                fprintf(stderr, __VA_ARGS__);                                  \
+                fprintf(stderr, "\n");                                         \
+                exit(2);                                                       \
+        } while (0)
+
+
+/**
+ * @brief Set config property. Exit on failure.
+ */
+static void conf_set(rd_kafka_conf_t *conf, const char *name, const char *val) {
+        char errstr[512];
+
+        if (rd_kafka_conf_set(conf, name, val, errstr, sizeof(errstr)) !=
+            RD_KAFKA_CONF_OK)
+                fatal("Failed to set %s=%s: %s", name, val, errstr);
+}
+
+/**
+ * @brief Parse an integer or fail.
+ */
+int64_t parse_int(const char *what, const char *str) {
+        char *end;
+        long n = strtol(str, &end, 0);
+
+        if (end != str + strlen(str)) {
+                fprintf(stderr, "%% Invalid input for %s: %s: not an integer\n",
+                        what, str);
+                exit(1);
+        }
+
+        return (int64_t)n;
+}
+
+
+/**
+ * @brief Print cluster information.
+ */
+static int
+print_cluster_info(const rd_kafka_DescribeCluster_result_t *clusterdesc) {
+        size_t j;
+        size_t node_cnt;
+        size_t authorized_operations_cnt;
+        const char *cluster_id =
+            rd_kafka_DescribeCluster_result_cluster_id(clusterdesc);
+        const rd_kafka_Node_t **nodes =
+            rd_kafka_DescribeCluster_result_nodes(clusterdesc, &node_cnt);
+        const rd_kafka_AclOperation_t *authorized_operations =
+            rd_kafka_DescribeCluster_result_authorized_operations(
+                clusterdesc, &authorized_operations_cnt);
+        const rd_kafka_Node_t *controller =
+            rd_kafka_DescribeCluster_result_controller(clusterdesc);
+
+        printf(
+            "Cluster id: %s\t Controller id: %d\t authorized operations count "
+            "allowed: %d\n",
+            cluster_id, controller ? rd_kafka_Node_id(controller) : -1,
+            (int)authorized_operations_cnt);
+
+        for (j = 0; j < authorized_operations_cnt; j++) {
+                printf("\t%s operation is allowed\n",
+                       rd_kafka_AclOperation_name(authorized_operations[j]));
+        }
+
+        for (j = 0; j < node_cnt; j++) {
+                const rd_kafka_Node_t *node = nodes[j];
+                printf("Node [id: %" PRId32
+                       ", host: %s"
+                       ", port: %" PRIu16 ", rack: %s]\n",
+                       rd_kafka_Node_id(node), rd_kafka_Node_host(node),
+                       rd_kafka_Node_port(node), rd_kafka_Node_rack(node));
+        }
+        return 0;
+}
+
+
+/**
+ * @brief Call rd_kafka_DescribeCluster()
+ */
+static void cmd_describe_cluster(rd_kafka_conf_t *conf, int argc, char **argv) {
+        rd_kafka_t *rk = NULL;
+        char errstr[512];
+        rd_kafka_AdminOptions_t *options = NULL;
+        rd_kafka_event_t *event          = NULL;
+        rd_kafka_error_t *error;
+        int retval         = 0;
+        const int min_argc = 1;
+
+        if (argc < min_argc)
+                usage("Wrong number of arguments.");
+
+        int include_cluster_authorized_operations =
+            parse_int("include_cluster_authorized_operations", argv[0]);
+        if (include_cluster_authorized_operations < 0 ||
+            include_cluster_authorized_operations > 1)
+                usage("include_cluster_authorized_operations not a 0-1 int");
+
+        /*
+         * Create producer instance
+         * NOTE: rd_kafka_new() takes ownership of the conf object
+         *       and the application must not reference it again after
+         *       this call.
+         */
+        rk = rd_kafka_new(RD_KAFKA_PRODUCER, conf, errstr, sizeof(errstr));
+        if (!rk)
+                fatal("Failed to create new producer: %s", errstr);
+
+        queue = rd_kafka_queue_new(rk);
+
+        /* Signal handler for clean shutdown */
+        signal(SIGINT, stop);
+
+        options =
+            rd_kafka_AdminOptions_new(rk, RD_KAFKA_ADMIN_OP_DESCRIBECLUSTER);
+
+        if (rd_kafka_AdminOptions_set_request_timeout(
+                options, 10 * 1000 /* 10s */, errstr, sizeof(errstr))) {
+                fprintf(stderr, "%% Failed to set timeout: %s\n", errstr);
+                retval = 1;
+                goto exit;
+        }
+        if ((error = rd_kafka_AdminOptions_set_include_authorized_operations(
+                 options, include_cluster_authorized_operations))) {
+                fprintf(stderr,
+                        "%% Failed to set require cluster authorized "
+                        "operations: %s\n",
+                        rd_kafka_error_string(error));
+                rd_kafka_error_destroy(error);
+                retval = 1;
+                goto exit;
+        }
+
+        /* Call DescribeCluster. */
+        rd_kafka_DescribeCluster(rk, options, queue);
+
+        /* Wait for results */
+        event = rd_kafka_queue_poll(queue, -1 /* indefinitely but limited by
+                                               * the request timeout set
+                                               * above (10s) */);
+
+        if (!event) {
+                /* User hit Ctrl-C,
+                 * see yield call in stop() signal handler */
+                fprintf(stderr, "%% Cancelled by user\n");
+
+        } else if (rd_kafka_event_error(event)) {
+                rd_kafka_resp_err_t err = rd_kafka_event_error(event);
+                /* DescribeCluster request failed */
+                fprintf(stderr, "%% DescribeCluster failed[%" PRId32 "]: %s\n",
+                        err, rd_kafka_event_error_string(event));
+                retval = 1;
+        } else {
+                /* DescribeCluster request succeeded */
+                const rd_kafka_DescribeCluster_result_t *result;
+
+                result = rd_kafka_event_DescribeCluster_result(event);
+                printf("DescribeCluster results:\n");
+                retval = print_cluster_info(result);
+        }
+
+
+exit:
+        /* Cleanup. */
+        if (event)
+                rd_kafka_event_destroy(event);
+        if (options)
+                rd_kafka_AdminOptions_destroy(options);
+        if (queue)
+                rd_kafka_queue_destroy(queue);
+        if (rk)
+                rd_kafka_destroy(rk);
+
+        exit(retval);
+}
+
+int main(int argc, char **argv) {
+        rd_kafka_conf_t *conf; /**< Client configuration object */
+        int opt;
+        argv0 = argv[0];
+
+        /*
+         * Create Kafka client configuration place-holder
+         */
+        conf = rd_kafka_conf_new();
+
+        /*
+         * Parse common options
+         */
+        while ((opt = getopt(argc, argv, "b:X:d:")) != -1) {
+                switch (opt) {
+                case 'b':
+                        conf_set(conf, "bootstrap.servers", optarg);
+                        break;
+
+                case 'X': {
+                        char *name = optarg, *val;
+
+                        if (!(val = strchr(name, '=')))
+                                fatal("-X expects a name=value argument");
+
+                        *val = '\0';
+                        val++;
+
+                        conf_set(conf, name, val);
+                        break;
+                }
+
+                case 'd':
+                        conf_set(conf, "debug", optarg);
+                        break;
+
+                default:
+                        usage("Unknown option %c", (char)opt);
+                }
+        }
+
+        cmd_describe_cluster(conf, argc - optind, &argv[optind]);
+        return 0;
+}
\ No newline at end of file
diff --git a/lib/librdkafka-2.1.0/examples/describe_consumer_groups.c b/lib/librdkafka-2.3.0/examples/describe_consumer_groups.c
similarity index 62%
rename from lib/librdkafka-2.1.0/examples/describe_consumer_groups.c
rename to lib/librdkafka-2.3.0/examples/describe_consumer_groups.c
index 45b6b8d0b01..daacc1d0219 100644
--- a/lib/librdkafka-2.1.0/examples/describe_consumer_groups.c
+++ b/lib/librdkafka-2.3.0/examples/describe_consumer_groups.c
@@ -2,6 +2,7 @@
  * librdkafka - Apache Kafka C library
  *
  * Copyright (c) 2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -50,9 +51,9 @@
 
 const char *argv0;
 
-static rd_kafka_queue_t *queue; /** Admin result queue.
-                                 *  This is a global so we can
-                                 *  yield in stop() */
+static rd_kafka_queue_t *queue = NULL; /** Admin result queue.
+                                        *  This is a global so we can
+                                        *  yield in stop() */
 static volatile sig_atomic_t run = 1;
 
 /**
@@ -64,7 +65,9 @@ static void stop(int sig) {
                 exit(2);
         }
         run = 0;
-        rd_kafka_queue_yield(queue);
+
+        if (queue)
+                rd_kafka_queue_yield(queue);
 }
 
 
@@ -73,7 +76,8 @@ static void usage(const char *reason, ...) {
         fprintf(stderr,
                 "Describe groups usage examples\n"
                 "\n"
-                "Usage: %s <options> <group1> <group2> ...\n"
+                "Usage: %s <options> <include_authorized_operations> <group1> "
+                "<group2> ...\n"
                 "\n"
                 "Options:\n"
                 "   -b <brokers>    Bootstrap server list to connect to.\n"
@@ -145,9 +149,93 @@ print_partition_list(FILE *fp,
         fprintf(fp, "\n");
 }
 
+
+/**
+ * @brief Print group member information.
+ */
+static void
+print_group_member_info(const rd_kafka_MemberDescription_t *member) {
+        printf(
+            "  Member \"%s\" with client-id %s,"
+            " group instance id: %s, host %s\n",
+            rd_kafka_MemberDescription_consumer_id(member),
+            rd_kafka_MemberDescription_client_id(member),
+            rd_kafka_MemberDescription_group_instance_id(member),
+            rd_kafka_MemberDescription_host(member));
+        const rd_kafka_MemberAssignment_t *assignment =
+            rd_kafka_MemberDescription_assignment(member);
+        const rd_kafka_topic_partition_list_t *topic_partitions =
+            rd_kafka_MemberAssignment_partitions(assignment);
+        if (!topic_partitions) {
+                printf("    No assignment\n");
+        } else if (topic_partitions->cnt == 0) {
+                printf("    Empty assignment\n");
+        } else {
+                printf("    Assignment:\n");
+                print_partition_list(stdout, topic_partitions, 0, "      ");
+        }
+}
+
+
 /**
  * @brief Print group information.
  */
+static void print_group_info(const rd_kafka_ConsumerGroupDescription_t *group) {
+        int member_cnt;
+        size_t j;
+        size_t authorized_operations_cnt;
+        const rd_kafka_AclOperation_t *authorized_operations;
+        const rd_kafka_error_t *error;
+        char coordinator_desc[512];
+        const rd_kafka_Node_t *coordinator = NULL;
+        const char *group_id =
+            rd_kafka_ConsumerGroupDescription_group_id(group);
+        const char *partition_assignor =
+            rd_kafka_ConsumerGroupDescription_partition_assignor(group);
+        rd_kafka_consumer_group_state_t state =
+            rd_kafka_ConsumerGroupDescription_state(group);
+        authorized_operations =
+            rd_kafka_ConsumerGroupDescription_authorized_operations(
+                group, &authorized_operations_cnt);
+        member_cnt  = rd_kafka_ConsumerGroupDescription_member_count(group);
+        error       = rd_kafka_ConsumerGroupDescription_error(group);
+        coordinator = rd_kafka_ConsumerGroupDescription_coordinator(group);
+        *coordinator_desc = '\0';
+
+        if (coordinator != NULL) {
+                snprintf(coordinator_desc, sizeof(coordinator_desc),
+                         ", coordinator [id: %" PRId32
+                         ", host: %s"
+                         ", port: %" PRIu16 "]",
+                         rd_kafka_Node_id(coordinator),
+                         rd_kafka_Node_host(coordinator),
+                         rd_kafka_Node_port(coordinator));
+        }
+        printf(
+            "Group \"%s\", partition assignor \"%s\", "
+            " state %s%s, with %" PRId32 " member(s)\n",
+            group_id, partition_assignor,
+            rd_kafka_consumer_group_state_name(state), coordinator_desc,
+            member_cnt);
+        for (j = 0; j < authorized_operations_cnt; j++) {
+                printf("%s operation is allowed\n",
+                       rd_kafka_AclOperation_name(authorized_operations[j]));
+        }
+        if (error)
+                printf(" error[%" PRId32 "]: %s", rd_kafka_error_code(error),
+                       rd_kafka_error_string(error));
+        printf("\n");
+        for (j = 0; j < (size_t)member_cnt; j++) {
+                const rd_kafka_MemberDescription_t *member =
+                    rd_kafka_ConsumerGroupDescription_member(group, j);
+                print_group_member_info(member);
+        }
+}
+
+
+/**
+ * @brief Print groups information.
+ */
 static int
 print_groups_info(const rd_kafka_DescribeConsumerGroups_result_t *grpdesc,
                   int groups_cnt) {
@@ -167,94 +255,56 @@ print_groups_info(const rd_kafka_DescribeConsumerGroups_result_t *grpdesc,
         }
 
         for (i = 0; i < result_groups_cnt; i++) {
-                int j, member_cnt;
-                const rd_kafka_error_t *error;
-                const rd_kafka_ConsumerGroupDescription_t *group =
-                    result_groups[i];
-                char coordinator_desc[512];
-                const rd_kafka_Node_t *coordinator = NULL;
-                const char *group_id =
-                    rd_kafka_ConsumerGroupDescription_group_id(group);
-                const char *partition_assignor =
-                    rd_kafka_ConsumerGroupDescription_partition_assignor(group);
-                rd_kafka_consumer_group_state_t state =
-                    rd_kafka_ConsumerGroupDescription_state(group);
-                member_cnt =
-                    rd_kafka_ConsumerGroupDescription_member_count(group);
-                error = rd_kafka_ConsumerGroupDescription_error(group);
-                coordinator =
-                    rd_kafka_ConsumerGroupDescription_coordinator(group);
-                *coordinator_desc = '\0';
-
-                if (coordinator != NULL) {
-                        snprintf(coordinator_desc, sizeof(coordinator_desc),
-                                 ", coordinator [id: %" PRId32
-                                 ", host: %s"
-                                 ", port: %" PRIu16 "]",
-                                 rd_kafka_Node_id(coordinator),
-                                 rd_kafka_Node_host(coordinator),
-                                 rd_kafka_Node_port(coordinator));
-                }
-                printf(
-                    "Group \"%s\", partition assignor \"%s\", "
-                    "state %s%s, with %" PRId32 " member(s)",
-                    group_id, partition_assignor,
-                    rd_kafka_consumer_group_state_name(state), coordinator_desc,
-                    member_cnt);
-                if (error)
-                        printf(" error[%" PRId32 "]: %s",
-                               rd_kafka_error_code(error),
-                               rd_kafka_error_string(error));
+                print_group_info(result_groups[i]);
                 printf("\n");
-                for (j = 0; j < member_cnt; j++) {
-                        const rd_kafka_MemberDescription_t *member =
-                            rd_kafka_ConsumerGroupDescription_member(group, j);
-                        printf(
-                            "  Member \"%s\" with client-id %s,"
-                            " group instance id: %s, host %s\n",
-                            rd_kafka_MemberDescription_consumer_id(member),
-                            rd_kafka_MemberDescription_client_id(member),
-                            rd_kafka_MemberDescription_group_instance_id(
-                                member),
-                            rd_kafka_MemberDescription_host(member));
-                        const rd_kafka_MemberAssignment_t *assignment =
-                            rd_kafka_MemberDescription_assignment(member);
-                        const rd_kafka_topic_partition_list_t
-                            *topic_partitions =
-                                rd_kafka_MemberAssignment_partitions(
-                                    assignment);
-                        if (!topic_partitions) {
-                                printf("    No assignment\n");
-                        } else if (topic_partitions->cnt == 0) {
-                                printf("    Empty assignment\n");
-                        } else {
-                                printf("    Assignment:\n");
-                                print_partition_list(stdout, topic_partitions,
-                                                     0, "      ");
-                        }
-                }
         }
         return 0;
 }
 
+/**
+ * @brief Parse an integer or fail.
+ */
+int64_t parse_int(const char *what, const char *str) {
+        char *end;
+        long n = strtol(str, &end, 0);
+
+        if (end != str + strlen(str)) {
+                fprintf(stderr, "%% Invalid input for %s: %s: not an integer\n",
+                        what, str);
+                exit(1);
+        }
+
+        return (int64_t)n;
+}
+
 /**
  * @brief Call rd_kafka_DescribeConsumerGroups() with a list of
  * groups.
  */
 static void
 cmd_describe_consumer_groups(rd_kafka_conf_t *conf, int argc, char **argv) {
-        rd_kafka_t *rk;
+        rd_kafka_t *rk      = NULL;
         const char **groups = NULL;
         char errstr[512];
-        rd_kafka_AdminOptions_t *options;
-        rd_kafka_event_t *event = NULL;
-        int retval              = 0;
-        int groups_cnt          = 0;
-
-        if (argc >= 1) {
-                groups     = (const char **)&argv[0];
-                groups_cnt = argc;
-        }
+        rd_kafka_AdminOptions_t *options = NULL;
+        rd_kafka_event_t *event          = NULL;
+        rd_kafka_error_t *error;
+        int retval         = 0;
+        int groups_cnt     = 0;
+        const int min_argc = 2;
+        int include_authorized_operations;
+
+        if (argc < min_argc)
+                usage("Wrong number of arguments");
+
+        include_authorized_operations =
+            parse_int("include_authorized_operations", argv[0]);
+        if (include_authorized_operations < 0 ||
+            include_authorized_operations > 1)
+                usage("include_authorized_operations not a 0-1 int");
+
+        groups     = (const char **)&argv[1];
+        groups_cnt = argc - 1;
 
         /*
          * Create consumer instance
@@ -280,6 +330,16 @@ cmd_describe_consumer_groups(rd_kafka_conf_t *conf, int argc, char **argv) {
         if (rd_kafka_AdminOptions_set_request_timeout(
                 options, 10 * 1000 /* 10s */, errstr, sizeof(errstr))) {
                 fprintf(stderr, "%% Failed to set timeout: %s\n", errstr);
+                retval = 1;
+                goto exit;
+        }
+        if ((error = rd_kafka_AdminOptions_set_include_authorized_operations(
+                 options, include_authorized_operations))) {
+                fprintf(stderr,
+                        "%% Failed to set require authorized operations: %s\n",
+                        rd_kafka_error_string(error));
+                rd_kafka_error_destroy(error);
+                retval = 1;
                 goto exit;
         }
 
@@ -301,7 +361,7 @@ cmd_describe_consumer_groups(rd_kafka_conf_t *conf, int argc, char **argv) {
                 fprintf(stderr,
                         "%% DescribeConsumerGroups failed[%" PRId32 "]: %s\n",
                         err, rd_kafka_event_error_string(event));
-                goto exit;
+                retval = 1;
 
         } else {
                 /* DescribeConsumerGroups request succeeded, but individual
@@ -315,12 +375,15 @@ cmd_describe_consumer_groups(rd_kafka_conf_t *conf, int argc, char **argv) {
 
 
 exit:
+        /* Cleanup. */
         if (event)
                 rd_kafka_event_destroy(event);
-        rd_kafka_AdminOptions_destroy(options);
-        rd_kafka_queue_destroy(queue);
-        /* Destroy the client instance */
-        rd_kafka_destroy(rk);
+        if (options)
+                rd_kafka_AdminOptions_destroy(options);
+        if (queue)
+                rd_kafka_queue_destroy(queue);
+        if (rk)
+                rd_kafka_destroy(rk);
 
         exit(retval);
 }
diff --git a/lib/librdkafka-2.3.0/examples/describe_topics.c b/lib/librdkafka-2.3.0/examples/describe_topics.c
new file mode 100644
index 00000000000..5b7425ef8c1
--- /dev/null
+++ b/lib/librdkafka-2.3.0/examples/describe_topics.c
@@ -0,0 +1,427 @@
+/*
+ * librdkafka - Apache Kafka C library
+ *
+ * Copyright (c) 2023, Confluent Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * DescribeTopics usage example.
+ */
+
+#include <stdio.h>
+#include <signal.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#ifdef _WIN32
+#include "../win32/wingetopt.h"
+#else
+#include <getopt.h>
+#endif
+
+
+/* Typical include path would be <librdkafka/rdkafka.h>, but this program
+ * is builtin from within the librdkafka source tree and thus differs. */
+#include "rdkafka.h"
+
+
+const char *argv0;
+static rd_kafka_queue_t *queue = NULL; /** Admin result queue.
+                                        *  This is a global so we can
+                                        *  yield in stop() */
+static volatile sig_atomic_t run = 1;
+
+/**
+ * @brief Signal termination of program
+ */
+static void stop(int sig) {
+        if (!run) {
+                fprintf(stderr, "%% Forced termination\n");
+                exit(2);
+        }
+        run = 0;
+        if (queue)
+                rd_kafka_queue_yield(queue);
+}
+
+
+static void usage(const char *reason, ...) {
+
+        fprintf(stderr,
+                "Describe topics usage examples\n"
+                "\n"
+                "Usage: %s <options> <include_topic_authorized_operations> "
+                "<topic1> <topic2> ...\n"
+                "\n"
+                "Options:\n"
+                "   -b <brokers>    Bootstrap server list to connect to.\n"
+                "   -X <prop=val>   Set librdkafka configuration property.\n"
+                "                   See CONFIGURATION.md for full list.\n"
+                "   -d <dbg,..>     Enable librdkafka debugging (%s).\n"
+                "\n",
+                argv0, rd_kafka_get_debug_contexts());
+
+        if (reason) {
+                va_list ap;
+                char reasonbuf[512];
+
+                va_start(ap, reason);
+                vsnprintf(reasonbuf, sizeof(reasonbuf), reason, ap);
+                va_end(ap);
+
+                fprintf(stderr, "ERROR: %s\n", reasonbuf);
+        }
+
+        exit(reason ? 1 : 0);
+}
+
+
+#define fatal(...)                                                             \
+        do {                                                                   \
+                fprintf(stderr, "ERROR: ");                                    \
+                fprintf(stderr, __VA_ARGS__);                                  \
+                fprintf(stderr, "\n");                                         \
+                exit(2);                                                       \
+        } while (0)
+
+
+/**
+ * @brief Set config property. Exit on failure.
+ */
+static void conf_set(rd_kafka_conf_t *conf, const char *name, const char *val) {
+        char errstr[512];
+
+        if (rd_kafka_conf_set(conf, name, val, errstr, sizeof(errstr)) !=
+            RD_KAFKA_CONF_OK)
+                fatal("Failed to set %s=%s: %s", name, val, errstr);
+}
+
+
+/**
+ * @brief Parse an integer or fail.
+ */
+int64_t parse_int(const char *what, const char *str) {
+        char *end;
+        long n = strtol(str, &end, 0);
+
+        if (end != str + strlen(str)) {
+                fprintf(stderr, "%% Invalid input for %s: %s: not an integer\n",
+                        what, str);
+                exit(1);
+        }
+
+        return (int64_t)n;
+}
+
+/**
+ * @brief Print node information.
+ */
+static void print_node_info(const rd_kafka_Node_t *node) {
+        if (!node) {
+                printf("\t\t(null)\n");
+                return;
+        }
+
+        printf("\t\tNode [id: %" PRId32
+               ", host: %s"
+               ", port: %" PRIu16 ", rack %s]\n",
+               rd_kafka_Node_id(node), rd_kafka_Node_host(node),
+               rd_kafka_Node_port(node), rd_kafka_Node_rack(node));
+}
+
+/**
+ * @brief Print partition information.
+ */
+static void
+print_partition_info(const rd_kafka_TopicPartitionInfo_t *partition) {
+        size_t k;
+        int id;
+        const rd_kafka_Node_t **isr;
+        size_t isr_cnt;
+        const rd_kafka_Node_t **replicas;
+        size_t replica_cnt;
+
+        id = rd_kafka_TopicPartitionInfo_partition(partition);
+        printf("\tPartition id: %d\n", id);
+
+        printf("\tPartition leader: \n");
+        print_node_info(rd_kafka_TopicPartitionInfo_leader(partition));
+
+        isr = rd_kafka_TopicPartitionInfo_isr(partition, &isr_cnt);
+        if (isr_cnt) {
+                printf(
+                    "\tThe in-sync replica count is: %d, they "
+                    "are: \n",
+                    (int)isr_cnt);
+                for (k = 0; k < isr_cnt; k++)
+                        print_node_info(isr[k]);
+        } else
+                printf("\tThe in-sync replica count is 0\n");
+
+        replicas = rd_kafka_TopicPartitionInfo_isr(partition, &replica_cnt);
+        if (replica_cnt) {
+                printf(
+                    "\tThe replica count is: %d, they "
+                    "are: \n",
+                    (int)replica_cnt);
+                for (k = 0; k < replica_cnt; k++)
+                        print_node_info(replicas[k]);
+        } else
+                printf("\tThe replica count is 0\n");
+}
+
+/**
+ * @brief Print topic information.
+ */
+static void print_topic_info(const rd_kafka_TopicDescription_t *topic) {
+        size_t j;
+        const char *topic_name        = rd_kafka_TopicDescription_name(topic);
+        const rd_kafka_error_t *error = rd_kafka_TopicDescription_error(topic);
+        const rd_kafka_AclOperation_t *authorized_operations;
+        size_t authorized_operations_cnt;
+        const rd_kafka_TopicPartitionInfo_t **partitions;
+        size_t partition_cnt;
+        const rd_kafka_Uuid_t *topic_id =
+            rd_kafka_TopicDescription_topic_id(topic);
+        const char *topic_id_str = rd_kafka_Uuid_base64str(topic_id);
+
+        if (rd_kafka_error_code(error)) {
+                printf("Topic: %s (Topic Id: %s) has error[%" PRId32 "]: %s\n",
+                       topic_name, topic_id_str, rd_kafka_error_code(error),
+                       rd_kafka_error_string(error));
+                return;
+        }
+
+        authorized_operations = rd_kafka_TopicDescription_authorized_operations(
+            topic, &authorized_operations_cnt);
+
+        printf(
+            "Topic: %s (Topic Id: %s) succeeded, has %ld authorized operations "
+            "allowed, they are:\n",
+            topic_name, topic_id_str, authorized_operations_cnt);
+
+        for (j = 0; j < authorized_operations_cnt; j++)
+                printf("\t%s operation is allowed\n",
+                       rd_kafka_AclOperation_name(authorized_operations[j]));
+
+
+        partitions =
+            rd_kafka_TopicDescription_partitions(topic, &partition_cnt);
+
+        printf("partition count is: %d\n", (int)partition_cnt);
+        for (j = 0; j < partition_cnt; j++) {
+                print_partition_info(partitions[j]);
+                printf("\n");
+        }
+}
+
+
+/**
+ * @brief Print topics information.
+ */
+static int print_topics_info(const rd_kafka_DescribeTopics_result_t *topicdesc,
+                             int topic_cnt) {
+        size_t i;
+        const rd_kafka_TopicDescription_t **result_topics;
+        size_t result_topics_cnt;
+        result_topics = rd_kafka_DescribeTopics_result_topics(
+            topicdesc, &result_topics_cnt);
+
+        if (result_topics_cnt == 0) {
+                if (topic_cnt > 0) {
+                        fprintf(stderr, "No matching topics found\n");
+                        return 1;
+                } else {
+                        fprintf(stderr, "No topics requested\n");
+                }
+        }
+
+        for (i = 0; i < result_topics_cnt; i++) {
+                print_topic_info(result_topics[i]);
+                printf("\n");
+        }
+        return 0;
+}
+
+
+/**
+ * @brief Call rd_kafka_DescribeTopics() with a list of
+ * topics.
+ */
+static void cmd_describe_topics(rd_kafka_conf_t *conf, int argc, char **argv) {
+        rd_kafka_t *rk                     = NULL;
+        const char **topic_names           = NULL;
+        rd_kafka_TopicCollection_t *topics = NULL;
+        char errstr[512];
+        rd_kafka_AdminOptions_t *options = NULL;
+        rd_kafka_event_t *event          = NULL;
+        rd_kafka_error_t *error;
+        int retval         = 0;
+        int topics_cnt     = 0;
+        const int min_argc = 1;
+        int include_topic_authorized_operations;
+
+        if (argc < min_argc)
+                usage("Wrong number of arguments");
+
+        include_topic_authorized_operations =
+            parse_int("include_topic_authorized_operations", argv[0]);
+        if (include_topic_authorized_operations < 0 ||
+            include_topic_authorized_operations > 1)
+                usage("include_topic_authorized_operations not a 0-1 int");
+
+        topic_names = (const char **)&argv[1];
+        topics_cnt  = argc - 1;
+        topics =
+            rd_kafka_TopicCollection_of_topic_names(topic_names, topics_cnt);
+
+        /*
+         * Create producer instance
+         * NOTE: rd_kafka_new() takes ownership of the conf object
+         *       and the application must not reference it again after
+         *       this call.
+         */
+        rk = rd_kafka_new(RD_KAFKA_PRODUCER, conf, errstr, sizeof(errstr));
+        if (!rk)
+                fatal("Failed to create new producer: %s", errstr);
+
+        queue = rd_kafka_queue_new(rk);
+
+        /* Signal handler for clean shutdown */
+        signal(SIGINT, stop);
+
+        options =
+            rd_kafka_AdminOptions_new(rk, RD_KAFKA_ADMIN_OP_DESCRIBETOPICS);
+
+        if (rd_kafka_AdminOptions_set_request_timeout(
+                options, 10 * 1000 /* 10s */, errstr, sizeof(errstr))) {
+                fprintf(stderr, "%% Failed to set timeout: %s\n", errstr);
+                goto exit;
+        }
+        if ((error = rd_kafka_AdminOptions_set_include_authorized_operations(
+                 options, include_topic_authorized_operations))) {
+                fprintf(stderr,
+                        "%% Failed to set require topic authorized operations: "
+                        "%s\n",
+                        rd_kafka_error_string(error));
+                rd_kafka_error_destroy(error);
+                retval = 1;
+                goto exit;
+        }
+
+        /* Call DescribeTopics */
+        rd_kafka_DescribeTopics(rk, topics, options, queue);
+
+        /* Wait for results */
+        event = rd_kafka_queue_poll(queue, -1 /* indefinitely but limited by
+                                               * the request timeout set
+                                               * above (10s) */);
+
+        if (!event) {
+                /* User hit Ctrl-C,
+                 * see yield call in stop() signal handler */
+                fprintf(stderr, "%% Cancelled by user\n");
+
+        } else if (rd_kafka_event_error(event)) {
+                rd_kafka_resp_err_t err = rd_kafka_event_error(event);
+                /* DescribeTopics request failed */
+                fprintf(stderr, "%% DescribeTopics failed[%" PRId32 "]: %s\n",
+                        err, rd_kafka_event_error_string(event));
+                retval = 1;
+                goto exit;
+
+        } else {
+                /* DescribeTopics request succeeded, but individual
+                 * groups may have errors. */
+                const rd_kafka_DescribeTopics_result_t *result;
+
+                result = rd_kafka_event_DescribeTopics_result(event);
+                printf("DescribeTopics results:\n");
+                retval = print_topics_info(result, topics_cnt);
+        }
+
+
+exit:
+        /* Cleanup. */
+        if (topics)
+                rd_kafka_TopicCollection_destroy(topics);
+        if (event)
+                rd_kafka_event_destroy(event);
+        if (options)
+                rd_kafka_AdminOptions_destroy(options);
+        if (queue)
+                rd_kafka_queue_destroy(queue);
+        if (rk)
+                rd_kafka_destroy(rk);
+
+        exit(retval);
+}
+
+
+int main(int argc, char **argv) {
+        rd_kafka_conf_t *conf; /**< Client configuration object */
+        int opt;
+        argv0 = argv[0];
+
+        /*
+         * Create Kafka client configuration place-holder
+         */
+        conf = rd_kafka_conf_new();
+
+        /*
+         * Parse common options
+         */
+        while ((opt = getopt(argc, argv, "b:X:d:")) != -1) {
+                switch (opt) {
+                case 'b':
+                        conf_set(conf, "bootstrap.servers", optarg);
+                        break;
+
+                case 'X': {
+                        char *name = optarg, *val;
+
+                        if (!(val = strchr(name, '=')))
+                                fatal("-X expects a name=value argument");
+
+                        *val = '\0';
+                        val++;
+
+                        conf_set(conf, name, val);
+                        break;
+                }
+
+                case 'd':
+                        conf_set(conf, "debug", optarg);
+                        break;
+
+                default:
+                        usage("Unknown option %c", (char)opt);
+                }
+        }
+
+        cmd_describe_topics(conf, argc - optind, &argv[optind]);
+        return 0;
+}
diff --git a/lib/librdkafka-2.1.0/examples/globals.json b/lib/librdkafka-2.3.0/examples/globals.json
similarity index 100%
rename from lib/librdkafka-2.1.0/examples/globals.json
rename to lib/librdkafka-2.3.0/examples/globals.json
diff --git a/lib/librdkafka-2.1.0/examples/idempotent_producer.c b/lib/librdkafka-2.3.0/examples/idempotent_producer.c
similarity index 99%
rename from lib/librdkafka-2.1.0/examples/idempotent_producer.c
rename to lib/librdkafka-2.3.0/examples/idempotent_producer.c
index 91b42a4b9d1..bb34610c420 100644
--- a/lib/librdkafka-2.1.0/examples/idempotent_producer.c
+++ b/lib/librdkafka-2.3.0/examples/idempotent_producer.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019, Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.3.0/examples/incremental_alter_configs.c b/lib/librdkafka-2.3.0/examples/incremental_alter_configs.c
new file mode 100644
index 00000000000..40a16cf8421
--- /dev/null
+++ b/lib/librdkafka-2.3.0/examples/incremental_alter_configs.c
@@ -0,0 +1,348 @@
+/*
+ * librdkafka - Apache Kafka C library
+ *
+ * Copyright (c) 2023, Confluent Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * IncrementalAlterConfigs usage example.
+ */
+
+#include <stdio.h>
+#include <signal.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#ifdef _WIN32
+#include "../win32/wingetopt.h"
+#else
+#include <getopt.h>
+#endif
+
+
+/* Typical include path would be <librdkafka/rdkafka.h>, but this program
+ * is builtin from within the librdkafka source tree and thus differs. */
+#include "rdkafka.h"
+
+
+const char *argv0;
+
+static rd_kafka_queue_t *queue; /** Admin result queue.
+                                 *  This is a global so we can
+                                 *  yield in stop() */
+static volatile sig_atomic_t run = 1;
+
+/**
+ * @brief Signal termination of program
+ */
+static void stop(int sig) {
+        if (!run) {
+                fprintf(stderr, "%% Forced termination\n");
+                exit(2);
+        }
+        run = 0;
+        rd_kafka_queue_yield(queue);
+}
+
+
+static void usage(const char *reason, ...) {
+
+        fprintf(stderr,
+                "Incremental alter config usage examples\n"
+                "\n"
+                "Usage: %s <options> <res_type1> <res_name1> <alter_op_type1> "
+                "<config_name1> <config_value1> ...\n"
+                "\n"
+                "Options:\n"
+                "   -b <brokers>    Bootstrap server list to connect to.\n"
+                "   -X <prop=val>   Set librdkafka configuration property.\n"
+                "                   See CONFIGURATION.md for full list.\n"
+                "   -d <dbg,..>     Enable librdkafka debugging (%s).\n"
+                "\n",
+                argv0, rd_kafka_get_debug_contexts());
+
+        if (reason) {
+                va_list ap;
+                char reasonbuf[512];
+
+                va_start(ap, reason);
+                vsnprintf(reasonbuf, sizeof(reasonbuf), reason, ap);
+                va_end(ap);
+
+                fprintf(stderr, "ERROR: %s\n", reasonbuf);
+        }
+
+        exit(reason ? 1 : 0);
+}
+
+
+#define fatal(...)                                                             \
+        do {                                                                   \
+                fprintf(stderr, "ERROR: ");                                    \
+                fprintf(stderr, __VA_ARGS__);                                  \
+                fprintf(stderr, "\n");                                         \
+                exit(2);                                                       \
+        } while (0)
+
+
+/**
+ * @brief Set config property. Exit on failure.
+ */
+static void conf_set(rd_kafka_conf_t *conf, const char *name, const char *val) {
+        char errstr[512];
+
+        if (rd_kafka_conf_set(conf, name, val, errstr, sizeof(errstr)) !=
+            RD_KAFKA_CONF_OK)
+                fatal("Failed to set %s=%s: %s", name, val, errstr);
+}
+
+
+
+static void print_alter_configs_result(
+    FILE *fp,
+    const rd_kafka_IncrementalAlterConfigs_result_t *result,
+    const char *prefix) {
+        size_t i;
+        size_t config_cnt;
+        const rd_kafka_ConfigResource_t **configs =
+            rd_kafka_IncrementalAlterConfigs_result_resources(result,
+                                                              &config_cnt);
+
+        for (i = 0; i < config_cnt; i++) {
+                const rd_kafka_ConfigResource_t *config = configs[i];
+
+                const char *resname = rd_kafka_ConfigResource_name(config);
+                rd_kafka_ResourceType_t restype =
+                    rd_kafka_ConfigResource_type(config);
+                rd_kafka_resp_err_t err = rd_kafka_ConfigResource_error(config);
+
+                fprintf(fp, "%sResource type: %s name: %s error: %s: %s\n",
+                        prefix, rd_kafka_ResourceType_name(restype), resname,
+                        rd_kafka_err2str(err),
+                        rd_kafka_ConfigResource_error_string(config));
+        }
+}
+
+
+/**
+ * @brief Call rd_kafka_IncrementalAlterConfigs() with a list of
+ *        configs to alter.
+ */
+static void
+cmd_incremental_alter_configs(rd_kafka_conf_t *conf, int argc, char **argv) {
+        rd_kafka_t *rk;
+        char errstr[512];
+        rd_kafka_AdminOptions_t *options;
+        rd_kafka_event_t *event = NULL;
+        rd_kafka_error_t *error;
+        int retval         = 0;
+        const char *prefix = "    ";
+        int i              = 0;
+        int resources      = 0;
+        int config_cnt;
+        rd_kafka_ResourceType_t prev_restype = RD_KAFKA_RESOURCE_UNKNOWN;
+        char *prev_resname                   = NULL;
+        rd_kafka_ConfigResource_t **configs;
+
+        if (argc % 5 != 0) {
+                usage("Invalid number of arguments: %d", argc);
+        }
+
+        config_cnt = argc / 5;
+        configs    = calloc(config_cnt, sizeof(*configs));
+
+        for (i = 0; i < config_cnt; i++) {
+                char *restype_s       = argv[i * 5];
+                char *resname         = argv[i * 5 + 1];
+                char *alter_op_type_s = argv[i * 5 + 2];
+                char *config_name     = argv[i * 5 + 3];
+                char *config_value    = argv[i * 5 + 4];
+                rd_kafka_ConfigResource_t *config;
+                rd_kafka_AlterConfigOpType_t op_type;
+                rd_kafka_ResourceType_t restype =
+                    !strcmp(restype_s, "TOPIC")
+                        ? RD_KAFKA_RESOURCE_TOPIC
+                        : !strcmp(restype_s, "BROKER")
+                              ? RD_KAFKA_RESOURCE_BROKER
+                              : RD_KAFKA_RESOURCE_UNKNOWN;
+
+                if (restype == RD_KAFKA_RESOURCE_UNKNOWN) {
+                        usage("Invalid resource type: %s", restype_s);
+                }
+
+                /* It's not necessary, but cleaner and more efficient to group
+                 * incremental alterations for the same ConfigResource.*/
+                if (restype != prev_restype || strcmp(resname, prev_resname)) {
+                        configs[resources++] =
+                            rd_kafka_ConfigResource_new(restype, resname);
+                }
+
+                config       = configs[resources - 1];
+                prev_restype = restype;
+                prev_resname = resname;
+
+                if (!strcmp(alter_op_type_s, "SET")) {
+                        op_type = RD_KAFKA_ALTER_CONFIG_OP_TYPE_SET;
+                } else if (!strcmp(alter_op_type_s, "APPEND")) {
+                        op_type = RD_KAFKA_ALTER_CONFIG_OP_TYPE_APPEND;
+                } else if (!strcmp(alter_op_type_s, "SUBTRACT")) {
+                        op_type = RD_KAFKA_ALTER_CONFIG_OP_TYPE_SUBTRACT;
+                } else if (!strcmp(alter_op_type_s, "DELETE")) {
+                        op_type = RD_KAFKA_ALTER_CONFIG_OP_TYPE_DELETE;
+                } else {
+                        usage("Invalid alter config operation: %s",
+                              alter_op_type_s);
+                }
+
+                error = rd_kafka_ConfigResource_add_incremental_config(
+                    config, config_name, op_type, config_value);
+
+                if (error) {
+                        usage(
+                            "Error setting incremental config alteration %s"
+                            " at index %d: %s",
+                            alter_op_type_s, i, rd_kafka_error_string(error));
+                }
+        }
+
+        /*
+         * Create consumer instance
+         * NOTE: rd_kafka_new() takes ownership of the conf object
+         *       and the application must not reference it again after
+         *       this call.
+         */
+        rk = rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr));
+        if (!rk)
+                fatal("Failed to create new consumer: %s", errstr);
+
+        /*
+         * Incremental alter configs
+         */
+        queue = rd_kafka_queue_new(rk);
+
+        /* Signal handler for clean shutdown */
+        signal(SIGINT, stop);
+
+        options = rd_kafka_AdminOptions_new(
+            rk, RD_KAFKA_ADMIN_OP_INCREMENTALALTERCONFIGS);
+
+        if (rd_kafka_AdminOptions_set_request_timeout(
+                options, 10 * 1000 /* 10s */, errstr, sizeof(errstr))) {
+                fprintf(stderr, "%% Failed to set timeout: %s\n", errstr);
+                goto exit;
+        }
+
+        rd_kafka_IncrementalAlterConfigs(rk, configs, resources, options,
+                                         queue);
+
+        rd_kafka_ConfigResource_destroy_array(configs, resources);
+        free(configs);
+
+        /* Wait for results */
+        event = rd_kafka_queue_poll(queue, -1 /* indefinitely but limited by
+                                               * the request timeout set
+                                               * above (10s) */);
+
+        if (!event) {
+                /* User hit Ctrl-C,
+                 * see yield call in stop() signal handler */
+                fprintf(stderr, "%% Cancelled by user\n");
+
+        } else if (rd_kafka_event_error(event)) {
+                rd_kafka_resp_err_t err = rd_kafka_event_error(event);
+                /* IncrementalAlterConfigs request failed */
+                fprintf(stderr, "%% IncrementalAlterConfigs failed: %s: %s\n",
+                        rd_kafka_err2str(err),
+                        rd_kafka_event_error_string(event));
+                goto exit;
+
+        } else {
+                /* IncrementalAlterConfigs request succeeded, but individual
+                 * configs may have errors. */
+                const rd_kafka_IncrementalAlterConfigs_result_t *result =
+                    rd_kafka_event_IncrementalAlterConfigs_result(event);
+                printf("IncrementalAlterConfigs results:\n");
+                print_alter_configs_result(stdout, result, prefix);
+        }
+
+
+exit:
+        if (event)
+                rd_kafka_event_destroy(event);
+        rd_kafka_AdminOptions_destroy(options);
+        rd_kafka_queue_destroy(queue);
+        /* Destroy the client instance */
+        rd_kafka_destroy(rk);
+
+        exit(retval);
+}
+
+int main(int argc, char **argv) {
+        rd_kafka_conf_t *conf; /**< Client configuration object */
+        int opt;
+        argv0 = argv[0];
+
+        /*
+         * Create Kafka client configuration place-holder
+         */
+        conf = rd_kafka_conf_new();
+
+
+        /*
+         * Parse common options
+         */
+        while ((opt = getopt(argc, argv, "b:X:d:")) != -1) {
+                switch (opt) {
+                case 'b':
+                        conf_set(conf, "bootstrap.servers", optarg);
+                        break;
+
+                case 'X': {
+                        char *name = optarg, *val;
+
+                        if (!(val = strchr(name, '=')))
+                                fatal("-X expects a name=value argument");
+
+                        *val = '\0';
+                        val++;
+
+                        conf_set(conf, name, val);
+                        break;
+                }
+
+                case 'd':
+                        conf_set(conf, "debug", optarg);
+                        break;
+
+                default:
+                        usage("Unknown option %c", (char)opt);
+                }
+        }
+
+        cmd_incremental_alter_configs(conf, argc - optind, &argv[optind]);
+
+        return 0;
+}
diff --git a/lib/librdkafka-2.1.0/examples/kafkatest_verifiable_client.cpp b/lib/librdkafka-2.3.0/examples/kafkatest_verifiable_client.cpp
similarity index 100%
rename from lib/librdkafka-2.1.0/examples/kafkatest_verifiable_client.cpp
rename to lib/librdkafka-2.3.0/examples/kafkatest_verifiable_client.cpp
diff --git a/lib/librdkafka-2.1.0/examples/list_consumer_group_offsets.c b/lib/librdkafka-2.3.0/examples/list_consumer_group_offsets.c
similarity index 100%
rename from lib/librdkafka-2.1.0/examples/list_consumer_group_offsets.c
rename to lib/librdkafka-2.3.0/examples/list_consumer_group_offsets.c
diff --git a/lib/librdkafka-2.1.0/examples/list_consumer_groups.c b/lib/librdkafka-2.3.0/examples/list_consumer_groups.c
similarity index 100%
rename from lib/librdkafka-2.1.0/examples/list_consumer_groups.c
rename to lib/librdkafka-2.3.0/examples/list_consumer_groups.c
diff --git a/lib/librdkafka-2.3.0/examples/list_offsets.c b/lib/librdkafka-2.3.0/examples/list_offsets.c
new file mode 100644
index 00000000000..f84c11c121c
--- /dev/null
+++ b/lib/librdkafka-2.3.0/examples/list_offsets.c
@@ -0,0 +1,327 @@
+/*
+ * librdkafka - Apache Kafka C library
+ *
+ * Copyright (c) 2023, Confluent Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SH THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Example utility that shows how to use ListOffsets (AdminAPI)
+ * to list the offset[EARLIEST,LATEST,...] for
+ * one or more topic partitions.
+ */
+
+#include <signal.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#ifdef _WIN32
+#include "../win32/wingetopt.h"
+#else
+#include <getopt.h>
+#endif
+
+
+/* Typical include path would be <librdkafka/rdkafka.h>, but this program
+ * is builtin from within the librdkafka source tree and thus differs. */
+#include "rdkafka.h"
+
+
+const char *argv0;
+
+static rd_kafka_queue_t *queue; /** Admin result queue.
+                                 *  This is a global so we can
+                                 *  yield in stop() */
+static volatile sig_atomic_t run = 1;
+
+/**
+ * @brief Signal termination of program
+ */
+static void stop(int sig) {
+        if (!run) {
+                fprintf(stderr, "%% Forced termination\n");
+                exit(2);
+        }
+        run = 0;
+        rd_kafka_queue_yield(queue);
+}
+
+
+static void usage(const char *reason, ...) {
+
+        fprintf(stderr,
+                "List offsets usage examples\n"
+                "\n"
+                "Usage: %s <options> [--] <isolation_level> "
+                "<topic_1> <partition_1> <offset_1> "
+                "[<topic_2> <partition_2> <offset_2> ...]\n"
+                "\n"
+                "Options:\n"
+                "   -b <brokers>    Bootstrap server list to connect to.\n"
+                "   -X <prop=val>   Set librdkafka configuration property.\n"
+                "                   See CONFIGURATION.md for full list.\n"
+                "   -d <dbg,..>     Enable librdkafka debugging (%s).\n"
+                "\n",
+                argv0, rd_kafka_get_debug_contexts());
+
+        if (reason) {
+                va_list ap;
+                char reasonbuf[512];
+
+                va_start(ap, reason);
+                vsnprintf(reasonbuf, sizeof(reasonbuf), reason, ap);
+                va_end(ap);
+
+                fprintf(stderr, "ERROR: %s\n", reasonbuf);
+        }
+
+        exit(reason ? 1 : 0);
+}
+
+
+#define fatal(...)                                                             \
+        do {                                                                   \
+                fprintf(stderr, "ERROR: ");                                    \
+                fprintf(stderr, __VA_ARGS__);                                  \
+                fprintf(stderr, "\n");                                         \
+                exit(2);                                                       \
+        } while (0)
+
+
+/**
+ * @brief Set config property. Exit on failure.
+ */
+static void conf_set(rd_kafka_conf_t *conf, const char *name, const char *val) {
+        char errstr[512];
+
+        if (rd_kafka_conf_set(conf, name, val, errstr, sizeof(errstr)) !=
+            RD_KAFKA_CONF_OK)
+                fatal("Failed to set %s=%s: %s", name, val, errstr);
+}
+
+/**
+ * @brief Print list offsets result information.
+ */
+static int
+print_list_offsets_result_info(const rd_kafka_ListOffsets_result_t *result,
+                               int req_cnt) {
+        const rd_kafka_ListOffsetsResultInfo_t **result_infos;
+        size_t cnt;
+        size_t i;
+        result_infos = rd_kafka_ListOffsets_result_infos(result, &cnt);
+        printf("ListOffsets results:\n");
+        if (cnt == 0) {
+                if (req_cnt > 0) {
+                        fprintf(stderr, "No matching partitions found\n");
+                        return 1;
+                } else {
+                        fprintf(stderr, "No partitions requested\n");
+                }
+        }
+        for (i = 0; i < cnt; i++) {
+                const rd_kafka_topic_partition_t *topic_partition =
+                    rd_kafka_ListOffsetsResultInfo_topic_partition(
+                        result_infos[i]);
+                int64_t timestamp =
+                    rd_kafka_ListOffsetsResultInfo_timestamp(result_infos[i]);
+                printf(
+                    "Topic: %s Partition: %d Error: %s "
+                    "Offset: %" PRId64 " Leader Epoch: %" PRId32
+                    " Timestamp: %" PRId64 "\n",
+                    topic_partition->topic, topic_partition->partition,
+                    rd_kafka_err2str(topic_partition->err),
+                    topic_partition->offset,
+                    rd_kafka_topic_partition_get_leader_epoch(topic_partition),
+                    timestamp);
+        }
+        return 0;
+}
+
+/**
+ * @brief Parse an integer or fail.
+ */
+int64_t parse_int(const char *what, const char *str) {
+        char *end;
+        unsigned long n = strtoull(str, &end, 0);
+
+        if (end != str + strlen(str)) {
+                fprintf(stderr, "%% Invalid input for %s: %s: not an integer\n",
+                        what, str);
+                exit(1);
+        }
+
+        return (int64_t)n;
+}
+
+/**
+ * @brief Call rd_kafka_ListOffsets() with a list of topic partitions.
+ */
+static void cmd_list_offsets(rd_kafka_conf_t *conf, int argc, char **argv) {
+        rd_kafka_t *rk;
+        char errstr[512];
+        rd_kafka_AdminOptions_t *options;
+        rd_kafka_IsolationLevel_t isolation_level;
+        rd_kafka_event_t *event = NULL;
+        rd_kafka_error_t *error = NULL;
+        int i;
+        int retval     = 0;
+        int partitions = 0;
+        rd_kafka_topic_partition_list_t *rktpars;
+
+        if ((argc - 1) % 3 != 0) {
+                usage("Wrong number of arguments: %d", argc);
+        }
+
+        isolation_level = parse_int("isolation level", argv[0]);
+        argc--;
+        argv++;
+        rktpars = rd_kafka_topic_partition_list_new(argc / 3);
+        for (i = 0; i < argc; i += 3) {
+                rd_kafka_topic_partition_list_add(
+                    rktpars, argv[i], parse_int("partition", argv[i + 1]))
+                    ->offset = parse_int("offset", argv[i + 2]);
+        }
+        partitions = rktpars->cnt;
+
+        /*
+         * Create consumer instance
+         * NOTE: rd_kafka_new() takes ownership of the conf object
+         *       and the application must not reference it again after
+         *       this call.
+         */
+        rk = rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr));
+        if (!rk) {
+                usage("Failed to create new consumer: %s", errstr);
+        }
+
+        /*
+         * List offsets
+         */
+        queue = rd_kafka_queue_new(rk);
+
+        /* Signal handler for clean shutdown */
+        signal(SIGINT, stop);
+
+        options = rd_kafka_AdminOptions_new(rk, RD_KAFKA_ADMIN_OP_LISTOFFSETS);
+
+        if (rd_kafka_AdminOptions_set_request_timeout(
+                options, 10 * 1000 /* 10s */, errstr, sizeof(errstr))) {
+                fprintf(stderr, "%% Failed to set timeout: %s\n", errstr);
+                goto exit;
+        }
+
+        if ((error = rd_kafka_AdminOptions_set_isolation_level(
+                 options, isolation_level))) {
+                fprintf(stderr, "%% Failed to set isolation level: %s\n",
+                        rd_kafka_error_string(error));
+                rd_kafka_error_destroy(error);
+                goto exit;
+        }
+
+        rd_kafka_ListOffsets(rk, rktpars, options, queue);
+        rd_kafka_topic_partition_list_destroy(rktpars);
+        rd_kafka_AdminOptions_destroy(options);
+
+        /* Wait for results */
+        event = rd_kafka_queue_poll(queue, -1 /* indefinitely but limited by
+                                               * the request timeout set
+                                               * above (10s) */);
+
+        if (!event) {
+                /* User hit Ctrl-C,
+                 * see yield call in stop() signal handler */
+                fprintf(stderr, "%% Cancelled by user\n");
+
+        } else if (rd_kafka_event_error(event)) {
+                rd_kafka_resp_err_t err = rd_kafka_event_error(event);
+                /* ListOffsets request failed */
+                fprintf(stderr, "%% ListOffsets failed[%" PRId32 "]: %s\n", err,
+                        rd_kafka_event_error_string(event));
+                goto exit;
+        } else {
+                /* ListOffsets request succeeded, but individual
+                 * partitions may have errors. */
+                const rd_kafka_ListOffsets_result_t *result;
+                result = rd_kafka_event_ListOffsets_result(event);
+                retval = print_list_offsets_result_info(result, partitions);
+        }
+
+
+exit:
+        if (event)
+                rd_kafka_event_destroy(event);
+        rd_kafka_queue_destroy(queue);
+        /* Destroy the client instance */
+        rd_kafka_destroy(rk);
+
+        exit(retval);
+}
+
+int main(int argc, char **argv) {
+        rd_kafka_conf_t *conf; /**< Client configuration object */
+        int opt;
+        argv0 = argv[0];
+
+        /*
+         * Create Kafka client configuration place-holder
+         */
+        conf = rd_kafka_conf_new();
+
+
+        /*
+         * Parse common options
+         */
+        while ((opt = getopt(argc, argv, "b:X:d:")) != -1) {
+                switch (opt) {
+                case 'b':
+                        conf_set(conf, "bootstrap.servers", optarg);
+                        break;
+
+                case 'X': {
+                        char *name = optarg, *val;
+
+                        if (!(val = strchr(name, '=')))
+                                fatal("-X expects a name=value argument");
+
+                        *val = '\0';
+                        val++;
+
+                        conf_set(conf, name, val);
+                        break;
+                }
+
+                case 'd':
+                        conf_set(conf, "debug", optarg);
+                        break;
+
+                default:
+                        usage("Unknown option %c", (char)opt);
+                }
+        }
+
+        cmd_list_offsets(conf, argc - optind, &argv[optind]);
+
+        return 0;
+}
diff --git a/lib/librdkafka-2.1.0/examples/misc.c b/lib/librdkafka-2.3.0/examples/misc.c
similarity index 100%
rename from lib/librdkafka-2.1.0/examples/misc.c
rename to lib/librdkafka-2.3.0/examples/misc.c
diff --git a/lib/librdkafka-2.1.0/examples/openssl_engine_example.cpp b/lib/librdkafka-2.3.0/examples/openssl_engine_example.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/examples/openssl_engine_example.cpp
rename to lib/librdkafka-2.3.0/examples/openssl_engine_example.cpp
index 401857e6b22..72797471762 100644
--- a/lib/librdkafka-2.1.0/examples/openssl_engine_example.cpp
+++ b/lib/librdkafka-2.3.0/examples/openssl_engine_example.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2021, Magnus Edenhill
+ * Copyright (c) 2021-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/examples/producer.c b/lib/librdkafka-2.3.0/examples/producer.c
similarity index 99%
rename from lib/librdkafka-2.1.0/examples/producer.c
rename to lib/librdkafka-2.3.0/examples/producer.c
index b6fb7115008..40e77b79eda 100644
--- a/lib/librdkafka-2.1.0/examples/producer.c
+++ b/lib/librdkafka-2.3.0/examples/producer.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2017, Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,7 +29,7 @@
 /**
  * Simple Apache Kafka producer
  * using the Kafka driver from librdkafka
- * (https://github.com/edenhill/librdkafka)
+ * (https://github.com/confluentinc/librdkafka)
  */
 
 #include <stdio.h>
diff --git a/lib/librdkafka-2.1.0/examples/producer.cpp b/lib/librdkafka-2.3.0/examples/producer.cpp
similarity index 98%
rename from lib/librdkafka-2.1.0/examples/producer.cpp
rename to lib/librdkafka-2.3.0/examples/producer.cpp
index d4a8a0c49ed..76560eb6bee 100755
--- a/lib/librdkafka-2.1.0/examples/producer.cpp
+++ b/lib/librdkafka-2.3.0/examples/producer.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019, Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,7 +29,7 @@
 /**
  * Apache Kafka producer
  * using the Kafka driver from librdkafka
- * (https://github.com/edenhill/librdkafka)
+ * (https://github.com/confluentinc/librdkafka)
  */
 
 #include <iostream>
diff --git a/lib/librdkafka-2.1.0/examples/rdkafka_complex_consumer_example.c b/lib/librdkafka-2.3.0/examples/rdkafka_complex_consumer_example.c
similarity index 99%
rename from lib/librdkafka-2.1.0/examples/rdkafka_complex_consumer_example.c
rename to lib/librdkafka-2.3.0/examples/rdkafka_complex_consumer_example.c
index 1632b30305f..ac56e659f2e 100644
--- a/lib/librdkafka-2.1.0/examples/rdkafka_complex_consumer_example.c
+++ b/lib/librdkafka-2.3.0/examples/rdkafka_complex_consumer_example.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2015, Magnus Edenhill
+ * Copyright (c) 2015-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,7 +29,7 @@
 /**
  * Apache Kafka high level consumer example program
  * using the Kafka driver from librdkafka
- * (https://github.com/edenhill/librdkafka)
+ * (https://github.com/confluentinc/librdkafka)
  */
 
 #include <ctype.h>
diff --git a/lib/librdkafka-2.1.0/examples/rdkafka_complex_consumer_example.cpp b/lib/librdkafka-2.3.0/examples/rdkafka_complex_consumer_example.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/examples/rdkafka_complex_consumer_example.cpp
rename to lib/librdkafka-2.3.0/examples/rdkafka_complex_consumer_example.cpp
index b4f158cbd9d..dc193df89d7 100644
--- a/lib/librdkafka-2.1.0/examples/rdkafka_complex_consumer_example.cpp
+++ b/lib/librdkafka-2.3.0/examples/rdkafka_complex_consumer_example.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2014, Magnus Edenhill
+ * Copyright (c) 2014-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,7 +29,7 @@
 /**
  * Apache Kafka consumer & producer example programs
  * using the Kafka driver from librdkafka
- * (https://github.com/edenhill/librdkafka)
+ * (https://github.com/confluentinc/librdkafka)
  */
 
 #include <iostream>
diff --git a/lib/librdkafka-2.1.0/examples/rdkafka_consume_batch.cpp b/lib/librdkafka-2.3.0/examples/rdkafka_consume_batch.cpp
similarity index 98%
rename from lib/librdkafka-2.1.0/examples/rdkafka_consume_batch.cpp
rename to lib/librdkafka-2.3.0/examples/rdkafka_consume_batch.cpp
index 576b396f87d..d9166303526 100644
--- a/lib/librdkafka-2.1.0/examples/rdkafka_consume_batch.cpp
+++ b/lib/librdkafka-2.3.0/examples/rdkafka_consume_batch.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018, Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,7 +29,7 @@
 /**
  * Apache Kafka consumer & producer example programs
  * using the Kafka driver from librdkafka
- * (https://github.com/edenhill/librdkafka)
+ * (https://github.com/confluentinc/librdkafka)
  *
  * This example shows how to read batches of messages.
  * Note that messages are fetched from the broker in batches regardless
diff --git a/lib/librdkafka-2.1.0/examples/rdkafka_example.c b/lib/librdkafka-2.3.0/examples/rdkafka_example.c
similarity index 99%
rename from lib/librdkafka-2.1.0/examples/rdkafka_example.c
rename to lib/librdkafka-2.3.0/examples/rdkafka_example.c
index 91415318ac8..b4fc4793f4c 100644
--- a/lib/librdkafka-2.1.0/examples/rdkafka_example.c
+++ b/lib/librdkafka-2.3.0/examples/rdkafka_example.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,7 +29,7 @@
 /**
  * Apache Kafka consumer & producer example programs
  * using the Kafka driver from librdkafka
- * (https://github.com/edenhill/librdkafka)
+ * (https://github.com/confluentinc/librdkafka)
  */
 
 #include <ctype.h>
diff --git a/lib/librdkafka-2.1.0/examples/rdkafka_example.cpp b/lib/librdkafka-2.3.0/examples/rdkafka_example.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/examples/rdkafka_example.cpp
rename to lib/librdkafka-2.3.0/examples/rdkafka_example.cpp
index 91c3440b3dc..e4c832b0649 100644
--- a/lib/librdkafka-2.1.0/examples/rdkafka_example.cpp
+++ b/lib/librdkafka-2.3.0/examples/rdkafka_example.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2014, Magnus Edenhill
+ * Copyright (c) 2014-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,7 +29,7 @@
 /**
  * Apache Kafka consumer & producer example programs
  * using the Kafka driver from librdkafka
- * (https://github.com/edenhill/librdkafka)
+ * (https://github.com/confluentinc/librdkafka)
  */
 
 #include <iostream>
diff --git a/lib/librdkafka-2.1.0/examples/rdkafka_performance.c b/lib/librdkafka-2.3.0/examples/rdkafka_performance.c
similarity index 99%
rename from lib/librdkafka-2.1.0/examples/rdkafka_performance.c
rename to lib/librdkafka-2.3.0/examples/rdkafka_performance.c
index a12bb747103..dab0b06b8f6 100644
--- a/lib/librdkafka-2.1.0/examples/rdkafka_performance.c
+++ b/lib/librdkafka-2.3.0/examples/rdkafka_performance.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,7 +30,7 @@
 /**
  * Apache Kafka consumer & producer performance tester
  * using the Kafka driver from librdkafka
- * (https://github.com/edenhill/librdkafka)
+ * (https://github.com/confluentinc/librdkafka)
  */
 
 #ifdef _MSC_VER
diff --git a/lib/librdkafka-2.1.0/examples/transactions-older-broker.c b/lib/librdkafka-2.3.0/examples/transactions-older-broker.c
similarity index 99%
rename from lib/librdkafka-2.1.0/examples/transactions-older-broker.c
rename to lib/librdkafka-2.3.0/examples/transactions-older-broker.c
index e9f8d06f756..711d51a8a3e 100644
--- a/lib/librdkafka-2.1.0/examples/transactions-older-broker.c
+++ b/lib/librdkafka-2.3.0/examples/transactions-older-broker.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/examples/transactions.c b/lib/librdkafka-2.3.0/examples/transactions.c
similarity index 99%
rename from lib/librdkafka-2.1.0/examples/transactions.c
rename to lib/librdkafka-2.3.0/examples/transactions.c
index 0a8b9a8cf0f..705e504e968 100644
--- a/lib/librdkafka-2.1.0/examples/transactions.c
+++ b/lib/librdkafka-2.3.0/examples/transactions.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.3.0/examples/user_scram.c b/lib/librdkafka-2.3.0/examples/user_scram.c
new file mode 100644
index 00000000000..95d6809b404
--- /dev/null
+++ b/lib/librdkafka-2.3.0/examples/user_scram.c
@@ -0,0 +1,492 @@
+/*
+ * librdkafka - Apache Kafka C library
+ *
+ * Copyright (c) 2023, Confluent Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SH THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Example utility that shows how to use SCRAM APIs (AdminAPI)
+ * DescribeUserScramCredentials -> Describe user SCRAM credentials
+ * AlterUserScramCredentials -> Upsert or delete user SCRAM credentials
+ */
+
+#include <stdio.h>
+#include <signal.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#ifdef _WIN32
+#include "../win32/wingetopt.h"
+#else
+#include <getopt.h>
+#endif
+
+/* Typical include path would be <librdkafka/rdkafka.h>, but this program
+ * is builtin from within the librdkafka source tree and thus differs. */
+#include "rdkafka.h"
+
+const char *argv0;
+
+static rd_kafka_queue_t *queue; /** Admin result queue.
+                                 *  This is a global so we can
+                                 *  yield in stop() */
+static volatile sig_atomic_t run = 1;
+
+/**
+ * @brief Signal termination of program
+ */
+static void stop(int sig) {
+        if (!run) {
+                fprintf(stderr, "%% Forced termination\n");
+                exit(2);
+        }
+        run = 0;
+        rd_kafka_queue_yield(queue);
+}
+
+
+static void usage(const char *reason, ...) {
+        fprintf(stderr,
+                "Describe/Alter user SCRAM credentials\n"
+                "\n"
+                "Usage: %s <options>\n"
+                "       DESCRIBE <user1> ... \n"
+                "       UPSERT <user1> <mechanism1> <iterations1> "
+                "<password1> <salt1> ... \n"
+                "       DELETE <user1> <mechanism1> ... \n"
+                "\n"
+                "Options:\n"
+                "   -b <brokers>    Bootstrap server list to connect to.\n"
+                "   -X <prop=val>   Set librdkafka configuration property.\n"
+                "                   See CONFIGURATION.md for full list.\n"
+                "   -d <dbg,..>     Enable librdkafka debugging (%s).\n"
+                "\n",
+                argv0, rd_kafka_get_debug_contexts());
+
+        if (reason) {
+                va_list ap;
+                char reasonbuf[512];
+
+                va_start(ap, reason);
+                vsnprintf(reasonbuf, sizeof(reasonbuf), reason, ap);
+                va_end(ap);
+
+                fprintf(stderr, "ERROR: %s\n", reasonbuf);
+        }
+
+        exit(reason ? 1 : 0);
+}
+
+#define fatal(...)                                                             \
+        do {                                                                   \
+                fprintf(stderr, "ERROR: ");                                    \
+                fprintf(stderr, __VA_ARGS__);                                  \
+                fprintf(stderr, "\n");                                         \
+                exit(2);                                                       \
+        } while (0)
+
+
+/**
+ * @brief Set config property. Exit on failure.
+ */
+static void conf_set(rd_kafka_conf_t *conf, const char *name, const char *val) {
+        char errstr[512];
+
+        if (rd_kafka_conf_set(conf, name, val, errstr, sizeof(errstr)) !=
+            RD_KAFKA_CONF_OK)
+                fatal("Failed to set %s=%s: %s", name, val, errstr);
+}
+
+
+/**
+ * @brief Parse an integer or fail.
+ */
+int64_t parse_int(const char *what, const char *str) {
+        char *end;
+        unsigned long n = strtoull(str, &end, 0);
+
+        if (end != str + strlen(str)) {
+                fprintf(stderr, "%% Invalid input for %s: %s: not an integer\n",
+                        what, str);
+                exit(1);
+        }
+
+        return (int64_t)n;
+}
+
+rd_kafka_ScramMechanism_t parse_mechanism(const char *arg) {
+        return !strcmp(arg, "SCRAM-SHA-256")
+                   ? RD_KAFKA_SCRAM_MECHANISM_SHA_256
+                   : !strcmp(arg, "SCRAM-SHA-512")
+                         ? RD_KAFKA_SCRAM_MECHANISM_SHA_512
+                         : RD_KAFKA_SCRAM_MECHANISM_UNKNOWN;
+}
+
+static void print_descriptions(
+    const rd_kafka_UserScramCredentialsDescription_t **descriptions,
+    size_t description_cnt) {
+        size_t i;
+        printf("DescribeUserScramCredentials descriptions[%zu]\n",
+               description_cnt);
+        for (i = 0; i < description_cnt; i++) {
+                const rd_kafka_UserScramCredentialsDescription_t *description;
+                description = descriptions[i];
+                const char *username;
+                const rd_kafka_error_t *error;
+                username =
+                    rd_kafka_UserScramCredentialsDescription_user(description);
+                error =
+                    rd_kafka_UserScramCredentialsDescription_error(description);
+                rd_kafka_resp_err_t err = rd_kafka_error_code(error);
+                printf("    Username: \"%s\" Error: \"%s\"\n", username,
+                       rd_kafka_err2str(err));
+                if (err) {
+                        const char *errstr = rd_kafka_error_string(error);
+                        printf("        ErrorMessage: \"%s\"\n", errstr);
+                }
+                size_t num_credentials =
+                    rd_kafka_UserScramCredentialsDescription_scramcredentialinfo_count(
+                        description);
+                size_t itr;
+                for (itr = 0; itr < num_credentials; itr++) {
+                        const rd_kafka_ScramCredentialInfo_t *scram_credential =
+                            rd_kafka_UserScramCredentialsDescription_scramcredentialinfo(
+                                description, itr);
+                        rd_kafka_ScramMechanism_t mechanism;
+                        int32_t iterations;
+                        mechanism = rd_kafka_ScramCredentialInfo_mechanism(
+                            scram_credential);
+                        iterations = rd_kafka_ScramCredentialInfo_iterations(
+                            scram_credential);
+                        switch (mechanism) {
+                        case RD_KAFKA_SCRAM_MECHANISM_UNKNOWN:
+                                printf(
+                                    "        Mechanism is "
+                                    "UNKNOWN\n");
+                                break;
+                        case RD_KAFKA_SCRAM_MECHANISM_SHA_256:
+                                printf(
+                                    "        Mechanism is "
+                                    "SCRAM-SHA-256\n");
+                                break;
+                        case RD_KAFKA_SCRAM_MECHANISM_SHA_512:
+                                printf(
+                                    "        Mechanism is "
+                                    "SCRAM-SHA-512\n");
+                                break;
+                        default:
+                                printf(
+                                    "        Mechanism does "
+                                    "not match enums\n");
+                        }
+                        printf("        Iterations are %d\n", iterations);
+                }
+        }
+}
+
+static void print_alteration_responses(
+    const rd_kafka_AlterUserScramCredentials_result_response_t **responses,
+    size_t responses_cnt) {
+        size_t i;
+        printf("AlterUserScramCredentials responses [%zu]:\n", responses_cnt);
+        for (i = 0; i < responses_cnt; i++) {
+                const rd_kafka_AlterUserScramCredentials_result_response_t
+                    *response = responses[i];
+                const char *username;
+                const rd_kafka_error_t *error;
+                username =
+                    rd_kafka_AlterUserScramCredentials_result_response_user(
+                        response);
+                error =
+                    rd_kafka_AlterUserScramCredentials_result_response_error(
+                        response);
+                rd_kafka_resp_err_t err = rd_kafka_error_code(error);
+                if (err) {
+                        const char *errstr = rd_kafka_error_string(error);
+                        printf("    Username: \"%s\", Error: \"%s\"\n",
+                               username, rd_kafka_err2str(err));
+                        printf("        ErrorMessage: \"%s\"\n", errstr);
+                } else {
+                        printf("    Username: \"%s\" Success\n", username);
+                }
+        }
+}
+
+static void Describe(rd_kafka_t *rk, const char **users, size_t user_cnt) {
+        rd_kafka_event_t *event;
+        char errstr[512]; /* librdkafka API error reporting buffer */
+
+        rd_kafka_AdminOptions_t *options = rd_kafka_AdminOptions_new(
+            rk, RD_KAFKA_ADMIN_OP_DESCRIBEUSERSCRAMCREDENTIALS);
+
+        if (rd_kafka_AdminOptions_set_request_timeout(
+                options, 30 * 1000 /* 30s */, errstr, sizeof(errstr))) {
+                fprintf(stderr, "%% Failed to set timeout: %s\n", errstr);
+                return;
+        }
+
+        /* NULL argument gives us all the users*/
+        rd_kafka_DescribeUserScramCredentials(rk, users, user_cnt, options,
+                                              queue);
+        rd_kafka_AdminOptions_destroy(options);
+
+        /* Wait for results */
+        event = rd_kafka_queue_poll(queue, -1 /*indefinitely*/);
+        if (!event) {
+                /* User hit Ctrl-C */
+                fprintf(stderr, "%% Cancelled by user\n");
+
+        } else if (rd_kafka_event_error(event)) {
+                /* Request failed */
+                fprintf(stderr, "%% DescribeUserScramCredentials failed: %s\n",
+                        rd_kafka_event_error_string(event));
+
+        } else {
+                /* Request succeeded */
+                const rd_kafka_DescribeUserScramCredentials_result_t *result;
+                const rd_kafka_UserScramCredentialsDescription_t **descriptions;
+                size_t description_cnt;
+                result =
+                    rd_kafka_event_DescribeUserScramCredentials_result(event);
+                descriptions =
+                    rd_kafka_DescribeUserScramCredentials_result_descriptions(
+                        result, &description_cnt);
+                print_descriptions(descriptions, description_cnt);
+        }
+        rd_kafka_event_destroy(event);
+}
+
+static void Alter(rd_kafka_t *rk,
+                  rd_kafka_UserScramCredentialAlteration_t **alterations,
+                  size_t alteration_cnt) {
+        rd_kafka_event_t *event;
+        char errstr[512]; /* librdkafka API error reporting buffer */
+
+        /* Set timeout (optional) */
+        rd_kafka_AdminOptions_t *options = rd_kafka_AdminOptions_new(
+            rk, RD_KAFKA_ADMIN_OP_ALTERUSERSCRAMCREDENTIALS);
+
+        if (rd_kafka_AdminOptions_set_request_timeout(
+                options, 30 * 1000 /* 30s */, errstr, sizeof(errstr))) {
+                fprintf(stderr, "%% Failed to set timeout: %s\n", errstr);
+                return;
+        }
+
+        /* Call the AlterUserScramCredentials function*/
+        rd_kafka_AlterUserScramCredentials(rk, alterations, alteration_cnt,
+                                           options, queue);
+        rd_kafka_AdminOptions_destroy(options);
+
+        /* Wait for results */
+        event = rd_kafka_queue_poll(queue, -1 /*indefinitely*/);
+        if (!event) {
+                /* User hit Ctrl-C */
+                fprintf(stderr, "%% Cancelled by user\n");
+
+        } else if (rd_kafka_event_error(event)) {
+                /* Request failed */
+                fprintf(stderr, "%% AlterUserScramCredentials failed: %s\n",
+                        rd_kafka_event_error_string(event));
+
+        } else {
+                /* Request succeeded */
+                const rd_kafka_AlterUserScramCredentials_result_t *result =
+                    rd_kafka_event_AlterUserScramCredentials_result(event);
+                const rd_kafka_AlterUserScramCredentials_result_response_t *
+                    *responses;
+                size_t responses_cnt;
+                responses = rd_kafka_AlterUserScramCredentials_result_responses(
+                    result, &responses_cnt);
+
+                print_alteration_responses(responses, responses_cnt);
+        }
+        rd_kafka_event_destroy(event);
+}
+
+static void cmd_user_scram(rd_kafka_conf_t *conf, int argc, const char **argv) {
+        char errstr[512]; /* librdkafka API error reporting buffer */
+        rd_kafka_t *rk;   /* Admin client instance */
+        size_t i;
+        const int min_argc  = 1;
+        const int args_rest = argc - min_argc;
+
+        int is_describe = 0;
+        int is_upsert   = 0;
+        int is_delete   = 0;
+
+        /*
+         * Argument validation
+         */
+        int correct_argument_cnt = argc >= min_argc;
+
+        if (!correct_argument_cnt)
+                usage("Wrong number of arguments");
+
+        is_describe = !strcmp(argv[0], "DESCRIBE");
+        is_upsert   = !strcmp(argv[0], "UPSERT");
+        is_delete   = !strcmp(argv[0], "DELETE");
+
+        correct_argument_cnt = is_describe ||
+                               (is_upsert && (args_rest % 5) == 0) ||
+                               (is_delete && (args_rest % 2) == 0) || 0;
+
+        if (!correct_argument_cnt)
+                usage("Wrong number of arguments");
+
+
+        /*
+         * Create an admin client, it can be created using any client type,
+         * so we choose producer since it requires no extra configuration
+         * and is more light-weight than the consumer.
+         *
+         * NOTE: rd_kafka_new() takes ownership of the conf object
+         *       and the application must not reference it again after
+         *       this call.
+         */
+        rk = rd_kafka_new(RD_KAFKA_PRODUCER, conf, errstr, sizeof(errstr));
+        if (!rk) {
+                fprintf(stderr, "%% Failed to create new producer: %s\n",
+                        errstr);
+                exit(1);
+        }
+
+        /* The Admin API is completely asynchronous, results are emitted
+         * on the result queue that is passed to DeleteRecords() */
+        queue = rd_kafka_queue_new(rk);
+
+        /* Signal handler for clean shutdown */
+        signal(SIGINT, stop);
+
+        if (is_describe) {
+
+                /* Describe  the users */
+                Describe(rk, &argv[min_argc], argc - min_argc);
+
+        } else if (is_upsert) {
+                size_t upsert_cnt        = args_rest / 5;
+                const char **upsert_args = &argv[min_argc];
+                rd_kafka_UserScramCredentialAlteration_t **upserts =
+                    calloc(upsert_cnt, sizeof(*upserts));
+                for (i = 0; i < upsert_cnt; i++) {
+                        const char **upsert_args_curr = &upsert_args[i * 5];
+                        size_t salt_size              = 0;
+                        const char *username          = upsert_args_curr[0];
+                        rd_kafka_ScramMechanism_t mechanism =
+                            parse_mechanism(upsert_args_curr[1]);
+                        int iterations =
+                            parse_int("iterations", upsert_args_curr[2]);
+                        const char *password = upsert_args_curr[3];
+                        const char *salt     = upsert_args_curr[4];
+
+                        if (strlen(salt) == 0)
+                                salt = NULL;
+                        else
+                                salt_size = strlen(salt);
+
+                        upserts[i] = rd_kafka_UserScramCredentialUpsertion_new(
+                            username, mechanism, iterations,
+                            (const unsigned char *)password, strlen(password),
+                            (const unsigned char *)salt, salt_size);
+                }
+                Alter(rk, upserts, upsert_cnt);
+                rd_kafka_UserScramCredentialAlteration_destroy_array(
+                    upserts, upsert_cnt);
+                free(upserts);
+        } else {
+                size_t deletion_cnt      = args_rest / 2;
+                const char **delete_args = &argv[min_argc];
+                rd_kafka_UserScramCredentialAlteration_t **deletions =
+                    calloc(deletion_cnt, sizeof(*deletions));
+                for (i = 0; i < deletion_cnt; i++) {
+                        const char **delete_args_curr = &delete_args[i * 2];
+                        rd_kafka_ScramMechanism_t mechanism =
+                            parse_mechanism(delete_args_curr[1]);
+                        const char *username = delete_args_curr[0];
+
+                        deletions[i] = rd_kafka_UserScramCredentialDeletion_new(
+                            username, mechanism);
+                }
+                Alter(rk, deletions, deletion_cnt);
+                rd_kafka_UserScramCredentialAlteration_destroy_array(
+                    deletions, deletion_cnt);
+                free(deletions);
+        }
+
+        signal(SIGINT, SIG_DFL);
+
+        /* Destroy queue */
+        rd_kafka_queue_destroy(queue);
+
+
+        /* Destroy the producer instance */
+        rd_kafka_destroy(rk);
+}
+
+int main(int argc, char **argv) {
+        rd_kafka_conf_t *conf; /**< Client configuration object */
+        int opt;
+        argv0 = argv[0];
+
+        /*
+         * Create Kafka client configuration place-holder
+         */
+        conf = rd_kafka_conf_new();
+
+
+        /*
+         * Parse common options
+         */
+        while ((opt = getopt(argc, argv, "b:X:d:")) != -1) {
+                switch (opt) {
+                case 'b':
+                        conf_set(conf, "bootstrap.servers", optarg);
+                        break;
+
+                case 'X': {
+                        char *name = optarg, *val;
+
+                        if (!(val = strchr(name, '=')))
+                                fatal("-X expects a name=value argument");
+
+                        *val = '\0';
+                        val++;
+
+                        conf_set(conf, name, val);
+                        break;
+                }
+
+                case 'd':
+                        conf_set(conf, "debug", optarg);
+                        break;
+
+                default:
+                        usage("Unknown option %c", (char)opt);
+                }
+        }
+
+        cmd_user_scram(conf, argc - optind, (const char **)&argv[optind]);
+        return 0;
+}
diff --git a/lib/librdkafka-2.1.0/examples/win_ssl_cert_store.cpp b/lib/librdkafka-2.3.0/examples/win_ssl_cert_store.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/examples/win_ssl_cert_store.cpp
rename to lib/librdkafka-2.3.0/examples/win_ssl_cert_store.cpp
index a80dfea30cd..5158f961b1a 100644
--- a/lib/librdkafka-2.1.0/examples/win_ssl_cert_store.cpp
+++ b/lib/librdkafka-2.3.0/examples/win_ssl_cert_store.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019, Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/lds-gen.py b/lib/librdkafka-2.3.0/lds-gen.py
similarity index 98%
rename from lib/librdkafka-2.1.0/lds-gen.py
rename to lib/librdkafka-2.3.0/lds-gen.py
index 44c718d1305..aca163a5594 100755
--- a/lib/librdkafka-2.1.0/lds-gen.py
+++ b/lib/librdkafka-2.3.0/lds-gen.py
@@ -2,7 +2,7 @@
 #
 # librdkafka - Apache Kafka C library
 #
-# Copyright (c) 2018 Magnus Edenhill
+# Copyright (c) 2018-2022, Magnus Edenhill
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/mainpage.doxy b/lib/librdkafka-2.3.0/mainpage.doxy
similarity index 100%
rename from lib/librdkafka-2.1.0/mainpage.doxy
rename to lib/librdkafka-2.3.0/mainpage.doxy
diff --git a/lib/librdkafka-2.1.0/mklove/.gitignore b/lib/librdkafka-2.3.0/mklove/.gitignore
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/.gitignore
rename to lib/librdkafka-2.3.0/mklove/.gitignore
diff --git a/lib/librdkafka-2.1.0/mklove/Makefile.base b/lib/librdkafka-2.3.0/mklove/Makefile.base
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/Makefile.base
rename to lib/librdkafka-2.3.0/mklove/Makefile.base
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.atomics b/lib/librdkafka-2.3.0/mklove/modules/configure.atomics
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.atomics
rename to lib/librdkafka-2.3.0/mklove/modules/configure.atomics
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.base b/lib/librdkafka-2.3.0/mklove/modules/configure.base
similarity index 99%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.base
rename to lib/librdkafka-2.3.0/mklove/modules/configure.base
index 1e216692bc2..77cee61a684 100644
--- a/lib/librdkafka-2.1.0/mklove/modules/configure.base
+++ b/lib/librdkafka-2.3.0/mklove/modules/configure.base
@@ -2208,7 +2208,7 @@ function mkl_require {
 MKL_USAGE="Usage: ./configure [OPTIONS...]
 
  mklove configure script - mklove, not autoconf
- Copyright (c) 2014-2019 Magnus Edenhill - https://github.com/edenhill/mklove
+ Copyright (c) 2014-2023, Magnus Edenhill - https://github.com/edenhill/mklove
 "
 
 function mkl_usage {
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.builtin b/lib/librdkafka-2.3.0/mklove/modules/configure.builtin
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.builtin
rename to lib/librdkafka-2.3.0/mklove/modules/configure.builtin
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.cc b/lib/librdkafka-2.3.0/mklove/modules/configure.cc
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.cc
rename to lib/librdkafka-2.3.0/mklove/modules/configure.cc
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.cxx b/lib/librdkafka-2.3.0/mklove/modules/configure.cxx
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.cxx
rename to lib/librdkafka-2.3.0/mklove/modules/configure.cxx
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.fileversion b/lib/librdkafka-2.3.0/mklove/modules/configure.fileversion
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.fileversion
rename to lib/librdkafka-2.3.0/mklove/modules/configure.fileversion
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.gitversion b/lib/librdkafka-2.3.0/mklove/modules/configure.gitversion
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.gitversion
rename to lib/librdkafka-2.3.0/mklove/modules/configure.gitversion
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.good_cflags b/lib/librdkafka-2.3.0/mklove/modules/configure.good_cflags
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.good_cflags
rename to lib/librdkafka-2.3.0/mklove/modules/configure.good_cflags
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.host b/lib/librdkafka-2.3.0/mklove/modules/configure.host
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.host
rename to lib/librdkafka-2.3.0/mklove/modules/configure.host
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.lib b/lib/librdkafka-2.3.0/mklove/modules/configure.lib
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.lib
rename to lib/librdkafka-2.3.0/mklove/modules/configure.lib
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.libcurl b/lib/librdkafka-2.3.0/mklove/modules/configure.libcurl
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.libcurl
rename to lib/librdkafka-2.3.0/mklove/modules/configure.libcurl
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.libsasl2 b/lib/librdkafka-2.3.0/mklove/modules/configure.libsasl2
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.libsasl2
rename to lib/librdkafka-2.3.0/mklove/modules/configure.libsasl2
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.libssl b/lib/librdkafka-2.3.0/mklove/modules/configure.libssl
similarity index 97%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.libssl
rename to lib/librdkafka-2.3.0/mklove/modules/configure.libssl
index 8ce58642282..a0aed5e77c0 100644
--- a/lib/librdkafka-2.1.0/mklove/modules/configure.libssl
+++ b/lib/librdkafka-2.3.0/mklove/modules/configure.libssl
@@ -91,8 +91,8 @@ function manual_checks {
 function libcrypto_install_source {
     local name=$1
     local destdir=$2
-    local ver=3.0.8
-    local checksum="6c13d2bf38fdf31eac3ce2a347073673f5d63263398f1f69d0df4a41253e4b3e"
+    local ver=3.0.11
+    local checksum="b3425d3bb4a2218d0697eb41f7fc0cdede016ed19ca49d168b78e8d947887f55"
     local url=https://www.openssl.org/source/openssl-${ver}.tar.gz
 
     local conf_args="--prefix=/usr --openssldir=/usr/lib/ssl no-shared no-zlib"
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.libzstd b/lib/librdkafka-2.3.0/mklove/modules/configure.libzstd
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.libzstd
rename to lib/librdkafka-2.3.0/mklove/modules/configure.libzstd
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.parseversion b/lib/librdkafka-2.3.0/mklove/modules/configure.parseversion
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.parseversion
rename to lib/librdkafka-2.3.0/mklove/modules/configure.parseversion
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.pic b/lib/librdkafka-2.3.0/mklove/modules/configure.pic
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.pic
rename to lib/librdkafka-2.3.0/mklove/modules/configure.pic
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.socket b/lib/librdkafka-2.3.0/mklove/modules/configure.socket
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.socket
rename to lib/librdkafka-2.3.0/mklove/modules/configure.socket
diff --git a/lib/librdkafka-2.1.0/mklove/modules/configure.zlib b/lib/librdkafka-2.3.0/mklove/modules/configure.zlib
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/configure.zlib
rename to lib/librdkafka-2.3.0/mklove/modules/configure.zlib
diff --git a/lib/librdkafka-2.1.0/mklove/modules/patches/README.md b/lib/librdkafka-2.3.0/mklove/modules/patches/README.md
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/patches/README.md
rename to lib/librdkafka-2.3.0/mklove/modules/patches/README.md
diff --git a/lib/librdkafka-2.1.0/mklove/modules/patches/libcurl.0000-no-runtime-linking-check.patch b/lib/librdkafka-2.3.0/mklove/modules/patches/libcurl.0000-no-runtime-linking-check.patch
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/patches/libcurl.0000-no-runtime-linking-check.patch
rename to lib/librdkafka-2.3.0/mklove/modules/patches/libcurl.0000-no-runtime-linking-check.patch
diff --git a/lib/librdkafka-2.1.0/mklove/modules/patches/libssl.0000-osx-rand-include-fix-OpenSSL-PR16409.patch b/lib/librdkafka-2.3.0/mklove/modules/patches/libssl.0000-osx-rand-include-fix-OpenSSL-PR16409.patch
similarity index 100%
rename from lib/librdkafka-2.1.0/mklove/modules/patches/libssl.0000-osx-rand-include-fix-OpenSSL-PR16409.patch
rename to lib/librdkafka-2.3.0/mklove/modules/patches/libssl.0000-osx-rand-include-fix-OpenSSL-PR16409.patch
diff --git a/lib/librdkafka-2.1.0/packaging/RELEASE.md b/lib/librdkafka-2.3.0/packaging/RELEASE.md
similarity index 99%
rename from lib/librdkafka-2.1.0/packaging/RELEASE.md
rename to lib/librdkafka-2.3.0/packaging/RELEASE.md
index 930636db47d..36cf38198ad 100644
--- a/lib/librdkafka-2.1.0/packaging/RELEASE.md
+++ b/lib/librdkafka-2.3.0/packaging/RELEASE.md
@@ -149,7 +149,7 @@ is finished, then download the relevant artifacts for further use, see
 
 ## Publish release on github
 
-Create a release on github by going to https://github.com/edenhill/librdkafka/releases
+Create a release on github by going to https://github.com/confluentinc/librdkafka/releases
 and Draft a new release.
 Name the release the same as the final release tag (e.g., `v1.9.0`) and set
 the tag to the same.
diff --git a/lib/librdkafka-2.1.0/packaging/alpine/build-alpine.sh b/lib/librdkafka-2.3.0/packaging/alpine/build-alpine.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/alpine/build-alpine.sh
rename to lib/librdkafka-2.3.0/packaging/alpine/build-alpine.sh
diff --git a/lib/librdkafka-2.1.0/packaging/archlinux/PKGBUILD b/lib/librdkafka-2.3.0/packaging/archlinux/PKGBUILD
similarity index 83%
rename from lib/librdkafka-2.1.0/packaging/archlinux/PKGBUILD
rename to lib/librdkafka-2.3.0/packaging/archlinux/PKGBUILD
index 7063d5cef81..36fef055b7f 100644
--- a/lib/librdkafka-2.1.0/packaging/archlinux/PKGBUILD
+++ b/lib/librdkafka-2.3.0/packaging/archlinux/PKGBUILD
@@ -2,10 +2,10 @@ pkgname=librdkafka
 pkgver=1.0.0.RC5.r11.g3cf68480
 pkgrel=1
 pkgdesc='The Apache Kafka C/C++ client library'
-url='https://github.com/edenhill/librdkafka'
+url='https://github.com/confluentinc/librdkafka'
 license=('BSD')
 arch=('x86_64')
-source=('git+https://github.com/edenhill/librdkafka#branch=master')
+source=('git+https://github.com/confluentinc/librdkafka#branch=master')
 sha256sums=('SKIP')
 depends=(glibc libsasl lz4 openssl zlib zstd)
 makedepends=(bash git python3)
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/Config.cmake.in b/lib/librdkafka-2.3.0/packaging/cmake/Config.cmake.in
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/Config.cmake.in
rename to lib/librdkafka-2.3.0/packaging/cmake/Config.cmake.in
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/Modules/FindLZ4.cmake b/lib/librdkafka-2.3.0/packaging/cmake/Modules/FindLZ4.cmake
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/Modules/FindLZ4.cmake
rename to lib/librdkafka-2.3.0/packaging/cmake/Modules/FindLZ4.cmake
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/Modules/FindZSTD.cmake b/lib/librdkafka-2.3.0/packaging/cmake/Modules/FindZSTD.cmake
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/Modules/FindZSTD.cmake
rename to lib/librdkafka-2.3.0/packaging/cmake/Modules/FindZSTD.cmake
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/Modules/LICENSE.FindZstd b/lib/librdkafka-2.3.0/packaging/cmake/Modules/LICENSE.FindZstd
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/Modules/LICENSE.FindZstd
rename to lib/librdkafka-2.3.0/packaging/cmake/Modules/LICENSE.FindZstd
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/README.md b/lib/librdkafka-2.3.0/packaging/cmake/README.md
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/README.md
rename to lib/librdkafka-2.3.0/packaging/cmake/README.md
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/config.h.in b/lib/librdkafka-2.3.0/packaging/cmake/config.h.in
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/config.h.in
rename to lib/librdkafka-2.3.0/packaging/cmake/config.h.in
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/parseversion.cmake b/lib/librdkafka-2.3.0/packaging/cmake/parseversion.cmake
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/parseversion.cmake
rename to lib/librdkafka-2.3.0/packaging/cmake/parseversion.cmake
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/rdkafka.pc.in b/lib/librdkafka-2.3.0/packaging/cmake/rdkafka.pc.in
similarity index 85%
rename from lib/librdkafka-2.1.0/packaging/cmake/rdkafka.pc.in
rename to lib/librdkafka-2.3.0/packaging/cmake/rdkafka.pc.in
index 0eb17e8560c..9632cf5134a 100644
--- a/lib/librdkafka-2.1.0/packaging/cmake/rdkafka.pc.in
+++ b/lib/librdkafka-2.3.0/packaging/cmake/rdkafka.pc.in
@@ -6,7 +6,7 @@ libdir=${prefix}/lib
 Name: @PKG_CONFIG_NAME@
 Description: @PKG_CONFIG_DESCRIPTION@
 Version: @PKG_CONFIG_VERSION@
-Requires: @PKG_CONFIG_REQUIRES@
+Requires.private: @PKG_CONFIG_REQUIRES_PRIVATE@
 Cflags: @PKG_CONFIG_CFLAGS@
 Libs: @PKG_CONFIG_LIBS@
 Libs.private: @PKG_CONFIG_LIBS_PRIVATE@
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/try_compile/atomic_32_test.c b/lib/librdkafka-2.3.0/packaging/cmake/try_compile/atomic_32_test.c
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/try_compile/atomic_32_test.c
rename to lib/librdkafka-2.3.0/packaging/cmake/try_compile/atomic_32_test.c
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/try_compile/atomic_64_test.c b/lib/librdkafka-2.3.0/packaging/cmake/try_compile/atomic_64_test.c
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/try_compile/atomic_64_test.c
rename to lib/librdkafka-2.3.0/packaging/cmake/try_compile/atomic_64_test.c
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/try_compile/c11threads_test.c b/lib/librdkafka-2.3.0/packaging/cmake/try_compile/c11threads_test.c
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/try_compile/c11threads_test.c
rename to lib/librdkafka-2.3.0/packaging/cmake/try_compile/c11threads_test.c
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/try_compile/crc32c_hw_test.c b/lib/librdkafka-2.3.0/packaging/cmake/try_compile/crc32c_hw_test.c
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/try_compile/crc32c_hw_test.c
rename to lib/librdkafka-2.3.0/packaging/cmake/try_compile/crc32c_hw_test.c
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/try_compile/dlopen_test.c b/lib/librdkafka-2.3.0/packaging/cmake/try_compile/dlopen_test.c
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/try_compile/dlopen_test.c
rename to lib/librdkafka-2.3.0/packaging/cmake/try_compile/dlopen_test.c
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/try_compile/libsasl2_test.c b/lib/librdkafka-2.3.0/packaging/cmake/try_compile/libsasl2_test.c
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/try_compile/libsasl2_test.c
rename to lib/librdkafka-2.3.0/packaging/cmake/try_compile/libsasl2_test.c
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/try_compile/pthread_setname_darwin_test.c b/lib/librdkafka-2.3.0/packaging/cmake/try_compile/pthread_setname_darwin_test.c
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/try_compile/pthread_setname_darwin_test.c
rename to lib/librdkafka-2.3.0/packaging/cmake/try_compile/pthread_setname_darwin_test.c
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/try_compile/pthread_setname_freebsd_test.c b/lib/librdkafka-2.3.0/packaging/cmake/try_compile/pthread_setname_freebsd_test.c
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/try_compile/pthread_setname_freebsd_test.c
rename to lib/librdkafka-2.3.0/packaging/cmake/try_compile/pthread_setname_freebsd_test.c
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/try_compile/pthread_setname_gnu_test.c b/lib/librdkafka-2.3.0/packaging/cmake/try_compile/pthread_setname_gnu_test.c
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/try_compile/pthread_setname_gnu_test.c
rename to lib/librdkafka-2.3.0/packaging/cmake/try_compile/pthread_setname_gnu_test.c
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/try_compile/rand_r_test.c b/lib/librdkafka-2.3.0/packaging/cmake/try_compile/rand_r_test.c
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/try_compile/rand_r_test.c
rename to lib/librdkafka-2.3.0/packaging/cmake/try_compile/rand_r_test.c
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/try_compile/rdkafka_setup.cmake b/lib/librdkafka-2.3.0/packaging/cmake/try_compile/rdkafka_setup.cmake
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/try_compile/rdkafka_setup.cmake
rename to lib/librdkafka-2.3.0/packaging/cmake/try_compile/rdkafka_setup.cmake
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/try_compile/regex_test.c b/lib/librdkafka-2.3.0/packaging/cmake/try_compile/regex_test.c
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/try_compile/regex_test.c
rename to lib/librdkafka-2.3.0/packaging/cmake/try_compile/regex_test.c
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/try_compile/strndup_test.c b/lib/librdkafka-2.3.0/packaging/cmake/try_compile/strndup_test.c
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/try_compile/strndup_test.c
rename to lib/librdkafka-2.3.0/packaging/cmake/try_compile/strndup_test.c
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/try_compile/sync_32_test.c b/lib/librdkafka-2.3.0/packaging/cmake/try_compile/sync_32_test.c
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/try_compile/sync_32_test.c
rename to lib/librdkafka-2.3.0/packaging/cmake/try_compile/sync_32_test.c
diff --git a/lib/librdkafka-2.1.0/packaging/cmake/try_compile/sync_64_test.c b/lib/librdkafka-2.3.0/packaging/cmake/try_compile/sync_64_test.c
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cmake/try_compile/sync_64_test.c
rename to lib/librdkafka-2.3.0/packaging/cmake/try_compile/sync_64_test.c
diff --git a/lib/librdkafka-2.1.0/packaging/cp/README.md b/lib/librdkafka-2.3.0/packaging/cp/README.md
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cp/README.md
rename to lib/librdkafka-2.3.0/packaging/cp/README.md
diff --git a/lib/librdkafka-2.1.0/packaging/cp/check_features.c b/lib/librdkafka-2.3.0/packaging/cp/check_features.c
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cp/check_features.c
rename to lib/librdkafka-2.3.0/packaging/cp/check_features.c
diff --git a/lib/librdkafka-2.1.0/packaging/cp/verify-deb.sh b/lib/librdkafka-2.3.0/packaging/cp/verify-deb.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cp/verify-deb.sh
rename to lib/librdkafka-2.3.0/packaging/cp/verify-deb.sh
diff --git a/lib/librdkafka-2.1.0/packaging/cp/verify-packages.sh b/lib/librdkafka-2.3.0/packaging/cp/verify-packages.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cp/verify-packages.sh
rename to lib/librdkafka-2.3.0/packaging/cp/verify-packages.sh
diff --git a/lib/librdkafka-2.1.0/packaging/cp/verify-rpm.sh b/lib/librdkafka-2.3.0/packaging/cp/verify-rpm.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/cp/verify-rpm.sh
rename to lib/librdkafka-2.3.0/packaging/cp/verify-rpm.sh
diff --git a/lib/librdkafka-2.1.0/packaging/debian/.gitignore b/lib/librdkafka-2.3.0/packaging/debian/.gitignore
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/debian/.gitignore
rename to lib/librdkafka-2.3.0/packaging/debian/.gitignore
diff --git a/lib/librdkafka-2.1.0/packaging/debian/changelog b/lib/librdkafka-2.3.0/packaging/debian/changelog
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/debian/changelog
rename to lib/librdkafka-2.3.0/packaging/debian/changelog
diff --git a/lib/librdkafka-2.1.0/packaging/debian/compat b/lib/librdkafka-2.3.0/packaging/debian/compat
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/debian/compat
rename to lib/librdkafka-2.3.0/packaging/debian/compat
diff --git a/lib/librdkafka-2.1.0/packaging/debian/control b/lib/librdkafka-2.3.0/packaging/debian/control
similarity index 90%
rename from lib/librdkafka-2.1.0/packaging/debian/control
rename to lib/librdkafka-2.3.0/packaging/debian/control
index 510db8f2314..87f8a8490f1 100644
--- a/lib/librdkafka-2.1.0/packaging/debian/control
+++ b/lib/librdkafka-2.3.0/packaging/debian/control
@@ -4,9 +4,9 @@ Maintainer: Faidon Liambotis <paravoid@debian.org>
 Build-Depends: debhelper (>= 9), zlib1g-dev, libssl-dev, libsasl2-dev, python3
 Standards-Version: 3.9.6
 Section: libs
-Homepage: https://github.com/edenhill/librdkafka
-Vcs-Git: git://github.com/edenhill/librdkafka.git -b debian
-Vcs-Browser: https://github.com/edenhill/librdkafka/tree/debian
+Homepage: https://github.com/confluentinc/librdkafka
+Vcs-Git: git://github.com/confluentinc/librdkafka.git -b debian
+Vcs-Browser: https://github.com/confluentinc/librdkafka/tree/debian
 
 Package: librdkafka1
 Architecture: any
diff --git a/lib/librdkafka-2.1.0/packaging/debian/copyright b/lib/librdkafka-2.3.0/packaging/debian/copyright
similarity index 97%
rename from lib/librdkafka-2.1.0/packaging/debian/copyright
rename to lib/librdkafka-2.3.0/packaging/debian/copyright
index 20885d9f3be..2ee03af7a0a 100644
--- a/lib/librdkafka-2.1.0/packaging/debian/copyright
+++ b/lib/librdkafka-2.3.0/packaging/debian/copyright
@@ -1,6 +1,6 @@
 Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
 Upstream-Name: librdkafka
-Source: https://github.com/edenhill/librdkafka
+Source: https://github.com/confluentinc/librdkafka
 
 License: BSD-2-clause
   Redistribution and use in source and binary forms, with or without
@@ -25,7 +25,7 @@ License: BSD-2-clause
   POSSIBILITY OF SUCH DAMAGE.
 
 Files: *
-Copyright: 2012-2015, Magnus Edenhill
+Copyright: 2012-2022, Magnus Edenhill; 2023 Confluent Inc.
 License: BSD-2-clause
 
 Files: src/rdcrc32.c src/rdcrc32.h
@@ -40,7 +40,7 @@ License: MIT
   .
   The above copyright notice and this permission notice shall be included in
   all copies or substantial portions of the Software.
-  . 
+  .
   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
diff --git a/lib/librdkafka-2.1.0/packaging/debian/docs b/lib/librdkafka-2.3.0/packaging/debian/docs
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/debian/docs
rename to lib/librdkafka-2.3.0/packaging/debian/docs
diff --git a/lib/librdkafka-2.1.0/packaging/debian/gbp.conf b/lib/librdkafka-2.3.0/packaging/debian/gbp.conf
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/debian/gbp.conf
rename to lib/librdkafka-2.3.0/packaging/debian/gbp.conf
diff --git a/lib/librdkafka-2.1.0/packaging/debian/librdkafka-dev.dirs b/lib/librdkafka-2.3.0/packaging/debian/librdkafka-dev.dirs
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/debian/librdkafka-dev.dirs
rename to lib/librdkafka-2.3.0/packaging/debian/librdkafka-dev.dirs
diff --git a/lib/librdkafka-2.1.0/packaging/debian/librdkafka-dev.examples b/lib/librdkafka-2.3.0/packaging/debian/librdkafka-dev.examples
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/debian/librdkafka-dev.examples
rename to lib/librdkafka-2.3.0/packaging/debian/librdkafka-dev.examples
diff --git a/lib/librdkafka-2.1.0/packaging/debian/librdkafka-dev.install b/lib/librdkafka-2.3.0/packaging/debian/librdkafka-dev.install
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/debian/librdkafka-dev.install
rename to lib/librdkafka-2.3.0/packaging/debian/librdkafka-dev.install
diff --git a/lib/librdkafka-2.1.0/packaging/debian/librdkafka-dev.substvars b/lib/librdkafka-2.3.0/packaging/debian/librdkafka-dev.substvars
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/debian/librdkafka-dev.substvars
rename to lib/librdkafka-2.3.0/packaging/debian/librdkafka-dev.substvars
diff --git a/lib/librdkafka-2.1.0/packaging/debian/librdkafka.dsc b/lib/librdkafka-2.3.0/packaging/debian/librdkafka.dsc
similarity index 64%
rename from lib/librdkafka-2.1.0/packaging/debian/librdkafka.dsc
rename to lib/librdkafka-2.3.0/packaging/debian/librdkafka.dsc
index 447b9e656f6..1514513450b 100644
--- a/lib/librdkafka-2.1.0/packaging/debian/librdkafka.dsc
+++ b/lib/librdkafka-2.3.0/packaging/debian/librdkafka.dsc
@@ -3,11 +3,11 @@ Source: librdkafka
 Binary: librdkafka1, librdkafka-dev, librdkafka1-dbg
 Architecture: any
 Version: 0.9.1-1pre1
-Maintainer: Magnus Edenhill <librdkafka@edenhill.se>
-Homepage: https://github.com/edenhill/librdkafka
+Maintainer: Confluent Inc. <cloud-support@confluent.io>
+Homepage: https://github.com/confluentinc/librdkafka
 Standards-Version: 3.9.6
-Vcs-Browser: https://github.com/edenhill/librdkafka/tree/master
-Vcs-Git: git://github.com/edenhill/librdkafka.git -b master
+Vcs-Browser: https://github.com/confluentinc/librdkafka/tree/master
+Vcs-Git: git://github.com/confluentinc/librdkafka.git -b master
 Build-Depends: debhelper (>= 9), zlib1g-dev, libssl-dev, libsasl2-dev, python3
 Package-List:
  librdkafka-dev deb libdevel optional arch=any
diff --git a/lib/librdkafka-2.1.0/packaging/debian/librdkafka1-dbg.substvars b/lib/librdkafka-2.3.0/packaging/debian/librdkafka1-dbg.substvars
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/debian/librdkafka1-dbg.substvars
rename to lib/librdkafka-2.3.0/packaging/debian/librdkafka1-dbg.substvars
diff --git a/lib/librdkafka-2.1.0/packaging/debian/librdkafka1.dirs b/lib/librdkafka-2.3.0/packaging/debian/librdkafka1.dirs
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/debian/librdkafka1.dirs
rename to lib/librdkafka-2.3.0/packaging/debian/librdkafka1.dirs
diff --git a/lib/librdkafka-2.1.0/packaging/debian/librdkafka1.install b/lib/librdkafka-2.3.0/packaging/debian/librdkafka1.install
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/debian/librdkafka1.install
rename to lib/librdkafka-2.3.0/packaging/debian/librdkafka1.install
diff --git a/lib/librdkafka-2.1.0/packaging/debian/librdkafka1.postinst.debhelper b/lib/librdkafka-2.3.0/packaging/debian/librdkafka1.postinst.debhelper
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/debian/librdkafka1.postinst.debhelper
rename to lib/librdkafka-2.3.0/packaging/debian/librdkafka1.postinst.debhelper
diff --git a/lib/librdkafka-2.1.0/packaging/debian/librdkafka1.postrm.debhelper b/lib/librdkafka-2.3.0/packaging/debian/librdkafka1.postrm.debhelper
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/debian/librdkafka1.postrm.debhelper
rename to lib/librdkafka-2.3.0/packaging/debian/librdkafka1.postrm.debhelper
diff --git a/lib/librdkafka-2.1.0/packaging/debian/librdkafka1.symbols b/lib/librdkafka-2.3.0/packaging/debian/librdkafka1.symbols
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/debian/librdkafka1.symbols
rename to lib/librdkafka-2.3.0/packaging/debian/librdkafka1.symbols
diff --git a/lib/librdkafka-2.1.0/packaging/debian/rules b/lib/librdkafka-2.3.0/packaging/debian/rules
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/debian/rules
rename to lib/librdkafka-2.3.0/packaging/debian/rules
diff --git a/lib/librdkafka-2.1.0/packaging/debian/source/format b/lib/librdkafka-2.3.0/packaging/debian/source/format
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/debian/source/format
rename to lib/librdkafka-2.3.0/packaging/debian/source/format
diff --git a/lib/librdkafka-2.3.0/packaging/debian/watch b/lib/librdkafka-2.3.0/packaging/debian/watch
new file mode 100644
index 00000000000..f08e19f895d
--- /dev/null
+++ b/lib/librdkafka-2.3.0/packaging/debian/watch
@@ -0,0 +1,2 @@
+version=3
+http://github.com/confluentinc/librdkafka/tags .*/(\d[\d\.]*)\.tar\.gz
diff --git a/lib/librdkafka-2.1.0/packaging/get_version.py b/lib/librdkafka-2.3.0/packaging/get_version.py
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/get_version.py
rename to lib/librdkafka-2.3.0/packaging/get_version.py
diff --git a/lib/librdkafka-2.1.0/packaging/homebrew/README.md b/lib/librdkafka-2.3.0/packaging/homebrew/README.md
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/homebrew/README.md
rename to lib/librdkafka-2.3.0/packaging/homebrew/README.md
diff --git a/lib/librdkafka-2.1.0/packaging/homebrew/brew-update-pr.sh b/lib/librdkafka-2.3.0/packaging/homebrew/brew-update-pr.sh
similarity index 85%
rename from lib/librdkafka-2.1.0/packaging/homebrew/brew-update-pr.sh
rename to lib/librdkafka-2.3.0/packaging/homebrew/brew-update-pr.sh
index f756159cdab..9c6cd838cfb 100755
--- a/lib/librdkafka-2.1.0/packaging/homebrew/brew-update-pr.sh
+++ b/lib/librdkafka-2.3.0/packaging/homebrew/brew-update-pr.sh
@@ -27,5 +27,5 @@ fi
 set -eu
 
 brew bump-formula-pr $DRY_RUN --strict \
-     --url=https://github.com/edenhill/librdkafka/archive/${TAG}.tar.gz \
+     --url=https://github.com/confluentinc/librdkafka/archive/${TAG}.tar.gz \
      librdkafka
diff --git a/lib/librdkafka-2.1.0/packaging/mingw-w64/configure-build-msys2-mingw-static.sh b/lib/librdkafka-2.3.0/packaging/mingw-w64/configure-build-msys2-mingw-static.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/mingw-w64/configure-build-msys2-mingw-static.sh
rename to lib/librdkafka-2.3.0/packaging/mingw-w64/configure-build-msys2-mingw-static.sh
diff --git a/lib/librdkafka-2.1.0/packaging/mingw-w64/configure-build-msys2-mingw.sh b/lib/librdkafka-2.3.0/packaging/mingw-w64/configure-build-msys2-mingw.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/mingw-w64/configure-build-msys2-mingw.sh
rename to lib/librdkafka-2.3.0/packaging/mingw-w64/configure-build-msys2-mingw.sh
diff --git a/lib/librdkafka-2.1.0/packaging/mingw-w64/run-tests.sh b/lib/librdkafka-2.3.0/packaging/mingw-w64/run-tests.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/mingw-w64/run-tests.sh
rename to lib/librdkafka-2.3.0/packaging/mingw-w64/run-tests.sh
diff --git a/lib/librdkafka-2.1.0/packaging/mingw-w64/semaphoreci-build.sh b/lib/librdkafka-2.3.0/packaging/mingw-w64/semaphoreci-build.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/mingw-w64/semaphoreci-build.sh
rename to lib/librdkafka-2.3.0/packaging/mingw-w64/semaphoreci-build.sh
diff --git a/lib/librdkafka-2.1.0/packaging/mingw-w64/travis-before-install.sh b/lib/librdkafka-2.3.0/packaging/mingw-w64/travis-before-install.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/mingw-w64/travis-before-install.sh
rename to lib/librdkafka-2.3.0/packaging/mingw-w64/travis-before-install.sh
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/.gitignore b/lib/librdkafka-2.3.0/packaging/nuget/.gitignore
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/.gitignore
rename to lib/librdkafka-2.3.0/packaging/nuget/.gitignore
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/README.md b/lib/librdkafka-2.3.0/packaging/nuget/README.md
similarity index 96%
rename from lib/librdkafka-2.1.0/packaging/nuget/README.md
rename to lib/librdkafka-2.3.0/packaging/nuget/README.md
index 87b17693026..d4394afb881 100644
--- a/lib/librdkafka-2.1.0/packaging/nuget/README.md
+++ b/lib/librdkafka-2.3.0/packaging/nuget/README.md
@@ -27,6 +27,12 @@ The finalized nuget package maybe uploaded manually to NuGet.org
 
 2. Wait for CI builds to finish, monitor the builds here:
 
+ New builds
+
+ * https://confluentinc.semaphoreci.com/projects/librdkafka
+
+ Previous builds
+
  * https://travis-ci.org/edenhill/librdkafka
  * https://ci.appveyor.com/project/edenhill/librdkafka
 
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/artifact.py b/lib/librdkafka-2.3.0/packaging/nuget/artifact.py
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/artifact.py
rename to lib/librdkafka-2.3.0/packaging/nuget/artifact.py
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/cleanup-s3.py b/lib/librdkafka-2.3.0/packaging/nuget/cleanup-s3.py
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/cleanup-s3.py
rename to lib/librdkafka-2.3.0/packaging/nuget/cleanup-s3.py
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/common/p-common__plat-windows__arch-win32__bldtype-Release/msvcr120.zip b/lib/librdkafka-2.3.0/packaging/nuget/common/p-common__plat-windows__arch-win32__bldtype-Release/msvcr120.zip
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/common/p-common__plat-windows__arch-win32__bldtype-Release/msvcr120.zip
rename to lib/librdkafka-2.3.0/packaging/nuget/common/p-common__plat-windows__arch-win32__bldtype-Release/msvcr120.zip
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/common/p-common__plat-windows__arch-win32__bldtype-Release/msvcr140.zip b/lib/librdkafka-2.3.0/packaging/nuget/common/p-common__plat-windows__arch-win32__bldtype-Release/msvcr140.zip
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/common/p-common__plat-windows__arch-win32__bldtype-Release/msvcr140.zip
rename to lib/librdkafka-2.3.0/packaging/nuget/common/p-common__plat-windows__arch-win32__bldtype-Release/msvcr140.zip
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/common/p-common__plat-windows__arch-x64__bldtype-Release/msvcr120.zip b/lib/librdkafka-2.3.0/packaging/nuget/common/p-common__plat-windows__arch-x64__bldtype-Release/msvcr120.zip
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/common/p-common__plat-windows__arch-x64__bldtype-Release/msvcr120.zip
rename to lib/librdkafka-2.3.0/packaging/nuget/common/p-common__plat-windows__arch-x64__bldtype-Release/msvcr120.zip
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/common/p-common__plat-windows__arch-x64__bldtype-Release/msvcr140.zip b/lib/librdkafka-2.3.0/packaging/nuget/common/p-common__plat-windows__arch-x64__bldtype-Release/msvcr140.zip
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/common/p-common__plat-windows__arch-x64__bldtype-Release/msvcr140.zip
rename to lib/librdkafka-2.3.0/packaging/nuget/common/p-common__plat-windows__arch-x64__bldtype-Release/msvcr140.zip
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/nuget.sh b/lib/librdkafka-2.3.0/packaging/nuget/nuget.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/nuget.sh
rename to lib/librdkafka-2.3.0/packaging/nuget/nuget.sh
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/nugetpackage.py b/lib/librdkafka-2.3.0/packaging/nuget/nugetpackage.py
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/nugetpackage.py
rename to lib/librdkafka-2.3.0/packaging/nuget/nugetpackage.py
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/packaging.py b/lib/librdkafka-2.3.0/packaging/nuget/packaging.py
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/packaging.py
rename to lib/librdkafka-2.3.0/packaging/nuget/packaging.py
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/push-to-nuget.sh b/lib/librdkafka-2.3.0/packaging/nuget/push-to-nuget.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/push-to-nuget.sh
rename to lib/librdkafka-2.3.0/packaging/nuget/push-to-nuget.sh
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/release.py b/lib/librdkafka-2.3.0/packaging/nuget/release.py
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/release.py
rename to lib/librdkafka-2.3.0/packaging/nuget/release.py
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/requirements.txt b/lib/librdkafka-2.3.0/packaging/nuget/requirements.txt
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/requirements.txt
rename to lib/librdkafka-2.3.0/packaging/nuget/requirements.txt
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/staticpackage.py b/lib/librdkafka-2.3.0/packaging/nuget/staticpackage.py
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/staticpackage.py
rename to lib/librdkafka-2.3.0/packaging/nuget/staticpackage.py
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/templates/librdkafka.redist.nuspec b/lib/librdkafka-2.3.0/packaging/nuget/templates/librdkafka.redist.nuspec
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/templates/librdkafka.redist.nuspec
rename to lib/librdkafka-2.3.0/packaging/nuget/templates/librdkafka.redist.nuspec
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/templates/librdkafka.redist.props b/lib/librdkafka-2.3.0/packaging/nuget/templates/librdkafka.redist.props
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/templates/librdkafka.redist.props
rename to lib/librdkafka-2.3.0/packaging/nuget/templates/librdkafka.redist.props
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/templates/librdkafka.redist.targets b/lib/librdkafka-2.3.0/packaging/nuget/templates/librdkafka.redist.targets
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/templates/librdkafka.redist.targets
rename to lib/librdkafka-2.3.0/packaging/nuget/templates/librdkafka.redist.targets
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/zfile/__init__.py b/lib/librdkafka-2.3.0/packaging/nuget/zfile/__init__.py
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/zfile/__init__.py
rename to lib/librdkafka-2.3.0/packaging/nuget/zfile/__init__.py
diff --git a/lib/librdkafka-2.1.0/packaging/nuget/zfile/zfile.py b/lib/librdkafka-2.3.0/packaging/nuget/zfile/zfile.py
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/nuget/zfile/zfile.py
rename to lib/librdkafka-2.3.0/packaging/nuget/zfile/zfile.py
diff --git a/lib/librdkafka-2.1.0/packaging/rpm/.gitignore b/lib/librdkafka-2.3.0/packaging/rpm/.gitignore
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/rpm/.gitignore
rename to lib/librdkafka-2.3.0/packaging/rpm/.gitignore
diff --git a/lib/librdkafka-2.1.0/packaging/rpm/Makefile b/lib/librdkafka-2.3.0/packaging/rpm/Makefile
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/rpm/Makefile
rename to lib/librdkafka-2.3.0/packaging/rpm/Makefile
diff --git a/lib/librdkafka-2.1.0/packaging/rpm/README.md b/lib/librdkafka-2.3.0/packaging/rpm/README.md
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/rpm/README.md
rename to lib/librdkafka-2.3.0/packaging/rpm/README.md
diff --git a/lib/librdkafka-2.1.0/packaging/rpm/el7-x86_64.cfg b/lib/librdkafka-2.3.0/packaging/rpm/el7-x86_64.cfg
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/rpm/el7-x86_64.cfg
rename to lib/librdkafka-2.3.0/packaging/rpm/el7-x86_64.cfg
diff --git a/lib/librdkafka-2.1.0/packaging/rpm/librdkafka.spec b/lib/librdkafka-2.3.0/packaging/rpm/librdkafka.spec
similarity index 98%
rename from lib/librdkafka-2.1.0/packaging/rpm/librdkafka.spec
rename to lib/librdkafka-2.3.0/packaging/rpm/librdkafka.spec
index 4f9e8c0d0ec..ac2ddd01142 100644
--- a/lib/librdkafka-2.1.0/packaging/rpm/librdkafka.spec
+++ b/lib/librdkafka-2.3.0/packaging/rpm/librdkafka.spec
@@ -6,7 +6,7 @@ Release: %{__release}%{?dist}
 Summary: The Apache Kafka C library
 Group:   Development/Libraries/C and C++
 License: BSD-2-Clause
-URL:     https://github.com/edenhill/librdkafka
+URL:     https://github.com/confluentinc/librdkafka
 Source:	 librdkafka-%{version}.tar.gz
 
 BuildRequires: zlib-devel libstdc++-devel gcc >= 4.1 gcc-c++ cyrus-sasl-devel
diff --git a/lib/librdkafka-2.1.0/packaging/rpm/mock-on-docker.sh b/lib/librdkafka-2.3.0/packaging/rpm/mock-on-docker.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/rpm/mock-on-docker.sh
rename to lib/librdkafka-2.3.0/packaging/rpm/mock-on-docker.sh
diff --git a/lib/librdkafka-2.1.0/packaging/rpm/tests/.gitignore b/lib/librdkafka-2.3.0/packaging/rpm/tests/.gitignore
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/rpm/tests/.gitignore
rename to lib/librdkafka-2.3.0/packaging/rpm/tests/.gitignore
diff --git a/lib/librdkafka-2.1.0/packaging/rpm/tests/Makefile b/lib/librdkafka-2.3.0/packaging/rpm/tests/Makefile
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/rpm/tests/Makefile
rename to lib/librdkafka-2.3.0/packaging/rpm/tests/Makefile
diff --git a/lib/librdkafka-2.1.0/packaging/rpm/tests/README.md b/lib/librdkafka-2.3.0/packaging/rpm/tests/README.md
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/rpm/tests/README.md
rename to lib/librdkafka-2.3.0/packaging/rpm/tests/README.md
diff --git a/lib/librdkafka-2.1.0/packaging/rpm/tests/run-test.sh b/lib/librdkafka-2.3.0/packaging/rpm/tests/run-test.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/rpm/tests/run-test.sh
rename to lib/librdkafka-2.3.0/packaging/rpm/tests/run-test.sh
diff --git a/lib/librdkafka-2.1.0/packaging/rpm/tests/test-on-docker.sh b/lib/librdkafka-2.3.0/packaging/rpm/tests/test-on-docker.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/rpm/tests/test-on-docker.sh
rename to lib/librdkafka-2.3.0/packaging/rpm/tests/test-on-docker.sh
diff --git a/lib/librdkafka-2.1.0/packaging/rpm/tests/test.c b/lib/librdkafka-2.3.0/packaging/rpm/tests/test.c
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/rpm/tests/test.c
rename to lib/librdkafka-2.3.0/packaging/rpm/tests/test.c
diff --git a/lib/librdkafka-2.1.0/packaging/rpm/tests/test.cpp b/lib/librdkafka-2.3.0/packaging/rpm/tests/test.cpp
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/rpm/tests/test.cpp
rename to lib/librdkafka-2.3.0/packaging/rpm/tests/test.cpp
diff --git a/lib/librdkafka-2.1.0/packaging/tools/build-deb-package.sh b/lib/librdkafka-2.3.0/packaging/tools/build-deb-package.sh
similarity index 92%
rename from lib/librdkafka-2.1.0/packaging/tools/build-deb-package.sh
rename to lib/librdkafka-2.3.0/packaging/tools/build-deb-package.sh
index d9cad6d25ac..86b806ee929 100755
--- a/lib/librdkafka-2.1.0/packaging/tools/build-deb-package.sh
+++ b/lib/librdkafka-2.3.0/packaging/tools/build-deb-package.sh
@@ -35,8 +35,8 @@ git clone /v librdkafka
 
 pushd librdkafka
 
-export DEBEMAIL="librdkafka packaging <rdkafka@edenhill.se>"
-git config user.email "rdkafka@edenhill.se"
+export DEBEMAIL="librdkafka packaging <cloud-support@confluent.io>"
+git config user.email "cloud-support@confluent.io"
 git config user.name "librdkafka packaging"
 
 DEB_BRANCH=origin/confluent-debian
diff --git a/lib/librdkafka-2.1.0/packaging/tools/build-debian.sh b/lib/librdkafka-2.3.0/packaging/tools/build-debian.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/tools/build-debian.sh
rename to lib/librdkafka-2.3.0/packaging/tools/build-debian.sh
diff --git a/lib/librdkafka-2.1.0/packaging/tools/build-manylinux.sh b/lib/librdkafka-2.3.0/packaging/tools/build-manylinux.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/tools/build-manylinux.sh
rename to lib/librdkafka-2.3.0/packaging/tools/build-manylinux.sh
diff --git a/lib/librdkafka-2.1.0/packaging/tools/build-release-artifacts.sh b/lib/librdkafka-2.3.0/packaging/tools/build-release-artifacts.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/tools/build-release-artifacts.sh
rename to lib/librdkafka-2.3.0/packaging/tools/build-release-artifacts.sh
diff --git a/lib/librdkafka-2.1.0/packaging/tools/distro-build.sh b/lib/librdkafka-2.3.0/packaging/tools/distro-build.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/tools/distro-build.sh
rename to lib/librdkafka-2.3.0/packaging/tools/distro-build.sh
diff --git a/lib/librdkafka-2.1.0/packaging/tools/gh-release-checksums.py b/lib/librdkafka-2.3.0/packaging/tools/gh-release-checksums.py
similarity index 88%
rename from lib/librdkafka-2.1.0/packaging/tools/gh-release-checksums.py
rename to lib/librdkafka-2.3.0/packaging/tools/gh-release-checksums.py
index e7259dc202f..5b51f383253 100755
--- a/lib/librdkafka-2.1.0/packaging/tools/gh-release-checksums.py
+++ b/lib/librdkafka-2.3.0/packaging/tools/gh-release-checksums.py
@@ -24,8 +24,8 @@
     print("Release asset checksums:")
 
     for ftype in ["zip", "tar.gz"]:
-        url = "https://github.com/edenhill/librdkafka/archive/{}.{}".format(
-            tag, ftype)
+        url = "https://github.com/confluentinc/" + \
+            "librdkafka/archive/{}.{}".format(tag, ftype)
 
         h = hashlib.sha256()
 
diff --git a/lib/librdkafka-2.1.0/packaging/tools/rdutcoverage.sh b/lib/librdkafka-2.3.0/packaging/tools/rdutcoverage.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/tools/rdutcoverage.sh
rename to lib/librdkafka-2.3.0/packaging/tools/rdutcoverage.sh
diff --git a/lib/librdkafka-2.1.0/packaging/tools/requirements.txt b/lib/librdkafka-2.3.0/packaging/tools/requirements.txt
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/tools/requirements.txt
rename to lib/librdkafka-2.3.0/packaging/tools/requirements.txt
diff --git a/lib/librdkafka-2.1.0/packaging/tools/style-format.sh b/lib/librdkafka-2.3.0/packaging/tools/style-format.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/packaging/tools/style-format.sh
rename to lib/librdkafka-2.3.0/packaging/tools/style-format.sh
diff --git a/lib/librdkafka-2.1.0/service.yml b/lib/librdkafka-2.3.0/service.yml
similarity index 100%
rename from lib/librdkafka-2.1.0/service.yml
rename to lib/librdkafka-2.3.0/service.yml
diff --git a/lib/librdkafka-2.1.0/src-cpp/CMakeLists.txt b/lib/librdkafka-2.3.0/src-cpp/CMakeLists.txt
similarity index 96%
rename from lib/librdkafka-2.1.0/src-cpp/CMakeLists.txt
rename to lib/librdkafka-2.3.0/src-cpp/CMakeLists.txt
index b0a6d51e475..2b496d9f9e6 100644
--- a/lib/librdkafka-2.1.0/src-cpp/CMakeLists.txt
+++ b/lib/librdkafka-2.3.0/src-cpp/CMakeLists.txt
@@ -41,7 +41,7 @@ set(PKG_CONFIG_VERSION "${PROJECT_VERSION}")
 if(NOT RDKAFKA_BUILD_STATIC)
   set(PKG_CONFIG_NAME "librdkafka++")
   set(PKG_CONFIG_DESCRIPTION "The Apache Kafka C/C++ library")
-  set(PKG_CONFIG_REQUIRES "rdkafka")
+  set(PKG_CONFIG_REQUIRES_PRIVATE "rdkafka")
   set(PKG_CONFIG_CFLAGS "-I\${includedir}")
   set(PKG_CONFIG_LIBS "-L\${libdir} -lrdkafka++")
   set(PKG_CONFIG_LIBS_PRIVATE "-lrdkafka")
@@ -57,7 +57,7 @@ if(NOT RDKAFKA_BUILD_STATIC)
 else()
   set(PKG_CONFIG_NAME "librdkafka++-static")
   set(PKG_CONFIG_DESCRIPTION "The Apache Kafka C/C++ library (static)")
-  set(PKG_CONFIG_REQUIRES "")
+  set(PKG_CONFIG_REQUIRES_PRIVATE "")
   set(PKG_CONFIG_CFLAGS "-I\${includedir} -DLIBRDKAFKA_STATICLIB")
   set(PKG_CONFIG_LIBS "-L\${libdir} \${libdir}/librdkafka++.a")
   if(WIN32)
diff --git a/lib/librdkafka-2.1.0/src-cpp/ConfImpl.cpp b/lib/librdkafka-2.3.0/src-cpp/ConfImpl.cpp
similarity index 98%
rename from lib/librdkafka-2.1.0/src-cpp/ConfImpl.cpp
rename to lib/librdkafka-2.3.0/src-cpp/ConfImpl.cpp
index 53d7b30c568..4f1f7090829 100644
--- a/lib/librdkafka-2.1.0/src-cpp/ConfImpl.cpp
+++ b/lib/librdkafka-2.3.0/src-cpp/ConfImpl.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C/C++ library
  *
- * Copyright (c) 2014 Magnus Edenhill
+ * Copyright (c) 2014-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src-cpp/ConsumerImpl.cpp b/lib/librdkafka-2.3.0/src-cpp/ConsumerImpl.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/src-cpp/ConsumerImpl.cpp
rename to lib/librdkafka-2.3.0/src-cpp/ConsumerImpl.cpp
index b7f5e3b220a..a467acfb0da 100644
--- a/lib/librdkafka-2.1.0/src-cpp/ConsumerImpl.cpp
+++ b/lib/librdkafka-2.3.0/src-cpp/ConsumerImpl.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C/C++ library
  *
- * Copyright (c) 2014 Magnus Edenhill
+ * Copyright (c) 2014-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src-cpp/HandleImpl.cpp b/lib/librdkafka-2.3.0/src-cpp/HandleImpl.cpp
similarity index 96%
rename from lib/librdkafka-2.1.0/src-cpp/HandleImpl.cpp
rename to lib/librdkafka-2.3.0/src-cpp/HandleImpl.cpp
index 7aa2f2939b2..8d16c0d1981 100644
--- a/lib/librdkafka-2.1.0/src-cpp/HandleImpl.cpp
+++ b/lib/librdkafka-2.3.0/src-cpp/HandleImpl.cpp
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C/C++ library
  *
- * Copyright (c) 2014 Magnus Edenhill
+ * Copyright (c) 2014-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -391,6 +392,12 @@ rd_kafka_topic_partition_list_t *partitions_to_c_parts(
     rd_kafka_topic_partition_t *rktpar = rd_kafka_topic_partition_list_add(
         c_parts, tpi->topic_.c_str(), tpi->partition_);
     rktpar->offset = tpi->offset_;
+    if (tpi->metadata_.size()) {
+      void *metadata_p = mem_malloc(tpi->metadata_.size());
+      memcpy(metadata_p, tpi->metadata_.data(), tpi->metadata_.size());
+      rktpar->metadata      = metadata_p;
+      rktpar->metadata_size = tpi->metadata_.size();
+    }
     if (tpi->leader_epoch_ != -1)
       rd_kafka_topic_partition_set_leader_epoch(rktpar, tpi->leader_epoch_);
   }
@@ -417,6 +424,10 @@ void update_partitions_from_c_parts(
         pp->offset_       = p->offset;
         pp->err_          = static_cast<RdKafka::ErrorCode>(p->err);
         pp->leader_epoch_ = rd_kafka_topic_partition_get_leader_epoch(p);
+        if (p->metadata_size) {
+          unsigned char *metadata = (unsigned char *)p->metadata;
+          pp->metadata_.assign(metadata, metadata + p->metadata_size);
+        }
       }
     }
   }
diff --git a/lib/librdkafka-2.1.0/src-cpp/HeadersImpl.cpp b/lib/librdkafka-2.3.0/src-cpp/HeadersImpl.cpp
similarity index 97%
rename from lib/librdkafka-2.1.0/src-cpp/HeadersImpl.cpp
rename to lib/librdkafka-2.3.0/src-cpp/HeadersImpl.cpp
index b567ef36c00..2b29488dc53 100644
--- a/lib/librdkafka-2.1.0/src-cpp/HeadersImpl.cpp
+++ b/lib/librdkafka-2.3.0/src-cpp/HeadersImpl.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C/C++ library
  *
- * Copyright (c) 2014 Magnus Edenhill
+ * Copyright (c) 2014-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src-cpp/KafkaConsumerImpl.cpp b/lib/librdkafka-2.3.0/src-cpp/KafkaConsumerImpl.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/src-cpp/KafkaConsumerImpl.cpp
rename to lib/librdkafka-2.3.0/src-cpp/KafkaConsumerImpl.cpp
index 6f3b81c727c..984710b214a 100644
--- a/lib/librdkafka-2.1.0/src-cpp/KafkaConsumerImpl.cpp
+++ b/lib/librdkafka-2.3.0/src-cpp/KafkaConsumerImpl.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C/C++ library
  *
- * Copyright (c) 2015 Magnus Edenhill
+ * Copyright (c) 2015-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src-cpp/Makefile b/lib/librdkafka-2.3.0/src-cpp/Makefile
similarity index 100%
rename from lib/librdkafka-2.1.0/src-cpp/Makefile
rename to lib/librdkafka-2.3.0/src-cpp/Makefile
diff --git a/lib/librdkafka-2.1.0/src-cpp/MessageImpl.cpp b/lib/librdkafka-2.3.0/src-cpp/MessageImpl.cpp
similarity index 97%
rename from lib/librdkafka-2.1.0/src-cpp/MessageImpl.cpp
rename to lib/librdkafka-2.3.0/src-cpp/MessageImpl.cpp
index c6d83150fd3..8261b1f6e1f 100644
--- a/lib/librdkafka-2.1.0/src-cpp/MessageImpl.cpp
+++ b/lib/librdkafka-2.3.0/src-cpp/MessageImpl.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C/C++ library
  *
- * Copyright (c) 2014 Magnus Edenhill
+ * Copyright (c) 2014-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src-cpp/MetadataImpl.cpp b/lib/librdkafka-2.3.0/src-cpp/MetadataImpl.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/src-cpp/MetadataImpl.cpp
rename to lib/librdkafka-2.3.0/src-cpp/MetadataImpl.cpp
index 62cbf9042ea..df58d4dbd70 100644
--- a/lib/librdkafka-2.1.0/src-cpp/MetadataImpl.cpp
+++ b/lib/librdkafka-2.3.0/src-cpp/MetadataImpl.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C/C++ library
  *
- * Copyright (c) 2014 Magnus Edenhill
+ * Copyright (c) 2014-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src-cpp/ProducerImpl.cpp b/lib/librdkafka-2.3.0/src-cpp/ProducerImpl.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/src-cpp/ProducerImpl.cpp
rename to lib/librdkafka-2.3.0/src-cpp/ProducerImpl.cpp
index 8300dfb3b60..88752156c11 100644
--- a/lib/librdkafka-2.1.0/src-cpp/ProducerImpl.cpp
+++ b/lib/librdkafka-2.3.0/src-cpp/ProducerImpl.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C/C++ library
  *
- * Copyright (c) 2014 Magnus Edenhill
+ * Copyright (c) 2014-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src-cpp/QueueImpl.cpp b/lib/librdkafka-2.3.0/src-cpp/QueueImpl.cpp
similarity index 98%
rename from lib/librdkafka-2.1.0/src-cpp/QueueImpl.cpp
rename to lib/librdkafka-2.3.0/src-cpp/QueueImpl.cpp
index 19ebce9d681..7148d72011d 100644
--- a/lib/librdkafka-2.1.0/src-cpp/QueueImpl.cpp
+++ b/lib/librdkafka-2.3.0/src-cpp/QueueImpl.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C/C++ library
  *
- * Copyright (c) 2014 Magnus Edenhill
+ * Copyright (c) 2014-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src-cpp/README.md b/lib/librdkafka-2.3.0/src-cpp/README.md
similarity index 100%
rename from lib/librdkafka-2.1.0/src-cpp/README.md
rename to lib/librdkafka-2.3.0/src-cpp/README.md
diff --git a/lib/librdkafka-2.1.0/src-cpp/RdKafka.cpp b/lib/librdkafka-2.3.0/src-cpp/RdKafka.cpp
similarity index 97%
rename from lib/librdkafka-2.1.0/src-cpp/RdKafka.cpp
rename to lib/librdkafka-2.3.0/src-cpp/RdKafka.cpp
index b6cb33c288b..c7c41ec9846 100644
--- a/lib/librdkafka-2.1.0/src-cpp/RdKafka.cpp
+++ b/lib/librdkafka-2.3.0/src-cpp/RdKafka.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C/C++ library
  *
- * Copyright (c) 2014 Magnus Edenhill
+ * Copyright (c) 2014-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src-cpp/TopicImpl.cpp b/lib/librdkafka-2.3.0/src-cpp/TopicImpl.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/src-cpp/TopicImpl.cpp
rename to lib/librdkafka-2.3.0/src-cpp/TopicImpl.cpp
index bf9734df944..6868b5932d6 100644
--- a/lib/librdkafka-2.1.0/src-cpp/TopicImpl.cpp
+++ b/lib/librdkafka-2.3.0/src-cpp/TopicImpl.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C/C++ library
  *
- * Copyright (c) 2014 Magnus Edenhill
+ * Copyright (c) 2014-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src-cpp/TopicPartitionImpl.cpp b/lib/librdkafka-2.3.0/src-cpp/TopicPartitionImpl.cpp
similarity index 97%
rename from lib/librdkafka-2.1.0/src-cpp/TopicPartitionImpl.cpp
rename to lib/librdkafka-2.3.0/src-cpp/TopicPartitionImpl.cpp
index 90ef820bf66..d453d964257 100644
--- a/lib/librdkafka-2.1.0/src-cpp/TopicPartitionImpl.cpp
+++ b/lib/librdkafka-2.3.0/src-cpp/TopicPartitionImpl.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C/C++ library
  *
- * Copyright (c) 2015 Magnus Edenhill
+ * Copyright (c) 2015-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src-cpp/rdkafkacpp.h b/lib/librdkafka-2.3.0/src-cpp/rdkafkacpp.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src-cpp/rdkafkacpp.h
rename to lib/librdkafka-2.3.0/src-cpp/rdkafkacpp.h
index 1df1043c076..f353d064b47 100644
--- a/lib/librdkafka-2.1.0/src-cpp/rdkafkacpp.h
+++ b/lib/librdkafka-2.3.0/src-cpp/rdkafkacpp.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C/C++ library
  *
- * Copyright (c) 2014-2022 Magnus Edenhill
+ * Copyright (c) 2014-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -111,7 +112,7 @@ namespace RdKafka {
  * @remark This value should only be used during compile time,
  *         for runtime checks of version use RdKafka::version()
  */
-#define RD_KAFKA_VERSION 0x020100ff
+#define RD_KAFKA_VERSION 0x020300ff
 
 /**
  * @brief Returns the librdkafka version as integer.
@@ -1986,6 +1987,12 @@ class RD_EXPORT TopicPartition {
 
   /** @brief Set partition leader epoch. */
   virtual void set_leader_epoch(int32_t leader_epoch) = 0;
+
+  /** @brief Get partition metadata. */
+  virtual std::vector<unsigned char> get_metadata() = 0;
+
+  /** @brief Set partition metadata. */
+  virtual void set_metadata(std::vector<unsigned char> &metadata) = 0;
 };
 
 
diff --git a/lib/librdkafka-2.1.0/src-cpp/rdkafkacpp_int.h b/lib/librdkafka-2.3.0/src-cpp/rdkafkacpp_int.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src-cpp/rdkafkacpp_int.h
rename to lib/librdkafka-2.3.0/src-cpp/rdkafkacpp_int.h
index bc024ebe903..167b83a072d 100644
--- a/lib/librdkafka-2.1.0/src-cpp/rdkafkacpp_int.h
+++ b/lib/librdkafka-2.3.0/src-cpp/rdkafkacpp_int.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C/C++ library
  *
- * Copyright (c) 2014 Magnus Edenhill
+ * Copyright (c) 2014-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -1260,7 +1261,10 @@ class TopicPartitionImpl : public TopicPartition {
     offset_       = c_part->offset;
     err_          = static_cast<ErrorCode>(c_part->err);
     leader_epoch_ = rd_kafka_topic_partition_get_leader_epoch(c_part);
-    // FIXME: metadata
+    if (c_part->metadata_size > 0) {
+      unsigned char *metadata = (unsigned char *)c_part->metadata;
+      metadata_.assign(metadata, metadata + c_part->metadata_size);
+    }
   }
 
   static void destroy(std::vector<TopicPartition *> &partitions);
@@ -1289,7 +1293,15 @@ class TopicPartitionImpl : public TopicPartition {
   }
 
   void set_leader_epoch(int32_t leader_epoch) {
-    leader_epoch_ = leader_epoch_;
+    leader_epoch_ = leader_epoch;
+  }
+
+  std::vector<unsigned char> get_metadata() {
+    return metadata_;
+  }
+
+  void set_metadata(std::vector<unsigned char> &metadata) {
+    metadata_ = metadata;
   }
 
   std::ostream &operator<<(std::ostream &ostrm) const {
@@ -1301,6 +1313,7 @@ class TopicPartitionImpl : public TopicPartition {
   int64_t offset_;
   ErrorCode err_;
   int32_t leader_epoch_;
+  std::vector<unsigned char> metadata_;
 };
 
 
diff --git a/lib/librdkafka-2.1.0/src/CMakeLists.txt b/lib/librdkafka-2.3.0/src/CMakeLists.txt
similarity index 95%
rename from lib/librdkafka-2.1.0/src/CMakeLists.txt
rename to lib/librdkafka-2.3.0/src/CMakeLists.txt
index 37b43c49964..cbcff5000a8 100644
--- a/lib/librdkafka-2.1.0/src/CMakeLists.txt
+++ b/lib/librdkafka-2.3.0/src/CMakeLists.txt
@@ -8,6 +8,7 @@ set(
     rdbuf.c
     rdcrc32.c
     rdfnv1a.c
+    rdbase64.c
     rdkafka.c
     rdkafka_assignor.c
     rdkafka_broker.c
@@ -199,7 +200,7 @@ target_include_directories(rdkafka PUBLIC "$<BUILD_INTERFACE:${dummy}>")
 
 if(WITH_CURL)
   find_package(CURL REQUIRED)
-  target_include_directories(rdkafka PUBLIC ${CURL_INCLUDE_DIRS})
+  target_include_directories(rdkafka PRIVATE ${CURL_INCLUDE_DIRS})
   target_link_libraries(rdkafka PUBLIC ${CURL_LIBRARIES})
 endif()
 
@@ -272,7 +273,7 @@ endif()
 
 # Generate pkg-config file
 set(PKG_CONFIG_VERSION "${PROJECT_VERSION}")
-set(PKG_CONFIG_REQUIRES "")
+set(PKG_CONFIG_REQUIRES_PRIVATE "")
 if (WIN32)
   set(PKG_CONFIG_LIBS_PRIVATE "-lws2_32 -lsecur32 -lcrypt32")
 else()
@@ -296,27 +297,27 @@ if(NOT RDKAFKA_BUILD_STATIC)
   set(PKG_CONFIG_DESCRIPTION "The Apache Kafka C/C++ library")
 
   if(WITH_CURL)
-    string(APPEND PKG_CONFIG_REQUIRES "curl ")
+    string(APPEND PKG_CONFIG_REQUIRES_PRIVATE "libcurl ")
   endif()
 
   if(WITH_ZLIB)
-    string(APPEND PKG_CONFIG_REQUIRES "zlib ")
+    string(APPEND PKG_CONFIG_REQUIRES_PRIVATE "zlib ")
   endif()
 
   if(WITH_SSL)
-    string(APPEND PKG_CONFIG_REQUIRES "libssl ")
+    string(APPEND PKG_CONFIG_REQUIRES_PRIVATE "libcrypto libssl ")
   endif()
 
   if(WITH_SASL_CYRUS)
-    string(APPEND PKG_CONFIG_REQUIRES "libsasl2 ")
+    string(APPEND PKG_CONFIG_REQUIRES_PRIVATE "libsasl2 ")
   endif()
 
   if(WITH_ZSTD)
-    string(APPEND PKG_CONFIG_REQUIRES "libzstd ")
+    string(APPEND PKG_CONFIG_REQUIRES_PRIVATE "libzstd ")
   endif()
 
   if(WITH_LZ4_EXT)
-    string(APPEND PKG_CONFIG_REQUIRES "liblz4 ")
+    string(APPEND PKG_CONFIG_REQUIRES_PRIVATE "liblz4 ")
   endif()
 
   set(PKG_CONFIG_CFLAGS "-I\${includedir}")
diff --git a/lib/librdkafka-2.1.0/src/Makefile b/lib/librdkafka-2.3.0/src/Makefile
similarity index 98%
rename from lib/librdkafka-2.1.0/src/Makefile
rename to lib/librdkafka-2.3.0/src/Makefile
index 26df5723b8c..1c43f0b0178 100644
--- a/lib/librdkafka-2.1.0/src/Makefile
+++ b/lib/librdkafka-2.3.0/src/Makefile
@@ -52,7 +52,7 @@ SRCS=		rdkafka.c rdkafka_broker.c rdkafka_msg.c rdkafka_topic.c \
 		rdkafka_msgset_writer.c rdkafka_msgset_reader.c \
 		rdkafka_header.c rdkafka_admin.c rdkafka_aux.c \
 		rdkafka_background.c rdkafka_idempotence.c rdkafka_cert.c \
-		rdkafka_txnmgr.c rdkafka_coord.c \
+		rdkafka_txnmgr.c rdkafka_coord.c rdbase64.c \
 		rdvarint.c rdbuf.c rdmap.c rdunittest.c \
 		rdkafka_mock.c rdkafka_mock_handlers.c rdkafka_mock_cgrp.c \
 		rdkafka_error.c rdkafka_fetcher.c \
diff --git a/lib/librdkafka-2.1.0/src/cJSON.c b/lib/librdkafka-2.3.0/src/cJSON.c
similarity index 100%
rename from lib/librdkafka-2.1.0/src/cJSON.c
rename to lib/librdkafka-2.3.0/src/cJSON.c
diff --git a/lib/librdkafka-2.1.0/src/cJSON.h b/lib/librdkafka-2.3.0/src/cJSON.h
similarity index 100%
rename from lib/librdkafka-2.1.0/src/cJSON.h
rename to lib/librdkafka-2.3.0/src/cJSON.h
diff --git a/lib/librdkafka-2.1.0/src/crc32c.c b/lib/librdkafka-2.3.0/src/crc32c.c
similarity index 100%
rename from lib/librdkafka-2.1.0/src/crc32c.c
rename to lib/librdkafka-2.3.0/src/crc32c.c
diff --git a/lib/librdkafka-2.1.0/src/crc32c.h b/lib/librdkafka-2.3.0/src/crc32c.h
similarity index 97%
rename from lib/librdkafka-2.1.0/src/crc32c.h
rename to lib/librdkafka-2.3.0/src/crc32c.h
index 21c7badc7f1..d768afc6763 100644
--- a/lib/librdkafka-2.1.0/src/crc32c.h
+++ b/lib/librdkafka-2.3.0/src/crc32c.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/generate_proto.sh b/lib/librdkafka-2.3.0/src/generate_proto.sh
similarity index 98%
rename from lib/librdkafka-2.1.0/src/generate_proto.sh
rename to lib/librdkafka-2.3.0/src/generate_proto.sh
index c7023f47ab8..4402022607c 100755
--- a/lib/librdkafka-2.1.0/src/generate_proto.sh
+++ b/lib/librdkafka-2.3.0/src/generate_proto.sh
@@ -2,7 +2,7 @@
 #
 # librdkafka - Apache Kafka C library
 #
-# Copyright (c) 2020 Magnus Edenhill
+# Copyright (c) 2020-2022, Magnus Edenhill
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/librdkafka_cgrp_synch.png b/lib/librdkafka-2.3.0/src/librdkafka_cgrp_synch.png
similarity index 100%
rename from lib/librdkafka-2.1.0/src/librdkafka_cgrp_synch.png
rename to lib/librdkafka-2.3.0/src/librdkafka_cgrp_synch.png
diff --git a/lib/librdkafka-2.1.0/src/lz4.c b/lib/librdkafka-2.3.0/src/lz4.c
similarity index 100%
rename from lib/librdkafka-2.1.0/src/lz4.c
rename to lib/librdkafka-2.3.0/src/lz4.c
diff --git a/lib/librdkafka-2.1.0/src/lz4.h b/lib/librdkafka-2.3.0/src/lz4.h
similarity index 100%
rename from lib/librdkafka-2.1.0/src/lz4.h
rename to lib/librdkafka-2.3.0/src/lz4.h
diff --git a/lib/librdkafka-2.1.0/src/lz4frame.c b/lib/librdkafka-2.3.0/src/lz4frame.c
similarity index 100%
rename from lib/librdkafka-2.1.0/src/lz4frame.c
rename to lib/librdkafka-2.3.0/src/lz4frame.c
diff --git a/lib/librdkafka-2.1.0/src/lz4frame.h b/lib/librdkafka-2.3.0/src/lz4frame.h
similarity index 100%
rename from lib/librdkafka-2.1.0/src/lz4frame.h
rename to lib/librdkafka-2.3.0/src/lz4frame.h
diff --git a/lib/librdkafka-2.1.0/src/lz4frame_static.h b/lib/librdkafka-2.3.0/src/lz4frame_static.h
similarity index 100%
rename from lib/librdkafka-2.1.0/src/lz4frame_static.h
rename to lib/librdkafka-2.3.0/src/lz4frame_static.h
diff --git a/lib/librdkafka-2.1.0/src/lz4hc.c b/lib/librdkafka-2.3.0/src/lz4hc.c
similarity index 100%
rename from lib/librdkafka-2.1.0/src/lz4hc.c
rename to lib/librdkafka-2.3.0/src/lz4hc.c
diff --git a/lib/librdkafka-2.1.0/src/lz4hc.h b/lib/librdkafka-2.3.0/src/lz4hc.h
similarity index 100%
rename from lib/librdkafka-2.1.0/src/lz4hc.h
rename to lib/librdkafka-2.3.0/src/lz4hc.h
diff --git a/lib/librdkafka-2.1.0/src/queue.h b/lib/librdkafka-2.3.0/src/queue.h
similarity index 100%
rename from lib/librdkafka-2.1.0/src/queue.h
rename to lib/librdkafka-2.3.0/src/queue.h
diff --git a/lib/librdkafka-2.1.0/src/rd.h b/lib/librdkafka-2.3.0/src/rd.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rd.h
rename to lib/librdkafka-2.3.0/src/rd.h
index 670605de441..fd6c307fd0b 100644
--- a/lib/librdkafka-2.1.0/src/rd.h
+++ b/lib/librdkafka-2.3.0/src/rd.h
@@ -1,7 +1,7 @@
 /*
  * librd - Rapid Development C library
  *
- * Copyright (c) 2012, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdaddr.c b/lib/librdkafka-2.3.0/src/rdaddr.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdaddr.c
rename to lib/librdkafka-2.3.0/src/rdaddr.c
index 092406233b2..6fb2c66ca56 100644
--- a/lib/librdkafka-2.1.0/src/rdaddr.c
+++ b/lib/librdkafka-2.3.0/src/rdaddr.c
@@ -1,7 +1,7 @@
 /*
  * librd - Rapid Development C library
  *
- * Copyright (c) 2012, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdaddr.h b/lib/librdkafka-2.3.0/src/rdaddr.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdaddr.h
rename to lib/librdkafka-2.3.0/src/rdaddr.h
index c8574d01941..7e86a549a81 100644
--- a/lib/librdkafka-2.1.0/src/rdaddr.h
+++ b/lib/librdkafka-2.3.0/src/rdaddr.h
@@ -1,7 +1,7 @@
 /*
  * librd - Rapid Development C library
  *
- * Copyright (c) 2012, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -139,7 +139,7 @@ rd_sockaddr_list_next(rd_sockaddr_list_t *rsal) {
 
 #define RD_SOCKADDR_LIST_FOREACH(sinx, rsal)                                   \
         for ((sinx) = &(rsal)->rsal_addr[0];                                   \
-             (sinx) < &(rsal)->rsal_addr[(rsal)->rsal_len]; (sinx)++)
+             (sinx) < &(rsal)->rsal_addr[(rsal)->rsal_cnt]; (sinx)++)
 
 /**
  * Wrapper for getaddrinfo(3) that performs these additional tasks:
diff --git a/lib/librdkafka-2.1.0/src/rdatomic.h b/lib/librdkafka-2.3.0/src/rdatomic.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdatomic.h
rename to lib/librdkafka-2.3.0/src/rdatomic.h
index aa7d3d7705c..4b97dd7d087 100644
--- a/lib/librdkafka-2.1.0/src/rdatomic.h
+++ b/lib/librdkafka-2.3.0/src/rdatomic.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2014-2016 Magnus Edenhill
+ * Copyright (c) 2014-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdavg.h b/lib/librdkafka-2.3.0/src/rdavg.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdavg.h
rename to lib/librdkafka-2.3.0/src/rdavg.h
index a170e8da537..55469e24661 100644
--- a/lib/librdkafka-2.1.0/src/rdavg.h
+++ b/lib/librdkafka-2.3.0/src/rdavg.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018 Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdavl.c b/lib/librdkafka-2.3.0/src/rdavl.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdavl.c
rename to lib/librdkafka-2.3.0/src/rdavl.c
index f25251de8e3..0bb41180966 100644
--- a/lib/librdkafka-2.1.0/src/rdavl.c
+++ b/lib/librdkafka-2.3.0/src/rdavl.c
@@ -1,7 +1,7 @@
 /*
  * librd - Rapid Development C library
  *
- * Copyright (c) 2012-2016, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdavl.h b/lib/librdkafka-2.3.0/src/rdavl.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdavl.h
rename to lib/librdkafka-2.3.0/src/rdavl.h
index f3e539242b9..dc6fe2e2c9b 100644
--- a/lib/librdkafka-2.1.0/src/rdavl.h
+++ b/lib/librdkafka-2.3.0/src/rdavl.h
@@ -1,7 +1,7 @@
 /*
  * librd - Rapid Development C library
  *
- * Copyright (c) 2012-2016, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.3.0/src/rdbase64.c b/lib/librdkafka-2.3.0/src/rdbase64.c
new file mode 100644
index 00000000000..aaf2fb138e4
--- /dev/null
+++ b/lib/librdkafka-2.3.0/src/rdbase64.c
@@ -0,0 +1,169 @@
+/*
+ * librdkafka - The Apache Kafka C/C++ library
+ *
+ * Copyright (c) 2023 Confluent Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rdbase64.h"
+
+#if WITH_SSL
+#include <openssl/ssl.h>
+#else
+
+#define conv_bin2ascii(a, table) ((table)[(a)&0x3f])
+
+static const unsigned char data_bin2ascii[65] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+static int base64_encoding_conversion(unsigned char *out,
+                                      const unsigned char *in,
+                                      int dlen) {
+        int i, ret = 0;
+        unsigned long l;
+
+        for (i = dlen; i > 0; i -= 3) {
+                if (i >= 3) {
+                        l = (((unsigned long)in[0]) << 16L) |
+                            (((unsigned long)in[1]) << 8L) | in[2];
+                        *(out++) = conv_bin2ascii(l >> 18L, data_bin2ascii);
+                        *(out++) = conv_bin2ascii(l >> 12L, data_bin2ascii);
+                        *(out++) = conv_bin2ascii(l >> 6L, data_bin2ascii);
+                        *(out++) = conv_bin2ascii(l, data_bin2ascii);
+                } else {
+                        l = ((unsigned long)in[0]) << 16L;
+                        if (i == 2)
+                                l |= ((unsigned long)in[1] << 8L);
+
+                        *(out++) = conv_bin2ascii(l >> 18L, data_bin2ascii);
+                        *(out++) = conv_bin2ascii(l >> 12L, data_bin2ascii);
+                        *(out++) =
+                            (i == 1) ? '='
+                                     : conv_bin2ascii(l >> 6L, data_bin2ascii);
+                        *(out++) = '=';
+                }
+                ret += 4;
+                in += 3;
+        }
+
+        *out = '\0';
+        return ret;
+}
+
+#endif
+
+/**
+ * @brief Base64 encode binary input \p in, and write base64-encoded string
+ *        and it's size to \p out. out->ptr will be NULL in case of some issue
+ *        with the conversion or the conversion is not supported.
+ *
+ * @remark out->ptr must be freed after use.
+ */
+void rd_base64_encode(const rd_chariov_t *in, rd_chariov_t *out) {
+
+        size_t max_len;
+
+        /* OpenSSL takes an |int| argument so the input cannot exceed that. */
+        if (in->size > INT_MAX) {
+                out->ptr = NULL;
+                return;
+        }
+
+        max_len  = (((in->size + 2) / 3) * 4) + 1;
+        out->ptr = rd_malloc(max_len);
+
+#if WITH_SSL
+        out->size = EVP_EncodeBlock((unsigned char *)out->ptr,
+                                    (unsigned char *)in->ptr, (int)in->size);
+#else
+        out->size = base64_encoding_conversion(
+            (unsigned char *)out->ptr, (unsigned char *)in->ptr, (int)in->size);
+#endif
+
+        rd_assert(out->size < max_len);
+        out->ptr[out->size] = 0;
+}
+
+
+/**
+ * @brief Base64 encode binary input \p in.
+ * @returns a newly allocated, base64-encoded string or NULL in case of some
+ *          issue with the conversion or the conversion is not supported.
+ *
+ * @remark Returned string must be freed after use.
+ */
+char *rd_base64_encode_str(const rd_chariov_t *in) {
+        rd_chariov_t out;
+        rd_base64_encode(in, &out);
+        return out.ptr;
+}
+
+
+/**
+ * @brief Base64 decode input string \p in. Ignores leading and trailing
+ *         whitespace.
+ * @returns * 0 on successes in which case a newly allocated binary string is
+ *            set in \p out (and size).
+ *          * -1 on invalid Base64.
+ *          * -2 on conversion not supported.
+ */
+int rd_base64_decode(const rd_chariov_t *in, rd_chariov_t *out) {
+
+#if WITH_SSL
+        size_t ret_len;
+
+        /* OpenSSL takes an |int| argument, so |in->size| must not exceed
+         * that. */
+        if (in->size % 4 != 0 || in->size > INT_MAX) {
+                return -1;
+        }
+
+        ret_len  = ((in->size / 4) * 3);
+        out->ptr = rd_malloc(ret_len + 1);
+
+        if (EVP_DecodeBlock((unsigned char *)out->ptr, (unsigned char *)in->ptr,
+                            (int)in->size) == -1) {
+                rd_free(out->ptr);
+                out->ptr = NULL;
+                return -1;
+        }
+
+        /* EVP_DecodeBlock will pad the output with trailing NULs and count
+         * them in the return value. */
+        if (in->size > 1 && in->ptr[in->size - 1] == '=') {
+                if (in->size > 2 && in->ptr[in->size - 2] == '=') {
+                        ret_len -= 2;
+                } else {
+                        ret_len -= 1;
+                }
+        }
+
+        out->ptr[ret_len] = 0;
+        out->size         = ret_len;
+
+        return 0;
+#else
+        return -2;
+#endif
+}
\ No newline at end of file
diff --git a/lib/librdkafka-2.3.0/src/rdbase64.h b/lib/librdkafka-2.3.0/src/rdbase64.h
new file mode 100644
index 00000000000..fd9e7a209fe
--- /dev/null
+++ b/lib/librdkafka-2.3.0/src/rdbase64.h
@@ -0,0 +1,41 @@
+/*
+ * librdkafka - The Apache Kafka C/C++ library
+ *
+ * Copyright (c) 2023 Confluent Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _RDBASE64_H_
+#define _RDBASE64_H_
+
+#include "rd.h"
+
+void rd_base64_encode(const rd_chariov_t *in, rd_chariov_t *out);
+
+char *rd_base64_encode_str(const rd_chariov_t *in);
+
+int rd_base64_decode(const rd_chariov_t *in, rd_chariov_t *out);
+
+#endif /* _RDBASE64_H_ */
\ No newline at end of file
diff --git a/lib/librdkafka-2.1.0/src/rdbuf.c b/lib/librdkafka-2.3.0/src/rdbuf.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdbuf.c
rename to lib/librdkafka-2.3.0/src/rdbuf.c
index 1392cf7b18c..6df64a9dee2 100644
--- a/lib/librdkafka-2.1.0/src/rdbuf.c
+++ b/lib/librdkafka-2.3.0/src/rdbuf.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdbuf.h b/lib/librdkafka-2.3.0/src/rdbuf.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdbuf.h
rename to lib/librdkafka-2.3.0/src/rdbuf.h
index 1ef30e4a95e..90d61401b04 100644
--- a/lib/librdkafka-2.1.0/src/rdbuf.h
+++ b/lib/librdkafka-2.3.0/src/rdbuf.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdcrc32.c b/lib/librdkafka-2.3.0/src/rdcrc32.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdcrc32.c
rename to lib/librdkafka-2.3.0/src/rdcrc32.c
index 2a6e126c142..f7a68855046 100644
--- a/lib/librdkafka-2.1.0/src/rdcrc32.c
+++ b/lib/librdkafka-2.3.0/src/rdcrc32.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018 Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdcrc32.h b/lib/librdkafka-2.3.0/src/rdcrc32.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdcrc32.h
rename to lib/librdkafka-2.3.0/src/rdcrc32.h
index c3195fca62d..676cd7d236b 100644
--- a/lib/librdkafka-2.1.0/src/rdcrc32.h
+++ b/lib/librdkafka-2.3.0/src/rdcrc32.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018 Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rddl.c b/lib/librdkafka-2.3.0/src/rddl.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rddl.c
rename to lib/librdkafka-2.3.0/src/rddl.c
index 785e28c486f..826d0a79127 100644
--- a/lib/librdkafka-2.1.0/src/rddl.c
+++ b/lib/librdkafka-2.3.0/src/rddl.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rddl.h b/lib/librdkafka-2.3.0/src/rddl.h
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rddl.h
rename to lib/librdkafka-2.3.0/src/rddl.h
index eaf6eb6d5ec..d1176c3e527 100644
--- a/lib/librdkafka-2.1.0/src/rddl.h
+++ b/lib/librdkafka-2.3.0/src/rddl.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdendian.h b/lib/librdkafka-2.3.0/src/rdendian.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdendian.h
rename to lib/librdkafka-2.3.0/src/rdendian.h
index 613d44bfaf8..8a1c4148ced 100644
--- a/lib/librdkafka-2.1.0/src/rdendian.h
+++ b/lib/librdkafka-2.3.0/src/rdendian.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015 Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdfloat.h b/lib/librdkafka-2.3.0/src/rdfloat.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdfloat.h
rename to lib/librdkafka-2.3.0/src/rdfloat.h
index 310045f0ea1..3868d35f5d9 100644
--- a/lib/librdkafka-2.1.0/src/rdfloat.h
+++ b/lib/librdkafka-2.3.0/src/rdfloat.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2018, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdfnv1a.c b/lib/librdkafka-2.3.0/src/rdfnv1a.c
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdfnv1a.c
rename to lib/librdkafka-2.3.0/src/rdfnv1a.c
index e951ec59f2e..c412348c2a7 100644
--- a/lib/librdkafka-2.1.0/src/rdfnv1a.c
+++ b/lib/librdkafka-2.3.0/src/rdfnv1a.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2020, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdfnv1a.h b/lib/librdkafka-2.3.0/src/rdfnv1a.h
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdfnv1a.h
rename to lib/librdkafka-2.3.0/src/rdfnv1a.h
index 8df66b0d62e..8d956ab68cd 100644
--- a/lib/librdkafka-2.1.0/src/rdfnv1a.h
+++ b/lib/librdkafka-2.3.0/src/rdfnv1a.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020 Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdgz.c b/lib/librdkafka-2.3.0/src/rdgz.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdgz.c
rename to lib/librdkafka-2.3.0/src/rdgz.c
index 794bd9cc1c5..d820bcfcacc 100644
--- a/lib/librdkafka-2.1.0/src/rdgz.c
+++ b/lib/librdkafka-2.3.0/src/rdgz.c
@@ -1,7 +1,7 @@
 /*
  * librd - Rapid Development C library
  *
- * Copyright (c) 2012, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdgz.h b/lib/librdkafka-2.3.0/src/rdgz.h
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdgz.h
rename to lib/librdkafka-2.3.0/src/rdgz.h
index 10d661cb3b2..1161091f298 100644
--- a/lib/librdkafka-2.1.0/src/rdgz.h
+++ b/lib/librdkafka-2.3.0/src/rdgz.h
@@ -1,7 +1,7 @@
 /*
  * librd - Rapid Development C library
  *
- * Copyright (c) 2012, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdhdrhistogram.c b/lib/librdkafka-2.3.0/src/rdhdrhistogram.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdhdrhistogram.c
rename to lib/librdkafka-2.3.0/src/rdhdrhistogram.c
index 3f2b6758b53..08240ac7a3b 100644
--- a/lib/librdkafka-2.1.0/src/rdhdrhistogram.c
+++ b/lib/librdkafka-2.3.0/src/rdhdrhistogram.c
@@ -31,7 +31,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018, Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdhdrhistogram.h b/lib/librdkafka-2.3.0/src/rdhdrhistogram.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdhdrhistogram.h
rename to lib/librdkafka-2.3.0/src/rdhdrhistogram.h
index 868614b7b0b..7bfae84f4b2 100644
--- a/lib/librdkafka-2.1.0/src/rdhdrhistogram.h
+++ b/lib/librdkafka-2.3.0/src/rdhdrhistogram.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018, Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdhttp.c b/lib/librdkafka-2.3.0/src/rdhttp.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdhttp.c
rename to lib/librdkafka-2.3.0/src/rdhttp.c
index 7457a7fbe4e..cea2d1c97d8 100644
--- a/lib/librdkafka-2.1.0/src/rdhttp.c
+++ b/lib/librdkafka-2.3.0/src/rdhttp.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2021 Magnus Edenhill
+ * Copyright (c) 2021-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdhttp.h b/lib/librdkafka-2.3.0/src/rdhttp.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdhttp.h
rename to lib/librdkafka-2.3.0/src/rdhttp.h
index 80512e5ac28..9691cc800e4 100644
--- a/lib/librdkafka-2.1.0/src/rdhttp.h
+++ b/lib/librdkafka-2.3.0/src/rdhttp.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2021 Magnus Edenhill
+ * Copyright (c) 2021-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdinterval.h b/lib/librdkafka-2.3.0/src/rdinterval.h
similarity index 87%
rename from lib/librdkafka-2.1.0/src/rdinterval.h
rename to lib/librdkafka-2.3.0/src/rdinterval.h
index 4283376462f..95cdf3c2d7f 100644
--- a/lib/librdkafka-2.1.0/src/rdinterval.h
+++ b/lib/librdkafka-2.3.0/src/rdinterval.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018 Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
+ *               2023 Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -30,6 +31,7 @@
 #define _RDINTERVAL_H_
 
 #include "rd.h"
+#include "rdrand.h"
 
 typedef struct rd_interval_s {
         rd_ts_t ri_ts_last; /* last interval timestamp */
@@ -109,6 +111,22 @@ static RD_INLINE RD_UNUSED void rd_interval_reset_to_now(rd_interval_t *ri,
         ri->ri_backoff = 0;
 }
 
+/**
+ * Reset the interval to 'now' with the given backoff ms and max_jitter as
+ * percentage. The backoff is given just for absolute jitter calculation. If now
+ * is 0, the time will be gathered automatically.
+ */
+static RD_INLINE RD_UNUSED void
+rd_interval_reset_to_now_with_jitter(rd_interval_t *ri,
+                                     rd_ts_t now,
+                                     int64_t backoff_ms,
+                                     int max_jitter) {
+        rd_interval_reset_to_now(ri, now);
+        /* We are multiplying by 10 as (backoff_ms * percent * 1000)/100 ->
+         * backoff_ms * jitter * 10 */
+        ri->ri_backoff = backoff_ms * rd_jitter(-max_jitter, max_jitter) * 10;
+}
+
 /**
  * Back off the next interval by `backoff_us` microseconds.
  */
diff --git a/lib/librdkafka-2.1.0/src/rdkafka.c b/lib/librdkafka-2.3.0/src/rdkafka.c
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdkafka.c
rename to lib/librdkafka-2.3.0/src/rdkafka.c
index b254748eb60..99d9c17449c 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -699,7 +700,6 @@ static const struct rd_kafka_err_desc rd_kafka_err_descs[] = {
     _ERR_DESC(RD_KAFKA_RESP_ERR_PRINCIPAL_DESERIALIZATION_FAILURE,
               "Broker: Request principal deserialization failed during "
               "forwarding"),
-
     _ERR_DESC(RD_KAFKA_RESP_ERR__END, NULL)};
 
 
@@ -990,7 +990,7 @@ void rd_kafka_destroy_final(rd_kafka_t *rk) {
         mtx_destroy(&rk->rk_init_lock);
 
         if (rk->rk_full_metadata)
-                rd_kafka_metadata_destroy(rk->rk_full_metadata);
+                rd_kafka_metadata_destroy(&rk->rk_full_metadata->metadata);
         rd_kafkap_str_destroy(rk->rk_client_id);
         rd_kafkap_str_destroy(rk->rk_group_id);
         rd_kafkap_str_destroy(rk->rk_eos.transactional_id);
@@ -2523,7 +2523,8 @@ rd_kafka_t *rd_kafka_new(rd_kafka_type_t type,
 
         /* Add initial list of brokers from configuration */
         if (rk->rk_conf.brokerlist) {
-                if (rd_kafka_brokers_add0(rk, rk->rk_conf.brokerlist) == 0)
+                if (rd_kafka_brokers_add0(rk, rk->rk_conf.brokerlist,
+                                          rd_true) == 0)
                         rd_kafka_op_err(rk, RD_KAFKA_RESP_ERR__ALL_BROKERS_DOWN,
                                         "No brokers configured");
         }
@@ -3479,6 +3480,7 @@ static void rd_kafka_query_wmark_offsets_resp_cb(rd_kafka_t *rk,
         struct _query_wmark_offsets_state *state;
         rd_kafka_topic_partition_list_t *offsets;
         rd_kafka_topic_partition_t *rktpar;
+        int actions = 0;
 
         if (err == RD_KAFKA_RESP_ERR__DESTROY) {
                 /* 'state' has gone out of scope when query_watermark..()
@@ -3490,7 +3492,15 @@ static void rd_kafka_query_wmark_offsets_resp_cb(rd_kafka_t *rk,
 
         offsets = rd_kafka_topic_partition_list_new(1);
         err = rd_kafka_handle_ListOffsets(rk, rkb, err, rkbuf, request, offsets,
-                                          NULL);
+                                          &actions);
+
+        if (actions & RD_KAFKA_ERR_ACTION_REFRESH) {
+                /* Remove its cache in case the topic isn't a known topic. */
+                rd_kafka_wrlock(rk);
+                rd_kafka_metadata_cache_delete_by_name(rk, state->topic);
+                rd_kafka_wrunlock(rk);
+        }
+
         if (err == RD_KAFKA_RESP_ERR__IN_PROGRESS) {
                 rd_kafka_topic_partition_list_destroy(offsets);
                 return; /* Retrying */
@@ -3511,14 +3521,18 @@ static void rd_kafka_query_wmark_offsets_resp_cb(rd_kafka_t *rk,
                 /* FALLTHRU */
         }
 
-        /* Partition not seen in response. */
-        if (!(rktpar = rd_kafka_topic_partition_list_find(offsets, state->topic,
-                                                          state->partition)))
+        rktpar = rd_kafka_topic_partition_list_find(offsets, state->topic,
+                                                    state->partition);
+        if (!rktpar && err > RD_KAFKA_RESP_ERR__END) {
+                /* Partition not seen in response,
+                 * not a local error. */
                 err = RD_KAFKA_RESP_ERR__BAD_MSG;
-        else if (rktpar->err)
-                err = rktpar->err;
-        else
-                state->offsets[state->offidx] = rktpar->offset;
+        } else if (rktpar) {
+                if (rktpar->err)
+                        err = rktpar->err;
+                else
+                        state->offsets[state->offidx] = rktpar->offset;
+        }
 
         state->offidx++;
 
@@ -3574,26 +3588,25 @@ rd_kafka_resp_err_t rd_kafka_query_watermark_offsets(rd_kafka_t *rk,
         state.ts_end        = ts_end;
         state.state_version = rd_kafka_brokers_get_state_version(rk);
 
-
         rktpar->offset = RD_KAFKA_OFFSET_BEGINNING;
         rd_kafka_ListOffsetsRequest(
             leader->rkb, partitions, RD_KAFKA_REPLYQ(rkq, 0),
-            rd_kafka_query_wmark_offsets_resp_cb, &state);
+            rd_kafka_query_wmark_offsets_resp_cb, timeout_ms, &state);
 
         rktpar->offset = RD_KAFKA_OFFSET_END;
         rd_kafka_ListOffsetsRequest(
             leader->rkb, partitions, RD_KAFKA_REPLYQ(rkq, 0),
-            rd_kafka_query_wmark_offsets_resp_cb, &state);
+            rd_kafka_query_wmark_offsets_resp_cb, timeout_ms, &state);
 
         rd_kafka_topic_partition_list_destroy(partitions);
         rd_list_destroy(&leaders);
 
         /* Wait for reply (or timeout) */
-        while (state.err == RD_KAFKA_RESP_ERR__IN_PROGRESS &&
-               rd_kafka_q_serve(rkq, 100, 0, RD_KAFKA_Q_CB_CALLBACK,
-                                rd_kafka_poll_cb,
-                                NULL) != RD_KAFKA_OP_RES_YIELD)
-                ;
+        while (state.err == RD_KAFKA_RESP_ERR__IN_PROGRESS) {
+                rd_kafka_q_serve(rkq, RD_POLL_INFINITE, 0,
+                                 RD_KAFKA_Q_CB_CALLBACK, rd_kafka_poll_cb,
+                                 NULL);
+        }
 
         rd_kafka_q_destroy_owner(rkq);
 
@@ -3733,7 +3746,7 @@ rd_kafka_offsets_for_times(rd_kafka_t *rk,
                 state.wait_reply++;
                 rd_kafka_ListOffsetsRequest(
                     leader->rkb, leader->partitions, RD_KAFKA_REPLYQ(rkq, 0),
-                    rd_kafka_get_offsets_for_times_resp_cb, &state);
+                    rd_kafka_get_offsets_for_times_resp_cb, timeout_ms, &state);
         }
 
         rd_list_destroy(&leaders);
@@ -3949,6 +3962,7 @@ rd_kafka_op_res_t rd_kafka_poll_cb(rd_kafka_t *rk,
         case RD_KAFKA_OP_DELETETOPICS:
         case RD_KAFKA_OP_CREATEPARTITIONS:
         case RD_KAFKA_OP_ALTERCONFIGS:
+        case RD_KAFKA_OP_INCREMENTALALTERCONFIGS:
         case RD_KAFKA_OP_DESCRIBECONFIGS:
         case RD_KAFKA_OP_DELETERECORDS:
         case RD_KAFKA_OP_DELETEGROUPS:
@@ -3956,6 +3970,7 @@ rd_kafka_op_res_t rd_kafka_poll_cb(rd_kafka_t *rk,
         case RD_KAFKA_OP_CREATEACLS:
         case RD_KAFKA_OP_DESCRIBEACLS:
         case RD_KAFKA_OP_DELETEACLS:
+        case RD_KAFKA_OP_LISTOFFSETS:
                 /* Calls op_destroy() from worker callback,
                  * when the time comes. */
                 res = rd_kafka_op_call(rk, rkq, rko);
@@ -4006,7 +4021,6 @@ int rd_kafka_poll(rd_kafka_t *rk, int timeout_ms) {
 
         r = rd_kafka_q_serve(rk->rk_rep, timeout_ms, 0, RD_KAFKA_Q_CB_CALLBACK,
                              rd_kafka_poll_cb, NULL);
-
         return r;
 }
 
@@ -4017,6 +4031,7 @@ rd_kafka_event_t *rd_kafka_queue_poll(rd_kafka_queue_t *rkqu, int timeout_ms) {
         rko = rd_kafka_q_pop_serve(rkqu->rkqu_q, rd_timeout_us(timeout_ms), 0,
                                    RD_KAFKA_Q_CB_EVENT, rd_kafka_poll_cb, NULL);
 
+
         if (!rko)
                 return NULL;
 
@@ -4028,7 +4043,6 @@ int rd_kafka_queue_poll_callback(rd_kafka_queue_t *rkqu, int timeout_ms) {
 
         r = rd_kafka_q_serve(rkqu->rkqu_q, timeout_ms, 0,
                              RD_KAFKA_Q_CB_CALLBACK, rd_kafka_poll_cb, NULL);
-
         return r;
 }
 
@@ -4666,8 +4680,8 @@ static void rd_kafka_DescribeGroups_resp_cb(rd_kafka_t *rk,
                         rd_kafka_buf_read_str(reply, &MemberId);
                         rd_kafka_buf_read_str(reply, &ClientId);
                         rd_kafka_buf_read_str(reply, &ClientHost);
-                        rd_kafka_buf_read_bytes(reply, &Meta);
-                        rd_kafka_buf_read_bytes(reply, &Assignment);
+                        rd_kafka_buf_read_kbytes(reply, &Meta);
+                        rd_kafka_buf_read_kbytes(reply, &Assignment);
 
                         mi->member_id   = RD_KAFKAP_STR_DUP(&MemberId);
                         mi->client_id   = RD_KAFKAP_STR_DUP(&ClientId);
@@ -4769,7 +4783,9 @@ static void rd_kafka_ListGroups_resp_cb(rd_kafka_t *rk,
 
                 state->wait_cnt++;
                 error = rd_kafka_DescribeGroupsRequest(
-                    rkb, 0, grps, i, RD_KAFKA_REPLYQ(state->q, 0),
+                    rkb, 0, grps, i,
+                    rd_false /* don't include authorized operations */,
+                    RD_KAFKA_REPLYQ(state->q, 0),
                     rd_kafka_DescribeGroups_resp_cb, state);
                 if (error) {
                         rd_kafka_DescribeGroups_resp_cb(
@@ -5024,3 +5040,77 @@ int rd_kafka_errno(void) {
 int rd_kafka_unittest(void) {
         return rd_unittest();
 }
+
+
+/**
+ * Creates a new UUID.
+ *
+ * @return A newly allocated UUID.
+ */
+rd_kafka_Uuid_t *rd_kafka_Uuid_new(int64_t most_significant_bits,
+                                   int64_t least_significant_bits) {
+        rd_kafka_Uuid_t *uuid        = rd_calloc(1, sizeof(rd_kafka_Uuid_t));
+        uuid->most_significant_bits  = most_significant_bits;
+        uuid->least_significant_bits = least_significant_bits;
+        return uuid;
+}
+
+/**
+ * Returns a newly allocated copy of the given UUID.
+ *
+ * @param uuid UUID to copy.
+ * @return Copy of the provided UUID.
+ *
+ * @remark Dynamically allocated. Deallocate (free) after use.
+ */
+rd_kafka_Uuid_t *rd_kafka_Uuid_copy(const rd_kafka_Uuid_t *uuid) {
+        rd_kafka_Uuid_t *copy_uuid = rd_kafka_Uuid_new(
+            uuid->most_significant_bits, uuid->least_significant_bits);
+        if (*uuid->base64str)
+                memcpy(copy_uuid->base64str, uuid->base64str, 23);
+        return copy_uuid;
+}
+
+/**
+ * @brief Destroy the provided uuid.
+ *
+ * @param uuid UUID
+ */
+void rd_kafka_Uuid_destroy(rd_kafka_Uuid_t *uuid) {
+        rd_free(uuid);
+}
+
+const char *rd_kafka_Uuid_base64str(const rd_kafka_Uuid_t *uuid) {
+        if (*uuid->base64str)
+                return uuid->base64str;
+
+        rd_chariov_t in_base64;
+        char *out_base64_str;
+        char *uuid_bytes;
+        uint64_t input_uuid[2];
+
+        input_uuid[0]  = htobe64(uuid->most_significant_bits);
+        input_uuid[1]  = htobe64(uuid->least_significant_bits);
+        uuid_bytes     = (char *)input_uuid;
+        in_base64.ptr  = uuid_bytes;
+        in_base64.size = sizeof(uuid->most_significant_bits) +
+                         sizeof(uuid->least_significant_bits);
+
+        out_base64_str = rd_base64_encode_str(&in_base64);
+        if (!out_base64_str)
+                return NULL;
+
+        rd_strlcpy((char *)uuid->base64str, out_base64_str,
+                   23 /* Removing extra ('=') padding */);
+        rd_free(out_base64_str);
+        return uuid->base64str;
+}
+
+int64_t rd_kafka_Uuid_least_significant_bits(const rd_kafka_Uuid_t *uuid) {
+        return uuid->least_significant_bits;
+}
+
+
+int64_t rd_kafka_Uuid_most_significant_bits(const rd_kafka_Uuid_t *uuid) {
+        return uuid->most_significant_bits;
+}
\ No newline at end of file
diff --git a/lib/librdkafka-2.1.0/src/rdkafka.h b/lib/librdkafka-2.3.0/src/rdkafka.h
similarity index 91%
rename from lib/librdkafka-2.1.0/src/rdkafka.h
rename to lib/librdkafka-2.3.0/src/rdkafka.h
index e3474e50ffa..de620284f0d 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2022 Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -166,7 +167,7 @@ typedef SSIZE_T ssize_t;
  * @remark This value should only be used during compile time,
  *         for runtime checks of version use rd_kafka_version()
  */
-#define RD_KAFKA_VERSION 0x020100ff
+#define RD_KAFKA_VERSION 0x020300ff
 
 /**
  * @brief Returns the librdkafka version as integer.
@@ -261,6 +262,7 @@ typedef struct rd_kafka_error_s rd_kafka_error_t;
 typedef struct rd_kafka_headers_s rd_kafka_headers_t;
 typedef struct rd_kafka_group_result_s rd_kafka_group_result_t;
 typedef struct rd_kafka_acl_result_s rd_kafka_acl_result_t;
+typedef struct rd_kafka_Uuid_s rd_kafka_Uuid_t;
 /* @endcond */
 
 
@@ -908,7 +910,6 @@ typedef struct rd_kafka_topic_partition_s {
                                   *   rd_kafka_t INSTANCES. */
 } rd_kafka_topic_partition_t;
 
-
 /**
  * @brief Destroy a rd_kafka_topic_partition_t.
  * @remark This must not be called for elements in a topic partition list.
@@ -952,7 +953,6 @@ typedef struct rd_kafka_topic_partition_list_s {
         rd_kafka_topic_partition_t *elems; /**< Element array[] */
 } rd_kafka_topic_partition_list_t;
 
-
 /**
  * @brief Create a new list/vector Topic+Partition container.
  *
@@ -970,7 +970,6 @@ typedef struct rd_kafka_topic_partition_list_s {
 RD_EXPORT
 rd_kafka_topic_partition_list_t *rd_kafka_topic_partition_list_new(int size);
 
-
 /**
  * @brief Free all resources used by the list and the list itself.
  */
@@ -1633,6 +1632,75 @@ rd_kafka_message_leader_epoch(const rd_kafka_message_t *rkmessage);
 /**@}*/
 
 
+/**
+ * @name UUID
+ * @{
+ *
+ */
+
+/**
+ * @brief Computes base64 encoding for the given uuid string.
+ * @param uuid UUID for which base64 encoding is required.
+ *
+ * @return base64 encoded string for the given UUID or NULL in case of some
+ *         issue with the conversion or the conversion is not supported.
+ */
+RD_EXPORT const char *rd_kafka_Uuid_base64str(const rd_kafka_Uuid_t *uuid);
+
+/**
+ * @brief Gets least significant 64 bits for the given UUID.
+ *
+ * @param uuid UUID
+ *
+ * @return least significant 64 bits for the given UUID.
+ */
+RD_EXPORT int64_t
+rd_kafka_Uuid_least_significant_bits(const rd_kafka_Uuid_t *uuid);
+
+
+/**
+ * @brief Gets most significant 64 bits for the given UUID.
+ *
+ * @param uuid UUID
+ *
+ * @return most significant 64 bits for the given UUID.
+ */
+RD_EXPORT int64_t
+rd_kafka_Uuid_most_significant_bits(const rd_kafka_Uuid_t *uuid);
+
+
+/**
+ * @brief Creates a new UUID.
+ *
+ * @param most_significant_bits most significant 64 bits of the 128 bits UUID.
+ * @param least_significant_bits least significant 64 bits of the 128 bits UUID.
+ *
+ * @return A newly allocated UUID.
+ * @remark Must be freed after use using rd_kafka_Uuid_destroy()
+ */
+RD_EXPORT rd_kafka_Uuid_t *rd_kafka_Uuid_new(int64_t most_significant_bits,
+                                             int64_t least_significant_bits);
+
+/**
+ * @brief Copies the given UUID.
+ *
+ * @param uuid UUID to be copied.
+ *
+ * @return A newly allocated copy of the provided UUID.
+ * @remark Must be freed after use using rd_kafka_Uuid_destroy()
+ */
+RD_EXPORT rd_kafka_Uuid_t *rd_kafka_Uuid_copy(const rd_kafka_Uuid_t *uuid);
+
+/**
+ * @brief Destroy the provided uuid.
+ *
+ * @param uuid UUID
+ */
+RD_EXPORT void rd_kafka_Uuid_destroy(rd_kafka_Uuid_t *uuid);
+
+/**@}*/
+
+
 /**
  * @name Configuration interface
  * @{
@@ -2101,7 +2169,7 @@ void rd_kafka_conf_set_log_cb(rd_kafka_conf_t *conf,
  *                 rd_kafka_conf_set_opaque().
  *
  * For more information on the format of \p json, see
- * https://github.com/edenhill/librdkafka/wiki/Statistics
+ * https://github.com/confluentinc/librdkafka/wiki/Statistics
  *
  * If the application wishes to hold on to the \p json pointer and free
  * it at a later time it must return 1 from the \p stats_cb.
@@ -3431,6 +3499,12 @@ rd_kafka_error_t *rd_kafka_sasl_set_credentials(rd_kafka_t *rk,
  *
  * @remark rd_kafka_queue_destroy() MUST be called on this queue
  *         prior to calling rd_kafka_consumer_close().
+ * @remark Polling the returned queue counts as a consumer poll, and will reset
+ *         the timer for max.poll.interval.ms. If this queue is forwarded to a
+ *         "destq", polling destq also counts as a consumer poll (this works
+ *         for any number of forwards). However, even if this queue is
+ *         unforwarded or forwarded elsewhere, polling destq will continue
+ *         to count as a consumer poll.
  */
 RD_EXPORT
 rd_kafka_queue_t *rd_kafka_queue_get_consumer(rd_kafka_t *rk);
@@ -4969,6 +5043,16 @@ const char *rd_kafka_Node_host(const rd_kafka_Node_t *node);
 RD_EXPORT
 uint16_t rd_kafka_Node_port(const rd_kafka_Node_t *node);
 
+/**
+ * @brief Get the rack of \p node.
+ *
+ * @param node The Node instance
+ *
+ * @return The node rack id. May be NULL.
+ */
+RD_EXPORT
+const char *rd_kafka_Node_rack(const rd_kafka_Node_t *node);
+
 /**@}*/
 
 
@@ -5360,7 +5444,18 @@ typedef int rd_kafka_event_type_t;
 #define RD_KAFKA_EVENT_LISTCONSUMERGROUPOFFSETS_RESULT 0x8000
 /** AlterConsumerGroupOffsets_result_t */
 #define RD_KAFKA_EVENT_ALTERCONSUMERGROUPOFFSETS_RESULT 0x10000
-
+/** IncrementalAlterConfigs_result_t */
+#define RD_KAFKA_EVENT_INCREMENTALALTERCONFIGS_RESULT 0x20000
+/** DescribeUserScramCredentials_result_t */
+#define RD_KAFKA_EVENT_DESCRIBEUSERSCRAMCREDENTIALS_RESULT 0x40000
+/** AlterUserScramCredentials_result_t */
+#define RD_KAFKA_EVENT_ALTERUSERSCRAMCREDENTIALS_RESULT 0x80000
+/** DescribeTopics_result_t */
+#define RD_KAFKA_EVENT_DESCRIBETOPICS_RESULT 0x100000
+/** DescribeCluster_result_t */
+#define RD_KAFKA_EVENT_DESCRIBECLUSTER_RESULT 0x200000
+/** ListOffsets_result_t */
+#define RD_KAFKA_EVENT_LISTOFFSETS_RESULT 0x400000
 
 /**
  * @returns the event type for the given event.
@@ -5507,6 +5602,7 @@ int rd_kafka_event_error_is_fatal(rd_kafka_event_t *rkev);
  *  - RD_KAFKA_EVENT_DESCRIBEACLS_RESULT
  *  - RD_KAFKA_EVENT_DELETEACLS_RESULT
  *  - RD_KAFKA_EVENT_ALTERCONFIGS_RESULT
+ *  - RD_KAFKA_EVENT_INCREMENTAL_ALTERCONFIGS_RESULT
  *  - RD_KAFKA_EVENT_DESCRIBECONFIGS_RESULT
  *  - RD_KAFKA_EVENT_DELETEGROUPS_RESULT
  *  - RD_KAFKA_EVENT_DELETECONSUMERGROUPOFFSETS_RESULT
@@ -5515,6 +5611,9 @@ int rd_kafka_event_error_is_fatal(rd_kafka_event_t *rkev);
  *  - RD_KAFKA_EVENT_DESCRIBECONSUMERGROUPS_RESULT
  *  - RD_KAFKA_EVENT_LISTCONSUMERGROUPOFFSETS_RESULT
  *  - RD_KAFKA_EVENT_ALTERCONSUMERGROUPOFFSETS_RESULT
+ *  - RD_KAFKA_EVENT_DESCRIBETOPICS_RESULT
+ *  - RD_KAFKA_EVENT_DESCRIBECLUSTER_RESULT
+ *  - RD_KAFKA_EVENT_LISTOFFSETS_RESULT
  */
 RD_EXPORT
 void *rd_kafka_event_opaque(rd_kafka_event_t *rkev);
@@ -5610,6 +5709,8 @@ typedef rd_kafka_event_t rd_kafka_DeleteAcls_result_t;
 typedef rd_kafka_event_t rd_kafka_CreatePartitions_result_t;
 /*! AlterConfigs result type */
 typedef rd_kafka_event_t rd_kafka_AlterConfigs_result_t;
+/*! IncrementalAlterConfigs result type */
+typedef rd_kafka_event_t rd_kafka_IncrementalAlterConfigs_result_t;
 /*! CreateTopics result type */
 typedef rd_kafka_event_t rd_kafka_DescribeConfigs_result_t;
 /*! DeleteRecords result type */
@@ -5626,6 +5727,16 @@ typedef rd_kafka_event_t rd_kafka_DeleteConsumerGroupOffsets_result_t;
 typedef rd_kafka_event_t rd_kafka_AlterConsumerGroupOffsets_result_t;
 /*! ListConsumerGroupOffsets result type */
 typedef rd_kafka_event_t rd_kafka_ListConsumerGroupOffsets_result_t;
+/*! DescribeTopics result type */
+typedef rd_kafka_event_t rd_kafka_DescribeTopics_result_t;
+/*! DescribeCluster result type */
+typedef rd_kafka_event_t rd_kafka_DescribeCluster_result_t;
+/*! DescribeUserScramCredentials result type */
+typedef rd_kafka_event_t rd_kafka_DescribeUserScramCredentials_result_t;
+/*! AlterUserScramCredentials result type */
+typedef rd_kafka_event_t rd_kafka_AlterUserScramCredentials_result_t;
+/*! ListOffsets result type */
+typedef rd_kafka_event_t rd_kafka_ListOffsets_result_t;
 
 /**
  * @brief Get CreateTopics result.
@@ -5675,6 +5786,18 @@ rd_kafka_event_CreatePartitions_result(rd_kafka_event_t *rkev);
 RD_EXPORT const rd_kafka_AlterConfigs_result_t *
 rd_kafka_event_AlterConfigs_result(rd_kafka_event_t *rkev);
 
+/**
+ * @brief Get IncrementalAlterConfigs result.
+ *
+ * @returns the result of a IncrementalAlterConfigs request, or NULL if event is
+ * of different type.
+ *
+ * Event types:
+ *   RD_KAFKA_EVENT_INCREMENTALALTERCONFIGS_RESULT
+ */
+RD_EXPORT const rd_kafka_IncrementalAlterConfigs_result_t *
+rd_kafka_event_IncrementalAlterConfigs_result(rd_kafka_event_t *rkev);
+
 /**
  * @brief Get DescribeConfigs result.
  *
@@ -5727,6 +5850,35 @@ rd_kafka_event_ListConsumerGroups_result(rd_kafka_event_t *rkev);
 RD_EXPORT const rd_kafka_DescribeConsumerGroups_result_t *
 rd_kafka_event_DescribeConsumerGroups_result(rd_kafka_event_t *rkev);
 
+/**
+ * @brief Get DescribeTopics result.
+ *
+ * @returns the result of a DescribeTopics request, or NULL if event is
+ * of different type.
+ *
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p rkev object.
+ *
+ * Event types:
+ *   RD_KAFKA_EVENT_DESCRIBETOPICS_RESULT
+ */
+RD_EXPORT const rd_kafka_DescribeTopics_result_t *
+rd_kafka_event_DescribeTopics_result(rd_kafka_event_t *rkev);
+
+/**
+ * @brief Get DescribeCluster result.
+ *
+ * @returns the result of a DescribeCluster request, or NULL if event is
+ * of different type.
+ *
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p rkev object.
+ *
+ * Event types:
+ *   RD_KAFKA_EVENT_DESCRIBECLUSTER_RESULT
+ */
+RD_EXPORT const rd_kafka_DescribeCluster_result_t *
+rd_kafka_event_DescribeCluster_result(rd_kafka_event_t *rkev);
 /**
  * @brief Get DeleteGroups result.
  *
@@ -5781,6 +5933,21 @@ rd_kafka_event_DescribeAcls_result(rd_kafka_event_t *rkev);
 RD_EXPORT const rd_kafka_DeleteAcls_result_t *
 rd_kafka_event_DeleteAcls_result(rd_kafka_event_t *rkev);
 
+/**
+ * @brief Get ListConsumerGroupOffsets result.
+ *
+ * @returns the result of a ListConsumerGroupOffsets request, or NULL if
+ *          event is of different type.
+ *
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p rkev object.
+ *
+ * Event types:
+ *   RD_KAFKA_EVENT_LISTCONSUMERGROUPOFFSETS_RESULT
+ */
+RD_EXPORT const rd_kafka_ListConsumerGroupOffsets_result_t *
+rd_kafka_event_ListConsumerGroupOffsets_result(rd_kafka_event_t *rkev);
+
 /**
  * @brief Get AlterConsumerGroupOffsets result.
  *
@@ -5797,19 +5964,50 @@ RD_EXPORT const rd_kafka_AlterConsumerGroupOffsets_result_t *
 rd_kafka_event_AlterConsumerGroupOffsets_result(rd_kafka_event_t *rkev);
 
 /**
- * @brief Get ListConsumerGroupOffsets result.
+ * @brief Get ListOffsets result.
  *
- * @returns the result of a ListConsumerGroupOffsets request, or NULL if
+ * @returns the result of a ListOffsets request, or NULL if
  *          event is of different type.
  *
  * @remark The lifetime of the returned memory is the same
  *         as the lifetime of the \p rkev object.
  *
  * Event types:
- *   RD_KAFKA_EVENT_LISTCONSUMERGROUPOFFSETS_RESULT
+ *   RD_KAFKA_EVENT_LISTOFFSETS_RESULT
  */
-RD_EXPORT const rd_kafka_ListConsumerGroupOffsets_result_t *
-rd_kafka_event_ListConsumerGroupOffsets_result(rd_kafka_event_t *rkev);
+RD_EXPORT const rd_kafka_ListOffsets_result_t *
+rd_kafka_event_ListOffsets_result(rd_kafka_event_t *rkev);
+
+
+/**
+ * @brief Get DescribeUserScramCredentials result.
+ *
+ * @returns the result of a DescribeUserScramCredentials request, or NULL if
+ *          event is of different type.
+ *
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p rkev object.
+ *
+ * Event types:
+ *   RD_KAFKA_EVENT_DESCRIBEUSERSCRAMCREDENTIALS_RESULT
+ */
+RD_EXPORT const rd_kafka_DescribeUserScramCredentials_result_t *
+rd_kafka_event_DescribeUserScramCredentials_result(rd_kafka_event_t *rkev);
+
+/**
+ * @brief Get AlterUserScramCredentials result.
+ *
+ * @returns the result of a AlterUserScramCredentials request, or NULL if
+ *          event is of different type.
+ *
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p rkev object.
+ *
+ * Event types:
+ *   RD_KAFKA_EVENT_ALTERUSERSCRAMCREDENTIALS_RESULT
+ */
+RD_EXPORT const rd_kafka_AlterUserScramCredentials_result_t *
+rd_kafka_event_AlterUserScramCredentials_result(rd_kafka_event_t *rkev);
 
 /**
  * @brief Poll a queue for an event for max \p timeout_ms.
@@ -6714,7 +6912,16 @@ typedef enum rd_kafka_admin_op_t {
         RD_KAFKA_ADMIN_OP_LISTCONSUMERGROUPOFFSETS,
         /** AlterConsumerGroupOffsets */
         RD_KAFKA_ADMIN_OP_ALTERCONSUMERGROUPOFFSETS,
-        RD_KAFKA_ADMIN_OP__CNT /**< Number of ops defined */
+        /** IncrementalAlterConfigs */
+        RD_KAFKA_ADMIN_OP_INCREMENTALALTERCONFIGS,
+        /** DescribeUserScramCredentials */
+        RD_KAFKA_ADMIN_OP_DESCRIBEUSERSCRAMCREDENTIALS,
+        /** AlterUserScramCredentials */
+        RD_KAFKA_ADMIN_OP_ALTERUSERSCRAMCREDENTIALS,
+        RD_KAFKA_ADMIN_OP_DESCRIBETOPICS,  /**< DescribeTopics */
+        RD_KAFKA_ADMIN_OP_DESCRIBECLUSTER, /**< DescribeCluster */
+        RD_KAFKA_ADMIN_OP_LISTOFFSETS,     /**< ListOffsets */
+        RD_KAFKA_ADMIN_OP__CNT             /**< Number of ops defined */
 } rd_kafka_admin_op_t;
 
 /**
@@ -6731,6 +6938,18 @@ typedef enum rd_kafka_admin_op_t {
 
 typedef struct rd_kafka_AdminOptions_s rd_kafka_AdminOptions_t;
 
+/**
+ * @enum rd_kafka_IsolationLevel_t
+ *
+ * @brief IsolationLevel enum name for use with rd_kafka_AdminOptions_new()
+ *
+ * @sa rd_kafka_AdminOptions_new()
+ */
+typedef enum rd_kafka_IsolationLevel_t {
+        RD_KAFKA_ISOLATION_LEVEL_READ_UNCOMMITTED = 0,
+        RD_KAFKA_ISOLATION_LEVEL_READ_COMMITTED   = 1
+} rd_kafka_IsolationLevel_t;
+
 /**
  * @brief Create a new AdminOptions object.
  *
@@ -6765,8 +6984,7 @@ RD_EXPORT void rd_kafka_AdminOptions_destroy(rd_kafka_AdminOptions_t *options);
  *        request transmission, operation time on broker, and response.
  *
  * @param options Admin options.
- * @param timeout_ms Timeout in milliseconds, use -1 for indefinite timeout.
- *                   Defaults to `socket.timeout.ms`.
+ * @param timeout_ms Timeout in milliseconds. Defaults to `socket.timeout.ms`.
  * @param errstr A human readable error string (nul-terminated) is written to
  *               this location that must be of at least \p errstr_size bytes.
  *               The \p errstr is only written in case of error.
@@ -6850,6 +7068,8 @@ rd_kafka_AdminOptions_set_validate_only(rd_kafka_AdminOptions_t *options,
  * the following exceptions:
  *   - AlterConfigs with a BROKER resource are sent to the broker id set
  *     as the resource name.
+ *   - IncrementalAlterConfigs with a BROKER resource are sent to the broker id
+ *     set as the resource name.
  *   - DescribeConfigs with a BROKER resource are sent to the broker id set
  *     as the resource name.
  *
@@ -6892,6 +7112,25 @@ rd_kafka_error_t *rd_kafka_AdminOptions_set_require_stable_offsets(
     rd_kafka_AdminOptions_t *options,
     int true_or_false);
 
+/**
+ * @brief Whether broker should return authorized operations for the given
+ *        resource in the DescribeConsumerGroups, DescribeTopics, or
+ *        DescribeCluster calls.
+ *
+ * @param options Admin options.
+ * @param true_or_false Defaults to false.
+ *
+ * @return NULL on success, a new error instance that must be
+ *         released with rd_kafka_error_destroy() in case of error.
+ *
+ * @remark This option is valid for DescribeConsumerGroups, DescribeTopics,
+ *         DescribeCluster.
+ */
+RD_EXPORT
+rd_kafka_error_t *rd_kafka_AdminOptions_set_include_authorized_operations(
+    rd_kafka_AdminOptions_t *options,
+    int true_or_false);
+
 /**
  * @brief Set consumer groups states to query for.
  *
@@ -6910,6 +7149,14 @@ rd_kafka_error_t *rd_kafka_AdminOptions_set_match_consumer_group_states(
     const rd_kafka_consumer_group_state_t *consumer_group_states,
     size_t consumer_group_states_cnt);
 
+/**
+ * @brief Set Isolation Level to an allowed `rd_kafka_IsolationLevel_t` value.
+ */
+RD_EXPORT
+rd_kafka_error_t *
+rd_kafka_AdminOptions_set_isolation_level(rd_kafka_AdminOptions_t *options,
+                                          rd_kafka_IsolationLevel_t value);
+
 /**
  * @brief Set application opaque value that can be extracted from the
  *        result event using rd_kafka_event_opaque()
@@ -6918,6 +7165,35 @@ RD_EXPORT void
 rd_kafka_AdminOptions_set_opaque(rd_kafka_AdminOptions_t *options,
                                  void *ev_opaque);
 
+
+
+/**
+ * @enum rd_kafka_AclOperation_t
+ * @brief Apache Kafka ACL operation types. Common type for multiple Admin API
+ * functions.
+ */
+typedef enum rd_kafka_AclOperation_t {
+        RD_KAFKA_ACL_OPERATION_UNKNOWN = 0, /**< Unknown */
+        RD_KAFKA_ACL_OPERATION_ANY =
+            1, /**< In a filter, matches any AclOperation */
+        RD_KAFKA_ACL_OPERATION_ALL      = 2, /**< ALL operation */
+        RD_KAFKA_ACL_OPERATION_READ     = 3, /**< READ operation */
+        RD_KAFKA_ACL_OPERATION_WRITE    = 4, /**< WRITE operation */
+        RD_KAFKA_ACL_OPERATION_CREATE   = 5, /**< CREATE operation */
+        RD_KAFKA_ACL_OPERATION_DELETE   = 6, /**< DELETE operation */
+        RD_KAFKA_ACL_OPERATION_ALTER    = 7, /**< ALTER operation */
+        RD_KAFKA_ACL_OPERATION_DESCRIBE = 8, /**< DESCRIBE operation */
+        RD_KAFKA_ACL_OPERATION_CLUSTER_ACTION =
+            9, /**< CLUSTER_ACTION operation */
+        RD_KAFKA_ACL_OPERATION_DESCRIBE_CONFIGS =
+            10, /**< DESCRIBE_CONFIGS operation */
+        RD_KAFKA_ACL_OPERATION_ALTER_CONFIGS =
+            11, /**< ALTER_CONFIGS  operation */
+        RD_KAFKA_ACL_OPERATION_IDEMPOTENT_WRITE =
+            12, /**< IDEMPOTENT_WRITE operation */
+        RD_KAFKA_ACL_OPERATION__CNT
+} rd_kafka_AclOperation_t;
+
 /**@}*/
 
 /**
@@ -7410,6 +7686,18 @@ typedef enum rd_kafka_ResourcePatternType_t {
         RD_KAFKA_RESOURCE_PATTERN_TYPE__CNT,
 } rd_kafka_ResourcePatternType_t;
 
+/**
+ * @enum rd_kafka_AlterConfigOpType_t
+ * @brief Incremental alter configs operations.
+ */
+typedef enum rd_kafka_AlterConfigOpType_t {
+        RD_KAFKA_ALTER_CONFIG_OP_TYPE_SET      = 0,
+        RD_KAFKA_ALTER_CONFIG_OP_TYPE_DELETE   = 1,
+        RD_KAFKA_ALTER_CONFIG_OP_TYPE_APPEND   = 2,
+        RD_KAFKA_ALTER_CONFIG_OP_TYPE_SUBTRACT = 3,
+        RD_KAFKA_ALTER_CONFIG_OP_TYPE__CNT,
+} rd_kafka_AlterConfigOpType_t;
+
 /**
  * @returns a string representation of the \p resource_pattern_type
  */
@@ -7475,6 +7763,31 @@ rd_kafka_ConfigResource_set_config(rd_kafka_ConfigResource_t *config,
                                    const char *value);
 
 
+/**
+ * @brief Add the value of the configuration entry for a subsequent
+ *        incremental alter config operation. APPEND and SUBTRACT are
+ *        possible for list-type configuration entries only.
+ *
+ * @param config ConfigResource to add config property to.
+ * @param name Configuration name, depends on resource type.
+ * @param op_type Operation type, one of rd_kafka_AlterConfigOpType_t.
+ * @param value Configuration value, depends on resource type and \p name.
+ *              Set to \c NULL, only with with op_type set to DELETE,
+ *              to revert configuration value to default.
+ *
+ * @returns NULL on success, or an rd_kafka_error_t *
+ *          with the corresponding error code and string.
+ *          Error ownership belongs to the caller.
+ *          Possible error codes:
+ *          - RD_KAFKA_RESP_ERR__INVALID_ARG on invalid input.
+ */
+RD_EXPORT rd_kafka_error_t *rd_kafka_ConfigResource_add_incremental_config(
+    rd_kafka_ConfigResource_t *config,
+    const char *name,
+    rd_kafka_AlterConfigOpType_t op_type,
+    const char *value);
+
+
 /**
  * @brief Get an array of config entries from a ConfigResource object.
  *
@@ -7540,6 +7853,8 @@ rd_kafka_ConfigResource_error_string(const rd_kafka_ConfigResource_t *config);
  *         since these resource requests must be sent to the broker specified
  *         in the resource.
  *
+ * @deprecated Use rd_kafka_IncrementalAlterConfigs().
+ *
  */
 RD_EXPORT
 void rd_kafka_AlterConfigs(rd_kafka_t *rk,
@@ -7574,6 +7889,66 @@ rd_kafka_AlterConfigs_result_resources(
 
 
 
+/*
+ * IncrementalAlterConfigs - alter cluster configuration incrementally.
+ *
+ */
+
+
+/**
+ * @brief Incrementally update the configuration for the specified resources.
+ *        Updates are not transactional so they may succeed for some resources
+ *        while fail for others. The configs for a particular resource are
+ *        updated atomically, executing the corresponding incremental operations
+ *        on the provided configurations.
+ *
+ * @remark Requires broker version >=2.3.0
+ *
+ * @remark Multiple resources and resource types may be set, but at most one
+ *         resource of type \c RD_KAFKA_RESOURCE_BROKER is allowed per call
+ *         since these resource requests must be sent to the broker specified
+ *         in the resource. Broker option will be ignored in this case.
+ *
+ * @param rk Client instance.
+ * @param configs Array of config entries to alter.
+ * @param config_cnt Number of elements in \p configs array.
+ * @param options Optional admin options, or NULL for defaults.
+ * @param rkqu Queue to emit result on.
+ */
+RD_EXPORT
+void rd_kafka_IncrementalAlterConfigs(rd_kafka_t *rk,
+                                      rd_kafka_ConfigResource_t **configs,
+                                      size_t config_cnt,
+                                      const rd_kafka_AdminOptions_t *options,
+                                      rd_kafka_queue_t *rkqu);
+
+
+/*
+ * IncrementalAlterConfigs result type and methods
+ */
+
+/**
+ * @brief Get an array of resource results from a IncrementalAlterConfigs
+ * result.
+ *
+ * Use \c rd_kafka_ConfigResource_error() and
+ * \c rd_kafka_ConfigResource_error_string() to extract per-resource error
+ * results on the returned array elements.
+ *
+ * The returned object life-times are the same as the \p result object.
+ *
+ * @param result Result object to get resource results from.
+ * @param cntp is updated to the number of elements in the array.
+ *
+ * @returns an array of ConfigResource elements, or NULL if not available.
+ */
+RD_EXPORT const rd_kafka_ConfigResource_t **
+rd_kafka_IncrementalAlterConfigs_result_resources(
+    const rd_kafka_IncrementalAlterConfigs_result_t *result,
+    size_t *cntp);
+
+
+
 /*
  * DescribeConfigs - retrieve cluster configuration.
  *
@@ -7726,46 +8101,350 @@ rd_kafka_DeleteRecords_result_offsets(
 /**@}*/
 
 /**
- * @name Admin API - ListConsumerGroups
+ * @name Admin API - DescribeTopics
  * @{
  */
 
+/**
+ * @brief Represents a collection of topics, to be passed to DescribeTopics.
+ *
+ */
+typedef struct rd_kafka_TopicCollection_s rd_kafka_TopicCollection_t;
+
+/**
+ * @brief TopicPartition represents a partition in the DescribeTopics result.
+ *
+ */
+typedef struct rd_kafka_TopicPartitionInfo_s rd_kafka_TopicPartitionInfo_t;
 
 /**
- * @brief ListConsumerGroups result for a single group
+ * @brief DescribeTopics result type.
+ *
  */
+typedef struct rd_kafka_TopicDescription_s rd_kafka_TopicDescription_t;
 
-/**! ListConsumerGroups result for a single group */
-typedef struct rd_kafka_ConsumerGroupListing_s rd_kafka_ConsumerGroupListing_t;
+/**
+ * @brief Creates a new TopicCollection for passing to rd_kafka_DescribeTopics.
+ *
+ * @param topics A list of topics.
+ * @param topics_cnt Count of topics.
+ *
+ * @return a newly allocated TopicCollection object. Must be freed using
+ *         rd_kafka_TopicCollection_destroy when done.
+ */
+RD_EXPORT
+rd_kafka_TopicCollection_t *
+rd_kafka_TopicCollection_of_topic_names(const char **topics, size_t topics_cnt);
 
-/**! ListConsumerGroups results and errors */
-typedef struct rd_kafka_ListConsumerGroupsResult_s
-    rd_kafka_ListConsumerGroupsResult_t;
+/**
+ * @brief Destroy and free a TopicCollection object created with
+ *        rd_kafka_TopicCollection_new_* methods.
+ */
+RD_EXPORT void
+rd_kafka_TopicCollection_destroy(rd_kafka_TopicCollection_t *topics);
 
 /**
- * @brief List the consumer groups available in the cluster.
+ * @brief Describe topics as specified by the \p topics
+ *        array of size \p topics_cnt elements.
  *
  * @param rk Client instance.
+ * @param topics Collection of topics to describe.
  * @param options Optional admin options, or NULL for defaults.
+ *                Valid options:
+ *                 - include_authorized_operations
  * @param rkqu Queue to emit result on.
  *
  * @remark The result event type emitted on the supplied queue is of type
- *         \c RD_KAFKA_EVENT_LISTCONSUMERGROUPS_RESULT
+ *         \c RD_KAFKA_EVENT_DESCRIBETOPICS_RESULT
  */
 RD_EXPORT
-void rd_kafka_ListConsumerGroups(rd_kafka_t *rk,
-                                 const rd_kafka_AdminOptions_t *options,
-                                 rd_kafka_queue_t *rkqu);
+void rd_kafka_DescribeTopics(rd_kafka_t *rk,
+                             const rd_kafka_TopicCollection_t *topics,
+                             const rd_kafka_AdminOptions_t *options,
+                             rd_kafka_queue_t *rkqu);
 
 /**
- * @brief Gets the group id for the \p grplist group.
- *
- * @param grplist The group listing.
+ * @brief Get an array of topic results from a DescribeTopics result.
  *
- * @return The group id.
+ * @param result Result to get topics results from.
+ * @param cntp is updated to the number of elements in the array.
  *
  * @remark The lifetime of the returned memory is the same
- *         as the lifetime of the \p grplist object.
+ *         as the lifetime of the \p result object.
+ */
+RD_EXPORT
+const rd_kafka_TopicDescription_t **rd_kafka_DescribeTopics_result_topics(
+    const rd_kafka_DescribeTopics_result_t *result,
+    size_t *cntp);
+
+
+/**
+ * @brief Gets an array of partitions for the \p topicdesc topic.
+ *
+ * @param topicdesc The topic description.
+ * @param cntp is updated to the number of partitions in the array.
+ *
+ * @return An array of TopicPartitionInfos.
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p topicdesc object.
+ */
+RD_EXPORT
+const rd_kafka_TopicPartitionInfo_t **rd_kafka_TopicDescription_partitions(
+    const rd_kafka_TopicDescription_t *topicdesc,
+    size_t *cntp);
+
+
+/**
+ * @brief Gets the partition id for \p partition.
+ *
+ * @param partition The partition info.
+ *
+ * @return The partition id.
+ */
+RD_EXPORT
+const int rd_kafka_TopicPartitionInfo_partition(
+    const rd_kafka_TopicPartitionInfo_t *partition);
+
+
+/**
+ * @brief Gets the partition leader for \p partition.
+ *
+ * @param partition The partition info.
+ *
+ * @return The partition leader.
+ *
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p partition object.
+ */
+RD_EXPORT
+const rd_kafka_Node_t *rd_kafka_TopicPartitionInfo_leader(
+    const rd_kafka_TopicPartitionInfo_t *partition);
+
+/**
+ * @brief Gets the partition in-sync replicas for \p partition.
+ *
+ * @param partition The partition info.
+ * @param cntp is updated with in-sync replicas count.
+ *
+ * @return The in-sync replica nodes.
+ *
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p partition object.
+ */
+RD_EXPORT
+const rd_kafka_Node_t **
+rd_kafka_TopicPartitionInfo_isr(const rd_kafka_TopicPartitionInfo_t *partition,
+                                size_t *cntp);
+
+/**
+ * @brief Gets the partition replicas for \p partition.
+ *
+ * @param partition The partition info.
+ * @param cntp is updated with partition replicas count.
+ *
+ * @return The partition replicas nodes.
+ *
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p partition object.
+ */
+RD_EXPORT
+const rd_kafka_Node_t **rd_kafka_TopicPartitionInfo_replicas(
+    const rd_kafka_TopicPartitionInfo_t *partition,
+    size_t *cntp);
+
+/**
+ * @brief Gets the topic authorized ACL operations for the \p topicdesc topic.
+ *
+ * @param topicdesc The topic description.
+ * @param cntp is updated with authorized ACL operations count.
+ *
+ * @return The topic authorized operations. Is NULL if operations were not
+ *         requested.
+ *
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p topicdesc object.
+ */
+RD_EXPORT
+const rd_kafka_AclOperation_t *rd_kafka_TopicDescription_authorized_operations(
+    const rd_kafka_TopicDescription_t *topicdesc,
+    size_t *cntp);
+
+/**
+ * @brief Gets the topic name for the \p topicdesc topic.
+ *
+ * @param topicdesc The topic description.
+ *
+ * @return The topic name.
+ *
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p topicdesc object.
+ */
+RD_EXPORT
+const char *
+rd_kafka_TopicDescription_name(const rd_kafka_TopicDescription_t *topicdesc);
+
+/**
+ * @brief Gets the topic id for the \p topicdesc topic.
+ *
+ * @param topicdesc The topic description.
+ * @return The topic id
+ *
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p topicdesc object.
+ */
+RD_EXPORT const rd_kafka_Uuid_t *rd_kafka_TopicDescription_topic_id(
+    const rd_kafka_TopicDescription_t *topicdesc);
+
+/**
+ * @brief Gets if the \p topicdesc topic is internal.
+ *
+ * @param topicdesc The topic description.
+ *
+ * @return 1 if the topic is internal to Kafka, 0 otherwise.
+ */
+RD_EXPORT
+int rd_kafka_TopicDescription_is_internal(
+    const rd_kafka_TopicDescription_t *topicdesc);
+
+/**
+ * @brief Gets the error for the \p topicdesc topic.
+ *
+ * @param topicdesc The topic description.
+ *
+ * @return The topic description error.
+ *
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p topicdesc object.
+ */
+RD_EXPORT
+const rd_kafka_error_t *
+rd_kafka_TopicDescription_error(const rd_kafka_TopicDescription_t *topicdesc);
+
+
+/**@}*/
+
+/**
+ * @name Admin API - DescribeCluster
+ * @{
+ */
+
+/**
+ * @brief Describes the cluster.
+ *
+ * @param rk Client instance.
+ * @param options Optional admin options, or NULL for defaults.
+ *                Valid options:
+ *                 - include_authorized_operations
+ * @param rkqu Queue to emit result on.
+ *
+ * @remark The result event type emitted on the supplied queue is of type
+ *         \c RD_KAFKA_EVENT_DESCRIBECLUSTER_RESULT
+ */
+RD_EXPORT
+void rd_kafka_DescribeCluster(rd_kafka_t *rk,
+                              const rd_kafka_AdminOptions_t *options,
+                              rd_kafka_queue_t *rkqu);
+
+/**
+ * @brief Gets the broker nodes for the \p result cluster.
+ *
+ * @param result The result of DescribeCluster.
+ * @param cntp is updated with the count of broker nodes.
+ *
+ * @return An array of broker nodes.
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p result object.
+ */
+RD_EXPORT
+const rd_kafka_Node_t **rd_kafka_DescribeCluster_result_nodes(
+    const rd_kafka_DescribeCluster_result_t *result,
+    size_t *cntp);
+
+/**
+ * @brief Gets the authorized ACL operations for the \p result cluster.
+ *
+ * @param result The result of DescribeCluster.
+ * @param cntp is updated with authorized ACL operations count.
+ *
+ * @return The cluster authorized operations. Is NULL if operations were not
+ *         requested.
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p result object.
+ */
+RD_EXPORT
+const rd_kafka_AclOperation_t *
+rd_kafka_DescribeCluster_result_authorized_operations(
+    const rd_kafka_DescribeCluster_result_t *result,
+    size_t *cntp);
+
+/**
+ * @brief Gets the current controller for the \p result cluster.
+ *
+ * @param result The result of DescribeCluster.
+ *
+ * @return The cluster current controller.
+ */
+RD_EXPORT
+const rd_kafka_Node_t *rd_kafka_DescribeCluster_result_controller(
+    const rd_kafka_DescribeCluster_result_t *result);
+
+/**
+ * @brief Gets the cluster id for the \p result cluster.
+ *
+ * @param result The result of DescribeCluster.
+ *
+ * @return The cluster id.
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p result object.
+ */
+RD_EXPORT
+const char *rd_kafka_DescribeCluster_result_cluster_id(
+    const rd_kafka_DescribeCluster_result_t *result);
+
+/**@}*/
+
+
+/**
+ * @name Admin API - ListConsumerGroups
+ * @{
+ */
+
+
+/**
+ * @brief ListConsumerGroups result for a single group
+ */
+
+/**! ListConsumerGroups result for a single group */
+typedef struct rd_kafka_ConsumerGroupListing_s rd_kafka_ConsumerGroupListing_t;
+
+/**! ListConsumerGroups results and errors */
+typedef struct rd_kafka_ListConsumerGroupsResult_s
+    rd_kafka_ListConsumerGroupsResult_t;
+
+/**
+ * @brief List the consumer groups available in the cluster.
+ *
+ * @param rk Client instance.
+ * @param options Optional admin options, or NULL for defaults.
+ * @param rkqu Queue to emit result on.
+ *
+ * @remark The result event type emitted on the supplied queue is of type
+ *         \c RD_KAFKA_EVENT_LISTCONSUMERGROUPS_RESULT
+ */
+RD_EXPORT
+void rd_kafka_ListConsumerGroups(rd_kafka_t *rk,
+                                 const rd_kafka_AdminOptions_t *options,
+                                 rd_kafka_queue_t *rkqu);
+
+/**
+ * @brief Gets the group id for the \p grplist group.
+ *
+ * @param grplist The group listing.
+ *
+ * @return The group id.
+ *
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p grplist object.
  */
 RD_EXPORT
 const char *rd_kafka_ConsumerGroupListing_group_id(
@@ -7863,6 +8542,8 @@ typedef struct rd_kafka_MemberAssignment_s rd_kafka_MemberAssignment_t;
  * @param groups Array of groups to describe.
  * @param groups_cnt Number of elements in \p groups array.
  * @param options Optional admin options, or NULL for defaults.
+ *                Valid options:
+ *                 - include_authorized_operations
  * @param rkqu Queue to emit result on.
  *
  * @remark The result event type emitted on the supplied queue is of type
@@ -7947,6 +8628,23 @@ RD_EXPORT
 const char *rd_kafka_ConsumerGroupDescription_partition_assignor(
     const rd_kafka_ConsumerGroupDescription_t *grpdesc);
 
+/**
+ * @brief Gets the authorized ACL operations for the \p grpdesc group.
+ *
+ * @param grpdesc The group description.
+ * @param cntp is updated with authorized ACL operations count.
+ *
+ * @return The group authorized operations. Is NULL if operations were not
+ *         requested.
+ *
+ * @remark The lifetime of the returned memory is the same
+ *         as the lifetime of the \p grpdesc object.
+ */
+RD_EXPORT
+const rd_kafka_AclOperation_t *
+rd_kafka_ConsumerGroupDescription_authorized_operations(
+    const rd_kafka_ConsumerGroupDescription_t *grpdesc,
+    size_t *cntp);
 
 /**
  * @brief Gets state for the \p grpdesc group.
@@ -8450,6 +9148,331 @@ rd_kafka_DeleteConsumerGroupOffsets_result_groups(
 
 /**@}*/
 
+/**
+ * @name Admin API - ListOffsets
+ * @brief Given a topic_partition list, provides the offset information.
+ * @{
+ */
+
+/**
+ * @enum rd_kafka_OffsetSpec_t
+ * @brief Allows to specify the desired offsets when using ListOffsets.
+ */
+typedef enum rd_kafka_OffsetSpec_t {
+        /* Used to retrieve the offset with the largest timestamp of a partition
+         * as message timestamps can be specified client side this may not match
+         * the log end offset returned by SPEC_LATEST.
+         */
+        RD_KAFKA_OFFSET_SPEC_MAX_TIMESTAMP = -3,
+        /* Used to retrieve the offset with the earliest timestamp of a
+           partition. */
+        RD_KAFKA_OFFSET_SPEC_EARLIEST = -2,
+        /* Used to retrieve the offset with the latest timestamp of a partition.
+         */
+        RD_KAFKA_OFFSET_SPEC_LATEST = -1,
+} rd_kafka_OffsetSpec_t;
+
+/**
+ * @brief Information returned from a ListOffsets call for a specific
+ *        `rd_kafka_topic_partition_t`.
+ */
+typedef struct rd_kafka_ListOffsetsResultInfo_s
+    rd_kafka_ListOffsetsResultInfo_t;
+
+/**
+ * @brief Returns the topic partition of the passed \p result_info.
+ */
+RD_EXPORT
+const rd_kafka_topic_partition_t *
+rd_kafka_ListOffsetsResultInfo_topic_partition(
+    const rd_kafka_ListOffsetsResultInfo_t *result_info);
+
+/**
+ * @brief Returns the timestamp corresponding to the offset in \p result_info.
+ */
+RD_EXPORT
+int64_t rd_kafka_ListOffsetsResultInfo_timestamp(
+    const rd_kafka_ListOffsetsResultInfo_t *result_info);
+
+/**
+ * @brief Returns the array of ListOffsetsResultInfo in \p result
+ *        and populates the size of the array in \p cntp.
+ */
+RD_EXPORT
+const rd_kafka_ListOffsetsResultInfo_t **
+rd_kafka_ListOffsets_result_infos(const rd_kafka_ListOffsets_result_t *result,
+                                  size_t *cntp);
+
+/**
+ * @brief List offsets for the specified \p topic_partitions.
+ *        This operation enables to find the beginning offset,
+ *        end offset as well as the offset matching a timestamp in partitions
+ *        or the offset with max timestamp.
+ *
+ * @param rk Client instance.
+ * @param topic_partitions topic_partition_list_t with the partitions and
+ *                         offsets to list. Each topic partition offset can be
+ *                         a value of the `rd_kafka_OffsetSpec_t` enum or
+ *                         a non-negative value, representing a timestamp,
+ *                         to query for the first offset after the
+ *                         given timestamp.
+ * @param options Optional admin options, or NULL for defaults.
+ * @param rkqu Queue to emit result on.
+ *
+ * Supported admin options:
+ *  - rd_kafka_AdminOptions_set_isolation_level() - default  \c
+ * RD_KAFKA_ISOLATION_LEVEL_READ_UNCOMMITTED
+ *  - rd_kafka_AdminOptions_set_request_timeout() - default socket.timeout.ms
+ *
+ * @remark The result event type emitted on the supplied queue is of type
+ *         \c RD_KAFKA_EVENT_LISTOFFSETS_RESULT
+ */
+RD_EXPORT
+void rd_kafka_ListOffsets(rd_kafka_t *rk,
+                          rd_kafka_topic_partition_list_t *topic_partitions,
+                          const rd_kafka_AdminOptions_t *options,
+                          rd_kafka_queue_t *rkqu);
+
+/**@}*/
+
+/**
+ * @name Admin API - User SCRAM credentials
+ * @{
+ */
+
+/**
+ * @enum rd_kafka_ScramMechanism_t
+ * @brief Apache Kafka ScramMechanism values.
+ */
+typedef enum rd_kafka_ScramMechanism_t {
+        RD_KAFKA_SCRAM_MECHANISM_UNKNOWN = 0,
+        RD_KAFKA_SCRAM_MECHANISM_SHA_256 = 1,
+        RD_KAFKA_SCRAM_MECHANISM_SHA_512 = 2,
+        RD_KAFKA_SCRAM_MECHANISM__CNT
+} rd_kafka_ScramMechanism_t;
+
+/**
+ * @brief Scram credential info.
+ *        Mechanism and iterations for a SASL/SCRAM
+ *        credential associated with a user.
+ */
+typedef struct rd_kafka_ScramCredentialInfo_s rd_kafka_ScramCredentialInfo_t;
+
+/**
+ * @brief Returns the mechanism of a given ScramCredentialInfo.
+ */
+RD_EXPORT
+rd_kafka_ScramMechanism_t rd_kafka_ScramCredentialInfo_mechanism(
+    const rd_kafka_ScramCredentialInfo_t *scram_credential_info);
+
+/**
+ * @brief Returns the iterations of a given ScramCredentialInfo.
+ */
+RD_EXPORT
+int32_t rd_kafka_ScramCredentialInfo_iterations(
+    const rd_kafka_ScramCredentialInfo_t *scram_credential_info);
+
+/**
+ * @brief Representation of all SASL/SCRAM credentials associated
+ *        with a user that can be retrieved,
+ *        or an error indicating why credentials
+ *        could not be retrieved.
+ */
+typedef struct rd_kafka_UserScramCredentialsDescription_s
+    rd_kafka_UserScramCredentialsDescription_t;
+
+/**
+ * @brief Returns the username of a UserScramCredentialsDescription.
+ */
+RD_EXPORT
+const char *rd_kafka_UserScramCredentialsDescription_user(
+    const rd_kafka_UserScramCredentialsDescription_t *description);
+
+/**
+ * @brief Returns the error associated with a UserScramCredentialsDescription.
+ */
+RD_EXPORT
+const rd_kafka_error_t *rd_kafka_UserScramCredentialsDescription_error(
+    const rd_kafka_UserScramCredentialsDescription_t *description);
+
+/**
+ * @brief Returns the count of ScramCredentialInfos of a
+ * UserScramCredentialsDescription.
+ */
+RD_EXPORT
+size_t rd_kafka_UserScramCredentialsDescription_scramcredentialinfo_count(
+    const rd_kafka_UserScramCredentialsDescription_t *description);
+
+/**
+ * @brief Returns the ScramCredentialInfo at index idx of
+ * UserScramCredentialsDescription.
+ */
+RD_EXPORT
+const rd_kafka_ScramCredentialInfo_t *
+rd_kafka_UserScramCredentialsDescription_scramcredentialinfo(
+    const rd_kafka_UserScramCredentialsDescription_t *description,
+    size_t idx);
+
+/**
+ * @brief Get an array of descriptions from a DescribeUserScramCredentials
+ * result.
+ *
+ * The returned value life-time is the same as the \p result object.
+ *
+ * @param result Result to get descriptions from.
+ * @param cntp is updated to the number of elements in the array.
+ */
+RD_EXPORT
+const rd_kafka_UserScramCredentialsDescription_t **
+rd_kafka_DescribeUserScramCredentials_result_descriptions(
+    const rd_kafka_DescribeUserScramCredentials_result_t *result,
+    size_t *cntp);
+
+/**
+ * @brief Describe SASL/SCRAM credentials.
+ *        This operation is supported by brokers with version 2.7.0 or higher.
+ *
+ * @param rk Client instance.
+ * @param users The users for which credentials are to be described.
+ *              All users' credentials are described if NULL.
+ * @param user_cnt Number of elements in \p users array.
+ * @param options Optional admin options, or NULL for defaults.
+ * @param rkqu Queue to emit result on.
+ */
+RD_EXPORT
+void rd_kafka_DescribeUserScramCredentials(
+    rd_kafka_t *rk,
+    const char **users,
+    size_t user_cnt,
+    const rd_kafka_AdminOptions_t *options,
+    rd_kafka_queue_t *rkqu);
+
+/**
+ * @brief A request to alter a user's SASL/SCRAM credentials.
+ */
+typedef struct rd_kafka_UserScramCredentialAlteration_s
+    rd_kafka_UserScramCredentialAlteration_t;
+
+/**
+ * @brief Allocates a new UserScramCredentialUpsertion given its fields.
+ *        If salt isn't given a 64 B salt is generated using OpenSSL
+ *        RAND_priv_bytes, if available.
+ *
+ * @param username The username (not empty).
+ * @param mechanism SASL/SCRAM mechanism.
+ * @param iterations SASL/SCRAM iterations.
+ * @param password Password bytes (not empty).
+ * @param password_size Size of \p password (greater than 0).
+ * @param salt Salt bytes (optional).
+ * @param salt_size Size of \p salt (optional).
+ *
+ * @remark A random salt is generated, when NULL, only if OpenSSL >= 1.1.1.
+ *         Otherwise it's a required param.
+ *
+ * @return A newly created instance of rd_kafka_UserScramCredentialAlteration_t.
+ *         Ownership belongs to the caller, use
+ *         rd_kafka_UserScramCredentialAlteration_destroy to destroy.
+ */
+RD_EXPORT
+rd_kafka_UserScramCredentialAlteration_t *
+rd_kafka_UserScramCredentialUpsertion_new(const char *username,
+                                          rd_kafka_ScramMechanism_t mechanism,
+                                          int32_t iterations,
+                                          const unsigned char *password,
+                                          size_t password_size,
+                                          const unsigned char *salt,
+                                          size_t salt_size);
+
+/**
+ * @brief Allocates a new UserScramCredentialDeletion given its fields.
+ *
+ * @param username The username (not empty).
+ * @param mechanism SASL/SCRAM mechanism.
+ * @return A newly created instance of rd_kafka_UserScramCredentialAlteration_t.
+ *         Ownership belongs to the caller, use
+ *         rd_kafka_UserScramCredentialAlteration_destroy to destroy.
+ */
+RD_EXPORT
+rd_kafka_UserScramCredentialAlteration_t *
+rd_kafka_UserScramCredentialDeletion_new(const char *username,
+                                         rd_kafka_ScramMechanism_t mechanism);
+
+
+/**
+ * @brief Destroys a UserScramCredentialAlteration given its pointer
+ */
+RD_EXPORT
+void rd_kafka_UserScramCredentialAlteration_destroy(
+    rd_kafka_UserScramCredentialAlteration_t *alteration);
+
+/**
+ * @brief Destroys an array of UserScramCredentialAlteration
+ */
+RD_EXPORT
+void rd_kafka_UserScramCredentialAlteration_destroy_array(
+    rd_kafka_UserScramCredentialAlteration_t **alterations,
+    size_t alteration_cnt);
+
+/**
+ * @brief Result of a single user SCRAM alteration.
+ */
+typedef struct rd_kafka_AlterUserScramCredentials_result_response_s
+    rd_kafka_AlterUserScramCredentials_result_response_t;
+
+/**
+ * @brief Returns the username for a
+ * rd_kafka_AlterUserScramCredentials_result_response.
+ */
+RD_EXPORT
+const char *rd_kafka_AlterUserScramCredentials_result_response_user(
+    const rd_kafka_AlterUserScramCredentials_result_response_t *response);
+
+/**
+ * @brief Returns the error of a
+ * rd_kafka_AlterUserScramCredentials_result_response.
+ */
+RD_EXPORT
+const rd_kafka_error_t *
+rd_kafka_AlterUserScramCredentials_result_response_error(
+    const rd_kafka_AlterUserScramCredentials_result_response_t *response);
+
+/**
+ * @brief Get an array of responses from a AlterUserScramCredentials result.
+ *
+ * The returned value life-time is the same as the \p result object.
+ *
+ * @param result Result to get responses from.
+ * @param cntp is updated to the number of elements in the array.
+ */
+RD_EXPORT
+const rd_kafka_AlterUserScramCredentials_result_response_t **
+rd_kafka_AlterUserScramCredentials_result_responses(
+    const rd_kafka_AlterUserScramCredentials_result_t *result,
+    size_t *cntp);
+
+/**
+ * @brief Alter SASL/SCRAM credentials.
+ *        This operation is supported by brokers with version 2.7.0 or higher.
+ *
+ * @remark For upsertions to be processed, librdkfka must be build with
+ *         OpenSSL support. It's needed to calculate the HMAC.
+ *
+ * @param rk Client instance.
+ * @param alterations The alterations to be applied.
+ * @param alteration_cnt Number of elements in \p alterations array.
+ * @param options Optional admin options, or NULL for defaults.
+ * @param rkqu Queue to emit result on.
+ */
+RD_EXPORT
+void rd_kafka_AlterUserScramCredentials(
+    rd_kafka_t *rk,
+    rd_kafka_UserScramCredentialAlteration_t **alterations,
+    size_t alteration_cnt,
+    const rd_kafka_AdminOptions_t *options,
+    rd_kafka_queue_t *rkqu);
+
+/**@}*/
+
 /**
  * @name Admin API - ACL operations
  * @{
@@ -8475,32 +9498,6 @@ RD_EXPORT const rd_kafka_error_t *
 rd_kafka_acl_result_error(const rd_kafka_acl_result_t *aclres);
 
 
-/**
- * @enum rd_kafka_AclOperation_t
- * @brief Apache Kafka ACL operation types.
- */
-typedef enum rd_kafka_AclOperation_t {
-        RD_KAFKA_ACL_OPERATION_UNKNOWN = 0, /**< Unknown */
-        RD_KAFKA_ACL_OPERATION_ANY =
-            1, /**< In a filter, matches any AclOperation */
-        RD_KAFKA_ACL_OPERATION_ALL      = 2, /**< ALL operation */
-        RD_KAFKA_ACL_OPERATION_READ     = 3, /**< READ operation */
-        RD_KAFKA_ACL_OPERATION_WRITE    = 4, /**< WRITE operation */
-        RD_KAFKA_ACL_OPERATION_CREATE   = 5, /**< CREATE operation */
-        RD_KAFKA_ACL_OPERATION_DELETE   = 6, /**< DELETE operation */
-        RD_KAFKA_ACL_OPERATION_ALTER    = 7, /**< ALTER operation */
-        RD_KAFKA_ACL_OPERATION_DESCRIBE = 8, /**< DESCRIBE operation */
-        RD_KAFKA_ACL_OPERATION_CLUSTER_ACTION =
-            9, /**< CLUSTER_ACTION operation */
-        RD_KAFKA_ACL_OPERATION_DESCRIBE_CONFIGS =
-            10, /**< DESCRIBE_CONFIGS operation */
-        RD_KAFKA_ACL_OPERATION_ALTER_CONFIGS =
-            11, /**< ALTER_CONFIGS  operation */
-        RD_KAFKA_ACL_OPERATION_IDEMPOTENT_WRITE =
-            12, /**< IDEMPOTENT_WRITE operation */
-        RD_KAFKA_ACL_OPERATION__CNT
-} rd_kafka_AclOperation_t;
-
 /**
  * @returns a string representation of the \p acl_operation
  */
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_admin.c b/lib/librdkafka-2.3.0/src/rdkafka_admin.c
similarity index 72%
rename from lib/librdkafka-2.1.0/src/rdkafka_admin.c
rename to lib/librdkafka-2.3.0/src/rdkafka_admin.c
index 6aaec636d53..4184d1cdc6e 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_admin.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_admin.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018 Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -427,6 +428,8 @@ static RD_UNUSED RD_FORMAT(printf, 3, 4) void rd_kafka_admin_result_set_err(
  */
 static RD_INLINE void rd_kafka_admin_result_enq(rd_kafka_op_t *rko_req,
                                                 rd_kafka_op_t *rko_result) {
+        if (rko_req->rko_u.admin_result.result_cb)
+                rko_req->rko_u.admin_result.result_cb(rko_result);
         rd_kafka_replyq_enq(&rko_req->rko_u.admin_request.replyq, rko_result,
                             rko_req->rko_u.admin_request.replyq.version);
 }
@@ -528,7 +531,8 @@ rd_kafka_admin_result_ret_resources(const rd_kafka_op_t *rko, size_t *cntp) {
         rd_kafka_op_type_t reqtype =
             rko->rko_u.admin_result.reqtype & ~RD_KAFKA_OP_FLAGMASK;
         rd_assert(reqtype == RD_KAFKA_OP_ALTERCONFIGS ||
-                  reqtype == RD_KAFKA_OP_DESCRIBECONFIGS);
+                  reqtype == RD_KAFKA_OP_DESCRIBECONFIGS ||
+                  reqtype == RD_KAFKA_OP_INCREMENTALALTERCONFIGS);
 
         *cntp = rd_list_cnt(&rko->rko_u.admin_result.results);
         return (const rd_kafka_ConfigResource_t **)
@@ -658,6 +662,12 @@ rd_kafka_admin_request_op_new(rd_kafka_t *rk,
         return rko;
 }
 
+static void
+rd_kafka_admin_request_op_result_cb_set(rd_kafka_op_t *op,
+                                        void (*result_cb)(rd_kafka_op_t *)) {
+        op->rko_u.admin_result.result_cb = result_cb;
+}
+
 
 /**
  * @returns the remaining request timeout in milliseconds.
@@ -1426,8 +1436,7 @@ static rd_kafka_op_res_t rd_kafka_admin_fanout_worker(rd_kafka_t *rk,
                         NULL);
 
         /* Enqueue result on application queue, we're done. */
-        rd_kafka_replyq_enq(&rko_fanout->rko_u.admin_request.replyq, rko_result,
-                            rko_fanout->rko_u.admin_request.replyq.version);
+        rd_kafka_admin_result_enq(rko_fanout, rko_result);
 
         /* FALLTHRU */
         if (rko_fanout->rko_u.admin_request.fanout.outstanding == 0)
@@ -1480,6 +1489,34 @@ static rd_kafka_op_t *rd_kafka_admin_request_op_target_all_new(
         return rko;
 }
 
+
+/**
+ * @brief Construct MetadataRequest for use with AdminAPI (does not send).
+ *        Common for DescribeTopics and DescribeCluster.
+ *
+ * @sa rd_kafka_MetadataRequest_resp_cb.
+ */
+static rd_kafka_resp_err_t
+rd_kafka_admin_MetadataRequest(rd_kafka_broker_t *rkb,
+                               const rd_list_t *topics,
+                               const char *reason,
+                               rd_bool_t include_cluster_authorized_operations,
+                               rd_bool_t include_topic_authorized_operations,
+                               rd_bool_t force_racks,
+                               rd_kafka_resp_cb_t *resp_cb,
+                               rd_kafka_replyq_t replyq,
+                               void *opaque) {
+        return rd_kafka_MetadataRequest_resp_cb(
+            rkb, topics, reason,
+            rd_false /* No admin operation requires topic creation. */,
+            include_cluster_authorized_operations,
+            include_topic_authorized_operations,
+            rd_false /* No admin operation should update cgrp. */, force_racks,
+            resp_cb, replyq,
+            rd_true /* Admin operation metadata requests are always forced. */,
+            opaque);
+}
+
 /**@}*/
 
 
@@ -1522,20 +1559,6 @@ rd_kafka_AdminOptions_set_validate_only(rd_kafka_AdminOptions_t *options,
                                          errstr, errstr_size);
 }
 
-rd_kafka_resp_err_t
-rd_kafka_AdminOptions_set_incremental(rd_kafka_AdminOptions_t *options,
-                                      int true_or_false,
-                                      char *errstr,
-                                      size_t errstr_size) {
-        rd_snprintf(errstr, errstr_size,
-                    "Incremental updates currently not supported, see KIP-248");
-        return RD_KAFKA_RESP_ERR__NOT_IMPLEMENTED;
-
-        return rd_kafka_confval_set_type(&options->incremental,
-                                         RD_KAFKA_CONFVAL_INT, &true_or_false,
-                                         errstr, errstr_size);
-}
-
 rd_kafka_resp_err_t
 rd_kafka_AdminOptions_set_broker(rd_kafka_AdminOptions_t *options,
                                  int32_t broker_id,
@@ -1547,6 +1570,16 @@ rd_kafka_AdminOptions_set_broker(rd_kafka_AdminOptions_t *options,
                                          &ibroker_id, errstr, errstr_size);
 }
 
+rd_kafka_error_t *
+rd_kafka_AdminOptions_set_isolation_level(rd_kafka_AdminOptions_t *options,
+                                          rd_kafka_IsolationLevel_t value) {
+        char errstr[512];
+        rd_kafka_resp_err_t err = rd_kafka_confval_set_type(
+            &options->isolation_level, RD_KAFKA_CONFVAL_INT, &value, errstr,
+            sizeof(errstr));
+        return !err ? NULL : rd_kafka_error_new(err, "%s", errstr);
+}
+
 rd_kafka_error_t *rd_kafka_AdminOptions_set_require_stable_offsets(
     rd_kafka_AdminOptions_t *options,
     int true_or_false) {
@@ -1557,6 +1590,16 @@ rd_kafka_error_t *rd_kafka_AdminOptions_set_require_stable_offsets(
         return !err ? NULL : rd_kafka_error_new(err, "%s", errstr);
 }
 
+rd_kafka_error_t *rd_kafka_AdminOptions_set_include_authorized_operations(
+    rd_kafka_AdminOptions_t *options,
+    int true_or_false) {
+        char errstr[512];
+        rd_kafka_resp_err_t err = rd_kafka_confval_set_type(
+            &options->include_authorized_operations, RD_KAFKA_CONFVAL_INT,
+            &true_or_false, errstr, sizeof(errstr));
+        return !err ? NULL : rd_kafka_error_new(err, "%s", errstr);
+}
+
 rd_kafka_error_t *rd_kafka_AdminOptions_set_match_consumer_group_states(
     rd_kafka_AdminOptions_t *options,
     const rd_kafka_consumer_group_state_t *consumer_group_states,
@@ -1624,7 +1667,8 @@ static void rd_kafka_AdminOptions_init(rd_kafka_t *rk,
             options->for_api == RD_KAFKA_ADMIN_OP_CREATETOPICS ||
             options->for_api == RD_KAFKA_ADMIN_OP_DELETETOPICS ||
             options->for_api == RD_KAFKA_ADMIN_OP_CREATEPARTITIONS ||
-            options->for_api == RD_KAFKA_ADMIN_OP_DELETERECORDS)
+            options->for_api == RD_KAFKA_ADMIN_OP_DELETERECORDS ||
+            options->for_api == RD_KAFKA_ADMIN_OP_LISTOFFSETS)
                 rd_kafka_confval_init_int(&options->operation_timeout,
                                           "operation_timeout", -1, 3600 * 1000,
                                           rk->rk_conf.admin.request_timeout_ms);
@@ -1635,20 +1679,14 @@ static void rd_kafka_AdminOptions_init(rd_kafka_t *rk,
         if (options->for_api == RD_KAFKA_ADMIN_OP_ANY ||
             options->for_api == RD_KAFKA_ADMIN_OP_CREATETOPICS ||
             options->for_api == RD_KAFKA_ADMIN_OP_CREATEPARTITIONS ||
-            options->for_api == RD_KAFKA_ADMIN_OP_ALTERCONFIGS)
+            options->for_api == RD_KAFKA_ADMIN_OP_ALTERCONFIGS ||
+            options->for_api == RD_KAFKA_ADMIN_OP_INCREMENTALALTERCONFIGS)
                 rd_kafka_confval_init_int(&options->validate_only,
                                           "validate_only", 0, 1, 0);
         else
                 rd_kafka_confval_disable(&options->validate_only,
                                          "validate_only");
 
-        if (options->for_api == RD_KAFKA_ADMIN_OP_ANY ||
-            options->for_api == RD_KAFKA_ADMIN_OP_ALTERCONFIGS)
-                rd_kafka_confval_init_int(&options->incremental, "incremental",
-                                          0, 1, 0);
-        else
-                rd_kafka_confval_disable(&options->incremental, "incremental");
-
         if (options->for_api == RD_KAFKA_ADMIN_OP_ANY ||
             options->for_api == RD_KAFKA_ADMIN_OP_LISTCONSUMERGROUPOFFSETS)
                 rd_kafka_confval_init_int(&options->require_stable_offsets,
@@ -1657,6 +1695,18 @@ static void rd_kafka_AdminOptions_init(rd_kafka_t *rk,
                 rd_kafka_confval_disable(&options->require_stable_offsets,
                                          "require_stable_offsets");
 
+        if (options->for_api == RD_KAFKA_ADMIN_OP_ANY ||
+            options->for_api == RD_KAFKA_ADMIN_OP_DESCRIBECONSUMERGROUPS ||
+            options->for_api == RD_KAFKA_ADMIN_OP_DESCRIBECLUSTER ||
+            options->for_api == RD_KAFKA_ADMIN_OP_DESCRIBETOPICS)
+                rd_kafka_confval_init_int(
+                    &options->include_authorized_operations,
+                    "include_authorized_operations", 0, 1, 0);
+        else
+                rd_kafka_confval_disable(
+                    &options->include_authorized_operations,
+                    "include_authorized_operations");
+
         if (options->for_api == RD_KAFKA_ADMIN_OP_ANY ||
             options->for_api == RD_KAFKA_ADMIN_OP_LISTCONSUMERGROUPS)
                 rd_kafka_confval_init_ptr(&options->match_consumer_group_states,
@@ -1665,6 +1715,14 @@ static void rd_kafka_AdminOptions_init(rd_kafka_t *rk,
                 rd_kafka_confval_disable(&options->match_consumer_group_states,
                                          "match_consumer_group_states");
 
+        if (options->for_api == RD_KAFKA_ADMIN_OP_ANY ||
+            options->for_api == RD_KAFKA_ADMIN_OP_LISTOFFSETS)
+                rd_kafka_confval_init_int(&options->isolation_level,
+                                          "isolation_level", 0, 1, 0);
+        else
+                rd_kafka_confval_disable(&options->isolation_level,
+                                         "isolation_level");
+
         rd_kafka_confval_init_int(&options->broker, "broker", 0, INT32_MAX, -1);
         rd_kafka_confval_init_ptr(&options->opaque, "opaque");
 }
@@ -1883,18 +1941,14 @@ rd_kafka_NewTopic_set_replica_assignment(rd_kafka_NewTopic_t *new_topic,
  * @brief Generic constructor of ConfigEntry which is also added to \p rl
  */
 static rd_kafka_resp_err_t
-rd_kafka_admin_add_config0(rd_list_t *rl,
-                           const char *name,
-                           const char *value,
-                           rd_kafka_AlterOperation_t operation) {
+rd_kafka_admin_add_config0(rd_list_t *rl, const char *name, const char *value) {
         rd_kafka_ConfigEntry_t *entry;
 
         if (!name)
                 return RD_KAFKA_RESP_ERR__INVALID_ARG;
 
-        entry              = rd_calloc(1, sizeof(*entry));
-        entry->kv          = rd_strtup_new(name, value);
-        entry->a.operation = operation;
+        entry     = rd_calloc(1, sizeof(*entry));
+        entry->kv = rd_strtup_new(name, value);
 
         rd_list_add(rl, entry);
 
@@ -1902,11 +1956,36 @@ rd_kafka_admin_add_config0(rd_list_t *rl,
 }
 
 
+/**
+ * @brief Generic constructor of ConfigEntry for Incremental Alter Operations
+ * which is also added to \p rl
+ */
+static rd_kafka_error_t *
+rd_kafka_admin_incremental_add_config0(rd_list_t *rl,
+                                       const char *name,
+                                       rd_kafka_AlterConfigOpType_t op_type,
+                                       const char *value) {
+        rd_kafka_ConfigEntry_t *entry;
+
+        if (!name) {
+                return rd_kafka_error_new(RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                          "Config name is required");
+        }
+
+        entry            = rd_calloc(1, sizeof(*entry));
+        entry->kv        = rd_strtup_new(name, value);
+        entry->a.op_type = op_type;
+
+        rd_list_add(rl, entry);
+
+        return NULL;
+}
+
+
 rd_kafka_resp_err_t rd_kafka_NewTopic_set_config(rd_kafka_NewTopic_t *new_topic,
                                                  const char *name,
                                                  const char *value) {
-        return rd_kafka_admin_add_config0(&new_topic->config, name, value,
-                                          RD_KAFKA_ALTER_OP_ADD);
+        return rd_kafka_admin_add_config0(&new_topic->config, name, value);
 }
 
 
@@ -2831,37 +2910,42 @@ rd_kafka_ConfigResource_add_ConfigEntry(rd_kafka_ConfigResource_t *config,
         rd_list_add(&config->config, entry);
 }
 
-
 rd_kafka_resp_err_t
-rd_kafka_ConfigResource_add_config(rd_kafka_ConfigResource_t *config,
+rd_kafka_ConfigResource_set_config(rd_kafka_ConfigResource_t *config,
                                    const char *name,
                                    const char *value) {
         if (!name || !*name || !value)
                 return RD_KAFKA_RESP_ERR__INVALID_ARG;
 
-        return rd_kafka_admin_add_config0(&config->config, name, value,
-                                          RD_KAFKA_ALTER_OP_ADD);
+        return rd_kafka_admin_add_config0(&config->config, name, value);
 }
 
-rd_kafka_resp_err_t
-rd_kafka_ConfigResource_set_config(rd_kafka_ConfigResource_t *config,
-                                   const char *name,
-                                   const char *value) {
-        if (!name || !*name || !value)
-                return RD_KAFKA_RESP_ERR__INVALID_ARG;
 
-        return rd_kafka_admin_add_config0(&config->config, name, value,
-                                          RD_KAFKA_ALTER_OP_SET);
-}
+rd_kafka_error_t *rd_kafka_ConfigResource_add_incremental_config(
+    rd_kafka_ConfigResource_t *config,
+    const char *name,
+    rd_kafka_AlterConfigOpType_t op_type,
+    const char *value) {
+        if (op_type < 0 || op_type >= RD_KAFKA_ALTER_CONFIG_OP_TYPE__CNT) {
+                return rd_kafka_error_new(
+                    RD_KAFKA_RESP_ERR__INVALID_ARG,
+                    "Invalid alter config operation type");
+        }
 
-rd_kafka_resp_err_t
-rd_kafka_ConfigResource_delete_config(rd_kafka_ConfigResource_t *config,
-                                      const char *name) {
-        if (!name || !*name)
-                return RD_KAFKA_RESP_ERR__INVALID_ARG;
+        if (!name || !*name) {
+                return rd_kafka_error_new(RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                          !name
+                                              ? "Config name is required"
+                                              : "Config name mustn't be empty");
+        }
+
+        if (op_type != RD_KAFKA_ALTER_CONFIG_OP_TYPE_DELETE && !value) {
+                return rd_kafka_error_new(RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                          "Config value is required");
+        }
 
-        return rd_kafka_admin_add_config0(&config->config, name, NULL,
-                                          RD_KAFKA_ALTER_OP_DELETE);
+        return rd_kafka_admin_incremental_add_config0(&config->config, name,
+                                                      op_type, value);
 }
 
 
@@ -2995,7 +3079,7 @@ rd_kafka_AlterConfigsResponse_parse(rd_kafka_op_t *rko_req,
         rd_kafka_buf_read_i32(reply, &Throttle_Time);
         rd_kafka_op_throttle_time(rkb, rk->rk_rep, Throttle_Time);
 
-        rd_kafka_buf_read_i32(reply, &res_cnt);
+        rd_kafka_buf_read_arraycnt(reply, &res_cnt, RD_KAFKAP_CONFIGS_MAX);
 
         if (res_cnt > rd_list_cnt(&rko_req->rko_u.admin_request.args)) {
                 rd_snprintf(errstr, errstr_size,
@@ -3028,6 +3112,7 @@ rd_kafka_AlterConfigsResponse_parse(rd_kafka_op_t *rko_req,
                 rd_kafka_buf_read_i8(reply, &res_type);
                 rd_kafka_buf_read_str(reply, &kres_name);
                 RD_KAFKAP_STR_DUPA(&res_name, &kres_name);
+                rd_kafka_buf_skip_tags(reply);
 
                 if (error_code) {
                         if (RD_KAFKAP_STR_IS_NULL(&error_msg) ||
@@ -3158,7 +3243,7 @@ const rd_kafka_ConfigResource_t **rd_kafka_AlterConfigs_result_resources(
 
 
 /**
- * @name DescribeConfigs
+ * @name IncrementalAlterConfigs
  * @{
  *
  *
@@ -3166,15 +3251,16 @@ const rd_kafka_ConfigResource_t **rd_kafka_AlterConfigs_result_resources(
  */
 
 
+
 /**
- * @brief Parse DescribeConfigsResponse and create ADMIN_RESULT op.
+ * @brief Parse IncrementalAlterConfigsResponse and create ADMIN_RESULT op.
  */
 static rd_kafka_resp_err_t
-rd_kafka_DescribeConfigsResponse_parse(rd_kafka_op_t *rko_req,
-                                       rd_kafka_op_t **rko_resultp,
-                                       rd_kafka_buf_t *reply,
-                                       char *errstr,
-                                       size_t errstr_size) {
+rd_kafka_IncrementalAlterConfigsResponse_parse(rd_kafka_op_t *rko_req,
+                                               rd_kafka_op_t **rko_resultp,
+                                               rd_kafka_buf_t *reply,
+                                               char *errstr,
+                                               size_t errstr_size) {
         const int log_decode_errors = LOG_ERR;
         rd_kafka_broker_t *rkb      = reply->rkbuf_rkb;
         rd_kafka_t *rk              = rkb->rkb_rk;
@@ -3182,22 +3268,21 @@ rd_kafka_DescribeConfigsResponse_parse(rd_kafka_op_t *rko_req,
         int32_t res_cnt;
         int i;
         int32_t Throttle_Time;
-        rd_kafka_ConfigResource_t *config = NULL;
-        rd_kafka_ConfigEntry_t *entry     = NULL;
 
         rd_kafka_buf_read_i32(reply, &Throttle_Time);
         rd_kafka_op_throttle_time(rkb, rk->rk_rep, Throttle_Time);
 
-        /* #resources */
-        rd_kafka_buf_read_i32(reply, &res_cnt);
+        rd_kafka_buf_read_arraycnt(reply, &res_cnt, RD_KAFKAP_CONFIGS_MAX);
 
-        if (res_cnt > rd_list_cnt(&rko_req->rko_u.admin_request.args))
-                rd_kafka_buf_parse_fail(
-                    reply,
-                    "Received %" PRId32
-                    " ConfigResources in response "
-                    "when only %d were requested",
-                    res_cnt, rd_list_cnt(&rko_req->rko_u.admin_request.args));
+        if (res_cnt != rd_list_cnt(&rko_req->rko_u.admin_request.args)) {
+                rd_snprintf(errstr, errstr_size,
+                            "Received %" PRId32
+                            " ConfigResources in response "
+                            "when %d were requested",
+                            res_cnt,
+                            rd_list_cnt(&rko_req->rko_u.admin_request.args));
+                return RD_KAFKA_RESP_ERR__BAD_MSG;
+        }
 
         rko_result = rd_kafka_admin_result_new(rko_req);
 
@@ -3211,16 +3296,16 @@ rd_kafka_DescribeConfigsResponse_parse(rd_kafka_op_t *rko_req,
                 rd_kafkap_str_t kres_name;
                 char *res_name;
                 char *this_errstr = NULL;
+                rd_kafka_ConfigResource_t *config;
                 rd_kafka_ConfigResource_t skel;
                 int orig_pos;
-                int32_t entry_cnt;
-                int ci;
 
                 rd_kafka_buf_read_i16(reply, &error_code);
                 rd_kafka_buf_read_str(reply, &error_msg);
                 rd_kafka_buf_read_i8(reply, &res_type);
                 rd_kafka_buf_read_str(reply, &kres_name);
                 RD_KAFKAP_STR_DUPA(&res_name, &kres_name);
+                rd_kafka_buf_skip_tags(reply);
 
                 if (error_code) {
                         if (RD_KAFKAP_STR_IS_NULL(&error_msg) ||
@@ -3234,7 +3319,7 @@ rd_kafka_DescribeConfigsResponse_parse(rd_kafka_op_t *rko_req,
                 config = rd_kafka_ConfigResource_new(res_type, res_name);
                 if (!config) {
                         rd_kafka_log(rko_req->rko_rk, LOG_ERR, "ADMIN",
-                                     "DescribeConfigs returned "
+                                     "IncrementalAlterConfigs returned "
                                      "unsupported ConfigResource #%d with "
                                      "type %d and name \"%s\": ignoring",
                                      i, res_type, res_name);
@@ -3245,108 +3330,6 @@ rd_kafka_DescribeConfigsResponse_parse(rd_kafka_op_t *rko_req,
                 if (this_errstr)
                         config->errstr = rd_strdup(this_errstr);
 
-                /* #config_entries */
-                rd_kafka_buf_read_i32(reply, &entry_cnt);
-
-                for (ci = 0; ci < (int)entry_cnt; ci++) {
-                        rd_kafkap_str_t config_name, config_value;
-                        int32_t syn_cnt;
-                        int si;
-
-                        rd_kafka_buf_read_str(reply, &config_name);
-                        rd_kafka_buf_read_str(reply, &config_value);
-
-                        entry = rd_kafka_ConfigEntry_new0(
-                            config_name.str, RD_KAFKAP_STR_LEN(&config_name),
-                            config_value.str, RD_KAFKAP_STR_LEN(&config_value));
-
-                        rd_kafka_buf_read_bool(reply, &entry->a.is_readonly);
-
-                        /* ApiVersion 0 has is_default field, while
-                         * ApiVersion 1 has source field.
-                         * Convert between the two so they look the same
-                         * to the caller. */
-                        if (rd_kafka_buf_ApiVersion(reply) == 0) {
-                                rd_kafka_buf_read_bool(reply,
-                                                       &entry->a.is_default);
-                                if (entry->a.is_default)
-                                        entry->a.source =
-                                            RD_KAFKA_CONFIG_SOURCE_DEFAULT_CONFIG;
-                        } else {
-                                int8_t config_source;
-                                rd_kafka_buf_read_i8(reply, &config_source);
-                                entry->a.source = config_source;
-
-                                if (entry->a.source ==
-                                    RD_KAFKA_CONFIG_SOURCE_DEFAULT_CONFIG)
-                                        entry->a.is_default = 1;
-                        }
-
-                        rd_kafka_buf_read_bool(reply, &entry->a.is_sensitive);
-
-
-                        if (rd_kafka_buf_ApiVersion(reply) == 1) {
-                                /* #config_synonyms (ApiVersion 1) */
-                                rd_kafka_buf_read_i32(reply, &syn_cnt);
-
-                                if (syn_cnt > 100000)
-                                        rd_kafka_buf_parse_fail(
-                                            reply,
-                                            "Broker returned %" PRId32
-                                            " config synonyms for "
-                                            "ConfigResource %d,%s: "
-                                            "limit is 100000",
-                                            syn_cnt, config->restype,
-                                            config->name);
-
-                                if (syn_cnt > 0)
-                                        rd_list_grow(&entry->synonyms, syn_cnt);
-
-                        } else {
-                                /* No synonyms in ApiVersion 0 */
-                                syn_cnt = 0;
-                        }
-
-
-
-                        /* Read synonyms (ApiVersion 1) */
-                        for (si = 0; si < (int)syn_cnt; si++) {
-                                rd_kafkap_str_t syn_name, syn_value;
-                                int8_t syn_source;
-                                rd_kafka_ConfigEntry_t *syn_entry;
-
-                                rd_kafka_buf_read_str(reply, &syn_name);
-                                rd_kafka_buf_read_str(reply, &syn_value);
-                                rd_kafka_buf_read_i8(reply, &syn_source);
-
-                                syn_entry = rd_kafka_ConfigEntry_new0(
-                                    syn_name.str, RD_KAFKAP_STR_LEN(&syn_name),
-                                    syn_value.str,
-                                    RD_KAFKAP_STR_LEN(&syn_value));
-                                if (!syn_entry)
-                                        rd_kafka_buf_parse_fail(
-                                            reply,
-                                            "Broker returned invalid "
-                                            "synonym #%d "
-                                            "for ConfigEntry #%d (%s) "
-                                            "and ConfigResource %d,%s: "
-                                            "syn_name.len %d, "
-                                            "syn_value.len %d",
-                                            si, ci, entry->kv->name,
-                                            config->restype, config->name,
-                                            (int)syn_name.len,
-                                            (int)syn_value.len);
-
-                                syn_entry->a.source     = syn_source;
-                                syn_entry->a.is_synonym = 1;
-
-                                rd_list_add(&entry->synonyms, syn_entry);
-                        }
-
-                        rd_kafka_ConfigResource_add_ConfigEntry(config, entry);
-                        entry = NULL;
-                }
-
                 /* As a convenience to the application we insert result
                  * in the same order as they were requested. The broker
                  * does not maintain ordering unfortunately. */
@@ -3354,25 +3337,28 @@ rd_kafka_DescribeConfigsResponse_parse(rd_kafka_op_t *rko_req,
                 skel.name    = config->name;
                 orig_pos = rd_list_index(&rko_result->rko_u.admin_result.args,
                                          &skel, rd_kafka_ConfigResource_cmp);
-                if (orig_pos == -1)
+                if (orig_pos == -1) {
+                        rd_kafka_ConfigResource_destroy(config);
                         rd_kafka_buf_parse_fail(
                             reply,
                             "Broker returned ConfigResource %d,%s "
                             "that was not "
                             "included in the original request",
                             res_type, res_name);
+                }
 
                 if (rd_list_elem(&rko_result->rko_u.admin_result.results,
-                                 orig_pos) != NULL)
+                                 orig_pos) != NULL) {
+                        rd_kafka_ConfigResource_destroy(config);
                         rd_kafka_buf_parse_fail(
                             reply,
                             "Broker returned ConfigResource %d,%s "
                             "multiple times",
                             res_type, res_name);
+                }
 
                 rd_list_set(&rko_result->rko_u.admin_result.results, orig_pos,
                             config);
-                config = NULL;
         }
 
         *rko_resultp = rko_result;
@@ -3380,55 +3366,113 @@ rd_kafka_DescribeConfigsResponse_parse(rd_kafka_op_t *rko_req,
         return RD_KAFKA_RESP_ERR_NO_ERROR;
 
 err_parse:
-        if (entry)
-                rd_kafka_ConfigEntry_destroy(entry);
-        if (config)
-                rd_kafka_ConfigResource_destroy(config);
-
         if (rko_result)
                 rd_kafka_op_destroy(rko_result);
 
-        rd_snprintf(errstr, errstr_size,
-                    "DescribeConfigs response protocol parse failure: %s",
-                    rd_kafka_err2str(reply->rkbuf_err));
+        rd_snprintf(
+            errstr, errstr_size,
+            "IncrementalAlterConfigs response protocol parse failure: %s",
+            rd_kafka_err2str(reply->rkbuf_err));
 
         return reply->rkbuf_err;
 }
 
+typedef RD_MAP_TYPE(const char *, const rd_bool_t *) map_str_bool;
 
 
-void rd_kafka_DescribeConfigs(rd_kafka_t *rk,
-                              rd_kafka_ConfigResource_t **configs,
-                              size_t config_cnt,
-                              const rd_kafka_AdminOptions_t *options,
-                              rd_kafka_queue_t *rkqu) {
+void rd_kafka_IncrementalAlterConfigs(rd_kafka_t *rk,
+                                      rd_kafka_ConfigResource_t **configs,
+                                      size_t config_cnt,
+                                      const rd_kafka_AdminOptions_t *options,
+                                      rd_kafka_queue_t *rkqu) {
         rd_kafka_op_t *rko;
         size_t i;
         rd_kafka_resp_err_t err;
         char errstr[256];
+        rd_bool_t value = rd_true;
+
         static const struct rd_kafka_admin_worker_cbs cbs = {
-            rd_kafka_DescribeConfigsRequest,
-            rd_kafka_DescribeConfigsResponse_parse,
+            rd_kafka_IncrementalAlterConfigsRequest,
+            rd_kafka_IncrementalAlterConfigsResponse_parse,
         };
 
         rd_assert(rkqu);
 
         rko = rd_kafka_admin_request_op_new(
-            rk, RD_KAFKA_OP_DESCRIBECONFIGS,
-            RD_KAFKA_EVENT_DESCRIBECONFIGS_RESULT, &cbs, options, rkqu->rkqu_q);
+            rk, RD_KAFKA_OP_INCREMENTALALTERCONFIGS,
+            RD_KAFKA_EVENT_INCREMENTALALTERCONFIGS_RESULT, &cbs, options,
+            rkqu->rkqu_q);
 
         rd_list_init(&rko->rko_u.admin_request.args, (int)config_cnt,
                      rd_kafka_ConfigResource_free);
 
-        for (i = 0; i < config_cnt; i++)
+        /* Check duplicate ConfigResource */
+        map_str_bool configs_map = RD_MAP_INITIALIZER(
+            config_cnt, rd_map_str_cmp, rd_map_str_hash, NULL, NULL);
+
+        for (i = 0; i < config_cnt; i++) {
+                /* 2 chars for the decimal restype + 1 for the comma
+                 * + 1 for the trailing zero. */
+                size_t len = 4 + strlen(configs[i]->name);
+                char *key  = rd_alloca(len);
+                const rd_kafka_ConfigEntry_t **entries;
+                size_t entry_cnt, j;
+
+                rd_snprintf(key, len - 1, "%d,%s", configs[i]->restype,
+                            configs[i]->name);
+                if (RD_MAP_GET(&configs_map, key)) {
+                        /* Duplicate ConfigResource found */
+                        break;
+                }
+                RD_MAP_SET(&configs_map, key, &value);
+                entries =
+                    rd_kafka_ConfigResource_configs(configs[i], &entry_cnt);
+
+                /* Check duplicate ConfigEntry */
+                map_str_bool entries_map = RD_MAP_INITIALIZER(
+                    entry_cnt, rd_map_str_cmp, rd_map_str_hash, NULL, NULL);
+
+                for (j = 0; j < entry_cnt; j++) {
+                        const rd_kafka_ConfigEntry_t *entry = entries[j];
+                        const char *key = rd_kafka_ConfigEntry_name(entry);
+
+                        if (RD_MAP_GET(&entries_map, key)) {
+                                /* Duplicate ConfigEntry found */
+                                break;
+                        }
+                        RD_MAP_SET(&entries_map, key, &value);
+                }
+                RD_MAP_DESTROY(&entries_map);
+
+                if (j != entry_cnt) {
+                        RD_MAP_DESTROY(&configs_map);
+                        rd_kafka_admin_result_fail(
+                            rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                            "Duplicate ConfigEntry found");
+                        rd_kafka_admin_common_worker_destroy(
+                            rk, rko, rd_true /*destroy*/);
+                        return;
+                }
+
                 rd_list_add(&rko->rko_u.admin_request.args,
                             rd_kafka_ConfigResource_copy(configs[i]));
+        }
 
-        /* If there's a BROKER resource in the list we need to
-         * speak directly to that broker rather than the controller.
-         *
-         * Multiple BROKER resources are not allowed.
-         */
+        RD_MAP_DESTROY(&configs_map);
+
+        if (i != config_cnt) {
+                rd_kafka_admin_result_fail(rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                           "Duplicate ConfigResource found");
+                rd_kafka_admin_common_worker_destroy(rk, rko,
+                                                     rd_true /*destroy*/);
+                return;
+        }
+
+        /* If there's a BROKER resource in the list we need to
+         * speak directly to that broker rather than the controller.
+         *
+         * Multiple BROKER resources are not allowed.
+         */
         err = rd_kafka_ConfigResource_get_single_broker_id(
             &rko->rko_u.admin_request.args, &rko->rko_u.admin_request.broker_id,
             errstr, sizeof(errstr));
@@ -3438,14 +3482,28 @@ void rd_kafka_DescribeConfigs(rd_kafka_t *rk,
                                                      rd_true /*destroy*/);
                 return;
         }
+        if (rko->rko_u.admin_request.broker_id !=
+            RD_KAFKA_ADMIN_TARGET_CONTROLLER) {
+                /* Revert broker option to default if altering
+                 * broker configs. */
+                err = rd_kafka_confval_set_type(
+                    &rko->rko_u.admin_request.options.broker,
+                    RD_KAFKA_CONFVAL_INT, NULL, errstr, sizeof(errstr));
+                if (err) {
+                        rd_kafka_admin_result_fail(rko, err, "%s", errstr);
+                        rd_kafka_admin_common_worker_destroy(
+                            rk, rko, rd_true /*destroy*/);
+                        return;
+                }
+        }
 
         rd_kafka_q_enq(rk->rk_ops, rko);
 }
 
 
-
-const rd_kafka_ConfigResource_t **rd_kafka_DescribeConfigs_result_resources(
-    const rd_kafka_DescribeConfigs_result_t *result,
+const rd_kafka_ConfigResource_t **
+rd_kafka_IncrementalAlterConfigs_result_resources(
+    const rd_kafka_IncrementalAlterConfigs_result_t *result,
     size_t *cntp) {
         return rd_kafka_admin_result_ret_resources(
             (const rd_kafka_op_t *)result, cntp);
@@ -3453,1598 +3511,1830 @@ const rd_kafka_ConfigResource_t **rd_kafka_DescribeConfigs_result_resources(
 
 /**@}*/
 
+
+
 /**
- * @name Delete Records
+ * @name DescribeConfigs
  * @{
  *
  *
  *
- *
  */
 
-rd_kafka_DeleteRecords_t *rd_kafka_DeleteRecords_new(
-    const rd_kafka_topic_partition_list_t *before_offsets) {
-        rd_kafka_DeleteRecords_t *del_records;
-
-        del_records = rd_calloc(1, sizeof(*del_records));
-        del_records->offsets =
-            rd_kafka_topic_partition_list_copy(before_offsets);
-
-        return del_records;
-}
-
-void rd_kafka_DeleteRecords_destroy(rd_kafka_DeleteRecords_t *del_records) {
-        rd_kafka_topic_partition_list_destroy(del_records->offsets);
-        rd_free(del_records);
-}
-
-void rd_kafka_DeleteRecords_destroy_array(
-    rd_kafka_DeleteRecords_t **del_records,
-    size_t del_record_cnt) {
-        size_t i;
-        for (i = 0; i < del_record_cnt; i++)
-                rd_kafka_DeleteRecords_destroy(del_records[i]);
-}
-
 
-
-/** @brief Merge the DeleteRecords response from a single broker
- *         into the user response list.
+/**
+ * @brief Parse DescribeConfigsResponse and create ADMIN_RESULT op.
  */
-static void
-rd_kafka_DeleteRecords_response_merge(rd_kafka_op_t *rko_fanout,
-                                      const rd_kafka_op_t *rko_partial) {
-        rd_kafka_t *rk = rko_fanout->rko_rk;
-        const rd_kafka_topic_partition_list_t *partitions;
-        rd_kafka_topic_partition_list_t *respartitions;
-        const rd_kafka_topic_partition_t *partition;
+static rd_kafka_resp_err_t
+rd_kafka_DescribeConfigsResponse_parse(rd_kafka_op_t *rko_req,
+                                       rd_kafka_op_t **rko_resultp,
+                                       rd_kafka_buf_t *reply,
+                                       char *errstr,
+                                       size_t errstr_size) {
+        const int log_decode_errors = LOG_ERR;
+        rd_kafka_broker_t *rkb      = reply->rkbuf_rkb;
+        rd_kafka_t *rk              = rkb->rkb_rk;
+        rd_kafka_op_t *rko_result   = NULL;
+        int32_t res_cnt;
+        int i;
+        int32_t Throttle_Time;
+        rd_kafka_ConfigResource_t *config = NULL;
+        rd_kafka_ConfigEntry_t *entry     = NULL;
 
-        rd_assert(rko_partial->rko_evtype ==
-                  RD_KAFKA_EVENT_DELETERECORDS_RESULT);
+        rd_kafka_buf_read_i32(reply, &Throttle_Time);
+        rd_kafka_op_throttle_time(rkb, rk->rk_rep, Throttle_Time);
 
-        /* All partitions (offsets) from the DeleteRecords() call */
-        respartitions =
-            rd_list_elem(&rko_fanout->rko_u.admin_request.fanout.results, 0);
+        /* #resources */
+        rd_kafka_buf_read_i32(reply, &res_cnt);
 
-        if (rko_partial->rko_err) {
-                /* If there was a request-level error, set the error on
-                 * all requested partitions for this request. */
-                const rd_kafka_topic_partition_list_t *reqpartitions;
-                rd_kafka_topic_partition_t *reqpartition;
+        if (res_cnt > rd_list_cnt(&rko_req->rko_u.admin_request.args))
+                rd_kafka_buf_parse_fail(
+                    reply,
+                    "Received %" PRId32
+                    " ConfigResources in response "
+                    "when only %d were requested",
+                    res_cnt, rd_list_cnt(&rko_req->rko_u.admin_request.args));
 
-                /* Partitions (offsets) from this DeleteRecordsRequest */
-                reqpartitions =
-                    rd_list_elem(&rko_partial->rko_u.admin_result.args, 0);
+        rko_result = rd_kafka_admin_result_new(rko_req);
 
-                RD_KAFKA_TPLIST_FOREACH(reqpartition, reqpartitions) {
-                        rd_kafka_topic_partition_t *respart;
+        rd_list_init(&rko_result->rko_u.admin_result.results, res_cnt,
+                     rd_kafka_ConfigResource_free);
 
-                        /* Find result partition */
-                        respart = rd_kafka_topic_partition_list_find(
-                            respartitions, reqpartition->topic,
-                            reqpartition->partition);
+        for (i = 0; i < (int)res_cnt; i++) {
+                int16_t error_code;
+                rd_kafkap_str_t error_msg;
+                int8_t res_type;
+                rd_kafkap_str_t kres_name;
+                char *res_name;
+                char *this_errstr = NULL;
+                rd_kafka_ConfigResource_t skel;
+                int orig_pos;
+                int32_t entry_cnt;
+                int ci;
 
-                        rd_assert(respart || !*"respart not found");
+                rd_kafka_buf_read_i16(reply, &error_code);
+                rd_kafka_buf_read_str(reply, &error_msg);
+                rd_kafka_buf_read_i8(reply, &res_type);
+                rd_kafka_buf_read_str(reply, &kres_name);
+                RD_KAFKAP_STR_DUPA(&res_name, &kres_name);
 
-                        respart->err = rko_partial->rko_err;
+                if (error_code) {
+                        if (RD_KAFKAP_STR_IS_NULL(&error_msg) ||
+                            RD_KAFKAP_STR_LEN(&error_msg) == 0)
+                                this_errstr =
+                                    (char *)rd_kafka_err2str(error_code);
+                        else
+                                RD_KAFKAP_STR_DUPA(&this_errstr, &error_msg);
                 }
 
-                return;
-        }
+                config = rd_kafka_ConfigResource_new(res_type, res_name);
+                if (!config) {
+                        rd_kafka_log(rko_req->rko_rk, LOG_ERR, "ADMIN",
+                                     "DescribeConfigs returned "
+                                     "unsupported ConfigResource #%d with "
+                                     "type %d and name \"%s\": ignoring",
+                                     i, res_type, res_name);
+                        continue;
+                }
 
-        /* Partitions from the DeleteRecordsResponse */
-        partitions = rd_list_elem(&rko_partial->rko_u.admin_result.results, 0);
+                config->err = error_code;
+                if (this_errstr)
+                        config->errstr = rd_strdup(this_errstr);
 
-        RD_KAFKA_TPLIST_FOREACH(partition, partitions) {
-                rd_kafka_topic_partition_t *respart;
+                /* #config_entries */
+                rd_kafka_buf_read_i32(reply, &entry_cnt);
 
+                for (ci = 0; ci < (int)entry_cnt; ci++) {
+                        rd_kafkap_str_t config_name, config_value;
+                        int32_t syn_cnt;
+                        int si;
 
-                /* Find result partition */
-                respart = rd_kafka_topic_partition_list_find(
-                    respartitions, partition->topic, partition->partition);
-                if (unlikely(!respart)) {
-                        rd_dassert(!*"partition not found");
+                        rd_kafka_buf_read_str(reply, &config_name);
+                        rd_kafka_buf_read_str(reply, &config_value);
 
-                        rd_kafka_log(rk, LOG_WARNING, "DELETERECORDS",
-                                     "DeleteRecords response contains "
-                                     "unexpected %s [%" PRId32
-                                     "] which "
-                                     "was not in the request list: ignored",
-                                     partition->topic, partition->partition);
-                        continue;
-                }
+                        entry = rd_kafka_ConfigEntry_new0(
+                            config_name.str, RD_KAFKAP_STR_LEN(&config_name),
+                            config_value.str, RD_KAFKAP_STR_LEN(&config_value));
 
-                respart->offset = partition->offset;
-                respart->err    = partition->err;
-        }
-}
+                        rd_kafka_buf_read_bool(reply, &entry->a.is_readonly);
 
+                        /* ApiVersion 0 has is_default field, while
+                         * ApiVersion 1 has source field.
+                         * Convert between the two so they look the same
+                         * to the caller. */
+                        if (rd_kafka_buf_ApiVersion(reply) == 0) {
+                                rd_kafka_buf_read_bool(reply,
+                                                       &entry->a.is_default);
+                                if (entry->a.is_default)
+                                        entry->a.source =
+                                            RD_KAFKA_CONFIG_SOURCE_DEFAULT_CONFIG;
+                        } else {
+                                int8_t config_source;
+                                rd_kafka_buf_read_i8(reply, &config_source);
+                                entry->a.source = config_source;
 
+                                if (entry->a.source ==
+                                    RD_KAFKA_CONFIG_SOURCE_DEFAULT_CONFIG)
+                                        entry->a.is_default = 1;
+                        }
 
-/**
- * @brief Parse DeleteRecordsResponse and create ADMIN_RESULT op.
- */
-static rd_kafka_resp_err_t
-rd_kafka_DeleteRecordsResponse_parse(rd_kafka_op_t *rko_req,
-                                     rd_kafka_op_t **rko_resultp,
-                                     rd_kafka_buf_t *reply,
-                                     char *errstr,
-                                     size_t errstr_size) {
-        const int log_decode_errors = LOG_ERR;
-        rd_kafka_op_t *rko_result;
-        rd_kafka_topic_partition_list_t *offsets;
+                        rd_kafka_buf_read_bool(reply, &entry->a.is_sensitive);
 
-        rd_kafka_buf_read_throttle_time(reply);
 
+                        if (rd_kafka_buf_ApiVersion(reply) == 1) {
+                                /* #config_synonyms (ApiVersion 1) */
+                                rd_kafka_buf_read_i32(reply, &syn_cnt);
 
-        const rd_kafka_topic_partition_field_t fields[] = {
-            RD_KAFKA_TOPIC_PARTITION_FIELD_PARTITION,
-            RD_KAFKA_TOPIC_PARTITION_FIELD_OFFSET,
-            RD_KAFKA_TOPIC_PARTITION_FIELD_ERR,
-            RD_KAFKA_TOPIC_PARTITION_FIELD_END};
-        offsets = rd_kafka_buf_read_topic_partitions(reply, 0, fields);
-        if (!offsets)
-                rd_kafka_buf_parse_fail(reply,
-                                        "Failed to parse topic partitions");
+                                if (syn_cnt > 100000)
+                                        rd_kafka_buf_parse_fail(
+                                            reply,
+                                            "Broker returned %" PRId32
+                                            " config synonyms for "
+                                            "ConfigResource %d,%s: "
+                                            "limit is 100000",
+                                            syn_cnt, config->restype,
+                                            config->name);
 
+                                if (syn_cnt > 0)
+                                        rd_list_grow(&entry->synonyms, syn_cnt);
 
-        rko_result = rd_kafka_admin_result_new(rko_req);
-        rd_list_init(&rko_result->rko_u.admin_result.results, 1,
-                     rd_kafka_topic_partition_list_destroy_free);
-        rd_list_add(&rko_result->rko_u.admin_result.results, offsets);
-        *rko_resultp = rko_result;
-        return RD_KAFKA_RESP_ERR_NO_ERROR;
+                        } else {
+                                /* No synonyms in ApiVersion 0 */
+                                syn_cnt = 0;
+                        }
 
-err_parse:
-        rd_snprintf(errstr, errstr_size,
-                    "DeleteRecords response protocol parse failure: %s",
-                    rd_kafka_err2str(reply->rkbuf_err));
 
-        return reply->rkbuf_err;
-}
 
+                        /* Read synonyms (ApiVersion 1) */
+                        for (si = 0; si < (int)syn_cnt; si++) {
+                                rd_kafkap_str_t syn_name, syn_value;
+                                int8_t syn_source;
+                                rd_kafka_ConfigEntry_t *syn_entry;
 
-/**
- * @brief Call when leaders have been queried to progress the DeleteRecords
- *        admin op to its next phase, sending DeleteRecords to partition
- *        leaders.
- *
- * @param rko Reply op (RD_KAFKA_OP_LEADERS).
- */
-static rd_kafka_op_res_t
-rd_kafka_DeleteRecords_leaders_queried_cb(rd_kafka_t *rk,
-                                          rd_kafka_q_t *rkq,
-                                          rd_kafka_op_t *reply) {
-        rd_kafka_resp_err_t err = reply->rko_err;
-        const rd_list_t *leaders =
-            reply->rko_u.leaders.leaders; /* Possibly NULL (on err) */
-        rd_kafka_topic_partition_list_t *partitions =
-            reply->rko_u.leaders.partitions; /* Possibly NULL (on err) */
-        rd_kafka_op_t *rko_fanout = reply->rko_u.leaders.opaque;
-        rd_kafka_topic_partition_t *rktpar;
-        rd_kafka_topic_partition_list_t *offsets;
-        const struct rd_kafka_partition_leader *leader;
-        static const struct rd_kafka_admin_worker_cbs cbs = {
-            rd_kafka_DeleteRecordsRequest,
-            rd_kafka_DeleteRecordsResponse_parse,
-        };
-        int i;
-
-        rd_assert((rko_fanout->rko_type & ~RD_KAFKA_OP_FLAGMASK) ==
-                  RD_KAFKA_OP_ADMIN_FANOUT);
+                                rd_kafka_buf_read_str(reply, &syn_name);
+                                rd_kafka_buf_read_str(reply, &syn_value);
+                                rd_kafka_buf_read_i8(reply, &syn_source);
 
-        if (err == RD_KAFKA_RESP_ERR__DESTROY)
-                goto err;
+                                syn_entry = rd_kafka_ConfigEntry_new0(
+                                    syn_name.str, RD_KAFKAP_STR_LEN(&syn_name),
+                                    syn_value.str,
+                                    RD_KAFKAP_STR_LEN(&syn_value));
+                                if (!syn_entry)
+                                        rd_kafka_buf_parse_fail(
+                                            reply,
+                                            "Broker returned invalid "
+                                            "synonym #%d "
+                                            "for ConfigEntry #%d (%s) "
+                                            "and ConfigResource %d,%s: "
+                                            "syn_name.len %d, "
+                                            "syn_value.len %d",
+                                            si, ci, entry->kv->name,
+                                            config->restype, config->name,
+                                            (int)syn_name.len,
+                                            (int)syn_value.len);
 
-        /* Requested offsets */
-        offsets = rd_list_elem(&rko_fanout->rko_u.admin_request.args, 0);
+                                syn_entry->a.source     = syn_source;
+                                syn_entry->a.is_synonym = 1;
 
-        /* Update the error field of each partition from the
-         * leader-queried partition list so that ERR_UNKNOWN_TOPIC_OR_PART
-         * and similar are propagated, since those partitions are not
-         * included in the leaders list. */
-        RD_KAFKA_TPLIST_FOREACH(rktpar, partitions) {
-                rd_kafka_topic_partition_t *rktpar2;
+                                rd_list_add(&entry->synonyms, syn_entry);
+                        }
 
-                if (!rktpar->err)
-                        continue;
+                        rd_kafka_ConfigResource_add_ConfigEntry(config, entry);
+                        entry = NULL;
+                }
 
-                rktpar2 = rd_kafka_topic_partition_list_find(
-                    offsets, rktpar->topic, rktpar->partition);
-                rd_assert(rktpar2);
-                rktpar2->err = rktpar->err;
-        }
+                /* As a convenience to the application we insert result
+                 * in the same order as they were requested. The broker
+                 * does not maintain ordering unfortunately. */
+                skel.restype = config->restype;
+                skel.name    = config->name;
+                orig_pos = rd_list_index(&rko_result->rko_u.admin_result.args,
+                                         &skel, rd_kafka_ConfigResource_cmp);
+                if (orig_pos == -1)
+                        rd_kafka_buf_parse_fail(
+                            reply,
+                            "Broker returned ConfigResource %d,%s "
+                            "that was not "
+                            "included in the original request",
+                            res_type, res_name);
 
+                if (rd_list_elem(&rko_result->rko_u.admin_result.results,
+                                 orig_pos) != NULL)
+                        rd_kafka_buf_parse_fail(
+                            reply,
+                            "Broker returned ConfigResource %d,%s "
+                            "multiple times",
+                            res_type, res_name);
 
-        if (err) {
-        err:
-                rd_kafka_admin_result_fail(
-                    rko_fanout, err, "Failed to query partition leaders: %s",
-                    err == RD_KAFKA_RESP_ERR__NOENT ? "No leaders found"
-                                                    : rd_kafka_err2str(err));
-                rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
-                                                     rd_true /*destroy*/);
-                return RD_KAFKA_OP_RES_HANDLED;
+                rd_list_set(&rko_result->rko_u.admin_result.results, orig_pos,
+                            config);
+                config = NULL;
         }
 
-        /* The response lists is one element deep and that element is a
-         * rd_kafka_topic_partition_list_t with the results of the deletes. */
-        rd_list_init(&rko_fanout->rko_u.admin_request.fanout.results, 1,
-                     rd_kafka_topic_partition_list_destroy_free);
-        rd_list_add(&rko_fanout->rko_u.admin_request.fanout.results,
-                    rd_kafka_topic_partition_list_copy(offsets));
-
-        rko_fanout->rko_u.admin_request.fanout.outstanding =
-            rd_list_cnt(leaders);
-
-        rd_assert(rd_list_cnt(leaders) > 0);
+        *rko_resultp = rko_result;
 
-        /* For each leader send a request for its partitions */
-        RD_LIST_FOREACH(leader, leaders, i) {
-                rd_kafka_op_t *rko = rd_kafka_admin_request_op_new(
-                    rk, RD_KAFKA_OP_DELETERECORDS,
-                    RD_KAFKA_EVENT_DELETERECORDS_RESULT, &cbs,
-                    &rko_fanout->rko_u.admin_request.options, rk->rk_ops);
-                rko->rko_u.admin_request.fanout_parent = rko_fanout;
-                rko->rko_u.admin_request.broker_id = leader->rkb->rkb_nodeid;
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
 
-                rd_kafka_topic_partition_list_sort_by_topic(leader->partitions);
+err_parse:
+        if (entry)
+                rd_kafka_ConfigEntry_destroy(entry);
+        if (config)
+                rd_kafka_ConfigResource_destroy(config);
 
-                rd_list_init(&rko->rko_u.admin_request.args, 1,
-                             rd_kafka_topic_partition_list_destroy_free);
-                rd_list_add(
-                    &rko->rko_u.admin_request.args,
-                    rd_kafka_topic_partition_list_copy(leader->partitions));
+        if (rko_result)
+                rd_kafka_op_destroy(rko_result);
 
-                /* Enqueue op for admin_worker() to transition to next state */
-                rd_kafka_q_enq(rk->rk_ops, rko);
-        }
+        rd_snprintf(errstr, errstr_size,
+                    "DescribeConfigs response protocol parse failure: %s",
+                    rd_kafka_err2str(reply->rkbuf_err));
 
-        return RD_KAFKA_OP_RES_HANDLED;
+        return reply->rkbuf_err;
 }
 
 
-void rd_kafka_DeleteRecords(rd_kafka_t *rk,
-                            rd_kafka_DeleteRecords_t **del_records,
-                            size_t del_record_cnt,
-                            const rd_kafka_AdminOptions_t *options,
-                            rd_kafka_queue_t *rkqu) {
-        rd_kafka_op_t *rko_fanout;
-        static const struct rd_kafka_admin_fanout_worker_cbs fanout_cbs = {
-            rd_kafka_DeleteRecords_response_merge,
-            rd_kafka_topic_partition_list_copy_opaque,
+
+void rd_kafka_DescribeConfigs(rd_kafka_t *rk,
+                              rd_kafka_ConfigResource_t **configs,
+                              size_t config_cnt,
+                              const rd_kafka_AdminOptions_t *options,
+                              rd_kafka_queue_t *rkqu) {
+        rd_kafka_op_t *rko;
+        size_t i;
+        rd_kafka_resp_err_t err;
+        char errstr[256];
+        static const struct rd_kafka_admin_worker_cbs cbs = {
+            rd_kafka_DescribeConfigsRequest,
+            rd_kafka_DescribeConfigsResponse_parse,
         };
-        const rd_kafka_topic_partition_list_t *offsets;
-        rd_kafka_topic_partition_list_t *copied_offsets;
 
         rd_assert(rkqu);
 
-        rko_fanout = rd_kafka_admin_fanout_op_new(
-            rk, RD_KAFKA_OP_DELETERECORDS, RD_KAFKA_EVENT_DELETERECORDS_RESULT,
-            &fanout_cbs, options, rkqu->rkqu_q);
+        rko = rd_kafka_admin_request_op_new(
+            rk, RD_KAFKA_OP_DESCRIBECONFIGS,
+            RD_KAFKA_EVENT_DESCRIBECONFIGS_RESULT, &cbs, options, rkqu->rkqu_q);
 
-        if (del_record_cnt != 1) {
-                /* We only support one DeleteRecords per call since there
-                 * is no point in passing multiples, but the API still
-                 * needs to be extensible/future-proof. */
-                rd_kafka_admin_result_fail(rko_fanout,
-                                           RD_KAFKA_RESP_ERR__INVALID_ARG,
-                                           "Exactly one DeleteRecords must be "
-                                           "passed");
-                rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
-                                                     rd_true /*destroy*/);
-                return;
-        }
+        rd_list_init(&rko->rko_u.admin_request.args, (int)config_cnt,
+                     rd_kafka_ConfigResource_free);
 
-        offsets = del_records[0]->offsets;
+        for (i = 0; i < config_cnt; i++)
+                rd_list_add(&rko->rko_u.admin_request.args,
+                            rd_kafka_ConfigResource_copy(configs[i]));
 
-        if (offsets == NULL || offsets->cnt == 0) {
-                rd_kafka_admin_result_fail(rko_fanout,
-                                           RD_KAFKA_RESP_ERR__INVALID_ARG,
-                                           "No records to delete");
-                rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
+        /* If there's a BROKER resource in the list we need to
+         * speak directly to that broker rather than the controller.
+         *
+         * Multiple BROKER resources are not allowed.
+         */
+        err = rd_kafka_ConfigResource_get_single_broker_id(
+            &rko->rko_u.admin_request.args, &rko->rko_u.admin_request.broker_id,
+            errstr, sizeof(errstr));
+        if (err) {
+                rd_kafka_admin_result_fail(rko, err, "%s", errstr);
+                rd_kafka_admin_common_worker_destroy(rk, rko,
                                                      rd_true /*destroy*/);
                 return;
         }
 
-        /* Copy offsets list and store it on the request op */
-        copied_offsets = rd_kafka_topic_partition_list_copy(offsets);
-        if (rd_kafka_topic_partition_list_has_duplicates(
-                copied_offsets, rd_false /*check partition*/)) {
-                rd_kafka_topic_partition_list_destroy(copied_offsets);
-                rd_kafka_admin_result_fail(rko_fanout,
-                                           RD_KAFKA_RESP_ERR__INVALID_ARG,
-                                           "Duplicate partitions not allowed");
-                rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
-                                                     rd_true /*destroy*/);
-                return;
-        }
+        rd_kafka_q_enq(rk->rk_ops, rko);
+}
 
-        /* Set default error on each partition so that if any of the partitions
-         * never get a request sent we have an error to indicate it. */
-        rd_kafka_topic_partition_list_set_err(copied_offsets,
-                                              RD_KAFKA_RESP_ERR__NOOP);
 
-        rd_list_init(&rko_fanout->rko_u.admin_request.args, 1,
-                     rd_kafka_topic_partition_list_destroy_free);
-        rd_list_add(&rko_fanout->rko_u.admin_request.args, copied_offsets);
 
-        /* Async query for partition leaders */
-        rd_kafka_topic_partition_list_query_leaders_async(
-            rk, copied_offsets, rd_kafka_admin_timeout_remains(rko_fanout),
-            RD_KAFKA_REPLYQ(rk->rk_ops, 0),
-            rd_kafka_DeleteRecords_leaders_queried_cb, rko_fanout);
+const rd_kafka_ConfigResource_t **rd_kafka_DescribeConfigs_result_resources(
+    const rd_kafka_DescribeConfigs_result_t *result,
+    size_t *cntp) {
+        return rd_kafka_admin_result_ret_resources(
+            (const rd_kafka_op_t *)result, cntp);
 }
 
+/**@}*/
 
 /**
- * @brief Get the list of offsets from a DeleteRecords result.
+ * @name Delete Records
+ * @{
+ *
+ *
+ *
  *
- * The returned \p offsets life-time is the same as the \p result object.
  */
-const rd_kafka_topic_partition_list_t *rd_kafka_DeleteRecords_result_offsets(
-    const rd_kafka_DeleteRecords_result_t *result) {
-        const rd_kafka_topic_partition_list_t *offsets;
-        const rd_kafka_op_t *rko = (const rd_kafka_op_t *)result;
-        size_t cnt;
 
-        rd_kafka_op_type_t reqtype =
-            rko->rko_u.admin_result.reqtype & ~RD_KAFKA_OP_FLAGMASK;
-        rd_assert(reqtype == RD_KAFKA_OP_DELETERECORDS);
-
-        cnt = rd_list_cnt(&rko->rko_u.admin_result.results);
+rd_kafka_DeleteRecords_t *rd_kafka_DeleteRecords_new(
+    const rd_kafka_topic_partition_list_t *before_offsets) {
+        rd_kafka_DeleteRecords_t *del_records;
 
-        rd_assert(cnt == 1);
+        del_records = rd_calloc(1, sizeof(*del_records));
+        del_records->offsets =
+            rd_kafka_topic_partition_list_copy(before_offsets);
 
-        offsets = (const rd_kafka_topic_partition_list_t *)rd_list_elem(
-            &rko->rko_u.admin_result.results, 0);
+        return del_records;
+}
 
-        rd_assert(offsets);
+void rd_kafka_DeleteRecords_destroy(rd_kafka_DeleteRecords_t *del_records) {
+        rd_kafka_topic_partition_list_destroy(del_records->offsets);
+        rd_free(del_records);
+}
 
-        return offsets;
+void rd_kafka_DeleteRecords_destroy_array(
+    rd_kafka_DeleteRecords_t **del_records,
+    size_t del_record_cnt) {
+        size_t i;
+        for (i = 0; i < del_record_cnt; i++)
+                rd_kafka_DeleteRecords_destroy(del_records[i]);
 }
 
-/**@}*/
 
-/**
- * @name Delete groups
- * @{
- *
- *
- *
- *
+
+/** @brief Merge the DeleteRecords response from a single broker
+ *         into the user response list.
  */
+static void
+rd_kafka_DeleteRecords_response_merge(rd_kafka_op_t *rko_fanout,
+                                      const rd_kafka_op_t *rko_partial) {
+        rd_kafka_t *rk = rko_fanout->rko_rk;
+        const rd_kafka_topic_partition_list_t *partitions;
+        rd_kafka_topic_partition_list_t *respartitions;
+        const rd_kafka_topic_partition_t *partition;
 
-rd_kafka_DeleteGroup_t *rd_kafka_DeleteGroup_new(const char *group) {
-        size_t tsize = strlen(group) + 1;
-        rd_kafka_DeleteGroup_t *del_group;
+        rd_assert(rko_partial->rko_evtype ==
+                  RD_KAFKA_EVENT_DELETERECORDS_RESULT);
 
-        /* Single allocation */
-        del_group        = rd_malloc(sizeof(*del_group) + tsize);
-        del_group->group = del_group->data;
-        memcpy(del_group->group, group, tsize);
+        /* All partitions (offsets) from the DeleteRecords() call */
+        respartitions =
+            rd_list_elem(&rko_fanout->rko_u.admin_request.fanout.results, 0);
 
-        return del_group;
-}
+        if (rko_partial->rko_err) {
+                /* If there was a request-level error, set the error on
+                 * all requested partitions for this request. */
+                const rd_kafka_topic_partition_list_t *reqpartitions;
+                rd_kafka_topic_partition_t *reqpartition;
 
-void rd_kafka_DeleteGroup_destroy(rd_kafka_DeleteGroup_t *del_group) {
-        rd_free(del_group);
-}
+                /* Partitions (offsets) from this DeleteRecordsRequest */
+                reqpartitions =
+                    rd_list_elem(&rko_partial->rko_u.admin_result.args, 0);
 
-static void rd_kafka_DeleteGroup_free(void *ptr) {
-        rd_kafka_DeleteGroup_destroy(ptr);
-}
+                RD_KAFKA_TPLIST_FOREACH(reqpartition, reqpartitions) {
+                        rd_kafka_topic_partition_t *respart;
 
-void rd_kafka_DeleteGroup_destroy_array(rd_kafka_DeleteGroup_t **del_groups,
-                                        size_t del_group_cnt) {
-        size_t i;
-        for (i = 0; i < del_group_cnt; i++)
-                rd_kafka_DeleteGroup_destroy(del_groups[i]);
-}
+                        /* Find result partition */
+                        respart = rd_kafka_topic_partition_list_find(
+                            respartitions, reqpartition->topic,
+                            reqpartition->partition);
 
-/**
- * @brief Group name comparator for DeleteGroup_t
- */
-static int rd_kafka_DeleteGroup_cmp(const void *_a, const void *_b) {
-        const rd_kafka_DeleteGroup_t *a = _a, *b = _b;
-        return strcmp(a->group, b->group);
-}
+                        rd_assert(respart || !*"respart not found");
 
-/**
- * @brief Allocate a new DeleteGroup and make a copy of \p src
- */
-static rd_kafka_DeleteGroup_t *
-rd_kafka_DeleteGroup_copy(const rd_kafka_DeleteGroup_t *src) {
-        return rd_kafka_DeleteGroup_new(src->group);
+                        respart->err = rko_partial->rko_err;
+                }
+
+                return;
+        }
+
+        /* Partitions from the DeleteRecordsResponse */
+        partitions = rd_list_elem(&rko_partial->rko_u.admin_result.results, 0);
+
+        RD_KAFKA_TPLIST_FOREACH(partition, partitions) {
+                rd_kafka_topic_partition_t *respart;
+
+
+                /* Find result partition */
+                respart = rd_kafka_topic_partition_list_find(
+                    respartitions, partition->topic, partition->partition);
+                if (unlikely(!respart)) {
+                        rd_dassert(!*"partition not found");
+
+                        rd_kafka_log(rk, LOG_WARNING, "DELETERECORDS",
+                                     "DeleteRecords response contains "
+                                     "unexpected %s [%" PRId32
+                                     "] which "
+                                     "was not in the request list: ignored",
+                                     partition->topic, partition->partition);
+                        continue;
+                }
+
+                respart->offset = partition->offset;
+                respart->err    = partition->err;
+        }
 }
 
 
+
 /**
- * @brief Parse DeleteGroupsResponse and create ADMIN_RESULT op.
+ * @brief Parse DeleteRecordsResponse and create ADMIN_RESULT op.
  */
 static rd_kafka_resp_err_t
-rd_kafka_DeleteGroupsResponse_parse(rd_kafka_op_t *rko_req,
-                                    rd_kafka_op_t **rko_resultp,
-                                    rd_kafka_buf_t *reply,
-                                    char *errstr,
-                                    size_t errstr_size) {
+rd_kafka_DeleteRecordsResponse_parse(rd_kafka_op_t *rko_req,
+                                     rd_kafka_op_t **rko_resultp,
+                                     rd_kafka_buf_t *reply,
+                                     char *errstr,
+                                     size_t errstr_size) {
         const int log_decode_errors = LOG_ERR;
-        int32_t group_cnt;
-        int i;
-        rd_kafka_op_t *rko_result = NULL;
+        rd_kafka_op_t *rko_result;
+        rd_kafka_topic_partition_list_t *offsets;
 
         rd_kafka_buf_read_throttle_time(reply);
 
-        /* #group_error_codes */
-        rd_kafka_buf_read_i32(reply, &group_cnt);
-
-        if (group_cnt > rd_list_cnt(&rko_req->rko_u.admin_request.args))
-                rd_kafka_buf_parse_fail(
-                    reply,
-                    "Received %" PRId32
-                    " groups in response "
-                    "when only %d were requested",
-                    group_cnt, rd_list_cnt(&rko_req->rko_u.admin_request.args));
-
-        rko_result = rd_kafka_admin_result_new(rko_req);
-        rd_list_init(&rko_result->rko_u.admin_result.results, group_cnt,
-                     rd_kafka_group_result_free);
-
-        for (i = 0; i < (int)group_cnt; i++) {
-                rd_kafkap_str_t kgroup;
-                int16_t error_code;
-                rd_kafka_group_result_t *groupres;
-
-                rd_kafka_buf_read_str(reply, &kgroup);
-                rd_kafka_buf_read_i16(reply, &error_code);
 
-                groupres = rd_kafka_group_result_new(
-                    kgroup.str, RD_KAFKAP_STR_LEN(&kgroup), NULL,
-                    error_code ? rd_kafka_error_new(error_code, NULL) : NULL);
+        const rd_kafka_topic_partition_field_t fields[] = {
+            RD_KAFKA_TOPIC_PARTITION_FIELD_PARTITION,
+            RD_KAFKA_TOPIC_PARTITION_FIELD_OFFSET,
+            RD_KAFKA_TOPIC_PARTITION_FIELD_ERR,
+            RD_KAFKA_TOPIC_PARTITION_FIELD_END};
+        offsets = rd_kafka_buf_read_topic_partitions(reply, 0, fields);
+        if (!offsets)
+                rd_kafka_buf_parse_fail(reply,
+                                        "Failed to parse topic partitions");
 
-                rd_list_add(&rko_result->rko_u.admin_result.results, groupres);
-        }
 
+        rko_result = rd_kafka_admin_result_new(rko_req);
+        rd_list_init(&rko_result->rko_u.admin_result.results, 1,
+                     rd_kafka_topic_partition_list_destroy_free);
+        rd_list_add(&rko_result->rko_u.admin_result.results, offsets);
         *rko_resultp = rko_result;
         return RD_KAFKA_RESP_ERR_NO_ERROR;
 
 err_parse:
-        if (rko_result)
-                rd_kafka_op_destroy(rko_result);
-
         rd_snprintf(errstr, errstr_size,
-                    "DeleteGroups response protocol parse failure: %s",
+                    "DeleteRecords response protocol parse failure: %s",
                     rd_kafka_err2str(reply->rkbuf_err));
 
         return reply->rkbuf_err;
 }
 
-/** @brief Merge the DeleteGroups response from a single broker
- *         into the user response list.
+/**
+ * @brief Creates a ListOffsetsResultInfo with the topic and parition and
+ *        returns the ListOffsetsResultInfo.
  */
-void rd_kafka_DeleteGroups_response_merge(rd_kafka_op_t *rko_fanout,
-                                          const rd_kafka_op_t *rko_partial) {
-        const rd_kafka_group_result_t *groupres = NULL;
-        rd_kafka_group_result_t *newgroupres;
-        const rd_kafka_DeleteGroup_t *grp =
-            rko_partial->rko_u.admin_result.opaque;
-        int orig_pos;
+rd_kafka_ListOffsetsResultInfo_t *
+rd_kafka_ListOffsetsResultInfo_new(rd_kafka_topic_partition_t *rktpar,
+                                   rd_ts_t timestamp) {
+        rd_kafka_ListOffsetsResultInfo_t *result_info;
+        result_info                  = rd_calloc(1, sizeof(*result_info));
+        result_info->timestamp       = timestamp;
+        result_info->topic_partition = rd_kafka_topic_partition_copy(rktpar);
+        return result_info;
+}
 
-        rd_assert(rko_partial->rko_evtype ==
-                  RD_KAFKA_EVENT_DELETEGROUPS_RESULT);
+/**
+ * @brief Copies the ListOffsetsResultInfo.
+ */
+static rd_kafka_ListOffsetsResultInfo_t *rd_kafka_ListOffsetsResultInfo_copy(
+    const rd_kafka_ListOffsetsResultInfo_t *result_info) {
+        return rd_kafka_ListOffsetsResultInfo_new(result_info->topic_partition,
+                                                  result_info->timestamp);
+}
 
-        if (!rko_partial->rko_err) {
-                /* Proper results.
-                 * We only send one group per request, make sure it matches */
-                groupres =
-                    rd_list_elem(&rko_partial->rko_u.admin_result.results, 0);
-                rd_assert(groupres);
-                rd_assert(!strcmp(groupres->group, grp->group));
-                newgroupres = rd_kafka_group_result_copy(groupres);
-        } else {
-                /* Op errored, e.g. timeout */
-                newgroupres = rd_kafka_group_result_new(
-                    grp->group, -1, NULL,
-                    rd_kafka_error_new(rko_partial->rko_err, NULL));
-        }
+/**
+ * @brief Same as rd_kafka_ListOffsetsResultInfo_copy() but suitable for
+ *        rd_list_copy(). The \p opaque is ignored.
+ */
+static void *rd_kafka_ListOffsetsResultInfo_copy_opaque(const void *element,
+                                                        void *opaque) {
+        return rd_kafka_ListOffsetsResultInfo_copy(element);
+}
 
-        /* As a convenience to the application we insert group result
-         * in the same order as they were requested. */
-        orig_pos = rd_list_index(&rko_fanout->rko_u.admin_request.args, grp,
-                                 rd_kafka_DeleteGroup_cmp);
-        rd_assert(orig_pos != -1);
+/**
+ * @brief Returns the topic partition of the passed \p result_info.
+ */
+const rd_kafka_topic_partition_t *
+rd_kafka_ListOffsetsResultInfo_topic_partition(
+    const rd_kafka_ListOffsetsResultInfo_t *result_info) {
+        return result_info->topic_partition;
+}
 
-        /* Make sure result is not already set */
-        rd_assert(rd_list_elem(&rko_fanout->rko_u.admin_request.fanout.results,
-                               orig_pos) == NULL);
+/**
+ * @brief Returns the timestamp specified for the offset of the
+ *        rd_kafka_ListOffsetsResultInfo_t.
+ */
+int64_t rd_kafka_ListOffsetsResultInfo_timestamp(
+    const rd_kafka_ListOffsetsResultInfo_t *result_info) {
+        return result_info->timestamp;
+}
 
-        rd_list_set(&rko_fanout->rko_u.admin_request.fanout.results, orig_pos,
-                    newgroupres);
+static void rd_kafka_ListOffsetsResultInfo_destroy(
+    rd_kafka_ListOffsetsResultInfo_t *element) {
+        rd_kafka_topic_partition_destroy(element->topic_partition);
+        rd_free(element);
 }
 
-void rd_kafka_DeleteGroups(rd_kafka_t *rk,
-                           rd_kafka_DeleteGroup_t **del_groups,
-                           size_t del_group_cnt,
-                           const rd_kafka_AdminOptions_t *options,
-                           rd_kafka_queue_t *rkqu) {
-        rd_kafka_op_t *rko_fanout;
-        rd_list_t dup_list;
-        size_t i;
-        static const struct rd_kafka_admin_fanout_worker_cbs fanout_cbs = {
-            rd_kafka_DeleteGroups_response_merge,
-            rd_kafka_group_result_copy_opaque,
-        };
+static void rd_kafka_ListOffsetsResultInfo_destroy_free(void *element) {
+        rd_kafka_ListOffsetsResultInfo_destroy(element);
+}
 
-        rd_assert(rkqu);
+/**
+ * @brief Merges the response of the partial request made for ListOffsets via
+ *        the \p rko_partial into the \p rko_fanout responsible for the
+ *        ListOffsets request.
+ * @param rko_fanout The rd_kafka_op_t corresponding to the whole original
+ *                   ListOffsets request.
+ * @param rko_partial The rd_kafka_op_t corresponding to the leader specific
+ *                    ListOffset request sent after leaders querying.
+ */
+static void
+rd_kafka_ListOffsets_response_merge(rd_kafka_op_t *rko_fanout,
+                                    const rd_kafka_op_t *rko_partial) {
+        size_t partition_cnt;
+        size_t total_partitions;
+        size_t i, j;
+        rd_assert(rko_partial->rko_evtype == RD_KAFKA_EVENT_LISTOFFSETS_RESULT);
+
+        partition_cnt = rd_list_cnt(&rko_partial->rko_u.admin_result.results);
+        total_partitions =
+            rd_list_cnt(&rko_fanout->rko_u.admin_request.fanout.results);
+
+        for (i = 0; i < partition_cnt; i++) {
+                rd_kafka_ListOffsetsResultInfo_t *partial_result_info =
+                    rd_list_elem(&rko_partial->rko_u.admin_result.results, i);
+                for (j = 0; j < total_partitions; j++) {
+                        rd_kafka_ListOffsetsResultInfo_t *result_info =
+                            rd_list_elem(
+                                &rko_fanout->rko_u.admin_request.fanout.results,
+                                j);
+                        if (rd_kafka_topic_partition_cmp(
+                                result_info->topic_partition,
+                                partial_result_info->topic_partition) == 0) {
+                                result_info->timestamp =
+                                    partial_result_info->timestamp;
+                                rd_kafka_topic_partition_destroy(
+                                    result_info->topic_partition);
+                                result_info->topic_partition =
+                                    rd_kafka_topic_partition_copy(
+                                        partial_result_info->topic_partition);
+                                break;
+                        }
+                }
+        }
+}
 
-        rko_fanout = rd_kafka_admin_fanout_op_new(
-            rk, RD_KAFKA_OP_DELETEGROUPS, RD_KAFKA_EVENT_DELETEGROUPS_RESULT,
-            &fanout_cbs, options, rkqu->rkqu_q);
+/**
+ * @brief Returns the array of pointers of rd_kafka_ListOffsetsResultInfo_t
+ * given rd_kafka_ListOffsets_result_t and populates the size of the array.
+ */
+const rd_kafka_ListOffsetsResultInfo_t **
+rd_kafka_ListOffsets_result_infos(const rd_kafka_ListOffsets_result_t *result,
+                                  size_t *cntp) {
+        *cntp = rd_list_cnt(&result->rko_u.admin_result.results);
+        return (const rd_kafka_ListOffsetsResultInfo_t **)
+            result->rko_u.admin_result.results.rl_elems;
+}
 
-        if (del_group_cnt == 0) {
-                rd_kafka_admin_result_fail(rko_fanout,
-                                           RD_KAFKA_RESP_ERR__INVALID_ARG,
-                                           "No groups to delete");
-                rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
-                                                     rd_true /*destroy*/);
+/**
+ * @brief Admin compatible API to parse the ListOffsetResponse buffer
+ *        provided in \p reply.
+ */
+static rd_kafka_resp_err_t
+rd_kafka_ListOffsetsResponse_parse(rd_kafka_op_t *rko_req,
+                                   rd_kafka_op_t **rko_resultp,
+                                   rd_kafka_buf_t *reply,
+                                   char *errstr,
+                                   size_t errstr_size) {
+        rd_list_t *result_list =
+            rd_list_new(1, rd_kafka_ListOffsetsResultInfo_destroy_free);
+        rd_kafka_op_t *rko_result;
+        rd_kafka_parse_ListOffsets(reply, NULL, result_list);
+        if (reply->rkbuf_err) {
+                rd_snprintf(errstr, errstr_size,
+                            "Error parsing ListOffsets response: %s",
+                            rd_kafka_err2str(reply->rkbuf_err));
+                return reply->rkbuf_err;
+        }
+
+        rko_result = rd_kafka_admin_result_new(rko_req);
+        rd_list_init_copy(&rko_result->rko_u.admin_result.results, result_list);
+        rd_list_copy_to(&rko_result->rko_u.admin_result.results, result_list,
+                        rd_kafka_ListOffsetsResultInfo_copy_opaque, NULL);
+        rd_list_destroy(result_list);
+
+        *rko_resultp = rko_result;
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
+}
+
+/**
+ * @brief Should the received error code cause a metadata refresh?
+ */
+static rd_bool_t rd_kafka_admin_result_err_refresh(rd_kafka_resp_err_t err) {
+        switch (err) {
+        case RD_KAFKA_RESP_ERR_NOT_LEADER_OR_FOLLOWER:
+        case RD_KAFKA_RESP_ERR_LEADER_NOT_AVAILABLE:
+                return rd_true;
+        default:
+                return rd_false;
+        }
+}
+
+/**
+ * @brief ListOffsets result handler for internal side effects.
+ */
+static void rd_kafka_ListOffsets_handle_result(rd_kafka_op_t *rko_result) {
+        rd_kafka_topic_partition_list_t *rktpars;
+        rd_kafka_ListOffsetsResultInfo_t *result_info;
+        rd_kafka_t *rk;
+        rd_kafka_resp_err_t err, rktpar_err;
+        rd_kafka_topic_partition_t *rktpar;
+        size_t i;
+
+        err = rko_result->rko_err;
+        if (rd_list_empty(&rko_result->rko_u.admin_result.args) ||
+            rd_list_empty(&rko_result->rko_u.admin_result.results))
                 return;
+
+        rk      = rko_result->rko_rk;
+        rktpars = rd_list_elem(&rko_result->rko_u.admin_result.args, 0);
+        rd_kafka_wrlock(rk);
+        i = 0;
+        RD_KAFKA_TPLIST_FOREACH(rktpar, rktpars) {
+                result_info =
+                    rd_list_elem(&rko_result->rko_u.admin_result.results, i);
+                rktpar_err = err ? err : result_info->topic_partition->err;
+
+                if (rd_kafka_admin_result_err_refresh(rktpar_err)) {
+                        rd_kafka_metadata_cache_delete_by_name(rk,
+                                                               rktpar->topic);
+                }
+                i++;
         }
+        rd_kafka_wrunlock(rk);
+}
 
-        /* Copy group list and store it on the request op.
-         * Maintain original ordering. */
-        rd_list_init(&rko_fanout->rko_u.admin_request.args, (int)del_group_cnt,
-                     rd_kafka_DeleteGroup_free);
-        for (i = 0; i < del_group_cnt; i++)
-                rd_list_add(&rko_fanout->rko_u.admin_request.args,
-                            rd_kafka_DeleteGroup_copy(del_groups[i]));
+/**
+ * @brief Call when leaders have been queried to progress the ListOffsets
+ *        admin op to its next phase, sending ListOffsets to partition
+ *        leaders.
+ */
+static rd_kafka_op_res_t
+rd_kafka_ListOffsets_leaders_queried_cb(rd_kafka_t *rk,
+                                        rd_kafka_q_t *rkq,
+                                        rd_kafka_op_t *reply) {
 
-        /* Check for duplicates.
-         * Make a temporary copy of the group list and sort it to check for
-         * duplicates, we don't want the original list sorted since we want
-         * to maintain ordering. */
-        rd_list_init(&dup_list,
-                     rd_list_cnt(&rko_fanout->rko_u.admin_request.args), NULL);
-        rd_list_copy_to(&dup_list, &rko_fanout->rko_u.admin_request.args, NULL,
-                        NULL);
-        rd_list_sort(&dup_list, rd_kafka_DeleteGroup_cmp);
-        if (rd_list_find_duplicate(&dup_list, rd_kafka_DeleteGroup_cmp)) {
-                rd_list_destroy(&dup_list);
-                rd_kafka_admin_result_fail(rko_fanout,
-                                           RD_KAFKA_RESP_ERR__INVALID_ARG,
-                                           "Duplicate groups not allowed");
+        rd_kafka_resp_err_t err = reply->rko_err;
+        const rd_list_t *leaders =
+            reply->rko_u.leaders.leaders; /* Possibly NULL (on err) */
+        rd_kafka_topic_partition_list_t *partitions =
+            reply->rko_u.leaders.partitions; /* Possibly NULL (on err) */
+        rd_kafka_op_t *rko_fanout = reply->rko_u.leaders.opaque;
+        rd_kafka_topic_partition_list_t *topic_partitions;
+        rd_kafka_topic_partition_t *rktpar;
+        size_t partition_cnt;
+        const struct rd_kafka_partition_leader *leader;
+        size_t i;
+        static const struct rd_kafka_admin_worker_cbs cbs = {
+            rd_kafka_ListOffsetsRequest_admin,
+            rd_kafka_ListOffsetsResponse_parse,
+        };
+
+        rd_assert((rko_fanout->rko_type & ~RD_KAFKA_OP_FLAGMASK) ==
+                  RD_KAFKA_OP_ADMIN_FANOUT);
+
+        if (err) {
+                rd_kafka_admin_result_fail(
+                    rko_fanout, err, "Failed to query partition leaders: %s",
+                    err == RD_KAFKA_RESP_ERR__NOENT ? "No leaders found"
+                                                    : rd_kafka_err2str(err));
                 rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
                                                      rd_true /*destroy*/);
-                return;
+                return RD_KAFKA_OP_RES_HANDLED;
         }
 
-        rd_list_destroy(&dup_list);
-
-        /* Prepare results list where fanned out op's results will be
-         * accumulated. */
+        /* Create fanout results */
+        topic_partitions =
+            rd_list_elem(&rko_fanout->rko_u.admin_request.args, 0);
+        partition_cnt = topic_partitions->cnt;
         rd_list_init(&rko_fanout->rko_u.admin_request.fanout.results,
-                     (int)del_group_cnt, rd_kafka_group_result_free);
-        rko_fanout->rko_u.admin_request.fanout.outstanding = (int)del_group_cnt;
+                     partition_cnt,
+                     rd_kafka_ListOffsetsResultInfo_destroy_free);
+
+        for (i = 0; i < partition_cnt; i++) {
+                rd_kafka_topic_partition_t *topic_partition =
+                    &topic_partitions->elems[i];
+                rd_kafka_ListOffsetsResultInfo_t *result_element =
+                    rd_kafka_ListOffsetsResultInfo_new(topic_partition, -1);
+                rd_kafka_topic_partition_set_from_fetch_pos(
+                    result_element->topic_partition,
+                    RD_KAFKA_FETCH_POS(RD_KAFKA_OFFSET_INVALID, -1));
+                result_element->topic_partition->err =
+                    RD_KAFKA_RESP_ERR_NO_ERROR;
+                rd_list_add(&rko_fanout->rko_u.admin_request.fanout.results,
+                            result_element);
+        }
 
-        /* Create individual request ops for each group.
-         * FIXME: A future optimization is to coalesce all groups for a single
-         *        coordinator into one op. */
-        for (i = 0; i < del_group_cnt; i++) {
-                static const struct rd_kafka_admin_worker_cbs cbs = {
-                    rd_kafka_DeleteGroupsRequest,
-                    rd_kafka_DeleteGroupsResponse_parse,
-                };
-                rd_kafka_DeleteGroup_t *grp =
-                    rd_list_elem(&rko_fanout->rko_u.admin_request.args, (int)i);
+        /* Set errors to corresponding result partitions */
+        RD_KAFKA_TPLIST_FOREACH(rktpar, partitions) {
+                rd_kafka_ListOffsetsResultInfo_t *result_element;
+                if (!rktpar->err)
+                        continue;
+                result_element = NULL;
+                for (i = 0; i < partition_cnt; i++) {
+                        result_element = rd_list_elem(
+                            &rko_fanout->rko_u.admin_request.fanout.results, i);
+                        if (rd_kafka_topic_partition_cmp(
+                                result_element->topic_partition, rktpar) == 0)
+                                break;
+                }
+                result_element->topic_partition->err = rktpar->err;
+        }
+
+        /* For each leader send a request for its partitions */
+        rko_fanout->rko_u.admin_request.fanout.outstanding =
+            rd_list_cnt(leaders);
+
+        RD_LIST_FOREACH(leader, leaders, i) {
                 rd_kafka_op_t *rko = rd_kafka_admin_request_op_new(
-                    rk, RD_KAFKA_OP_DELETEGROUPS,
-                    RD_KAFKA_EVENT_DELETEGROUPS_RESULT, &cbs, options,
-                    rk->rk_ops);
+                    rk, RD_KAFKA_OP_LISTOFFSETS,
+                    RD_KAFKA_EVENT_LISTOFFSETS_RESULT, &cbs,
+                    &rko_fanout->rko_u.admin_request.options, rk->rk_ops);
 
                 rko->rko_u.admin_request.fanout_parent = rko_fanout;
-                rko->rko_u.admin_request.broker_id =
-                    RD_KAFKA_ADMIN_TARGET_COORDINATOR;
-                rko->rko_u.admin_request.coordtype = RD_KAFKA_COORD_GROUP;
-                rko->rko_u.admin_request.coordkey  = rd_strdup(grp->group);
-
-                /* Set the group name as the opaque so the fanout worker use it
-                 * to fill in errors.
-                 * References rko_fanout's memory, which will always outlive
-                 * the fanned out op. */
-                rd_kafka_AdminOptions_set_opaque(
-                    &rko->rko_u.admin_request.options, grp);
+                rko->rko_u.admin_request.broker_id = leader->rkb->rkb_nodeid;
 
+                rd_kafka_topic_partition_list_sort_by_topic(leader->partitions);
                 rd_list_init(&rko->rko_u.admin_request.args, 1,
-                             rd_kafka_DeleteGroup_free);
-                rd_list_add(&rko->rko_u.admin_request.args,
-                            rd_kafka_DeleteGroup_copy(del_groups[i]));
+                             rd_kafka_topic_partition_list_destroy_free);
+                rd_list_add(
+                    &rko->rko_u.admin_request.args,
+                    rd_kafka_topic_partition_list_copy(leader->partitions));
 
+                /* Enqueue op for admin_worker() to transition to next state */
                 rd_kafka_q_enq(rk->rk_ops, rko);
         }
-}
 
+        return RD_KAFKA_OP_RES_HANDLED;
+}
 
 /**
- * @brief Get an array of group results from a DeleteGroups result.
- *
- * The returned \p groups life-time is the same as the \p result object.
- * @param cntp is updated to the number of elements in the array.
+ * @brief Call when leaders have been queried to progress the DeleteRecords
+ *        admin op to its next phase, sending DeleteRecords to partition
+ *        leaders.
  */
-const rd_kafka_group_result_t **rd_kafka_DeleteGroups_result_groups(
-    const rd_kafka_DeleteGroups_result_t *result,
-    size_t *cntp) {
-        return rd_kafka_admin_result_ret_groups((const rd_kafka_op_t *)result,
-                                                cntp);
-}
+static rd_kafka_op_res_t
+rd_kafka_DeleteRecords_leaders_queried_cb(rd_kafka_t *rk,
+                                          rd_kafka_q_t *rkq,
+                                          rd_kafka_op_t *reply) {
+        rd_kafka_resp_err_t err = reply->rko_err;
+        const rd_list_t *leaders =
+            reply->rko_u.leaders.leaders; /* Possibly NULL (on err) */
+        rd_kafka_topic_partition_list_t *partitions =
+            reply->rko_u.leaders.partitions; /* Possibly NULL (on err) */
+        rd_kafka_op_t *rko_fanout = reply->rko_u.leaders.opaque;
+        rd_kafka_topic_partition_t *rktpar;
+        rd_kafka_topic_partition_list_t *offsets;
+        const struct rd_kafka_partition_leader *leader;
+        static const struct rd_kafka_admin_worker_cbs cbs = {
+            rd_kafka_DeleteRecordsRequest,
+            rd_kafka_DeleteRecordsResponse_parse,
+        };
+        int i;
 
+        rd_assert((rko_fanout->rko_type & ~RD_KAFKA_OP_FLAGMASK) ==
+                  RD_KAFKA_OP_ADMIN_FANOUT);
 
-/**@}*/
+        if (err == RD_KAFKA_RESP_ERR__DESTROY)
+                goto err;
 
+        /* Requested offsets */
+        offsets = rd_list_elem(&rko_fanout->rko_u.admin_request.args, 0);
 
-/**
- * @name Delete consumer group offsets (committed offsets)
- * @{
- *
- *
- *
- *
- */
+        /* Update the error field of each partition from the
+         * leader-queried partition list so that ERR_UNKNOWN_TOPIC_OR_PART
+         * and similar are propagated, since those partitions are not
+         * included in the leaders list. */
+        RD_KAFKA_TPLIST_FOREACH(rktpar, partitions) {
+                rd_kafka_topic_partition_t *rktpar2;
 
-rd_kafka_DeleteConsumerGroupOffsets_t *rd_kafka_DeleteConsumerGroupOffsets_new(
-    const char *group,
-    const rd_kafka_topic_partition_list_t *partitions) {
-        size_t tsize = strlen(group) + 1;
-        rd_kafka_DeleteConsumerGroupOffsets_t *del_grpoffsets;
+                if (!rktpar->err)
+                        continue;
 
-        rd_assert(partitions);
+                rktpar2 = rd_kafka_topic_partition_list_find(
+                    offsets, rktpar->topic, rktpar->partition);
+                rd_assert(rktpar2);
+                rktpar2->err = rktpar->err;
+        }
 
-        /* Single allocation */
-        del_grpoffsets        = rd_malloc(sizeof(*del_grpoffsets) + tsize);
-        del_grpoffsets->group = del_grpoffsets->data;
-        memcpy(del_grpoffsets->group, group, tsize);
-        del_grpoffsets->partitions =
-            rd_kafka_topic_partition_list_copy(partitions);
 
-        return del_grpoffsets;
-}
+        if (err) {
+        err:
+                rd_kafka_admin_result_fail(
+                    rko_fanout, err, "Failed to query partition leaders: %s",
+                    err == RD_KAFKA_RESP_ERR__NOENT ? "No leaders found"
+                                                    : rd_kafka_err2str(err));
+                rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
+                                                     rd_true /*destroy*/);
+                return RD_KAFKA_OP_RES_HANDLED;
+        }
 
-void rd_kafka_DeleteConsumerGroupOffsets_destroy(
-    rd_kafka_DeleteConsumerGroupOffsets_t *del_grpoffsets) {
-        rd_kafka_topic_partition_list_destroy(del_grpoffsets->partitions);
-        rd_free(del_grpoffsets);
-}
+        /* The response lists is one element deep and that element is a
+         * rd_kafka_topic_partition_list_t with the results of the deletes. */
+        rd_list_init(&rko_fanout->rko_u.admin_request.fanout.results, 1,
+                     rd_kafka_topic_partition_list_destroy_free);
+        rd_list_add(&rko_fanout->rko_u.admin_request.fanout.results,
+                    rd_kafka_topic_partition_list_copy(offsets));
 
-static void rd_kafka_DeleteConsumerGroupOffsets_free(void *ptr) {
-        rd_kafka_DeleteConsumerGroupOffsets_destroy(ptr);
-}
+        rko_fanout->rko_u.admin_request.fanout.outstanding =
+            rd_list_cnt(leaders);
 
-void rd_kafka_DeleteConsumerGroupOffsets_destroy_array(
-    rd_kafka_DeleteConsumerGroupOffsets_t **del_grpoffsets,
-    size_t del_grpoffsets_cnt) {
-        size_t i;
-        for (i = 0; i < del_grpoffsets_cnt; i++)
-                rd_kafka_DeleteConsumerGroupOffsets_destroy(del_grpoffsets[i]);
-}
+        rd_assert(rd_list_cnt(leaders) > 0);
 
+        /* For each leader send a request for its partitions */
+        RD_LIST_FOREACH(leader, leaders, i) {
+                rd_kafka_op_t *rko = rd_kafka_admin_request_op_new(
+                    rk, RD_KAFKA_OP_DELETERECORDS,
+                    RD_KAFKA_EVENT_DELETERECORDS_RESULT, &cbs,
+                    &rko_fanout->rko_u.admin_request.options, rk->rk_ops);
+                rko->rko_u.admin_request.fanout_parent = rko_fanout;
+                rko->rko_u.admin_request.broker_id = leader->rkb->rkb_nodeid;
 
-/**
- * @brief Allocate a new DeleteGroup and make a copy of \p src
- */
-static rd_kafka_DeleteConsumerGroupOffsets_t *
-rd_kafka_DeleteConsumerGroupOffsets_copy(
-    const rd_kafka_DeleteConsumerGroupOffsets_t *src) {
-        return rd_kafka_DeleteConsumerGroupOffsets_new(src->group,
-                                                       src->partitions);
+                rd_kafka_topic_partition_list_sort_by_topic(leader->partitions);
+
+                rd_list_init(&rko->rko_u.admin_request.args, 1,
+                             rd_kafka_topic_partition_list_destroy_free);
+                rd_list_add(
+                    &rko->rko_u.admin_request.args,
+                    rd_kafka_topic_partition_list_copy(leader->partitions));
+
+                /* Enqueue op for admin_worker() to transition to next state */
+                rd_kafka_q_enq(rk->rk_ops, rko);
+        }
+
+        return RD_KAFKA_OP_RES_HANDLED;
 }
 
 
-/**
- * @brief Parse OffsetDeleteResponse and create ADMIN_RESULT op.
- */
-static rd_kafka_resp_err_t
-rd_kafka_OffsetDeleteResponse_parse(rd_kafka_op_t *rko_req,
-                                    rd_kafka_op_t **rko_resultp,
-                                    rd_kafka_buf_t *reply,
-                                    char *errstr,
-                                    size_t errstr_size) {
-        const int log_decode_errors = LOG_ERR;
-        rd_kafka_op_t *rko_result;
-        int16_t ErrorCode;
-        rd_kafka_topic_partition_list_t *partitions = NULL;
-        const rd_kafka_DeleteConsumerGroupOffsets_t *del_grpoffsets;
+void rd_kafka_DeleteRecords(rd_kafka_t *rk,
+                            rd_kafka_DeleteRecords_t **del_records,
+                            size_t del_record_cnt,
+                            const rd_kafka_AdminOptions_t *options,
+                            rd_kafka_queue_t *rkqu) {
+        rd_kafka_op_t *rko_fanout;
+        static const struct rd_kafka_admin_fanout_worker_cbs fanout_cbs = {
+            rd_kafka_DeleteRecords_response_merge,
+            rd_kafka_topic_partition_list_copy_opaque,
+        };
+        const rd_kafka_topic_partition_list_t *offsets;
+        rd_kafka_topic_partition_list_t *copied_offsets;
 
-        rd_kafka_buf_read_i16(reply, &ErrorCode);
-        if (ErrorCode) {
-                rd_snprintf(errstr, errstr_size,
-                            "OffsetDelete response error: %s",
-                            rd_kafka_err2str(ErrorCode));
-                return ErrorCode;
+        rd_assert(rkqu);
+
+        rko_fanout = rd_kafka_admin_fanout_op_new(
+            rk, RD_KAFKA_OP_DELETERECORDS, RD_KAFKA_EVENT_DELETERECORDS_RESULT,
+            &fanout_cbs, options, rkqu->rkqu_q);
+
+        if (del_record_cnt != 1) {
+                /* We only support one DeleteRecords per call since there
+                 * is no point in passing multiples, but the API still
+                 * needs to be extensible/future-proof. */
+                rd_kafka_admin_result_fail(rko_fanout,
+                                           RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                           "Exactly one DeleteRecords must be "
+                                           "passed");
+                rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
+                                                     rd_true /*destroy*/);
+                return;
         }
 
-        rd_kafka_buf_read_throttle_time(reply);
+        offsets = del_records[0]->offsets;
 
+        if (offsets == NULL || offsets->cnt == 0) {
+                rd_kafka_admin_result_fail(rko_fanout,
+                                           RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                           "No records to delete");
+                rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
+                                                     rd_true /*destroy*/);
+                return;
+        }
 
-        const rd_kafka_topic_partition_field_t fields[] = {
-            RD_KAFKA_TOPIC_PARTITION_FIELD_PARTITION,
-            RD_KAFKA_TOPIC_PARTITION_FIELD_ERR,
-            RD_KAFKA_TOPIC_PARTITION_FIELD_END};
-        partitions = rd_kafka_buf_read_topic_partitions(reply, 16, fields);
-        if (!partitions) {
-                rd_snprintf(errstr, errstr_size,
-                            "Failed to parse OffsetDeleteResponse partitions");
-                return RD_KAFKA_RESP_ERR__BAD_MSG;
+        /* Copy offsets list and store it on the request op */
+        copied_offsets = rd_kafka_topic_partition_list_copy(offsets);
+        if (rd_kafka_topic_partition_list_has_duplicates(
+                copied_offsets, rd_false /*check partition*/)) {
+                rd_kafka_topic_partition_list_destroy(copied_offsets);
+                rd_kafka_admin_result_fail(rko_fanout,
+                                           RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                           "Duplicate partitions not allowed");
+                rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
+                                                     rd_true /*destroy*/);
+                return;
         }
 
+        /* Set default error on each partition so that if any of the partitions
+         * never get a request sent we have an error to indicate it. */
+        rd_kafka_topic_partition_list_set_err(copied_offsets,
+                                              RD_KAFKA_RESP_ERR__NOOP);
 
-        /* Create result op and group_result_t */
-        rko_result     = rd_kafka_admin_result_new(rko_req);
-        del_grpoffsets = rd_list_elem(&rko_result->rko_u.admin_result.args, 0);
+        rd_list_init(&rko_fanout->rko_u.admin_request.args, 1,
+                     rd_kafka_topic_partition_list_destroy_free);
+        rd_list_add(&rko_fanout->rko_u.admin_request.args, copied_offsets);
 
-        rd_list_init(&rko_result->rko_u.admin_result.results, 1,
-                     rd_kafka_group_result_free);
-        rd_list_add(&rko_result->rko_u.admin_result.results,
-                    rd_kafka_group_result_new(del_grpoffsets->group, -1,
-                                              partitions, NULL));
-        rd_kafka_topic_partition_list_destroy(partitions);
+        /* Async query for partition leaders */
+        rd_kafka_topic_partition_list_query_leaders_async(
+            rk, copied_offsets, rd_kafka_admin_timeout_remains(rko_fanout),
+            RD_KAFKA_REPLYQ(rk->rk_ops, 0),
+            rd_kafka_DeleteRecords_leaders_queried_cb, rko_fanout);
+}
 
-        *rko_resultp = rko_result;
 
-        return RD_KAFKA_RESP_ERR_NO_ERROR;
+void rd_kafka_ListOffsets(rd_kafka_t *rk,
+                          rd_kafka_topic_partition_list_t *topic_partitions,
+                          const rd_kafka_AdminOptions_t *options,
+                          rd_kafka_queue_t *rkqu) {
+        int i;
+        rd_kafka_op_t *rko_fanout;
+        rd_kafka_topic_partition_list_t *copied_topic_partitions;
+        rd_list_t *topic_partitions_sorted = NULL;
 
-err_parse:
-        rd_snprintf(errstr, errstr_size,
-                    "OffsetDelete response protocol parse failure: %s",
-                    rd_kafka_err2str(reply->rkbuf_err));
-        return reply->rkbuf_err;
-}
+        static const struct rd_kafka_admin_fanout_worker_cbs fanout_cbs = {
+            rd_kafka_ListOffsets_response_merge,
+            rd_kafka_ListOffsetsResultInfo_copy_opaque,
+            rd_kafka_topic_partition_list_copy_opaque};
 
+        rko_fanout = rd_kafka_admin_fanout_op_new(
+            rk, RD_KAFKA_OP_LISTOFFSETS, RD_KAFKA_EVENT_LISTOFFSETS_RESULT,
+            &fanout_cbs, options, rkqu->rkqu_q);
 
-void rd_kafka_DeleteConsumerGroupOffsets(
-    rd_kafka_t *rk,
-    rd_kafka_DeleteConsumerGroupOffsets_t **del_grpoffsets,
-    size_t del_grpoffsets_cnt,
-    const rd_kafka_AdminOptions_t *options,
-    rd_kafka_queue_t *rkqu) {
-        static const struct rd_kafka_admin_worker_cbs cbs = {
-            rd_kafka_OffsetDeleteRequest,
-            rd_kafka_OffsetDeleteResponse_parse,
-        };
-        rd_kafka_op_t *rko;
+        rd_kafka_admin_request_op_result_cb_set(
+            rko_fanout, rd_kafka_ListOffsets_handle_result);
+
+        if (topic_partitions->cnt) {
+                for (i = 0; i < topic_partitions->cnt; i++) {
+                        if (!topic_partitions->elems[i].topic[0]) {
+                                rd_kafka_admin_result_fail(
+                                    rko_fanout, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                    "Partition topic name at index %d must be "
+                                    "non-empty",
+                                    i);
+                                goto err;
+                        }
+                        if (topic_partitions->elems[i].partition < 0) {
+                                rd_kafka_admin_result_fail(
+                                    rko_fanout, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                    "Partition at index %d cannot be negative",
+                                    i);
+                                goto err;
+                        }
+                }
 
-        rd_assert(rkqu);
 
-        rko = rd_kafka_admin_request_op_new(
-            rk, RD_KAFKA_OP_DELETECONSUMERGROUPOFFSETS,
-            RD_KAFKA_EVENT_DELETECONSUMERGROUPOFFSETS_RESULT, &cbs, options,
-            rkqu->rkqu_q);
+                topic_partitions_sorted =
+                    rd_list_new(topic_partitions->cnt,
+                                rd_kafka_topic_partition_destroy_free);
+                for (i = 0; i < topic_partitions->cnt; i++)
+                        rd_list_add(topic_partitions_sorted,
+                                    rd_kafka_topic_partition_copy(
+                                        &topic_partitions->elems[i]));
 
-        if (del_grpoffsets_cnt != 1) {
-                /* For simplicity we only support one single group for now */
-                rd_kafka_admin_result_fail(rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
-                                           "Exactly one "
-                                           "DeleteConsumerGroupOffsets must "
-                                           "be passed");
-                rd_kafka_admin_common_worker_destroy(rk, rko,
-                                                     rd_true /*destroy*/);
-                return;
-        }
+                rd_list_sort(topic_partitions_sorted,
+                             rd_kafka_topic_partition_cmp);
+                if (rd_list_find_duplicate(topic_partitions_sorted,
+                                           rd_kafka_topic_partition_cmp)) {
 
+                        rd_kafka_admin_result_fail(
+                            rko_fanout, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                            "Partitions must not contain duplicates");
+                        goto err;
+                }
+        }
 
-        rko->rko_u.admin_request.broker_id = RD_KAFKA_ADMIN_TARGET_COORDINATOR;
-        rko->rko_u.admin_request.coordtype = RD_KAFKA_COORD_GROUP;
-        rko->rko_u.admin_request.coordkey = rd_strdup(del_grpoffsets[0]->group);
+        for (i = 0; i < topic_partitions->cnt; i++) {
+                rd_kafka_topic_partition_t *partition =
+                    &topic_partitions->elems[i];
+                if (partition->offset < RD_KAFKA_OFFSET_SPEC_MAX_TIMESTAMP) {
+                        rd_kafka_admin_result_fail(
+                            rko_fanout, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                            "Partition %d has an invalid offset %" PRId64, i,
+                            partition->offset);
+                        goto err;
+                }
+        }
 
-        /* Store copy of group on request so the group name can be reached
-         * from the response parser. */
-        rd_list_init(&rko->rko_u.admin_request.args, 1,
-                     rd_kafka_DeleteConsumerGroupOffsets_free);
-        rd_list_add(
-            &rko->rko_u.admin_request.args,
-            rd_kafka_DeleteConsumerGroupOffsets_copy(del_grpoffsets[0]));
+        copied_topic_partitions =
+            rd_kafka_topic_partition_list_copy(topic_partitions);
+        rd_list_init(&rko_fanout->rko_u.admin_request.args, 1,
+                     rd_kafka_topic_partition_list_destroy_free);
+        rd_list_add(&rko_fanout->rko_u.admin_request.args,
+                    copied_topic_partitions);
+
+        if (topic_partitions->cnt) {
+                /* Async query for partition leaders */
+                rd_kafka_topic_partition_list_query_leaders_async(
+                    rk, copied_topic_partitions,
+                    rd_kafka_admin_timeout_remains(rko_fanout),
+                    RD_KAFKA_REPLYQ(rk->rk_ops, 0),
+                    rd_kafka_ListOffsets_leaders_queried_cb, rko_fanout);
+        } else {
+                /* Empty list */
+                rd_kafka_op_t *rko_result =
+                    rd_kafka_admin_result_new(rko_fanout);
+                /* Enqueue empty result on application queue, we're done. */
+                rd_kafka_admin_result_enq(rko_fanout, rko_result);
+                rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
+                                                     rd_true /*destroy*/);
+        }
 
-        rd_kafka_q_enq(rk->rk_ops, rko);
+        RD_IF_FREE(topic_partitions_sorted, rd_list_destroy);
+        return;
+err:
+        RD_IF_FREE(topic_partitions_sorted, rd_list_destroy);
+        rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
+                                             rd_true /*destroy*/);
 }
 
-
 /**
- * @brief Get an array of group results from a DeleteGroups result.
+ * @brief Get the list of offsets from a DeleteRecords result.
  *
- * The returned \p groups life-time is the same as the \p result object.
- * @param cntp is updated to the number of elements in the array.
+ * The returned \p offsets life-time is the same as the \p result object.
  */
-const rd_kafka_group_result_t **
-rd_kafka_DeleteConsumerGroupOffsets_result_groups(
-    const rd_kafka_DeleteConsumerGroupOffsets_result_t *result,
-    size_t *cntp) {
-        return rd_kafka_admin_result_ret_groups((const rd_kafka_op_t *)result,
-                                                cntp);
-}
+const rd_kafka_topic_partition_list_t *rd_kafka_DeleteRecords_result_offsets(
+    const rd_kafka_DeleteRecords_result_t *result) {
+        const rd_kafka_topic_partition_list_t *offsets;
+        const rd_kafka_op_t *rko = (const rd_kafka_op_t *)result;
+        size_t cnt;
 
-void rd_kafka_DeleteConsumerGroupOffsets(
-    rd_kafka_t *rk,
-    rd_kafka_DeleteConsumerGroupOffsets_t **del_grpoffsets,
-    size_t del_grpoffsets_cnt,
-    const rd_kafka_AdminOptions_t *options,
-    rd_kafka_queue_t *rkqu);
+        rd_kafka_op_type_t reqtype =
+            rko->rko_u.admin_result.reqtype & ~RD_KAFKA_OP_FLAGMASK;
+        rd_assert(reqtype == RD_KAFKA_OP_DELETERECORDS);
+
+        cnt = rd_list_cnt(&rko->rko_u.admin_result.results);
+
+        rd_assert(cnt == 1);
+
+        offsets = (const rd_kafka_topic_partition_list_t *)rd_list_elem(
+            &rko->rko_u.admin_result.results, 0);
+
+        rd_assert(offsets);
+
+        return offsets;
+}
 
 /**@}*/
+
 /**
- * @name CreateAcls
+ * @name Delete groups
  * @{
  *
  *
  *
+ *
  */
 
-const char *rd_kafka_AclOperation_name(rd_kafka_AclOperation_t operation) {
-        static const char *names[] = {"UNKNOWN",
-                                      "ANY",
-                                      "ALL",
-                                      "READ",
-                                      "WRITE",
-                                      "CREATE",
-                                      "DELETE",
-                                      "ALTER",
-                                      "DESCRIBE",
-                                      "CLUSTER_ACTION",
-                                      "DESCRIBE_CONFIGS",
-                                      "ALTER_CONFIGS",
-                                      "IDEMPOTENT_WRITE"};
+rd_kafka_DeleteGroup_t *rd_kafka_DeleteGroup_new(const char *group) {
+        size_t tsize = strlen(group) + 1;
+        rd_kafka_DeleteGroup_t *del_group;
 
-        if ((unsigned int)operation >=
-            (unsigned int)RD_KAFKA_ACL_OPERATION__CNT)
-                return "UNSUPPORTED";
+        /* Single allocation */
+        del_group        = rd_malloc(sizeof(*del_group) + tsize);
+        del_group->group = del_group->data;
+        memcpy(del_group->group, group, tsize);
 
-        return names[operation];
+        return del_group;
 }
 
-const char *
-rd_kafka_AclPermissionType_name(rd_kafka_AclPermissionType_t permission_type) {
-        static const char *names[] = {"UNKNOWN", "ANY", "DENY", "ALLOW"};
-
-        if ((unsigned int)permission_type >=
-            (unsigned int)RD_KAFKA_ACL_PERMISSION_TYPE__CNT)
-                return "UNSUPPORTED";
+void rd_kafka_DeleteGroup_destroy(rd_kafka_DeleteGroup_t *del_group) {
+        rd_free(del_group);
+}
 
-        return names[permission_type];
+static void rd_kafka_DeleteGroup_free(void *ptr) {
+        rd_kafka_DeleteGroup_destroy(ptr);
 }
 
-static rd_kafka_AclBinding_t *
-rd_kafka_AclBinding_new0(rd_kafka_ResourceType_t restype,
-                         const char *name,
-                         rd_kafka_ResourcePatternType_t resource_pattern_type,
-                         const char *principal,
-                         const char *host,
-                         rd_kafka_AclOperation_t operation,
-                         rd_kafka_AclPermissionType_t permission_type,
-                         rd_kafka_resp_err_t err,
-                         const char *errstr) {
-        rd_kafka_AclBinding_t *acl_binding;
+void rd_kafka_DeleteGroup_destroy_array(rd_kafka_DeleteGroup_t **del_groups,
+                                        size_t del_group_cnt) {
+        size_t i;
+        for (i = 0; i < del_group_cnt; i++)
+                rd_kafka_DeleteGroup_destroy(del_groups[i]);
+}
 
-        acl_binding       = rd_calloc(1, sizeof(*acl_binding));
-        acl_binding->name = name != NULL ? rd_strdup(name) : NULL;
-        acl_binding->principal =
-            principal != NULL ? rd_strdup(principal) : NULL;
-        acl_binding->host    = host != NULL ? rd_strdup(host) : NULL;
-        acl_binding->restype = restype;
-        acl_binding->resource_pattern_type = resource_pattern_type;
-        acl_binding->operation             = operation;
-        acl_binding->permission_type       = permission_type;
-        if (err)
-                acl_binding->error = rd_kafka_error_new(err, "%s", errstr);
+/**
+ * @brief Group name comparator for DeleteGroup_t
+ */
+static int rd_kafka_DeleteGroup_cmp(const void *_a, const void *_b) {
+        const rd_kafka_DeleteGroup_t *a = _a, *b = _b;
+        return strcmp(a->group, b->group);
+}
 
-        return acl_binding;
+/**
+ * @brief Allocate a new DeleteGroup and make a copy of \p src
+ */
+static rd_kafka_DeleteGroup_t *
+rd_kafka_DeleteGroup_copy(const rd_kafka_DeleteGroup_t *src) {
+        return rd_kafka_DeleteGroup_new(src->group);
 }
 
-rd_kafka_AclBinding_t *
-rd_kafka_AclBinding_new(rd_kafka_ResourceType_t restype,
-                        const char *name,
-                        rd_kafka_ResourcePatternType_t resource_pattern_type,
-                        const char *principal,
-                        const char *host,
-                        rd_kafka_AclOperation_t operation,
-                        rd_kafka_AclPermissionType_t permission_type,
-                        char *errstr,
-                        size_t errstr_size) {
-        if (!name) {
-                rd_snprintf(errstr, errstr_size, "Invalid resource name");
-                return NULL;
-        }
-        if (!principal) {
-                rd_snprintf(errstr, errstr_size, "Invalid principal");
-                return NULL;
-        }
-        if (!host) {
-                rd_snprintf(errstr, errstr_size, "Invalid host");
-                return NULL;
-        }
 
-        if (restype == RD_KAFKA_RESOURCE_ANY ||
-            restype <= RD_KAFKA_RESOURCE_UNKNOWN ||
-            restype >= RD_KAFKA_RESOURCE__CNT) {
-                rd_snprintf(errstr, errstr_size, "Invalid resource type");
-                return NULL;
-        }
+/**
+ * @brief Parse DeleteGroupsResponse and create ADMIN_RESULT op.
+ */
+static rd_kafka_resp_err_t
+rd_kafka_DeleteGroupsResponse_parse(rd_kafka_op_t *rko_req,
+                                    rd_kafka_op_t **rko_resultp,
+                                    rd_kafka_buf_t *reply,
+                                    char *errstr,
+                                    size_t errstr_size) {
+        const int log_decode_errors = LOG_ERR;
+        int32_t group_cnt;
+        int i;
+        rd_kafka_op_t *rko_result = NULL;
 
-        if (resource_pattern_type == RD_KAFKA_RESOURCE_PATTERN_ANY ||
-            resource_pattern_type == RD_KAFKA_RESOURCE_PATTERN_MATCH ||
-            resource_pattern_type <= RD_KAFKA_RESOURCE_PATTERN_UNKNOWN ||
-            resource_pattern_type >= RD_KAFKA_RESOURCE_PATTERN_TYPE__CNT) {
-                rd_snprintf(errstr, errstr_size,
-                            "Invalid resource pattern type");
-                return NULL;
-        }
+        rd_kafka_buf_read_throttle_time(reply);
 
-        if (operation == RD_KAFKA_ACL_OPERATION_ANY ||
-            operation <= RD_KAFKA_ACL_OPERATION_UNKNOWN ||
-            operation >= RD_KAFKA_ACL_OPERATION__CNT) {
-                rd_snprintf(errstr, errstr_size, "Invalid operation");
-                return NULL;
-        }
+        /* #group_error_codes */
+        rd_kafka_buf_read_i32(reply, &group_cnt);
 
-        if (permission_type == RD_KAFKA_ACL_PERMISSION_TYPE_ANY ||
-            permission_type <= RD_KAFKA_ACL_PERMISSION_TYPE_UNKNOWN ||
-            permission_type >= RD_KAFKA_ACL_PERMISSION_TYPE__CNT) {
-                rd_snprintf(errstr, errstr_size, "Invalid permission type");
-                return NULL;
-        }
+        if (group_cnt > rd_list_cnt(&rko_req->rko_u.admin_request.args))
+                rd_kafka_buf_parse_fail(
+                    reply,
+                    "Received %" PRId32
+                    " groups in response "
+                    "when only %d were requested",
+                    group_cnt, rd_list_cnt(&rko_req->rko_u.admin_request.args));
 
-        return rd_kafka_AclBinding_new0(
-            restype, name, resource_pattern_type, principal, host, operation,
-            permission_type, RD_KAFKA_RESP_ERR_NO_ERROR, NULL);
-}
+        rko_result = rd_kafka_admin_result_new(rko_req);
+        rd_list_init(&rko_result->rko_u.admin_result.results, group_cnt,
+                     rd_kafka_group_result_free);
 
-rd_kafka_AclBindingFilter_t *rd_kafka_AclBindingFilter_new(
-    rd_kafka_ResourceType_t restype,
-    const char *name,
-    rd_kafka_ResourcePatternType_t resource_pattern_type,
-    const char *principal,
-    const char *host,
-    rd_kafka_AclOperation_t operation,
-    rd_kafka_AclPermissionType_t permission_type,
-    char *errstr,
-    size_t errstr_size) {
+        for (i = 0; i < (int)group_cnt; i++) {
+                rd_kafkap_str_t kgroup;
+                int16_t error_code;
+                rd_kafka_group_result_t *groupres;
 
+                rd_kafka_buf_read_str(reply, &kgroup);
+                rd_kafka_buf_read_i16(reply, &error_code);
 
-        if (restype <= RD_KAFKA_RESOURCE_UNKNOWN ||
-            restype >= RD_KAFKA_RESOURCE__CNT) {
-                rd_snprintf(errstr, errstr_size, "Invalid resource type");
-                return NULL;
-        }
+                groupres = rd_kafka_group_result_new(
+                    kgroup.str, RD_KAFKAP_STR_LEN(&kgroup), NULL,
+                    error_code ? rd_kafka_error_new(error_code, NULL) : NULL);
 
-        if (resource_pattern_type <= RD_KAFKA_RESOURCE_PATTERN_UNKNOWN ||
-            resource_pattern_type >= RD_KAFKA_RESOURCE_PATTERN_TYPE__CNT) {
-                rd_snprintf(errstr, errstr_size,
-                            "Invalid resource pattern type");
-                return NULL;
+                rd_list_add(&rko_result->rko_u.admin_result.results, groupres);
         }
 
-        if (operation <= RD_KAFKA_ACL_OPERATION_UNKNOWN ||
-            operation >= RD_KAFKA_ACL_OPERATION__CNT) {
-                rd_snprintf(errstr, errstr_size, "Invalid operation");
-                return NULL;
-        }
+        *rko_resultp = rko_result;
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
 
-        if (permission_type <= RD_KAFKA_ACL_PERMISSION_TYPE_UNKNOWN ||
-            permission_type >= RD_KAFKA_ACL_PERMISSION_TYPE__CNT) {
-                rd_snprintf(errstr, errstr_size, "Invalid permission type");
-                return NULL;
-        }
+err_parse:
+        if (rko_result)
+                rd_kafka_op_destroy(rko_result);
 
-        return rd_kafka_AclBinding_new0(
-            restype, name, resource_pattern_type, principal, host, operation,
-            permission_type, RD_KAFKA_RESP_ERR_NO_ERROR, NULL);
-}
+        rd_snprintf(errstr, errstr_size,
+                    "DeleteGroups response protocol parse failure: %s",
+                    rd_kafka_err2str(reply->rkbuf_err));
 
-rd_kafka_ResourceType_t
-rd_kafka_AclBinding_restype(const rd_kafka_AclBinding_t *acl) {
-        return acl->restype;
+        return reply->rkbuf_err;
 }
 
-const char *rd_kafka_AclBinding_name(const rd_kafka_AclBinding_t *acl) {
-        return acl->name;
-}
+/** @brief Merge the DeleteGroups response from a single broker
+ *         into the user response list.
+ */
+void rd_kafka_DeleteGroups_response_merge(rd_kafka_op_t *rko_fanout,
+                                          const rd_kafka_op_t *rko_partial) {
+        const rd_kafka_group_result_t *groupres = NULL;
+        rd_kafka_group_result_t *newgroupres;
+        const rd_kafka_DeleteGroup_t *grp =
+            rko_partial->rko_u.admin_result.opaque;
+        int orig_pos;
 
-const char *rd_kafka_AclBinding_principal(const rd_kafka_AclBinding_t *acl) {
-        return acl->principal;
-}
+        rd_assert(rko_partial->rko_evtype ==
+                  RD_KAFKA_EVENT_DELETEGROUPS_RESULT);
 
-const char *rd_kafka_AclBinding_host(const rd_kafka_AclBinding_t *acl) {
-        return acl->host;
-}
+        if (!rko_partial->rko_err) {
+                /* Proper results.
+                 * We only send one group per request, make sure it matches */
+                groupres =
+                    rd_list_elem(&rko_partial->rko_u.admin_result.results, 0);
+                rd_assert(groupres);
+                rd_assert(!strcmp(groupres->group, grp->group));
+                newgroupres = rd_kafka_group_result_copy(groupres);
+        } else {
+                /* Op errored, e.g. timeout */
+                newgroupres = rd_kafka_group_result_new(
+                    grp->group, -1, NULL,
+                    rd_kafka_error_new(rko_partial->rko_err, NULL));
+        }
 
-rd_kafka_AclOperation_t
-rd_kafka_AclBinding_operation(const rd_kafka_AclBinding_t *acl) {
-        return acl->operation;
-}
+        /* As a convenience to the application we insert group result
+         * in the same order as they were requested. */
+        orig_pos = rd_list_index(&rko_fanout->rko_u.admin_request.args, grp,
+                                 rd_kafka_DeleteGroup_cmp);
+        rd_assert(orig_pos != -1);
 
-rd_kafka_AclPermissionType_t
-rd_kafka_AclBinding_permission_type(const rd_kafka_AclBinding_t *acl) {
-        return acl->permission_type;
+        /* Make sure result is not already set */
+        rd_assert(rd_list_elem(&rko_fanout->rko_u.admin_request.fanout.results,
+                               orig_pos) == NULL);
+
+        rd_list_set(&rko_fanout->rko_u.admin_request.fanout.results, orig_pos,
+                    newgroupres);
 }
 
-rd_kafka_ResourcePatternType_t
-rd_kafka_AclBinding_resource_pattern_type(const rd_kafka_AclBinding_t *acl) {
-        return acl->resource_pattern_type;
-}
-
-const rd_kafka_error_t *
-rd_kafka_AclBinding_error(const rd_kafka_AclBinding_t *acl) {
-        return acl->error;
-}
-
-/**
- * @brief Allocate a new AclBinding and make a copy of \p src
- */
-static rd_kafka_AclBinding_t *
-rd_kafka_AclBinding_copy(const rd_kafka_AclBinding_t *src) {
-        rd_kafka_AclBinding_t *dst;
-
-        dst = rd_kafka_AclBinding_new(
-            src->restype, src->name, src->resource_pattern_type, src->principal,
-            src->host, src->operation, src->permission_type, NULL, 0);
-        rd_assert(dst);
-        return dst;
-}
-
-/**
- * @brief Allocate a new AclBindingFilter and make a copy of \p src
- */
-static rd_kafka_AclBindingFilter_t *
-rd_kafka_AclBindingFilter_copy(const rd_kafka_AclBindingFilter_t *src) {
-        rd_kafka_AclBindingFilter_t *dst;
-
-        dst = rd_kafka_AclBindingFilter_new(
-            src->restype, src->name, src->resource_pattern_type, src->principal,
-            src->host, src->operation, src->permission_type, NULL, 0);
-        rd_assert(dst);
-        return dst;
-}
-
-void rd_kafka_AclBinding_destroy(rd_kafka_AclBinding_t *acl_binding) {
-        if (acl_binding->name)
-                rd_free(acl_binding->name);
-        if (acl_binding->principal)
-                rd_free(acl_binding->principal);
-        if (acl_binding->host)
-                rd_free(acl_binding->host);
-        if (acl_binding->error)
-                rd_kafka_error_destroy(acl_binding->error);
-        rd_free(acl_binding);
-}
-
-static void rd_kafka_AclBinding_free(void *ptr) {
-        rd_kafka_AclBinding_destroy(ptr);
-}
-
-
-void rd_kafka_AclBinding_destroy_array(rd_kafka_AclBinding_t **acl_bindings,
-                                       size_t acl_bindings_cnt) {
+void rd_kafka_DeleteGroups(rd_kafka_t *rk,
+                           rd_kafka_DeleteGroup_t **del_groups,
+                           size_t del_group_cnt,
+                           const rd_kafka_AdminOptions_t *options,
+                           rd_kafka_queue_t *rkqu) {
+        rd_kafka_op_t *rko_fanout;
+        rd_list_t dup_list;
         size_t i;
-        for (i = 0; i < acl_bindings_cnt; i++)
-                rd_kafka_AclBinding_destroy(acl_bindings[i]);
-}
-
-/**
- * @brief Parse CreateAclsResponse and create ADMIN_RESULT op.
- */
-static rd_kafka_resp_err_t
-rd_kafka_CreateAclsResponse_parse(rd_kafka_op_t *rko_req,
-                                  rd_kafka_op_t **rko_resultp,
-                                  rd_kafka_buf_t *reply,
-                                  char *errstr,
-                                  size_t errstr_size) {
-        const int log_decode_errors = LOG_ERR;
-        rd_kafka_resp_err_t err     = RD_KAFKA_RESP_ERR_NO_ERROR;
-        rd_kafka_op_t *rko_result   = NULL;
-        int32_t acl_cnt;
-        int i;
-
-        rd_kafka_buf_read_throttle_time(reply);
-
-        rd_kafka_buf_read_arraycnt(reply, &acl_cnt, 100000);
-
-        if (acl_cnt != rd_list_cnt(&rko_req->rko_u.admin_request.args))
-                rd_kafka_buf_parse_fail(
-                    reply,
-                    "Received %" PRId32
-                    " acls in response, but %d were requested",
-                    acl_cnt, rd_list_cnt(&rko_req->rko_u.admin_request.args));
-
-        rko_result = rd_kafka_admin_result_new(rko_req);
-
-        rd_list_init(&rko_result->rko_u.admin_result.results, acl_cnt,
-                     rd_kafka_acl_result_free);
-
-        for (i = 0; i < (int)acl_cnt; i++) {
-                int16_t error_code;
-                rd_kafkap_str_t error_msg = RD_KAFKAP_STR_INITIALIZER;
-                rd_kafka_acl_result_t *acl_res;
-                char *errstr = NULL;
-
-                rd_kafka_buf_read_i16(reply, &error_code);
-
-                rd_kafka_buf_read_str(reply, &error_msg);
+        static const struct rd_kafka_admin_fanout_worker_cbs fanout_cbs = {
+            rd_kafka_DeleteGroups_response_merge,
+            rd_kafka_group_result_copy_opaque,
+        };
 
-                if (error_code) {
-                        if (RD_KAFKAP_STR_LEN(&error_msg) == 0)
-                                errstr = (char *)rd_kafka_err2str(error_code);
-                        else
-                                RD_KAFKAP_STR_DUPA(&errstr, &error_msg);
-                }
+        rd_assert(rkqu);
 
-                acl_res = rd_kafka_acl_result_new(
-                    error_code ? rd_kafka_error_new(error_code, "%s", errstr)
-                               : NULL);
+        rko_fanout = rd_kafka_admin_fanout_op_new(
+            rk, RD_KAFKA_OP_DELETEGROUPS, RD_KAFKA_EVENT_DELETEGROUPS_RESULT,
+            &fanout_cbs, options, rkqu->rkqu_q);
 
-                rd_list_set(&rko_result->rko_u.admin_result.results, i,
-                            acl_res);
+        if (del_group_cnt == 0) {
+                rd_kafka_admin_result_fail(rko_fanout,
+                                           RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                           "No groups to delete");
+                rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
+                                                     rd_true /*destroy*/);
+                return;
         }
 
-        *rko_resultp = rko_result;
-
-        return RD_KAFKA_RESP_ERR_NO_ERROR;
+        /* Copy group list and store it on the request op.
+         * Maintain original ordering. */
+        rd_list_init(&rko_fanout->rko_u.admin_request.args, (int)del_group_cnt,
+                     rd_kafka_DeleteGroup_free);
+        for (i = 0; i < del_group_cnt; i++)
+                rd_list_add(&rko_fanout->rko_u.admin_request.args,
+                            rd_kafka_DeleteGroup_copy(del_groups[i]));
 
-err_parse:
-        if (rko_result)
-                rd_kafka_op_destroy(rko_result);
+        /* Check for duplicates.
+         * Make a temporary copy of the group list and sort it to check for
+         * duplicates, we don't want the original list sorted since we want
+         * to maintain ordering. */
+        rd_list_init(&dup_list,
+                     rd_list_cnt(&rko_fanout->rko_u.admin_request.args), NULL);
+        rd_list_copy_to(&dup_list, &rko_fanout->rko_u.admin_request.args, NULL,
+                        NULL);
+        rd_list_sort(&dup_list, rd_kafka_DeleteGroup_cmp);
+        if (rd_list_find_duplicate(&dup_list, rd_kafka_DeleteGroup_cmp)) {
+                rd_list_destroy(&dup_list);
+                rd_kafka_admin_result_fail(rko_fanout,
+                                           RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                           "Duplicate groups not allowed");
+                rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
+                                                     rd_true /*destroy*/);
+                return;
+        }
 
-        rd_snprintf(errstr, errstr_size,
-                    "CreateAcls response protocol parse failure: %s",
-                    rd_kafka_err2str(err));
+        rd_list_destroy(&dup_list);
 
-        return err;
-}
+        /* Prepare results list where fanned out op's results will be
+         * accumulated. */
+        rd_list_init(&rko_fanout->rko_u.admin_request.fanout.results,
+                     (int)del_group_cnt, rd_kafka_group_result_free);
+        rko_fanout->rko_u.admin_request.fanout.outstanding = (int)del_group_cnt;
 
-void rd_kafka_CreateAcls(rd_kafka_t *rk,
-                         rd_kafka_AclBinding_t **new_acls,
-                         size_t new_acls_cnt,
-                         const rd_kafka_AdminOptions_t *options,
-                         rd_kafka_queue_t *rkqu) {
-        rd_kafka_op_t *rko;
-        size_t i;
-        static const struct rd_kafka_admin_worker_cbs cbs = {
-            rd_kafka_CreateAclsRequest, rd_kafka_CreateAclsResponse_parse};
+        /* Create individual request ops for each group.
+         * FIXME: A future optimization is to coalesce all groups for a single
+         *        coordinator into one op. */
+        for (i = 0; i < del_group_cnt; i++) {
+                static const struct rd_kafka_admin_worker_cbs cbs = {
+                    rd_kafka_DeleteGroupsRequest,
+                    rd_kafka_DeleteGroupsResponse_parse,
+                };
+                rd_kafka_DeleteGroup_t *grp =
+                    rd_list_elem(&rko_fanout->rko_u.admin_request.args, (int)i);
+                rd_kafka_op_t *rko = rd_kafka_admin_request_op_new(
+                    rk, RD_KAFKA_OP_DELETEGROUPS,
+                    RD_KAFKA_EVENT_DELETEGROUPS_RESULT, &cbs, options,
+                    rk->rk_ops);
 
-        rko = rd_kafka_admin_request_op_new(rk, RD_KAFKA_OP_CREATEACLS,
-                                            RD_KAFKA_EVENT_CREATEACLS_RESULT,
-                                            &cbs, options, rkqu->rkqu_q);
+                rko->rko_u.admin_request.fanout_parent = rko_fanout;
+                rko->rko_u.admin_request.broker_id =
+                    RD_KAFKA_ADMIN_TARGET_COORDINATOR;
+                rko->rko_u.admin_request.coordtype = RD_KAFKA_COORD_GROUP;
+                rko->rko_u.admin_request.coordkey  = rd_strdup(grp->group);
 
-        rd_list_init(&rko->rko_u.admin_request.args, (int)new_acls_cnt,
-                     rd_kafka_AclBinding_free);
+                /* Set the group name as the opaque so the fanout worker use it
+                 * to fill in errors.
+                 * References rko_fanout's memory, which will always outlive
+                 * the fanned out op. */
+                rd_kafka_AdminOptions_set_opaque(
+                    &rko->rko_u.admin_request.options, grp);
 
-        for (i = 0; i < new_acls_cnt; i++)
+                rd_list_init(&rko->rko_u.admin_request.args, 1,
+                             rd_kafka_DeleteGroup_free);
                 rd_list_add(&rko->rko_u.admin_request.args,
-                            rd_kafka_AclBinding_copy(new_acls[i]));
+                            rd_kafka_DeleteGroup_copy(del_groups[i]));
 
-        rd_kafka_q_enq(rk->rk_ops, rko);
+                rd_kafka_q_enq(rk->rk_ops, rko);
+        }
 }
 
+
 /**
- * @brief Get an array of rd_kafka_acl_result_t from a CreateAcls result.
+ * @brief Get an array of group results from a DeleteGroups result.
  *
- * The returned \p rd_kafka_acl_result_t life-time is the same as the \p result
- * object.
+ * The returned \p groups life-time is the same as the \p result object.
  * @param cntp is updated to the number of elements in the array.
  */
-const rd_kafka_acl_result_t **
-rd_kafka_CreateAcls_result_acls(const rd_kafka_CreateAcls_result_t *result,
-                                size_t *cntp) {
-        return rd_kafka_admin_result_ret_acl_results(
-            (const rd_kafka_op_t *)result, cntp);
+const rd_kafka_group_result_t **rd_kafka_DeleteGroups_result_groups(
+    const rd_kafka_DeleteGroups_result_t *result,
+    size_t *cntp) {
+        return rd_kafka_admin_result_ret_groups((const rd_kafka_op_t *)result,
+                                                cntp);
 }
 
+
 /**@}*/
 
+
 /**
- * @name DescribeAcls
+ * @name Delete consumer group offsets (committed offsets)
  * @{
  *
  *
  *
+ *
  */
 
-/**
- * @brief Parse DescribeAclsResponse and create ADMIN_RESULT op.
- */
-static rd_kafka_resp_err_t
-rd_kafka_DescribeAclsResponse_parse(rd_kafka_op_t *rko_req,
-                                    rd_kafka_op_t **rko_resultp,
-                                    rd_kafka_buf_t *reply,
-                                    char *errstr,
-                                    size_t errstr_size) {
-        const int log_decode_errors = LOG_ERR;
-        rd_kafka_broker_t *rkb      = reply->rkbuf_rkb;
-        rd_kafka_resp_err_t err     = RD_KAFKA_RESP_ERR_NO_ERROR;
-        rd_kafka_op_t *rko_result   = NULL;
-        int32_t res_cnt;
-        int i;
-        int j;
-        rd_kafka_AclBinding_t *acl = NULL;
-        int16_t error_code;
-        rd_kafkap_str_t error_msg;
+rd_kafka_DeleteConsumerGroupOffsets_t *rd_kafka_DeleteConsumerGroupOffsets_new(
+    const char *group,
+    const rd_kafka_topic_partition_list_t *partitions) {
+        size_t tsize = strlen(group) + 1;
+        rd_kafka_DeleteConsumerGroupOffsets_t *del_grpoffsets;
 
-        rd_kafka_buf_read_throttle_time(reply);
+        rd_assert(partitions);
 
-        rd_kafka_buf_read_i16(reply, &error_code);
-        rd_kafka_buf_read_str(reply, &error_msg);
+        /* Single allocation */
+        del_grpoffsets        = rd_malloc(sizeof(*del_grpoffsets) + tsize);
+        del_grpoffsets->group = del_grpoffsets->data;
+        memcpy(del_grpoffsets->group, group, tsize);
+        del_grpoffsets->partitions =
+            rd_kafka_topic_partition_list_copy(partitions);
 
-        if (error_code) {
-                if (RD_KAFKAP_STR_LEN(&error_msg) == 0)
-                        errstr = (char *)rd_kafka_err2str(error_code);
-                else
-                        RD_KAFKAP_STR_DUPA(&errstr, &error_msg);
-        }
+        return del_grpoffsets;
+}
 
-        /* #resources */
-        rd_kafka_buf_read_arraycnt(reply, &res_cnt, 100000);
+void rd_kafka_DeleteConsumerGroupOffsets_destroy(
+    rd_kafka_DeleteConsumerGroupOffsets_t *del_grpoffsets) {
+        rd_kafka_topic_partition_list_destroy(del_grpoffsets->partitions);
+        rd_free(del_grpoffsets);
+}
 
-        rko_result = rd_kafka_admin_result_new(rko_req);
+static void rd_kafka_DeleteConsumerGroupOffsets_free(void *ptr) {
+        rd_kafka_DeleteConsumerGroupOffsets_destroy(ptr);
+}
 
-        rd_list_init(&rko_result->rko_u.admin_result.results, res_cnt,
-                     rd_kafka_AclBinding_free);
+void rd_kafka_DeleteConsumerGroupOffsets_destroy_array(
+    rd_kafka_DeleteConsumerGroupOffsets_t **del_grpoffsets,
+    size_t del_grpoffsets_cnt) {
+        size_t i;
+        for (i = 0; i < del_grpoffsets_cnt; i++)
+                rd_kafka_DeleteConsumerGroupOffsets_destroy(del_grpoffsets[i]);
+}
 
-        for (i = 0; i < (int)res_cnt; i++) {
-                int8_t res_type = RD_KAFKA_RESOURCE_UNKNOWN;
-                rd_kafkap_str_t kres_name;
-                char *res_name;
-                int8_t resource_pattern_type =
-                    RD_KAFKA_RESOURCE_PATTERN_LITERAL;
-                int32_t acl_cnt;
 
-                rd_kafka_buf_read_i8(reply, &res_type);
-                rd_kafka_buf_read_str(reply, &kres_name);
-                RD_KAFKAP_STR_DUPA(&res_name, &kres_name);
+/**
+ * @brief Allocate a new DeleteGroup and make a copy of \p src
+ */
+static rd_kafka_DeleteConsumerGroupOffsets_t *
+rd_kafka_DeleteConsumerGroupOffsets_copy(
+    const rd_kafka_DeleteConsumerGroupOffsets_t *src) {
+        return rd_kafka_DeleteConsumerGroupOffsets_new(src->group,
+                                                       src->partitions);
+}
 
-                if (rd_kafka_buf_ApiVersion(reply) >= 1) {
-                        rd_kafka_buf_read_i8(reply, &resource_pattern_type);
-                }
 
-                if (res_type <= RD_KAFKA_RESOURCE_UNKNOWN ||
-                    res_type >= RD_KAFKA_RESOURCE__CNT) {
-                        rd_rkb_log(rkb, LOG_WARNING, "DESCRIBEACLSRESPONSE",
-                                   "DescribeAclsResponse returned unknown "
-                                   "resource type %d",
-                                   res_type);
-                        res_type = RD_KAFKA_RESOURCE_UNKNOWN;
-                }
-                if (resource_pattern_type <=
-                        RD_KAFKA_RESOURCE_PATTERN_UNKNOWN ||
-                    resource_pattern_type >=
-                        RD_KAFKA_RESOURCE_PATTERN_TYPE__CNT) {
-                        rd_rkb_log(rkb, LOG_WARNING, "DESCRIBEACLSRESPONSE",
-                                   "DescribeAclsResponse returned unknown "
-                                   "resource pattern type %d",
-                                   resource_pattern_type);
-                        resource_pattern_type =
-                            RD_KAFKA_RESOURCE_PATTERN_UNKNOWN;
-                }
+/**
+ * @brief Parse OffsetDeleteResponse and create ADMIN_RESULT op.
+ */
+static rd_kafka_resp_err_t
+rd_kafka_OffsetDeleteResponse_parse(rd_kafka_op_t *rko_req,
+                                    rd_kafka_op_t **rko_resultp,
+                                    rd_kafka_buf_t *reply,
+                                    char *errstr,
+                                    size_t errstr_size) {
+        const int log_decode_errors = LOG_ERR;
+        rd_kafka_op_t *rko_result;
+        int16_t ErrorCode;
+        rd_kafka_topic_partition_list_t *partitions = NULL;
+        const rd_kafka_DeleteConsumerGroupOffsets_t *del_grpoffsets;
 
-                /* #resources */
-                rd_kafka_buf_read_arraycnt(reply, &acl_cnt, 100000);
+        rd_kafka_buf_read_i16(reply, &ErrorCode);
+        if (ErrorCode) {
+                rd_snprintf(errstr, errstr_size,
+                            "OffsetDelete response error: %s",
+                            rd_kafka_err2str(ErrorCode));
+                return ErrorCode;
+        }
 
-                for (j = 0; j < (int)acl_cnt; j++) {
-                        rd_kafkap_str_t kprincipal;
-                        rd_kafkap_str_t khost;
-                        int8_t operation = RD_KAFKA_ACL_OPERATION_UNKNOWN;
-                        int8_t permission_type =
-                            RD_KAFKA_ACL_PERMISSION_TYPE_UNKNOWN;
-                        char *principal;
-                        char *host;
+        rd_kafka_buf_read_throttle_time(reply);
 
-                        rd_kafka_buf_read_str(reply, &kprincipal);
-                        rd_kafka_buf_read_str(reply, &khost);
-                        rd_kafka_buf_read_i8(reply, &operation);
-                        rd_kafka_buf_read_i8(reply, &permission_type);
-                        RD_KAFKAP_STR_DUPA(&principal, &kprincipal);
-                        RD_KAFKAP_STR_DUPA(&host, &khost);
 
-                        if (operation <= RD_KAFKA_ACL_OPERATION_UNKNOWN ||
-                            operation >= RD_KAFKA_ACL_OPERATION__CNT) {
-                                rd_rkb_log(rkb, LOG_WARNING,
-                                           "DESCRIBEACLSRESPONSE",
-                                           "DescribeAclsResponse returned "
-                                           "unknown acl operation %d",
-                                           operation);
-                                operation = RD_KAFKA_ACL_OPERATION_UNKNOWN;
-                        }
-                        if (permission_type <=
-                                RD_KAFKA_ACL_PERMISSION_TYPE_UNKNOWN ||
-                            permission_type >=
-                                RD_KAFKA_ACL_PERMISSION_TYPE__CNT) {
-                                rd_rkb_log(rkb, LOG_WARNING,
-                                           "DESCRIBEACLSRESPONSE",
-                                           "DescribeAclsResponse returned "
-                                           "unknown acl permission type %d",
-                                           permission_type);
-                                permission_type =
-                                    RD_KAFKA_ACL_PERMISSION_TYPE_UNKNOWN;
-                        }
+        const rd_kafka_topic_partition_field_t fields[] = {
+            RD_KAFKA_TOPIC_PARTITION_FIELD_PARTITION,
+            RD_KAFKA_TOPIC_PARTITION_FIELD_ERR,
+            RD_KAFKA_TOPIC_PARTITION_FIELD_END};
+        partitions = rd_kafka_buf_read_topic_partitions(reply, 16, fields);
+        if (!partitions) {
+                rd_snprintf(errstr, errstr_size,
+                            "Failed to parse OffsetDeleteResponse partitions");
+                return RD_KAFKA_RESP_ERR__BAD_MSG;
+        }
 
-                        acl = rd_kafka_AclBinding_new0(
-                            res_type, res_name, resource_pattern_type,
-                            principal, host, operation, permission_type,
-                            RD_KAFKA_RESP_ERR_NO_ERROR, NULL);
 
-                        rd_list_add(&rko_result->rko_u.admin_result.results,
-                                    acl);
-                }
-        }
+        /* Create result op and group_result_t */
+        rko_result     = rd_kafka_admin_result_new(rko_req);
+        del_grpoffsets = rd_list_elem(&rko_result->rko_u.admin_result.args, 0);
+
+        rd_list_init(&rko_result->rko_u.admin_result.results, 1,
+                     rd_kafka_group_result_free);
+        rd_list_add(&rko_result->rko_u.admin_result.results,
+                    rd_kafka_group_result_new(del_grpoffsets->group, -1,
+                                              partitions, NULL));
+        rd_kafka_topic_partition_list_destroy(partitions);
 
         *rko_resultp = rko_result;
 
         return RD_KAFKA_RESP_ERR_NO_ERROR;
 
 err_parse:
-        if (rko_result)
-                rd_kafka_op_destroy(rko_result);
-
         rd_snprintf(errstr, errstr_size,
-                    "DescribeAcls response protocol parse failure: %s",
-                    rd_kafka_err2str(err));
-
-        return err;
+                    "OffsetDelete response protocol parse failure: %s",
+                    rd_kafka_err2str(reply->rkbuf_err));
+        return reply->rkbuf_err;
 }
 
-void rd_kafka_DescribeAcls(rd_kafka_t *rk,
-                           rd_kafka_AclBindingFilter_t *acl_filter,
-                           const rd_kafka_AdminOptions_t *options,
-                           rd_kafka_queue_t *rkqu) {
-        rd_kafka_op_t *rko;
 
+void rd_kafka_DeleteConsumerGroupOffsets(
+    rd_kafka_t *rk,
+    rd_kafka_DeleteConsumerGroupOffsets_t **del_grpoffsets,
+    size_t del_grpoffsets_cnt,
+    const rd_kafka_AdminOptions_t *options,
+    rd_kafka_queue_t *rkqu) {
         static const struct rd_kafka_admin_worker_cbs cbs = {
-            rd_kafka_DescribeAclsRequest,
-            rd_kafka_DescribeAclsResponse_parse,
+            rd_kafka_OffsetDeleteRequest,
+            rd_kafka_OffsetDeleteResponse_parse,
         };
+        rd_kafka_op_t *rko;
 
-        rko = rd_kafka_admin_request_op_new(rk, RD_KAFKA_OP_DESCRIBEACLS,
-                                            RD_KAFKA_EVENT_DESCRIBEACLS_RESULT,
-                                            &cbs, options, rkqu->rkqu_q);
+        rd_assert(rkqu);
 
-        rd_list_init(&rko->rko_u.admin_request.args, 1,
-                     rd_kafka_AclBinding_free);
+        rko = rd_kafka_admin_request_op_new(
+            rk, RD_KAFKA_OP_DELETECONSUMERGROUPOFFSETS,
+            RD_KAFKA_EVENT_DELETECONSUMERGROUPOFFSETS_RESULT, &cbs, options,
+            rkqu->rkqu_q);
 
-        rd_list_add(&rko->rko_u.admin_request.args,
-                    rd_kafka_AclBindingFilter_copy(acl_filter));
+        if (del_grpoffsets_cnt != 1) {
+                /* For simplicity we only support one single group for now */
+                rd_kafka_admin_result_fail(rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                           "Exactly one "
+                                           "DeleteConsumerGroupOffsets must "
+                                           "be passed");
+                rd_kafka_admin_common_worker_destroy(rk, rko,
+                                                     rd_true /*destroy*/);
+                return;
+        }
+
+
+        rko->rko_u.admin_request.broker_id = RD_KAFKA_ADMIN_TARGET_COORDINATOR;
+        rko->rko_u.admin_request.coordtype = RD_KAFKA_COORD_GROUP;
+        rko->rko_u.admin_request.coordkey = rd_strdup(del_grpoffsets[0]->group);
+
+        /* Store copy of group on request so the group name can be reached
+         * from the response parser. */
+        rd_list_init(&rko->rko_u.admin_request.args, 1,
+                     rd_kafka_DeleteConsumerGroupOffsets_free);
+        rd_list_add(
+            &rko->rko_u.admin_request.args,
+            rd_kafka_DeleteConsumerGroupOffsets_copy(del_grpoffsets[0]));
 
         rd_kafka_q_enq(rk->rk_ops, rko);
 }
 
+
 /**
- * @brief Get an array of rd_kafka_AclBinding_t from a DescribeAcls result.
+ * @brief Get an array of group results from a DeleteGroups result.
  *
- * The returned \p rd_kafka_AclBinding_t life-time is the same as the \p result
- * object.
+ * The returned \p groups life-time is the same as the \p result object.
  * @param cntp is updated to the number of elements in the array.
  */
-const rd_kafka_AclBinding_t **
-rd_kafka_DescribeAcls_result_acls(const rd_kafka_DescribeAcls_result_t *result,
-                                  size_t *cntp) {
-        return rd_kafka_admin_result_ret_acl_bindings(
-            (const rd_kafka_op_t *)result, cntp);
+const rd_kafka_group_result_t **
+rd_kafka_DeleteConsumerGroupOffsets_result_groups(
+    const rd_kafka_DeleteConsumerGroupOffsets_result_t *result,
+    size_t *cntp) {
+        return rd_kafka_admin_result_ret_groups((const rd_kafka_op_t *)result,
+                                                cntp);
 }
 
-/**@}*/
+void rd_kafka_DeleteConsumerGroupOffsets(
+    rd_kafka_t *rk,
+    rd_kafka_DeleteConsumerGroupOffsets_t **del_grpoffsets,
+    size_t del_grpoffsets_cnt,
+    const rd_kafka_AdminOptions_t *options,
+    rd_kafka_queue_t *rkqu);
 
+/**@}*/
 /**
- * @name DeleteAcls
+ * @name CreateAcls
  * @{
  *
  *
  *
  */
 
-/**
- * @brief Allocate a new DeleteAcls result response with the given
- * \p err error code and \p errstr error message.
- */
-const rd_kafka_DeleteAcls_result_response_t *
-rd_kafka_DeleteAcls_result_response_new(rd_kafka_resp_err_t err, char *errstr) {
-        rd_kafka_DeleteAcls_result_response_t *result_response;
-
-        result_response = rd_calloc(1, sizeof(*result_response));
-        if (err)
-                result_response->error = rd_kafka_error_new(
-                    err, "%s", errstr ? errstr : rd_kafka_err2str(err));
+const char *rd_kafka_AclOperation_name(rd_kafka_AclOperation_t operation) {
+        static const char *names[] = {"UNKNOWN",
+                                      "ANY",
+                                      "ALL",
+                                      "READ",
+                                      "WRITE",
+                                      "CREATE",
+                                      "DELETE",
+                                      "ALTER",
+                                      "DESCRIBE",
+                                      "CLUSTER_ACTION",
+                                      "DESCRIBE_CONFIGS",
+                                      "ALTER_CONFIGS",
+                                      "IDEMPOTENT_WRITE"};
 
-        /* List of int32 lists */
-        rd_list_init(&result_response->matching_acls, 0,
-                     rd_kafka_AclBinding_free);
+        if ((unsigned int)operation >=
+            (unsigned int)RD_KAFKA_ACL_OPERATION__CNT)
+                return "UNSUPPORTED";
 
-        return result_response;
+        return names[operation];
 }
 
-static void rd_kafka_DeleteAcls_result_response_destroy(
-    rd_kafka_DeleteAcls_result_response_t *resp) {
-        if (resp->error)
-                rd_kafka_error_destroy(resp->error);
-        rd_list_destroy(&resp->matching_acls);
-        rd_free(resp);
-}
+const char *
+rd_kafka_AclPermissionType_name(rd_kafka_AclPermissionType_t permission_type) {
+        static const char *names[] = {"UNKNOWN", "ANY", "DENY", "ALLOW"};
 
-static void rd_kafka_DeleteAcls_result_response_free(void *ptr) {
-        rd_kafka_DeleteAcls_result_response_destroy(
-            (rd_kafka_DeleteAcls_result_response_t *)ptr);
-}
+        if ((unsigned int)permission_type >=
+            (unsigned int)RD_KAFKA_ACL_PERMISSION_TYPE__CNT)
+                return "UNSUPPORTED";
 
-/**
- * @brief Get an array of rd_kafka_AclBinding_t from a DescribeAcls result.
- *
- * The returned \p rd_kafka_AclBinding_t life-time is the same as the \p result
- * object.
- * @param cntp is updated to the number of elements in the array.
- */
-const rd_kafka_DeleteAcls_result_response_t **
-rd_kafka_DeleteAcls_result_responses(const rd_kafka_DeleteAcls_result_t *result,
-                                     size_t *cntp) {
-        return rd_kafka_admin_result_ret_delete_acl_result_responses(
-            (const rd_kafka_op_t *)result, cntp);
+        return names[permission_type];
 }
 
-const rd_kafka_error_t *rd_kafka_DeleteAcls_result_response_error(
-    const rd_kafka_DeleteAcls_result_response_t *result_response) {
-        return result_response->error;
-}
+static rd_kafka_AclBinding_t *
+rd_kafka_AclBinding_new0(rd_kafka_ResourceType_t restype,
+                         const char *name,
+                         rd_kafka_ResourcePatternType_t resource_pattern_type,
+                         const char *principal,
+                         const char *host,
+                         rd_kafka_AclOperation_t operation,
+                         rd_kafka_AclPermissionType_t permission_type,
+                         rd_kafka_resp_err_t err,
+                         const char *errstr) {
+        rd_kafka_AclBinding_t *acl_binding;
 
-const rd_kafka_AclBinding_t **rd_kafka_DeleteAcls_result_response_matching_acls(
-    const rd_kafka_DeleteAcls_result_response_t *result_response,
-    size_t *matching_acls_cntp) {
-        *matching_acls_cntp = result_response->matching_acls.rl_cnt;
-        return (const rd_kafka_AclBinding_t **)
-            result_response->matching_acls.rl_elems;
+        acl_binding       = rd_calloc(1, sizeof(*acl_binding));
+        acl_binding->name = name != NULL ? rd_strdup(name) : NULL;
+        acl_binding->principal =
+            principal != NULL ? rd_strdup(principal) : NULL;
+        acl_binding->host    = host != NULL ? rd_strdup(host) : NULL;
+        acl_binding->restype = restype;
+        acl_binding->resource_pattern_type = resource_pattern_type;
+        acl_binding->operation             = operation;
+        acl_binding->permission_type       = permission_type;
+        if (err)
+                acl_binding->error = rd_kafka_error_new(err, "%s", errstr);
+
+        return acl_binding;
 }
 
-/**
- * @brief Parse DeleteAclsResponse and create ADMIN_RESULT op.
- */
-static rd_kafka_resp_err_t
-rd_kafka_DeleteAclsResponse_parse(rd_kafka_op_t *rko_req,
-                                  rd_kafka_op_t **rko_resultp,
-                                  rd_kafka_buf_t *reply,
-                                  char *errstr,
-                                  size_t errstr_size) {
-        const int log_decode_errors = LOG_ERR;
-        rd_kafka_broker_t *rkb      = reply->rkbuf_rkb;
-        rd_kafka_op_t *rko_result   = NULL;
-        rd_kafka_resp_err_t err     = RD_KAFKA_RESP_ERR_NO_ERROR;
-        int32_t res_cnt;
-        int i;
-        int j;
+rd_kafka_AclBinding_t *
+rd_kafka_AclBinding_new(rd_kafka_ResourceType_t restype,
+                        const char *name,
+                        rd_kafka_ResourcePatternType_t resource_pattern_type,
+                        const char *principal,
+                        const char *host,
+                        rd_kafka_AclOperation_t operation,
+                        rd_kafka_AclPermissionType_t permission_type,
+                        char *errstr,
+                        size_t errstr_size) {
+        if (!name) {
+                rd_snprintf(errstr, errstr_size, "Invalid resource name");
+                return NULL;
+        }
+        if (!principal) {
+                rd_snprintf(errstr, errstr_size, "Invalid principal");
+                return NULL;
+        }
+        if (!host) {
+                rd_snprintf(errstr, errstr_size, "Invalid host");
+                return NULL;
+        }
 
-        rd_kafka_buf_read_throttle_time(reply);
+        if (restype == RD_KAFKA_RESOURCE_ANY ||
+            restype <= RD_KAFKA_RESOURCE_UNKNOWN ||
+            restype >= RD_KAFKA_RESOURCE__CNT) {
+                rd_snprintf(errstr, errstr_size, "Invalid resource type");
+                return NULL;
+        }
 
-        /* #responses */
-        rd_kafka_buf_read_arraycnt(reply, &res_cnt, 100000);
+        if (resource_pattern_type == RD_KAFKA_RESOURCE_PATTERN_ANY ||
+            resource_pattern_type == RD_KAFKA_RESOURCE_PATTERN_MATCH ||
+            resource_pattern_type <= RD_KAFKA_RESOURCE_PATTERN_UNKNOWN ||
+            resource_pattern_type >= RD_KAFKA_RESOURCE_PATTERN_TYPE__CNT) {
+                rd_snprintf(errstr, errstr_size,
+                            "Invalid resource pattern type");
+                return NULL;
+        }
 
-        rko_result = rd_kafka_admin_result_new(rko_req);
+        if (operation == RD_KAFKA_ACL_OPERATION_ANY ||
+            operation <= RD_KAFKA_ACL_OPERATION_UNKNOWN ||
+            operation >= RD_KAFKA_ACL_OPERATION__CNT) {
+                rd_snprintf(errstr, errstr_size, "Invalid operation");
+                return NULL;
+        }
 
-        rd_list_init(&rko_result->rko_u.admin_result.results, res_cnt,
-                     rd_kafka_DeleteAcls_result_response_free);
+        if (permission_type == RD_KAFKA_ACL_PERMISSION_TYPE_ANY ||
+            permission_type <= RD_KAFKA_ACL_PERMISSION_TYPE_UNKNOWN ||
+            permission_type >= RD_KAFKA_ACL_PERMISSION_TYPE__CNT) {
+                rd_snprintf(errstr, errstr_size, "Invalid permission type");
+                return NULL;
+        }
 
-        for (i = 0; i < (int)res_cnt; i++) {
-                int16_t error_code;
-                rd_kafkap_str_t error_msg = RD_KAFKAP_STR_INITIALIZER;
-                char *errstr              = NULL;
-                const rd_kafka_DeleteAcls_result_response_t *result_response;
-                int32_t matching_acls_cnt;
+        return rd_kafka_AclBinding_new0(
+            restype, name, resource_pattern_type, principal, host, operation,
+            permission_type, RD_KAFKA_RESP_ERR_NO_ERROR, NULL);
+}
 
-                rd_kafka_buf_read_i16(reply, &error_code);
-                rd_kafka_buf_read_str(reply, &error_msg);
+rd_kafka_AclBindingFilter_t *rd_kafka_AclBindingFilter_new(
+    rd_kafka_ResourceType_t restype,
+    const char *name,
+    rd_kafka_ResourcePatternType_t resource_pattern_type,
+    const char *principal,
+    const char *host,
+    rd_kafka_AclOperation_t operation,
+    rd_kafka_AclPermissionType_t permission_type,
+    char *errstr,
+    size_t errstr_size) {
 
-                if (error_code) {
-                        if (RD_KAFKAP_STR_IS_NULL(&error_msg) ||
-                            RD_KAFKAP_STR_LEN(&error_msg) == 0)
-                                errstr = (char *)rd_kafka_err2str(error_code);
-                        else
-                                RD_KAFKAP_STR_DUPA(&errstr, &error_msg);
-                }
 
-                result_response =
-                    rd_kafka_DeleteAcls_result_response_new(error_code, errstr);
+        if (restype <= RD_KAFKA_RESOURCE_UNKNOWN ||
+            restype >= RD_KAFKA_RESOURCE__CNT) {
+                rd_snprintf(errstr, errstr_size, "Invalid resource type");
+                return NULL;
+        }
 
-                /* #maching_acls */
-                rd_kafka_buf_read_arraycnt(reply, &matching_acls_cnt, 100000);
-                for (j = 0; j < (int)matching_acls_cnt; j++) {
-                        int16_t acl_error_code;
-                        int8_t res_type = RD_KAFKA_RESOURCE_UNKNOWN;
-                        rd_kafkap_str_t acl_error_msg =
-                            RD_KAFKAP_STR_INITIALIZER;
-                        rd_kafkap_str_t kres_name;
-                        rd_kafkap_str_t khost;
-                        rd_kafkap_str_t kprincipal;
-                        int8_t resource_pattern_type =
-                            RD_KAFKA_RESOURCE_PATTERN_LITERAL;
-                        int8_t operation = RD_KAFKA_ACL_OPERATION_UNKNOWN;
-                        int8_t permission_type =
-                            RD_KAFKA_ACL_PERMISSION_TYPE_UNKNOWN;
-                        rd_kafka_AclBinding_t *matching_acl;
-                        char *acl_errstr = NULL;
-                        char *res_name;
-                        char *principal;
-                        char *host;
+        if (resource_pattern_type <= RD_KAFKA_RESOURCE_PATTERN_UNKNOWN ||
+            resource_pattern_type >= RD_KAFKA_RESOURCE_PATTERN_TYPE__CNT) {
+                rd_snprintf(errstr, errstr_size,
+                            "Invalid resource pattern type");
+                return NULL;
+        }
 
-                        rd_kafka_buf_read_i16(reply, &acl_error_code);
-                        rd_kafka_buf_read_str(reply, &acl_error_msg);
-                        if (acl_error_code) {
-                                if (RD_KAFKAP_STR_IS_NULL(&acl_error_msg) ||
-                                    RD_KAFKAP_STR_LEN(&acl_error_msg) == 0)
-                                        acl_errstr = (char *)rd_kafka_err2str(
-                                            acl_error_code);
-                                else
-                                        RD_KAFKAP_STR_DUPA(&acl_errstr,
-                                                           &acl_error_msg);
-                        }
+        if (operation <= RD_KAFKA_ACL_OPERATION_UNKNOWN ||
+            operation >= RD_KAFKA_ACL_OPERATION__CNT) {
+                rd_snprintf(errstr, errstr_size, "Invalid operation");
+                return NULL;
+        }
 
-                        rd_kafka_buf_read_i8(reply, &res_type);
-                        rd_kafka_buf_read_str(reply, &kres_name);
+        if (permission_type <= RD_KAFKA_ACL_PERMISSION_TYPE_UNKNOWN ||
+            permission_type >= RD_KAFKA_ACL_PERMISSION_TYPE__CNT) {
+                rd_snprintf(errstr, errstr_size, "Invalid permission type");
+                return NULL;
+        }
 
-                        if (rd_kafka_buf_ApiVersion(reply) >= 1) {
-                                rd_kafka_buf_read_i8(reply,
-                                                     &resource_pattern_type);
-                        }
+        return rd_kafka_AclBinding_new0(
+            restype, name, resource_pattern_type, principal, host, operation,
+            permission_type, RD_KAFKA_RESP_ERR_NO_ERROR, NULL);
+}
 
-                        rd_kafka_buf_read_str(reply, &kprincipal);
-                        rd_kafka_buf_read_str(reply, &khost);
-                        rd_kafka_buf_read_i8(reply, &operation);
-                        rd_kafka_buf_read_i8(reply, &permission_type);
-                        RD_KAFKAP_STR_DUPA(&res_name, &kres_name);
-                        RD_KAFKAP_STR_DUPA(&principal, &kprincipal);
-                        RD_KAFKAP_STR_DUPA(&host, &khost);
+rd_kafka_ResourceType_t
+rd_kafka_AclBinding_restype(const rd_kafka_AclBinding_t *acl) {
+        return acl->restype;
+}
 
-                        if (res_type <= RD_KAFKA_RESOURCE_UNKNOWN ||
-                            res_type >= RD_KAFKA_RESOURCE__CNT) {
-                                rd_rkb_log(rkb, LOG_WARNING,
-                                           "DELETEACLSRESPONSE",
-                                           "DeleteAclsResponse returned "
-                                           "unknown resource type %d",
-                                           res_type);
-                                res_type = RD_KAFKA_RESOURCE_UNKNOWN;
-                        }
-                        if (resource_pattern_type <=
-                                RD_KAFKA_RESOURCE_PATTERN_UNKNOWN ||
-                            resource_pattern_type >=
-                                RD_KAFKA_RESOURCE_PATTERN_TYPE__CNT) {
-                                rd_rkb_log(rkb, LOG_WARNING,
-                                           "DELETEACLSRESPONSE",
-                                           "DeleteAclsResponse returned "
-                                           "unknown resource pattern type %d",
-                                           resource_pattern_type);
-                                resource_pattern_type =
-                                    RD_KAFKA_RESOURCE_PATTERN_UNKNOWN;
-                        }
-                        if (operation <= RD_KAFKA_ACL_OPERATION_UNKNOWN ||
-                            operation >= RD_KAFKA_ACL_OPERATION__CNT) {
-                                rd_rkb_log(rkb, LOG_WARNING,
-                                           "DELETEACLSRESPONSE",
-                                           "DeleteAclsResponse returned "
-                                           "unknown acl operation %d",
-                                           operation);
-                                operation = RD_KAFKA_ACL_OPERATION_UNKNOWN;
-                        }
-                        if (permission_type <=
-                                RD_KAFKA_ACL_PERMISSION_TYPE_UNKNOWN ||
-                            permission_type >=
-                                RD_KAFKA_ACL_PERMISSION_TYPE__CNT) {
-                                rd_rkb_log(rkb, LOG_WARNING,
-                                           "DELETEACLSRESPONSE",
-                                           "DeleteAclsResponse returned "
-                                           "unknown acl permission type %d",
-                                           permission_type);
-                                permission_type =
-                                    RD_KAFKA_ACL_PERMISSION_TYPE_UNKNOWN;
-                        }
+const char *rd_kafka_AclBinding_name(const rd_kafka_AclBinding_t *acl) {
+        return acl->name;
+}
 
-                        matching_acl = rd_kafka_AclBinding_new0(
-                            res_type, res_name, resource_pattern_type,
-                            principal, host, operation, permission_type,
-                            acl_error_code, acl_errstr);
+const char *rd_kafka_AclBinding_principal(const rd_kafka_AclBinding_t *acl) {
+        return acl->principal;
+}
 
-                        rd_list_add(
-                            (rd_list_t *)&result_response->matching_acls,
-                            (void *)matching_acl);
+const char *rd_kafka_AclBinding_host(const rd_kafka_AclBinding_t *acl) {
+        return acl->host;
+}
+
+rd_kafka_AclOperation_t
+rd_kafka_AclBinding_operation(const rd_kafka_AclBinding_t *acl) {
+        return acl->operation;
+}
+
+rd_kafka_AclPermissionType_t
+rd_kafka_AclBinding_permission_type(const rd_kafka_AclBinding_t *acl) {
+        return acl->permission_type;
+}
+
+rd_kafka_ResourcePatternType_t
+rd_kafka_AclBinding_resource_pattern_type(const rd_kafka_AclBinding_t *acl) {
+        return acl->resource_pattern_type;
+}
+
+const rd_kafka_error_t *
+rd_kafka_AclBinding_error(const rd_kafka_AclBinding_t *acl) {
+        return acl->error;
+}
+
+/**
+ * @brief Allocate a new AclBinding and make a copy of \p src
+ */
+static rd_kafka_AclBinding_t *
+rd_kafka_AclBinding_copy(const rd_kafka_AclBinding_t *src) {
+        rd_kafka_AclBinding_t *dst;
+
+        dst = rd_kafka_AclBinding_new(
+            src->restype, src->name, src->resource_pattern_type, src->principal,
+            src->host, src->operation, src->permission_type, NULL, 0);
+        rd_assert(dst);
+        return dst;
+}
+
+/**
+ * @brief Allocate a new AclBindingFilter and make a copy of \p src
+ */
+static rd_kafka_AclBindingFilter_t *
+rd_kafka_AclBindingFilter_copy(const rd_kafka_AclBindingFilter_t *src) {
+        rd_kafka_AclBindingFilter_t *dst;
+
+        dst = rd_kafka_AclBindingFilter_new(
+            src->restype, src->name, src->resource_pattern_type, src->principal,
+            src->host, src->operation, src->permission_type, NULL, 0);
+        rd_assert(dst);
+        return dst;
+}
+
+void rd_kafka_AclBinding_destroy(rd_kafka_AclBinding_t *acl_binding) {
+        if (acl_binding->name)
+                rd_free(acl_binding->name);
+        if (acl_binding->principal)
+                rd_free(acl_binding->principal);
+        if (acl_binding->host)
+                rd_free(acl_binding->host);
+        if (acl_binding->error)
+                rd_kafka_error_destroy(acl_binding->error);
+        rd_free(acl_binding);
+}
+
+static void rd_kafka_AclBinding_free(void *ptr) {
+        rd_kafka_AclBinding_destroy(ptr);
+}
+
+
+void rd_kafka_AclBinding_destroy_array(rd_kafka_AclBinding_t **acl_bindings,
+                                       size_t acl_bindings_cnt) {
+        size_t i;
+        for (i = 0; i < acl_bindings_cnt; i++)
+                rd_kafka_AclBinding_destroy(acl_bindings[i]);
+}
+
+/**
+ * @brief Parse CreateAclsResponse and create ADMIN_RESULT op.
+ */
+static rd_kafka_resp_err_t
+rd_kafka_CreateAclsResponse_parse(rd_kafka_op_t *rko_req,
+                                  rd_kafka_op_t **rko_resultp,
+                                  rd_kafka_buf_t *reply,
+                                  char *errstr,
+                                  size_t errstr_size) {
+        const int log_decode_errors = LOG_ERR;
+        rd_kafka_resp_err_t err     = RD_KAFKA_RESP_ERR_NO_ERROR;
+        rd_kafka_op_t *rko_result   = NULL;
+        int32_t acl_cnt;
+        int i;
+
+        rd_kafka_buf_read_throttle_time(reply);
+
+        rd_kafka_buf_read_arraycnt(reply, &acl_cnt, 100000);
+
+        if (acl_cnt != rd_list_cnt(&rko_req->rko_u.admin_request.args))
+                rd_kafka_buf_parse_fail(
+                    reply,
+                    "Received %" PRId32
+                    " acls in response, but %d were requested",
+                    acl_cnt, rd_list_cnt(&rko_req->rko_u.admin_request.args));
+
+        rko_result = rd_kafka_admin_result_new(rko_req);
+
+        rd_list_init(&rko_result->rko_u.admin_result.results, acl_cnt,
+                     rd_kafka_acl_result_free);
+
+        for (i = 0; i < (int)acl_cnt; i++) {
+                int16_t error_code;
+                rd_kafkap_str_t error_msg = RD_KAFKAP_STR_INITIALIZER;
+                rd_kafka_acl_result_t *acl_res;
+                char *errstr = NULL;
+
+                rd_kafka_buf_read_i16(reply, &error_code);
+
+                rd_kafka_buf_read_str(reply, &error_msg);
+
+                if (error_code) {
+                        if (RD_KAFKAP_STR_LEN(&error_msg) == 0)
+                                errstr = (char *)rd_kafka_err2str(error_code);
+                        else
+                                RD_KAFKAP_STR_DUPA(&errstr, &error_msg);
                 }
 
-                rd_list_add(&rko_result->rko_u.admin_result.results,
-                            (void *)result_response);
+                acl_res = rd_kafka_acl_result_new(
+                    error_code ? rd_kafka_error_new(error_code, "%s", errstr)
+                               : NULL);
+
+                rd_list_set(&rko_result->rko_u.admin_result.results, i,
+                            acl_res);
         }
 
         *rko_resultp = rko_result;
@@ -5056,278 +5346,2180 @@ rd_kafka_DeleteAclsResponse_parse(rd_kafka_op_t *rko_req,
                 rd_kafka_op_destroy(rko_result);
 
         rd_snprintf(errstr, errstr_size,
-                    "DeleteAcls response protocol parse failure: %s",
+                    "CreateAcls response protocol parse failure: %s",
                     rd_kafka_err2str(err));
 
         return err;
 }
 
-
-void rd_kafka_DeleteAcls(rd_kafka_t *rk,
-                         rd_kafka_AclBindingFilter_t **del_acls,
-                         size_t del_acls_cnt,
+void rd_kafka_CreateAcls(rd_kafka_t *rk,
+                         rd_kafka_AclBinding_t **new_acls,
+                         size_t new_acls_cnt,
                          const rd_kafka_AdminOptions_t *options,
                          rd_kafka_queue_t *rkqu) {
         rd_kafka_op_t *rko;
         size_t i;
         static const struct rd_kafka_admin_worker_cbs cbs = {
-            rd_kafka_DeleteAclsRequest, rd_kafka_DeleteAclsResponse_parse};
+            rd_kafka_CreateAclsRequest, rd_kafka_CreateAclsResponse_parse};
 
-        rko = rd_kafka_admin_request_op_new(rk, RD_KAFKA_OP_DELETEACLS,
-                                            RD_KAFKA_EVENT_DELETEACLS_RESULT,
+        rko = rd_kafka_admin_request_op_new(rk, RD_KAFKA_OP_CREATEACLS,
+                                            RD_KAFKA_EVENT_CREATEACLS_RESULT,
                                             &cbs, options, rkqu->rkqu_q);
 
-        rd_list_init(&rko->rko_u.admin_request.args, (int)del_acls_cnt,
+        rd_list_init(&rko->rko_u.admin_request.args, (int)new_acls_cnt,
                      rd_kafka_AclBinding_free);
 
-        for (i = 0; i < del_acls_cnt; i++)
+        for (i = 0; i < new_acls_cnt; i++)
                 rd_list_add(&rko->rko_u.admin_request.args,
-                            rd_kafka_AclBindingFilter_copy(del_acls[i]));
+                            rd_kafka_AclBinding_copy(new_acls[i]));
 
         rd_kafka_q_enq(rk->rk_ops, rko);
 }
 
+/**
+ * @brief Get an array of rd_kafka_acl_result_t from a CreateAcls result.
+ *
+ * The returned \p rd_kafka_acl_result_t life-time is the same as the \p result
+ * object.
+ * @param cntp is updated to the number of elements in the array.
+ */
+const rd_kafka_acl_result_t **
+rd_kafka_CreateAcls_result_acls(const rd_kafka_CreateAcls_result_t *result,
+                                size_t *cntp) {
+        return rd_kafka_admin_result_ret_acl_results(
+            (const rd_kafka_op_t *)result, cntp);
+}
+
 /**@}*/
 
 /**
- * @name Alter consumer group offsets (committed offsets)
+ * @name DescribeAcls
  * @{
  *
  *
  *
- *
  */
 
-rd_kafka_AlterConsumerGroupOffsets_t *rd_kafka_AlterConsumerGroupOffsets_new(
-    const char *group_id,
-    const rd_kafka_topic_partition_list_t *partitions) {
-        rd_assert(group_id && partitions);
-
-        size_t tsize = strlen(group_id) + 1;
-        rd_kafka_AlterConsumerGroupOffsets_t *alter_grpoffsets;
-
-        /* Single allocation */
-        alter_grpoffsets = rd_malloc(sizeof(*alter_grpoffsets) + tsize);
-        alter_grpoffsets->group_id = alter_grpoffsets->data;
-        memcpy(alter_grpoffsets->group_id, group_id, tsize);
-        alter_grpoffsets->partitions =
-            rd_kafka_topic_partition_list_copy(partitions);
+/**
+ * @brief Parse DescribeAclsResponse and create ADMIN_RESULT op.
+ */
+static rd_kafka_resp_err_t
+rd_kafka_DescribeAclsResponse_parse(rd_kafka_op_t *rko_req,
+                                    rd_kafka_op_t **rko_resultp,
+                                    rd_kafka_buf_t *reply,
+                                    char *errstr,
+                                    size_t errstr_size) {
+        const int log_decode_errors = LOG_ERR;
+        rd_kafka_broker_t *rkb      = reply->rkbuf_rkb;
+        rd_kafka_resp_err_t err     = RD_KAFKA_RESP_ERR_NO_ERROR;
+        rd_kafka_op_t *rko_result   = NULL;
+        int32_t res_cnt;
+        int i;
+        int j;
+        rd_kafka_AclBinding_t *acl = NULL;
+        int16_t error_code;
+        rd_kafkap_str_t error_msg;
 
-        return alter_grpoffsets;
-}
+        rd_kafka_buf_read_throttle_time(reply);
 
-void rd_kafka_AlterConsumerGroupOffsets_destroy(
-    rd_kafka_AlterConsumerGroupOffsets_t *alter_grpoffsets) {
-        rd_kafka_topic_partition_list_destroy(alter_grpoffsets->partitions);
-        rd_free(alter_grpoffsets);
-}
+        rd_kafka_buf_read_i16(reply, &error_code);
+        rd_kafka_buf_read_str(reply, &error_msg);
 
-static void rd_kafka_AlterConsumerGroupOffsets_free(void *ptr) {
-        rd_kafka_AlterConsumerGroupOffsets_destroy(ptr);
-}
+        if (error_code) {
+                if (RD_KAFKAP_STR_LEN(&error_msg) == 0)
+                        errstr = (char *)rd_kafka_err2str(error_code);
+                else
+                        RD_KAFKAP_STR_DUPA(&errstr, &error_msg);
+        }
 
-void rd_kafka_AlterConsumerGroupOffsets_destroy_array(
-    rd_kafka_AlterConsumerGroupOffsets_t **alter_grpoffsets,
-    size_t alter_grpoffsets_cnt) {
-        size_t i;
-        for (i = 0; i < alter_grpoffsets_cnt; i++)
-                rd_kafka_AlterConsumerGroupOffsets_destroy(alter_grpoffsets[i]);
-}
+        /* #resources */
+        rd_kafka_buf_read_arraycnt(reply, &res_cnt, 100000);
 
-/**
- * @brief Allocate a new AlterGroup and make a copy of \p src
- */
-static rd_kafka_AlterConsumerGroupOffsets_t *
-rd_kafka_AlterConsumerGroupOffsets_copy(
-    const rd_kafka_AlterConsumerGroupOffsets_t *src) {
-        return rd_kafka_AlterConsumerGroupOffsets_new(src->group_id,
-                                                      src->partitions);
-}
+        rko_result = rd_kafka_admin_result_new(rko_req);
 
-/**
- * @brief Send a OffsetCommitRequest to \p rkb with the partitions
- *        in alter_grpoffsets (AlterConsumerGroupOffsets_t*) using
- *        \p options.
- *
- */
-static rd_kafka_resp_err_t rd_kafka_AlterConsumerGroupOffsetsRequest(
-    rd_kafka_broker_t *rkb,
-    /* (rd_kafka_AlterConsumerGroupOffsets_t*) */
-    const rd_list_t *alter_grpoffsets,
-    rd_kafka_AdminOptions_t *options,
-    char *errstr,
-    size_t errstr_size,
-    rd_kafka_replyq_t replyq,
-    rd_kafka_resp_cb_t *resp_cb,
-    void *opaque) {
-        const rd_kafka_AlterConsumerGroupOffsets_t *grpoffsets =
-            rd_list_elem(alter_grpoffsets, 0);
+        rd_list_init(&rko_result->rko_u.admin_result.results, res_cnt,
+                     rd_kafka_AclBinding_free);
 
-        rd_assert(rd_list_cnt(alter_grpoffsets) == 1);
+        for (i = 0; i < (int)res_cnt; i++) {
+                int8_t res_type = RD_KAFKA_RESOURCE_UNKNOWN;
+                rd_kafkap_str_t kres_name;
+                char *res_name;
+                int8_t resource_pattern_type =
+                    RD_KAFKA_RESOURCE_PATTERN_LITERAL;
+                int32_t acl_cnt;
 
-        rd_kafka_topic_partition_list_t *offsets = grpoffsets->partitions;
-        rd_kafka_consumer_group_metadata_t *cgmetadata =
-            rd_kafka_consumer_group_metadata_new(grpoffsets->group_id);
+                rd_kafka_buf_read_i8(reply, &res_type);
+                rd_kafka_buf_read_str(reply, &kres_name);
+                RD_KAFKAP_STR_DUPA(&res_name, &kres_name);
 
-        int ret = rd_kafka_OffsetCommitRequest(
-            rkb, cgmetadata, offsets, replyq, resp_cb, opaque,
-            "rd_kafka_AlterConsumerGroupOffsetsRequest");
-        rd_kafka_consumer_group_metadata_destroy(cgmetadata);
-        if (ret == 0) {
-                rd_snprintf(errstr, errstr_size,
-                            "At least one topic-partition offset must "
-                            "be >= 0");
-                return RD_KAFKA_RESP_ERR__NO_OFFSET;
-        }
-        return RD_KAFKA_RESP_ERR_NO_ERROR;
-}
+                if (rd_kafka_buf_ApiVersion(reply) >= 1) {
+                        rd_kafka_buf_read_i8(reply, &resource_pattern_type);
+                }
 
-/**
- * @brief Parse OffsetCommitResponse and create ADMIN_RESULT op.
- */
-static rd_kafka_resp_err_t
-rd_kafka_AlterConsumerGroupOffsetsResponse_parse(rd_kafka_op_t *rko_req,
-                                                 rd_kafka_op_t **rko_resultp,
-                                                 rd_kafka_buf_t *reply,
-                                                 char *errstr,
-                                                 size_t errstr_size) {
-        rd_kafka_t *rk;
-        rd_kafka_broker_t *rkb;
-        rd_kafka_op_t *rko_result;
-        rd_kafka_topic_partition_list_t *partitions = NULL;
-        rd_kafka_resp_err_t err = RD_KAFKA_RESP_ERR_NO_ERROR;
-        const rd_kafka_AlterConsumerGroupOffsets_t *alter_grpoffsets =
-            rd_list_elem(&rko_req->rko_u.admin_request.args, 0);
-        partitions =
-            rd_kafka_topic_partition_list_copy(alter_grpoffsets->partitions);
+                if (res_type <= RD_KAFKA_RESOURCE_UNKNOWN ||
+                    res_type >= RD_KAFKA_RESOURCE__CNT) {
+                        rd_rkb_log(rkb, LOG_WARNING, "DESCRIBEACLSRESPONSE",
+                                   "DescribeAclsResponse returned unknown "
+                                   "resource type %d",
+                                   res_type);
+                        res_type = RD_KAFKA_RESOURCE_UNKNOWN;
+                }
+                if (resource_pattern_type <=
+                        RD_KAFKA_RESOURCE_PATTERN_UNKNOWN ||
+                    resource_pattern_type >=
+                        RD_KAFKA_RESOURCE_PATTERN_TYPE__CNT) {
+                        rd_rkb_log(rkb, LOG_WARNING, "DESCRIBEACLSRESPONSE",
+                                   "DescribeAclsResponse returned unknown "
+                                   "resource pattern type %d",
+                                   resource_pattern_type);
+                        resource_pattern_type =
+                            RD_KAFKA_RESOURCE_PATTERN_UNKNOWN;
+                }
 
-        rk  = rko_req->rko_rk;
-        rkb = reply->rkbuf_rkb;
-        err = rd_kafka_handle_OffsetCommit(rk, rkb, err, reply, NULL,
-                                           partitions, rd_true);
+                /* #resources */
+                rd_kafka_buf_read_arraycnt(reply, &acl_cnt, 100000);
+
+                for (j = 0; j < (int)acl_cnt; j++) {
+                        rd_kafkap_str_t kprincipal;
+                        rd_kafkap_str_t khost;
+                        int8_t operation = RD_KAFKA_ACL_OPERATION_UNKNOWN;
+                        int8_t permission_type =
+                            RD_KAFKA_ACL_PERMISSION_TYPE_UNKNOWN;
+                        char *principal;
+                        char *host;
+
+                        rd_kafka_buf_read_str(reply, &kprincipal);
+                        rd_kafka_buf_read_str(reply, &khost);
+                        rd_kafka_buf_read_i8(reply, &operation);
+                        rd_kafka_buf_read_i8(reply, &permission_type);
+                        RD_KAFKAP_STR_DUPA(&principal, &kprincipal);
+                        RD_KAFKAP_STR_DUPA(&host, &khost);
+
+                        if (operation <= RD_KAFKA_ACL_OPERATION_UNKNOWN ||
+                            operation >= RD_KAFKA_ACL_OPERATION__CNT) {
+                                rd_rkb_log(rkb, LOG_WARNING,
+                                           "DESCRIBEACLSRESPONSE",
+                                           "DescribeAclsResponse returned "
+                                           "unknown acl operation %d",
+                                           operation);
+                                operation = RD_KAFKA_ACL_OPERATION_UNKNOWN;
+                        }
+                        if (permission_type <=
+                                RD_KAFKA_ACL_PERMISSION_TYPE_UNKNOWN ||
+                            permission_type >=
+                                RD_KAFKA_ACL_PERMISSION_TYPE__CNT) {
+                                rd_rkb_log(rkb, LOG_WARNING,
+                                           "DESCRIBEACLSRESPONSE",
+                                           "DescribeAclsResponse returned "
+                                           "unknown acl permission type %d",
+                                           permission_type);
+                                permission_type =
+                                    RD_KAFKA_ACL_PERMISSION_TYPE_UNKNOWN;
+                        }
+
+                        acl = rd_kafka_AclBinding_new0(
+                            res_type, res_name, resource_pattern_type,
+                            principal, host, operation, permission_type,
+                            RD_KAFKA_RESP_ERR_NO_ERROR, NULL);
+
+                        rd_list_add(&rko_result->rko_u.admin_result.results,
+                                    acl);
+                }
+        }
 
-        /* Create result op and group_result_t */
-        rko_result = rd_kafka_admin_result_new(rko_req);
-        rd_list_init(&rko_result->rko_u.admin_result.results, 1,
-                     rd_kafka_group_result_free);
-        rd_list_add(&rko_result->rko_u.admin_result.results,
-                    rd_kafka_group_result_new(alter_grpoffsets->group_id, -1,
-                                              partitions, NULL));
-        rd_kafka_topic_partition_list_destroy(partitions);
         *rko_resultp = rko_result;
 
-        if (reply->rkbuf_err)
-                rd_snprintf(
-                    errstr, errstr_size,
-                    "AlterConsumerGroupOffset response parse failure: %s",
-                    rd_kafka_err2str(reply->rkbuf_err));
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
 
-        return reply->rkbuf_err;
+err_parse:
+        if (rko_result)
+                rd_kafka_op_destroy(rko_result);
+
+        rd_snprintf(errstr, errstr_size,
+                    "DescribeAcls response protocol parse failure: %s",
+                    rd_kafka_err2str(err));
+
+        return err;
 }
 
-void rd_kafka_AlterConsumerGroupOffsets(
-    rd_kafka_t *rk,
-    rd_kafka_AlterConsumerGroupOffsets_t **alter_grpoffsets,
-    size_t alter_grpoffsets_cnt,
-    const rd_kafka_AdminOptions_t *options,
-    rd_kafka_queue_t *rkqu) {
-        int i;
+void rd_kafka_DescribeAcls(rd_kafka_t *rk,
+                           rd_kafka_AclBindingFilter_t *acl_filter,
+                           const rd_kafka_AdminOptions_t *options,
+                           rd_kafka_queue_t *rkqu) {
+        rd_kafka_op_t *rko;
+
         static const struct rd_kafka_admin_worker_cbs cbs = {
-            rd_kafka_AlterConsumerGroupOffsetsRequest,
-            rd_kafka_AlterConsumerGroupOffsetsResponse_parse,
+            rd_kafka_DescribeAclsRequest,
+            rd_kafka_DescribeAclsResponse_parse,
         };
-        rd_kafka_op_t *rko;
-        rd_kafka_topic_partition_list_t *copied_offsets;
 
-        rd_assert(rkqu);
+        rko = rd_kafka_admin_request_op_new(rk, RD_KAFKA_OP_DESCRIBEACLS,
+                                            RD_KAFKA_EVENT_DESCRIBEACLS_RESULT,
+                                            &cbs, options, rkqu->rkqu_q);
 
-        rko = rd_kafka_admin_request_op_new(
-            rk, RD_KAFKA_OP_ALTERCONSUMERGROUPOFFSETS,
-            RD_KAFKA_EVENT_ALTERCONSUMERGROUPOFFSETS_RESULT, &cbs, options,
-            rkqu->rkqu_q);
+        rd_list_init(&rko->rko_u.admin_request.args, 1,
+                     rd_kafka_AclBinding_free);
 
-        if (alter_grpoffsets_cnt != 1) {
-                /* For simplicity we only support one single group for now */
-                rd_kafka_admin_result_fail(rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
-                                           "Exactly one "
-                                           "AlterConsumerGroupOffsets must "
-                                           "be passed");
-                goto fail;
+        rd_list_add(&rko->rko_u.admin_request.args,
+                    rd_kafka_AclBindingFilter_copy(acl_filter));
+
+        rd_kafka_q_enq(rk->rk_ops, rko);
+}
+
+struct rd_kafka_ScramCredentialInfo_s {
+        rd_kafka_ScramMechanism_t mechanism;
+        int32_t iterations;
+};
+
+rd_kafka_ScramMechanism_t rd_kafka_ScramCredentialInfo_mechanism(
+    const rd_kafka_ScramCredentialInfo_t *scram_credential_info) {
+        return scram_credential_info->mechanism;
+}
+
+int32_t rd_kafka_ScramCredentialInfo_iterations(
+    const rd_kafka_ScramCredentialInfo_t *scram_credential_info) {
+        return scram_credential_info->iterations;
+}
+
+struct rd_kafka_UserScramCredentialsDescription_s {
+        char *user;
+        rd_kafka_error_t *error;
+        size_t credential_info_cnt;
+        rd_kafka_ScramCredentialInfo_t *credential_infos;
+};
+
+rd_kafka_UserScramCredentialsDescription_t *
+rd_kafka_UserScramCredentialsDescription_new(const char *username,
+                                             size_t num_credentials) {
+        rd_kafka_UserScramCredentialsDescription_t *description;
+        description                      = rd_calloc(1, sizeof(*description));
+        description->user                = rd_strdup(username);
+        description->error               = NULL;
+        description->credential_info_cnt = num_credentials;
+        description->credential_infos    = NULL;
+        if (num_credentials > 0) {
+                rd_kafka_ScramCredentialInfo_t *credentialinfo;
+                description->credential_infos =
+                    rd_calloc(num_credentials, sizeof(*credentialinfo));
         }
+        return description;
+}
 
-        if (alter_grpoffsets[0]->partitions->cnt == 0) {
-                rd_kafka_admin_result_fail(rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
-                                           "Non-empty topic partition list "
-                                           "must be present");
-                goto fail;
+void rd_kafka_UserScramCredentialsDescription_destroy(
+    rd_kafka_UserScramCredentialsDescription_t *description) {
+        if (!description)
+                return;
+        rd_free(description->user);
+        rd_kafka_error_destroy(description->error);
+        if (description->credential_infos)
+                rd_free(description->credential_infos);
+        rd_free(description);
+}
+
+void rd_kafka_UserScramCredentialsDescription_destroy_free(void *description) {
+        rd_kafka_UserScramCredentialsDescription_destroy(description);
+}
+
+void rd_kafka_UserScramCredentailsDescription_set_error(
+    rd_kafka_UserScramCredentialsDescription_t *description,
+    rd_kafka_resp_err_t errorcode,
+    const char *err) {
+        rd_kafka_error_destroy(description->error);
+        description->error = rd_kafka_error_new(errorcode, "%s", err);
+}
+
+const char *rd_kafka_UserScramCredentialsDescription_user(
+    const rd_kafka_UserScramCredentialsDescription_t *description) {
+        return description->user;
+}
+
+const rd_kafka_error_t *rd_kafka_UserScramCredentialsDescription_error(
+    const rd_kafka_UserScramCredentialsDescription_t *description) {
+        return description->error;
+}
+
+size_t rd_kafka_UserScramCredentialsDescription_scramcredentialinfo_count(
+    const rd_kafka_UserScramCredentialsDescription_t *description) {
+        return description->credential_info_cnt;
+}
+
+const rd_kafka_ScramCredentialInfo_t *
+rd_kafka_UserScramCredentialsDescription_scramcredentialinfo(
+    const rd_kafka_UserScramCredentialsDescription_t *description,
+    size_t idx) {
+        return &description->credential_infos[idx];
+}
+
+const rd_kafka_UserScramCredentialsDescription_t **
+rd_kafka_DescribeUserScramCredentials_result_descriptions(
+    const rd_kafka_DescribeUserScramCredentials_result_t *result,
+    size_t *cntp) {
+        *cntp = rd_list_cnt(&result->rko_u.admin_result.results);
+        return (const rd_kafka_UserScramCredentialsDescription_t **)
+            result->rko_u.admin_result.results.rl_elems;
+}
+
+rd_kafka_resp_err_t
+rd_kafka_DescribeUserScramCredentialsRequest(rd_kafka_broker_t *rkb,
+                                             const rd_list_t *userlist,
+                                             rd_kafka_AdminOptions_t *options,
+                                             char *errstr,
+                                             size_t errstr_size,
+                                             rd_kafka_replyq_t replyq,
+                                             rd_kafka_resp_cb_t *resp_cb,
+                                             void *opaque) {
+        rd_kafka_buf_t *rkbuf;
+        int16_t ApiVersion = 0;
+        int features;
+        size_t i;
+        size_t num_users;
+
+        ApiVersion = rd_kafka_broker_ApiVersion_supported(
+            rkb, RD_KAFKAP_DescribeUserScramCredentials, 0, 0, &features);
+        if (ApiVersion == -1) {
+                rd_snprintf(
+                    errstr, errstr_size,
+                    "DescribeUserScramCredentials API (KIP-554) not supported "
+                    "by broker");
+                return RD_KAFKA_RESP_ERR__UNSUPPORTED_FEATURE;
         }
 
-        for (i = 0; i < alter_grpoffsets[0]->partitions->cnt; i++) {
-                if (alter_grpoffsets[0]->partitions->elems[i].offset < 0) {
-                        rd_kafka_admin_result_fail(
-                            rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
-                            "All topic-partition offsets "
-                            "must be >= 0");
-                        goto fail;
-                }
+        num_users = rd_list_cnt(userlist);
+
+        rkbuf = rd_kafka_buf_new_flexver_request(
+            rkb, RD_KAFKAP_DescribeUserScramCredentials, 1, num_users * 25,
+            rd_true);
+        /* #Users */
+        rd_kafka_buf_write_arraycnt(rkbuf, num_users);
+        for (i = 0; i < num_users; i++) {
+                rd_kafkap_str_t *user = rd_list_elem(userlist, i);
+                /* Name */
+                rd_kafka_buf_write_str(rkbuf, user->str, user->len);
+                rd_kafka_buf_write_tags(rkbuf);
         }
+        rd_kafka_buf_ApiVersion_set(rkbuf, ApiVersion, 0);
+        /* Last Tag buffer included automatically*/
+        rd_kafka_broker_buf_enq_replyq(rkb, rkbuf, replyq, resp_cb, opaque);
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
+}
 
-        /* TODO: add group id duplication check if in future more than one
-         * AlterConsumerGroupOffsets can be passed */
+static rd_kafka_resp_err_t
+rd_kafka_DescribeUserScramCredentialsResponse_parse(rd_kafka_op_t *rko_req,
+                                                    rd_kafka_op_t **rko_resultp,
+                                                    rd_kafka_buf_t *reply,
+                                                    char *errstr,
+                                                    size_t errstr_size) {
+        const int log_decode_errors = LOG_ERR;
+        rd_kafka_op_t *rko_result   = NULL;
+        int32_t num_users;
+        int16_t ErrorCode;
+        rd_kafkap_str_t ErrorMessage = RD_KAFKAP_STR_INITIALIZER;
+        int32_t i;
 
-        /* Copy offsets list for checking duplicated */
-        copied_offsets =
-            rd_kafka_topic_partition_list_copy(alter_grpoffsets[0]->partitions);
-        if (rd_kafka_topic_partition_list_has_duplicates(
-                copied_offsets, rd_false /*check partition*/)) {
-                rd_kafka_topic_partition_list_destroy(copied_offsets);
-                rd_kafka_admin_result_fail(rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
-                                           "Duplicate partitions not allowed");
-                goto fail;
+        rko_result = rd_kafka_admin_result_new(rko_req);
+
+        /* ThrottleTimeMs */
+        rd_kafka_buf_read_throttle_time(reply);
+
+        /* ErrorCode */
+        rd_kafka_buf_read_i16(reply, &ErrorCode);
+        rko_result->rko_err = ErrorCode; /*Request Level Error Code */
+
+        /* ErrorMessage */
+        rd_kafka_buf_read_str(reply, &ErrorMessage);
+        if (ErrorCode) {
+                if (RD_KAFKAP_STR_LEN(&ErrorMessage) == 0)
+                        errstr = (char *)rd_kafka_err2str(ErrorCode);
+                else
+                        RD_KAFKAP_STR_DUPA(&errstr, &ErrorMessage);
+                rko_result->rko_u.admin_result.errstr =
+                    errstr; /* Request Level Error string*/
         }
-        rd_kafka_topic_partition_list_destroy(copied_offsets);
 
-        rko->rko_u.admin_request.broker_id = RD_KAFKA_ADMIN_TARGET_COORDINATOR;
-        rko->rko_u.admin_request.coordtype = RD_KAFKA_COORD_GROUP;
-        rko->rko_u.admin_request.coordkey =
-            rd_strdup(alter_grpoffsets[0]->group_id);
+        /* #Results */
+        rd_kafka_buf_read_arraycnt(reply, &num_users, 10000);
+        rd_list_init(&rko_result->rko_u.admin_result.results, num_users,
+                     rd_kafka_UserScramCredentialsDescription_destroy_free);
+
+        for (i = 0; i < num_users; i++) {
+                rd_kafkap_str_t User;
+                int16_t ErrorCode;
+                rd_kafkap_str_t ErrorMessage = RD_KAFKAP_STR_INITIALIZER;
+                size_t itr;
+                /* User */
+                rd_kafka_buf_read_str(reply, &User);
+                /* ErrorCode */
+                rd_kafka_buf_read_i16(reply, &ErrorCode);
+                /* ErrorMessage */
+                rd_kafka_buf_read_str(reply, &ErrorMessage);
+
+                int32_t num_credentials;
+                /* #CredentialInfos */
+                rd_kafka_buf_read_arraycnt(reply, &num_credentials, 10000);
+                rd_kafka_UserScramCredentialsDescription_t *description =
+                    rd_kafka_UserScramCredentialsDescription_new(
+                        User.str, num_credentials);
+                rd_kafka_UserScramCredentailsDescription_set_error(
+                    description, ErrorCode, ErrorMessage.str);
+                for (itr = 0; itr < (size_t)num_credentials; itr++) {
+                        int8_t Mechanism;
+                        int32_t Iterations;
+                        /* Mechanism */
+                        rd_kafka_buf_read_i8(reply, &Mechanism);
+                        /* Iterations */
+                        rd_kafka_buf_read_i32(reply, &Iterations);
+                        rd_kafka_buf_skip_tags(reply);
+                        rd_kafka_ScramCredentialInfo_t *scram_credential =
+                            &description->credential_infos[itr];
+                        scram_credential->mechanism  = Mechanism;
+                        scram_credential->iterations = Iterations;
+                }
+                rd_kafka_buf_skip_tags(reply);
+                rd_list_add(&rko_result->rko_u.admin_result.results,
+                            description);
+        }
+        *rko_resultp = rko_result;
 
-        /* Store copy of group on request so the group name can be reached
-         * from the response parser. */
-        rd_list_init(&rko->rko_u.admin_request.args, 1,
-                     rd_kafka_AlterConsumerGroupOffsets_free);
-        rd_list_add(&rko->rko_u.admin_request.args,
-                    (void *)rd_kafka_AlterConsumerGroupOffsets_copy(
-                        alter_grpoffsets[0]));
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
+
+err_parse:
+        if (rko_result)
+                rd_kafka_op_destroy(rko_result);
+
+        rd_snprintf(
+            errstr, errstr_size,
+            "DescribeUserScramCredentials response protocol parse failure: %s",
+            rd_kafka_err2str(reply->rkbuf_err));
+
+        return reply->rkbuf_err;
+}
+
+void rd_kafka_DescribeUserScramCredentials(
+    rd_kafka_t *rk,
+    const char **users,
+    size_t user_cnt,
+    const rd_kafka_AdminOptions_t *options,
+    rd_kafka_queue_t *rkqu) {
+
+        rd_kafka_op_t *rko;
+        size_t i;
+        rd_list_t *userlist = NULL;
+
+        static const struct rd_kafka_admin_worker_cbs cbs = {
+            rd_kafka_DescribeUserScramCredentialsRequest,
+            rd_kafka_DescribeUserScramCredentialsResponse_parse,
+        };
+
+        rko = rd_kafka_admin_request_op_new(
+            rk, RD_KAFKA_OP_DESCRIBEUSERSCRAMCREDENTIALS,
+            RD_KAFKA_EVENT_DESCRIBEUSERSCRAMCREDENTIALS_RESULT, &cbs, options,
+            rkqu->rkqu_q);
 
+        /* Check empty strings */
+        for (i = 0; i < user_cnt; i++) {
+                if (!*users[i]) {
+                        rd_kafka_admin_result_fail(
+                            rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                            "Empty users aren't allowed, "
+                            "index %" PRIusz,
+                            i);
+                        goto err;
+                }
+        }
+
+        /* Check Duplicates */
+        if (user_cnt > 1) {
+                userlist = rd_list_new(user_cnt, rd_free);
+                for (i = 0; i < user_cnt; i++) {
+                        rd_list_add(userlist, rd_strdup(users[i]));
+                }
+                rd_list_sort(userlist, rd_strcmp2);
+                if (rd_list_find_duplicate(userlist, rd_strcmp2)) {
+                        rd_kafka_admin_result_fail(
+                            rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                            "Duplicate users aren't allowed "
+                            "in the same request");
+                        goto err;
+                }
+                rd_list_destroy(userlist);
+        }
+
+        rd_list_init(&rko->rko_u.admin_request.args, user_cnt, rd_free);
+        for (i = 0; i < user_cnt; i++) {
+                rd_list_add(&rko->rko_u.admin_request.args,
+                            rd_kafkap_str_new(users[i], -1));
+        }
         rd_kafka_q_enq(rk->rk_ops, rko);
         return;
-fail:
+err:
+        RD_IF_FREE(userlist, rd_list_destroy);
         rd_kafka_admin_common_worker_destroy(rk, rko, rd_true /*destroy*/);
 }
 
+/**
+ * @enum rd_kafka_UserScramCredentialAlteration_type_t
+ * @brief Types of user SCRAM alterations.
+ */
+typedef enum rd_kafka_UserScramCredentialAlteration_type_s {
+        RD_KAFKA_USER_SCRAM_CREDENTIAL_ALTERATION_TYPE_UPSERT = 0,
+        RD_KAFKA_USER_SCRAM_CREDENTIAL_ALTERATION_TYPE_DELETE = 1,
+        RD_KAFKA_USER_SCRAM_CREDENTIAL_ALTERATION_TYPE__CNT
+} rd_kafka_UserScramCredentialAlteration_type_t;
+
+struct rd_kafka_UserScramCredentialAlteration_s {
+        char *user;
+        rd_kafka_UserScramCredentialAlteration_type_t alteration_type;
+        union {
+                struct {
+                        rd_kafka_ScramCredentialInfo_t credential_info;
+                        rd_kafkap_bytes_t *salt;
+                        rd_kafkap_bytes_t *password;
+                } upsertion;
+                struct {
+                        rd_kafka_ScramMechanism_t mechanism;
+                } deletion;
+        } alteration;
+};
+
+rd_kafka_UserScramCredentialAlteration_t *
+rd_kafka_UserScramCredentialUpsertion_new(const char *username,
+                                          rd_kafka_ScramMechanism_t mechanism,
+                                          int32_t iterations,
+                                          const unsigned char *password,
+                                          size_t password_size,
+                                          const unsigned char *salt,
+                                          size_t salt_size) {
+        rd_kafka_UserScramCredentialAlteration_t *alteration;
+        alteration       = rd_calloc(1, sizeof(*alteration));
+        alteration->user = rd_strdup(username);
+        alteration->alteration_type =
+            RD_KAFKA_USER_SCRAM_CREDENTIAL_ALTERATION_TYPE_UPSERT;
+        alteration->alteration.upsertion.credential_info.mechanism = mechanism;
+        alteration->alteration.upsertion.credential_info.iterations =
+            iterations;
+
+        alteration->alteration.upsertion.password =
+            rd_kafkap_bytes_new(password, password_size);
+        if (salt_size != 0) {
+                alteration->alteration.upsertion.salt =
+                    rd_kafkap_bytes_new(salt, salt_size);
+        } else {
+#if WITH_SSL && OPENSSL_VERSION_NUMBER >= 0x10101000L
+                unsigned char random_salt[64];
+                if (RAND_priv_bytes(random_salt, sizeof(random_salt)) == 1) {
+                        alteration->alteration.upsertion.salt =
+                            rd_kafkap_bytes_new(random_salt,
+                                                sizeof(random_salt));
+                }
+#endif
+        }
+        return alteration;
+}
+
+rd_kafka_UserScramCredentialAlteration_t *
+rd_kafka_UserScramCredentialDeletion_new(const char *username,
+                                         rd_kafka_ScramMechanism_t mechanism) {
+        rd_kafka_UserScramCredentialAlteration_t *alteration;
+        alteration       = rd_calloc(1, sizeof(*alteration));
+        alteration->user = rd_strdup(username);
+        alteration->alteration_type =
+            RD_KAFKA_USER_SCRAM_CREDENTIAL_ALTERATION_TYPE_DELETE;
+        alteration->alteration.deletion.mechanism = mechanism;
+        return alteration;
+}
+
+void rd_kafka_UserScramCredentialAlteration_destroy(
+    rd_kafka_UserScramCredentialAlteration_t *alteration) {
+        if (!alteration)
+                return;
+        rd_free(alteration->user);
+        if (alteration->alteration_type ==
+            RD_KAFKA_USER_SCRAM_CREDENTIAL_ALTERATION_TYPE_UPSERT) {
+                rd_kafkap_bytes_destroy(alteration->alteration.upsertion.salt);
+                rd_kafkap_bytes_destroy(
+                    alteration->alteration.upsertion.password);
+        }
+        rd_free(alteration);
+}
+
+void rd_kafka_UserScramCredentialAlteration_destroy_free(void *alteration) {
+        rd_kafka_UserScramCredentialAlteration_destroy(alteration);
+}
+
+void rd_kafka_UserScramCredentialAlteration_destroy_array(
+    rd_kafka_UserScramCredentialAlteration_t **alterations,
+    size_t alteration_cnt) {
+        size_t i;
+        for (i = 0; i < alteration_cnt; i++)
+                rd_kafka_UserScramCredentialAlteration_destroy(alterations[i]);
+}
+
+static rd_kafka_UserScramCredentialAlteration_t *
+rd_kafka_UserScramCredentialAlteration_copy(
+    const rd_kafka_UserScramCredentialAlteration_t *alteration) {
+        rd_kafka_UserScramCredentialAlteration_t *copied_alteration =
+            rd_calloc(1, sizeof(*alteration));
+        copied_alteration->user            = rd_strdup(alteration->user);
+        copied_alteration->alteration_type = alteration->alteration_type;
+
+        if (alteration->alteration_type ==
+            RD_KAFKA_USER_SCRAM_CREDENTIAL_ALTERATION_TYPE_UPSERT /*Upsert*/) {
+                copied_alteration->alteration.upsertion.salt =
+                    rd_kafkap_bytes_copy(alteration->alteration.upsertion.salt);
+                copied_alteration->alteration.upsertion.password =
+                    rd_kafkap_bytes_copy(
+                        alteration->alteration.upsertion.password);
+                copied_alteration->alteration.upsertion.credential_info
+                    .mechanism =
+                    alteration->alteration.upsertion.credential_info.mechanism;
+                copied_alteration->alteration.upsertion.credential_info
+                    .iterations =
+                    alteration->alteration.upsertion.credential_info.iterations;
+        } else if (
+            alteration->alteration_type ==
+            RD_KAFKA_USER_SCRAM_CREDENTIAL_ALTERATION_TYPE_DELETE /*Delete*/) {
+                copied_alteration->alteration.deletion.mechanism =
+                    alteration->alteration.deletion.mechanism;
+        }
+
+        return copied_alteration;
+}
+
+struct rd_kafka_AlterUserScramCredentials_result_response_s {
+        char *user;
+        rd_kafka_error_t *error;
+};
+
+rd_kafka_AlterUserScramCredentials_result_response_t *
+rd_kafka_AlterUserScramCredentials_result_response_new(const char *username) {
+        rd_kafka_AlterUserScramCredentials_result_response_t *response;
+        response        = rd_calloc(1, sizeof(*response));
+        response->user  = rd_strdup(username);
+        response->error = NULL;
+        return response;
+}
+
+void rd_kafka_AlterUserScramCredentials_result_response_destroy(
+    rd_kafka_AlterUserScramCredentials_result_response_t *response) {
+        if (response->user)
+                rd_free(response->user);
+        rd_kafka_error_destroy(response->error);
+        rd_free(response);
+}
+
+void rd_kafka_AlterUserScramCredentials_result_response_destroy_free(
+    void *response) {
+        rd_kafka_AlterUserScramCredentials_result_response_destroy(response);
+}
+
+void rd_kafka_AlterUserScramCredentials_result_response_set_error(
+    rd_kafka_AlterUserScramCredentials_result_response_t *response,
+    rd_kafka_resp_err_t errorcode,
+    const char *errstr) {
+        rd_kafka_error_destroy(response->error);
+        response->error = rd_kafka_error_new(errorcode, "%s", errstr);
+}
+
+const char *rd_kafka_AlterUserScramCredentials_result_response_user(
+    const rd_kafka_AlterUserScramCredentials_result_response_t *response) {
+        return response->user;
+}
+
+const rd_kafka_error_t *
+rd_kafka_AlterUserScramCredentials_result_response_error(
+    const rd_kafka_AlterUserScramCredentials_result_response_t *response) {
+        return response->error;
+}
+
+const rd_kafka_AlterUserScramCredentials_result_response_t **
+rd_kafka_AlterUserScramCredentials_result_responses(
+    const rd_kafka_AlterUserScramCredentials_result_t *result,
+    size_t *cntp) {
+        *cntp = rd_list_cnt(&result->rko_u.admin_result.results);
+        return (const rd_kafka_AlterUserScramCredentials_result_response_t **)
+            result->rko_u.admin_result.results.rl_elems;
+}
+
+
+#if WITH_SSL
+static rd_kafkap_bytes_t *
+rd_kafka_AlterUserScramCredentialsRequest_salted_password(
+    rd_kafka_broker_t *rkb,
+    rd_kafkap_bytes_t *salt,
+    rd_kafkap_bytes_t *password,
+    rd_kafka_ScramMechanism_t mechanism,
+    int32_t iterations) {
+        rd_chariov_t saltedpassword_chariov = {.ptr =
+                                                   rd_alloca(EVP_MAX_MD_SIZE)};
+
+        rd_chariov_t salt_chariov;
+        salt_chariov.ptr  = (char *)salt->data;
+        salt_chariov.size = RD_KAFKAP_BYTES_LEN(salt);
+
+        rd_chariov_t password_chariov;
+        password_chariov.ptr  = (char *)password->data;
+        password_chariov.size = RD_KAFKAP_BYTES_LEN(password);
+
+        const EVP_MD *evp = NULL;
+        if (mechanism == RD_KAFKA_SCRAM_MECHANISM_SHA_256)
+                evp = EVP_sha256();
+        else if (mechanism == RD_KAFKA_SCRAM_MECHANISM_SHA_512)
+                evp = EVP_sha512();
+        rd_assert(evp != NULL);
+
+        rd_kafka_ssl_hmac(rkb, evp, &password_chariov, &salt_chariov,
+                          iterations, &saltedpassword_chariov);
+
+        return rd_kafkap_bytes_new(
+            (const unsigned char *)saltedpassword_chariov.ptr,
+            saltedpassword_chariov.size);
+}
+#endif
+
+rd_kafka_resp_err_t rd_kafka_AlterUserScramCredentialsRequest(
+    rd_kafka_broker_t *rkb,
+    const rd_list_t *user_scram_credential_alterations,
+    rd_kafka_AdminOptions_t *options,
+    char *errstr,
+    size_t errstr_size,
+    rd_kafka_replyq_t replyq,
+    rd_kafka_resp_cb_t *resp_cb,
+    void *opaque) {
+
+        rd_kafka_buf_t *rkbuf;
+        int16_t ApiVersion = 0;
+        int features;
+        size_t num_deletions = 0;
+        size_t i;
+        size_t num_alterations;
+        size_t of_deletions;
+        ApiVersion = rd_kafka_broker_ApiVersion_supported(
+            rkb, RD_KAFKAP_DescribeUserScramCredentials, 0, 0, &features);
+        if (ApiVersion == -1) {
+                rd_snprintf(
+                    errstr, errstr_size,
+                    "AlterUserScramCredentials API (KIP-554) not supported "
+                    "by broker");
+                return RD_KAFKA_RESP_ERR__UNSUPPORTED_FEATURE;
+        }
+
+        num_alterations = rd_list_cnt(user_scram_credential_alterations);
+
+        rkbuf = rd_kafka_buf_new_flexver_request(
+            rkb, RD_KAFKAP_AlterUserScramCredentials, 1, num_alterations * 100,
+            rd_true);
+
+        /* Deletion scram requests*/
+
+        /* #Deletions */
+        of_deletions = rd_kafka_buf_write_arraycnt_pos(rkbuf);
+
+        for (i = 0; i < num_alterations; i++) {
+                rd_kafka_UserScramCredentialAlteration_t *alteration =
+                    rd_list_elem(user_scram_credential_alterations, i);
+                if (alteration->alteration_type !=
+                    RD_KAFKA_USER_SCRAM_CREDENTIAL_ALTERATION_TYPE_DELETE)
+                        continue;
+
+                num_deletions++;
+                /* Name */
+                rd_kafka_buf_write_str(rkbuf, alteration->user,
+                                       strlen(alteration->user));
+                /* Mechanism */
+                rd_kafka_buf_write_i8(
+                    rkbuf, alteration->alteration.deletion.mechanism);
+                rd_kafka_buf_write_tags(rkbuf);
+        }
+        rd_kafka_buf_finalize_arraycnt(rkbuf, of_deletions, num_deletions);
+
+        /* Upsertion scram request*/
+
+        /* #Upsertions */
+        rd_kafka_buf_write_arraycnt(rkbuf, num_alterations - num_deletions);
+        for (i = 0; i < num_alterations; i++) {
+                rd_kafka_UserScramCredentialAlteration_t *alteration =
+                    rd_list_elem(user_scram_credential_alterations, i);
+                if (alteration->alteration_type !=
+                    RD_KAFKA_USER_SCRAM_CREDENTIAL_ALTERATION_TYPE_UPSERT)
+                        continue;
+
+#if !WITH_SSL
+                rd_assert(!*"OpenSSL is required for upsertions");
+#else
+                char *user      = alteration->user;
+                size_t usersize = strlen(user);
+                rd_kafka_ScramMechanism_t mechanism =
+                    alteration->alteration.upsertion.credential_info.mechanism;
+                int32_t iterations =
+                    alteration->alteration.upsertion.credential_info.iterations;
+                /* Name */
+                rd_kafka_buf_write_str(rkbuf, user, usersize);
+
+                /* Mechanism */
+                rd_kafka_buf_write_i8(rkbuf, mechanism);
+
+                /* Iterations */
+                rd_kafka_buf_write_i32(rkbuf, iterations);
+
+                /* Salt */
+                rd_kafka_buf_write_kbytes(
+                    rkbuf, alteration->alteration.upsertion.salt);
+
+                rd_kafkap_bytes_t *password_bytes =
+                    rd_kafka_AlterUserScramCredentialsRequest_salted_password(
+                        rkb, alteration->alteration.upsertion.salt,
+                        alteration->alteration.upsertion.password, mechanism,
+                        iterations);
+
+                /* SaltedPassword */
+                rd_kafka_buf_write_kbytes(rkbuf, password_bytes);
+                rd_kafkap_bytes_destroy(password_bytes);
+                rd_kafka_buf_write_tags(rkbuf);
+#endif
+        }
+
+        rd_kafka_buf_write_tags(rkbuf);
+        rd_kafka_buf_ApiVersion_set(rkbuf, ApiVersion, 0);
+        rd_kafka_broker_buf_enq_replyq(rkb, rkbuf, replyq, resp_cb, opaque);
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
+}
+
+rd_kafka_resp_err_t
+rd_kafka_AlterUserScramCredentialsResponse_parse(rd_kafka_op_t *rko_req,
+                                                 rd_kafka_op_t **rko_resultp,
+                                                 rd_kafka_buf_t *reply,
+                                                 char *errstr,
+                                                 size_t errstr_size) {
+        const int log_decode_errors = LOG_ERR;
+        rd_kafka_op_t *rko_result   = NULL;
+        int32_t num_results;
+        int32_t i;
+
+        rko_result = rd_kafka_admin_result_new(rko_req);
+
+        /* ThrottleTimeMs */
+        rd_kafka_buf_read_throttle_time(reply);
+
+        /* #Results */
+        rd_kafka_buf_read_arraycnt(reply, &num_results, 10000);
+
+        rd_list_init(
+            &rko_result->rko_u.admin_result.results, num_results,
+            rd_kafka_AlterUserScramCredentials_result_response_destroy_free);
+        for (i = 0; i < num_results; i++) {
+                rd_kafkap_str_t User;
+                int16_t ErrorCode;
+                rd_kafkap_str_t ErrorMessage = RD_KAFKAP_STR_INITIALIZER;
+
+                /* User */
+                rd_kafka_buf_read_str(reply, &User);
+
+                /* ErrorCode */
+                rd_kafka_buf_read_i16(reply, &ErrorCode);
+
+                /* ErrorMessage */
+                rd_kafka_buf_read_str(reply, &ErrorMessage);
+
+                rd_kafka_buf_skip_tags(reply);
+
+                rd_kafka_AlterUserScramCredentials_result_response_t *response =
+                    rd_kafka_AlterUserScramCredentials_result_response_new(
+                        User.str);
+                rd_kafka_AlterUserScramCredentials_result_response_set_error(
+                    response, ErrorCode, ErrorMessage.str);
+                rd_list_add(&rko_result->rko_u.admin_result.results, response);
+        }
+        *rko_resultp = rko_result;
+
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
+
+err_parse:
+        if (rko_result)
+                rd_kafka_op_destroy(rko_result);
+
+        rd_snprintf(
+            errstr, errstr_size,
+            "AlterUserScramCredentials response protocol parse failure: %s",
+            rd_kafka_err2str(reply->rkbuf_err));
+
+        return reply->rkbuf_err;
+}
+
+void rd_kafka_AlterUserScramCredentials(
+    rd_kafka_t *rk,
+    rd_kafka_UserScramCredentialAlteration_t **alterations,
+    size_t alteration_cnt,
+    const rd_kafka_AdminOptions_t *options,
+    rd_kafka_queue_t *rkqu) {
+
+        rd_kafka_op_t *rko;
+        size_t i;
+
+        static const struct rd_kafka_admin_worker_cbs cbs = {
+            rd_kafka_AlterUserScramCredentialsRequest,
+            rd_kafka_AlterUserScramCredentialsResponse_parse,
+        };
+
+        rko = rd_kafka_admin_request_op_new(
+            rk, RD_KAFKA_OP_ALTERUSERSCRAMCREDENTIALS,
+            RD_KAFKA_EVENT_ALTERUSERSCRAMCREDENTIALS_RESULT, &cbs, options,
+            rkqu->rkqu_q);
+
+        if (alteration_cnt > 0) {
+                const char *errstr = NULL;
+                for (i = 0; i < alteration_cnt; i++) {
+                        rd_bool_t is_upsert =
+                            alterations[i]->alteration_type ==
+                            RD_KAFKA_USER_SCRAM_CREDENTIAL_ALTERATION_TYPE_UPSERT;
+                        rd_bool_t is_delete =
+                            alterations[i]->alteration_type ==
+                            RD_KAFKA_USER_SCRAM_CREDENTIAL_ALTERATION_TYPE_DELETE;
+
+                        if ((is_upsert || is_delete) &&
+                            alterations[i]
+                                    ->alteration.upsertion.credential_info
+                                    .mechanism ==
+                                RD_KAFKA_SCRAM_MECHANISM_UNKNOWN) {
+                                errstr =
+                                    "SCRAM mechanism must be specified at "
+                                    "index %" PRIusz;
+                                break;
+                        }
+
+
+                        if (!alterations[i]->user || !*alterations[i]->user) {
+                                errstr = "Empty user at index %" PRIusz;
+                                break;
+                        }
+
+                        if (is_upsert) {
+#if !WITH_SSL
+                                errstr =
+                                    "OpenSSL required for upsertion at index "
+                                    "%" PRIusz;
+                                break;
+#endif
+                                if (RD_KAFKAP_BYTES_LEN(
+                                        alterations[i]
+                                            ->alteration.upsertion.password) ==
+                                    0) {
+                                        errstr =
+                                            "Empty password at index %" PRIusz;
+                                        break;
+                                }
+
+                                if (!alterations[i]
+                                         ->alteration.upsertion.salt ||
+                                    RD_KAFKAP_BYTES_LEN(
+                                        alterations[i]
+                                            ->alteration.upsertion.salt) == 0) {
+                                        errstr = "Empty salt at index %" PRIusz;
+                                        break;
+                                }
+
+                                if (alterations[i]
+                                        ->alteration.upsertion.credential_info
+                                        .iterations <= 0) {
+                                        errstr =
+                                            "Non-positive iterations at index "
+                                            "%" PRIusz;
+                                        break;
+                                }
+                        }
+                }
+
+                if (errstr) {
+                        rd_kafka_admin_result_fail(
+                            rko, RD_KAFKA_RESP_ERR__INVALID_ARG, errstr, i);
+                        rd_kafka_admin_common_worker_destroy(
+                            rk, rko, rd_true /*destroy*/);
+                        return;
+                }
+        } else {
+                rd_kafka_admin_result_fail(
+                    rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                    "At least one alteration is required");
+                rd_kafka_admin_common_worker_destroy(rk, rko,
+                                                     rd_true /*destroy*/);
+                return;
+        }
+
+        rd_list_init(&rko->rko_u.admin_request.args, alteration_cnt,
+                     rd_kafka_UserScramCredentialAlteration_destroy_free);
+
+        for (i = 0; i < alteration_cnt; i++) {
+                rd_list_add(&rko->rko_u.admin_request.args,
+                            rd_kafka_UserScramCredentialAlteration_copy(
+                                alterations[i]));
+        }
+        rd_kafka_q_enq(rk->rk_ops, rko);
+        return;
+}
+
+/**
+ * @brief Get an array of rd_kafka_AclBinding_t from a DescribeAcls result.
+ *
+ * The returned \p rd_kafka_AclBinding_t life-time is the same as the \p result
+ * object.
+ * @param cntp is updated to the number of elements in the array.
+ */
+const rd_kafka_AclBinding_t **
+rd_kafka_DescribeAcls_result_acls(const rd_kafka_DescribeAcls_result_t *result,
+                                  size_t *cntp) {
+        return rd_kafka_admin_result_ret_acl_bindings(
+            (const rd_kafka_op_t *)result, cntp);
+}
+
+/**@}*/
+
+/**
+ * @name DeleteAcls
+ * @{
+ *
+ *
+ *
+ */
+
+/**
+ * @brief Allocate a new DeleteAcls result response with the given
+ * \p err error code and \p errstr error message.
+ */
+const rd_kafka_DeleteAcls_result_response_t *
+rd_kafka_DeleteAcls_result_response_new(rd_kafka_resp_err_t err, char *errstr) {
+        rd_kafka_DeleteAcls_result_response_t *result_response;
+
+        result_response = rd_calloc(1, sizeof(*result_response));
+        if (err)
+                result_response->error = rd_kafka_error_new(
+                    err, "%s", errstr ? errstr : rd_kafka_err2str(err));
+
+        /* List of int32 lists */
+        rd_list_init(&result_response->matching_acls, 0,
+                     rd_kafka_AclBinding_free);
+
+        return result_response;
+}
+
+static void rd_kafka_DeleteAcls_result_response_destroy(
+    rd_kafka_DeleteAcls_result_response_t *resp) {
+        if (resp->error)
+                rd_kafka_error_destroy(resp->error);
+        rd_list_destroy(&resp->matching_acls);
+        rd_free(resp);
+}
+
+static void rd_kafka_DeleteAcls_result_response_free(void *ptr) {
+        rd_kafka_DeleteAcls_result_response_destroy(
+            (rd_kafka_DeleteAcls_result_response_t *)ptr);
+}
+
+/**
+ * @brief Get an array of rd_kafka_AclBinding_t from a DescribeAcls result.
+ *
+ * The returned \p rd_kafka_AclBinding_t life-time is the same as the \p result
+ * object.
+ * @param cntp is updated to the number of elements in the array.
+ */
+const rd_kafka_DeleteAcls_result_response_t **
+rd_kafka_DeleteAcls_result_responses(const rd_kafka_DeleteAcls_result_t *result,
+                                     size_t *cntp) {
+        return rd_kafka_admin_result_ret_delete_acl_result_responses(
+            (const rd_kafka_op_t *)result, cntp);
+}
+
+const rd_kafka_error_t *rd_kafka_DeleteAcls_result_response_error(
+    const rd_kafka_DeleteAcls_result_response_t *result_response) {
+        return result_response->error;
+}
+
+const rd_kafka_AclBinding_t **rd_kafka_DeleteAcls_result_response_matching_acls(
+    const rd_kafka_DeleteAcls_result_response_t *result_response,
+    size_t *matching_acls_cntp) {
+        *matching_acls_cntp = result_response->matching_acls.rl_cnt;
+        return (const rd_kafka_AclBinding_t **)
+            result_response->matching_acls.rl_elems;
+}
+
+/**
+ * @brief Parse DeleteAclsResponse and create ADMIN_RESULT op.
+ */
+static rd_kafka_resp_err_t
+rd_kafka_DeleteAclsResponse_parse(rd_kafka_op_t *rko_req,
+                                  rd_kafka_op_t **rko_resultp,
+                                  rd_kafka_buf_t *reply,
+                                  char *errstr,
+                                  size_t errstr_size) {
+        const int log_decode_errors = LOG_ERR;
+        rd_kafka_broker_t *rkb      = reply->rkbuf_rkb;
+        rd_kafka_op_t *rko_result   = NULL;
+        rd_kafka_resp_err_t err     = RD_KAFKA_RESP_ERR_NO_ERROR;
+        int32_t res_cnt;
+        int i;
+        int j;
+
+        rd_kafka_buf_read_throttle_time(reply);
+
+        /* #responses */
+        rd_kafka_buf_read_arraycnt(reply, &res_cnt, 100000);
+
+        rko_result = rd_kafka_admin_result_new(rko_req);
+
+        rd_list_init(&rko_result->rko_u.admin_result.results, res_cnt,
+                     rd_kafka_DeleteAcls_result_response_free);
+
+        for (i = 0; i < (int)res_cnt; i++) {
+                int16_t error_code;
+                rd_kafkap_str_t error_msg = RD_KAFKAP_STR_INITIALIZER;
+                char *errstr              = NULL;
+                const rd_kafka_DeleteAcls_result_response_t *result_response;
+                int32_t matching_acls_cnt;
+
+                rd_kafka_buf_read_i16(reply, &error_code);
+                rd_kafka_buf_read_str(reply, &error_msg);
+
+                if (error_code) {
+                        if (RD_KAFKAP_STR_IS_NULL(&error_msg) ||
+                            RD_KAFKAP_STR_LEN(&error_msg) == 0)
+                                errstr = (char *)rd_kafka_err2str(error_code);
+                        else
+                                RD_KAFKAP_STR_DUPA(&errstr, &error_msg);
+                }
+
+                result_response =
+                    rd_kafka_DeleteAcls_result_response_new(error_code, errstr);
+
+                /* #maching_acls */
+                rd_kafka_buf_read_arraycnt(reply, &matching_acls_cnt, 100000);
+                for (j = 0; j < (int)matching_acls_cnt; j++) {
+                        int16_t acl_error_code;
+                        int8_t res_type = RD_KAFKA_RESOURCE_UNKNOWN;
+                        rd_kafkap_str_t acl_error_msg =
+                            RD_KAFKAP_STR_INITIALIZER;
+                        rd_kafkap_str_t kres_name;
+                        rd_kafkap_str_t khost;
+                        rd_kafkap_str_t kprincipal;
+                        int8_t resource_pattern_type =
+                            RD_KAFKA_RESOURCE_PATTERN_LITERAL;
+                        int8_t operation = RD_KAFKA_ACL_OPERATION_UNKNOWN;
+                        int8_t permission_type =
+                            RD_KAFKA_ACL_PERMISSION_TYPE_UNKNOWN;
+                        rd_kafka_AclBinding_t *matching_acl;
+                        char *acl_errstr = NULL;
+                        char *res_name;
+                        char *principal;
+                        char *host;
+
+                        rd_kafka_buf_read_i16(reply, &acl_error_code);
+                        rd_kafka_buf_read_str(reply, &acl_error_msg);
+                        if (acl_error_code) {
+                                if (RD_KAFKAP_STR_IS_NULL(&acl_error_msg) ||
+                                    RD_KAFKAP_STR_LEN(&acl_error_msg) == 0)
+                                        acl_errstr = (char *)rd_kafka_err2str(
+                                            acl_error_code);
+                                else
+                                        RD_KAFKAP_STR_DUPA(&acl_errstr,
+                                                           &acl_error_msg);
+                        }
+
+                        rd_kafka_buf_read_i8(reply, &res_type);
+                        rd_kafka_buf_read_str(reply, &kres_name);
+
+                        if (rd_kafka_buf_ApiVersion(reply) >= 1) {
+                                rd_kafka_buf_read_i8(reply,
+                                                     &resource_pattern_type);
+                        }
+
+                        rd_kafka_buf_read_str(reply, &kprincipal);
+                        rd_kafka_buf_read_str(reply, &khost);
+                        rd_kafka_buf_read_i8(reply, &operation);
+                        rd_kafka_buf_read_i8(reply, &permission_type);
+                        RD_KAFKAP_STR_DUPA(&res_name, &kres_name);
+                        RD_KAFKAP_STR_DUPA(&principal, &kprincipal);
+                        RD_KAFKAP_STR_DUPA(&host, &khost);
+
+                        if (res_type <= RD_KAFKA_RESOURCE_UNKNOWN ||
+                            res_type >= RD_KAFKA_RESOURCE__CNT) {
+                                rd_rkb_log(rkb, LOG_WARNING,
+                                           "DELETEACLSRESPONSE",
+                                           "DeleteAclsResponse returned "
+                                           "unknown resource type %d",
+                                           res_type);
+                                res_type = RD_KAFKA_RESOURCE_UNKNOWN;
+                        }
+                        if (resource_pattern_type <=
+                                RD_KAFKA_RESOURCE_PATTERN_UNKNOWN ||
+                            resource_pattern_type >=
+                                RD_KAFKA_RESOURCE_PATTERN_TYPE__CNT) {
+                                rd_rkb_log(rkb, LOG_WARNING,
+                                           "DELETEACLSRESPONSE",
+                                           "DeleteAclsResponse returned "
+                                           "unknown resource pattern type %d",
+                                           resource_pattern_type);
+                                resource_pattern_type =
+                                    RD_KAFKA_RESOURCE_PATTERN_UNKNOWN;
+                        }
+                        if (operation <= RD_KAFKA_ACL_OPERATION_UNKNOWN ||
+                            operation >= RD_KAFKA_ACL_OPERATION__CNT) {
+                                rd_rkb_log(rkb, LOG_WARNING,
+                                           "DELETEACLSRESPONSE",
+                                           "DeleteAclsResponse returned "
+                                           "unknown acl operation %d",
+                                           operation);
+                                operation = RD_KAFKA_ACL_OPERATION_UNKNOWN;
+                        }
+                        if (permission_type <=
+                                RD_KAFKA_ACL_PERMISSION_TYPE_UNKNOWN ||
+                            permission_type >=
+                                RD_KAFKA_ACL_PERMISSION_TYPE__CNT) {
+                                rd_rkb_log(rkb, LOG_WARNING,
+                                           "DELETEACLSRESPONSE",
+                                           "DeleteAclsResponse returned "
+                                           "unknown acl permission type %d",
+                                           permission_type);
+                                permission_type =
+                                    RD_KAFKA_ACL_PERMISSION_TYPE_UNKNOWN;
+                        }
+
+                        matching_acl = rd_kafka_AclBinding_new0(
+                            res_type, res_name, resource_pattern_type,
+                            principal, host, operation, permission_type,
+                            acl_error_code, acl_errstr);
+
+                        rd_list_add(
+                            (rd_list_t *)&result_response->matching_acls,
+                            (void *)matching_acl);
+                }
+
+                rd_list_add(&rko_result->rko_u.admin_result.results,
+                            (void *)result_response);
+        }
+
+        *rko_resultp = rko_result;
+
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
+
+err_parse:
+        if (rko_result)
+                rd_kafka_op_destroy(rko_result);
+
+        rd_snprintf(errstr, errstr_size,
+                    "DeleteAcls response protocol parse failure: %s",
+                    rd_kafka_err2str(err));
+
+        return err;
+}
+
+
+void rd_kafka_DeleteAcls(rd_kafka_t *rk,
+                         rd_kafka_AclBindingFilter_t **del_acls,
+                         size_t del_acls_cnt,
+                         const rd_kafka_AdminOptions_t *options,
+                         rd_kafka_queue_t *rkqu) {
+        rd_kafka_op_t *rko;
+        size_t i;
+        static const struct rd_kafka_admin_worker_cbs cbs = {
+            rd_kafka_DeleteAclsRequest, rd_kafka_DeleteAclsResponse_parse};
+
+        rko = rd_kafka_admin_request_op_new(rk, RD_KAFKA_OP_DELETEACLS,
+                                            RD_KAFKA_EVENT_DELETEACLS_RESULT,
+                                            &cbs, options, rkqu->rkqu_q);
+
+        rd_list_init(&rko->rko_u.admin_request.args, (int)del_acls_cnt,
+                     rd_kafka_AclBinding_free);
+
+        for (i = 0; i < del_acls_cnt; i++)
+                rd_list_add(&rko->rko_u.admin_request.args,
+                            rd_kafka_AclBindingFilter_copy(del_acls[i]));
+
+        rd_kafka_q_enq(rk->rk_ops, rko);
+}
+
+/**@}*/
+
+/**
+ * @name Alter consumer group offsets (committed offsets)
+ * @{
+ *
+ *
+ *
+ *
+ */
+
+rd_kafka_AlterConsumerGroupOffsets_t *rd_kafka_AlterConsumerGroupOffsets_new(
+    const char *group_id,
+    const rd_kafka_topic_partition_list_t *partitions) {
+        rd_assert(group_id && partitions);
+
+        size_t tsize = strlen(group_id) + 1;
+        rd_kafka_AlterConsumerGroupOffsets_t *alter_grpoffsets;
+
+        /* Single allocation */
+        alter_grpoffsets = rd_malloc(sizeof(*alter_grpoffsets) + tsize);
+        alter_grpoffsets->group_id = alter_grpoffsets->data;
+        memcpy(alter_grpoffsets->group_id, group_id, tsize);
+        alter_grpoffsets->partitions =
+            rd_kafka_topic_partition_list_copy(partitions);
+
+        return alter_grpoffsets;
+}
+
+void rd_kafka_AlterConsumerGroupOffsets_destroy(
+    rd_kafka_AlterConsumerGroupOffsets_t *alter_grpoffsets) {
+        rd_kafka_topic_partition_list_destroy(alter_grpoffsets->partitions);
+        rd_free(alter_grpoffsets);
+}
+
+static void rd_kafka_AlterConsumerGroupOffsets_free(void *ptr) {
+        rd_kafka_AlterConsumerGroupOffsets_destroy(ptr);
+}
+
+void rd_kafka_AlterConsumerGroupOffsets_destroy_array(
+    rd_kafka_AlterConsumerGroupOffsets_t **alter_grpoffsets,
+    size_t alter_grpoffsets_cnt) {
+        size_t i;
+        for (i = 0; i < alter_grpoffsets_cnt; i++)
+                rd_kafka_AlterConsumerGroupOffsets_destroy(alter_grpoffsets[i]);
+}
+
+/**
+ * @brief Allocate a new AlterGroup and make a copy of \p src
+ */
+static rd_kafka_AlterConsumerGroupOffsets_t *
+rd_kafka_AlterConsumerGroupOffsets_copy(
+    const rd_kafka_AlterConsumerGroupOffsets_t *src) {
+        return rd_kafka_AlterConsumerGroupOffsets_new(src->group_id,
+                                                      src->partitions);
+}
+
+/**
+ * @brief Send a OffsetCommitRequest to \p rkb with the partitions
+ *        in alter_grpoffsets (AlterConsumerGroupOffsets_t*) using
+ *        \p options.
+ *
+ */
+static rd_kafka_resp_err_t rd_kafka_AlterConsumerGroupOffsetsRequest(
+    rd_kafka_broker_t *rkb,
+    /* (rd_kafka_AlterConsumerGroupOffsets_t*) */
+    const rd_list_t *alter_grpoffsets,
+    rd_kafka_AdminOptions_t *options,
+    char *errstr,
+    size_t errstr_size,
+    rd_kafka_replyq_t replyq,
+    rd_kafka_resp_cb_t *resp_cb,
+    void *opaque) {
+        const rd_kafka_AlterConsumerGroupOffsets_t *grpoffsets =
+            rd_list_elem(alter_grpoffsets, 0);
+
+        rd_assert(rd_list_cnt(alter_grpoffsets) == 1);
+
+        rd_kafka_topic_partition_list_t *offsets = grpoffsets->partitions;
+        rd_kafka_consumer_group_metadata_t *cgmetadata =
+            rd_kafka_consumer_group_metadata_new(grpoffsets->group_id);
+
+        int ret = rd_kafka_OffsetCommitRequest(
+            rkb, cgmetadata, offsets, replyq, resp_cb, opaque,
+            "rd_kafka_AlterConsumerGroupOffsetsRequest");
+        rd_kafka_consumer_group_metadata_destroy(cgmetadata);
+        if (ret == 0) {
+                rd_snprintf(errstr, errstr_size,
+                            "At least one topic-partition offset must "
+                            "be >= 0");
+                return RD_KAFKA_RESP_ERR__NO_OFFSET;
+        }
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
+}
+
+/**
+ * @brief Parse OffsetCommitResponse and create ADMIN_RESULT op.
+ */
+static rd_kafka_resp_err_t
+rd_kafka_AlterConsumerGroupOffsetsResponse_parse(rd_kafka_op_t *rko_req,
+                                                 rd_kafka_op_t **rko_resultp,
+                                                 rd_kafka_buf_t *reply,
+                                                 char *errstr,
+                                                 size_t errstr_size) {
+        rd_kafka_t *rk;
+        rd_kafka_broker_t *rkb;
+        rd_kafka_op_t *rko_result;
+        rd_kafka_topic_partition_list_t *partitions = NULL;
+        rd_kafka_resp_err_t err = RD_KAFKA_RESP_ERR_NO_ERROR;
+        const rd_kafka_AlterConsumerGroupOffsets_t *alter_grpoffsets =
+            rd_list_elem(&rko_req->rko_u.admin_request.args, 0);
+        partitions =
+            rd_kafka_topic_partition_list_copy(alter_grpoffsets->partitions);
+
+        rk  = rko_req->rko_rk;
+        rkb = reply->rkbuf_rkb;
+        err = rd_kafka_handle_OffsetCommit(rk, rkb, err, reply, NULL,
+                                           partitions, rd_true);
+
+        /* Create result op and group_result_t */
+        rko_result = rd_kafka_admin_result_new(rko_req);
+        rd_list_init(&rko_result->rko_u.admin_result.results, 1,
+                     rd_kafka_group_result_free);
+        rd_list_add(&rko_result->rko_u.admin_result.results,
+                    rd_kafka_group_result_new(alter_grpoffsets->group_id, -1,
+                                              partitions, NULL));
+        rd_kafka_topic_partition_list_destroy(partitions);
+        *rko_resultp = rko_result;
+
+        if (reply->rkbuf_err)
+                rd_snprintf(
+                    errstr, errstr_size,
+                    "AlterConsumerGroupOffset response parse failure: %s",
+                    rd_kafka_err2str(reply->rkbuf_err));
+
+        return reply->rkbuf_err;
+}
+
+void rd_kafka_AlterConsumerGroupOffsets(
+    rd_kafka_t *rk,
+    rd_kafka_AlterConsumerGroupOffsets_t **alter_grpoffsets,
+    size_t alter_grpoffsets_cnt,
+    const rd_kafka_AdminOptions_t *options,
+    rd_kafka_queue_t *rkqu) {
+        int i;
+        static const struct rd_kafka_admin_worker_cbs cbs = {
+            rd_kafka_AlterConsumerGroupOffsetsRequest,
+            rd_kafka_AlterConsumerGroupOffsetsResponse_parse,
+        };
+        rd_kafka_op_t *rko;
+        rd_kafka_topic_partition_list_t *copied_offsets;
+
+        rd_assert(rkqu);
+
+        rko = rd_kafka_admin_request_op_new(
+            rk, RD_KAFKA_OP_ALTERCONSUMERGROUPOFFSETS,
+            RD_KAFKA_EVENT_ALTERCONSUMERGROUPOFFSETS_RESULT, &cbs, options,
+            rkqu->rkqu_q);
+
+        if (alter_grpoffsets_cnt != 1) {
+                /* For simplicity we only support one single group for now */
+                rd_kafka_admin_result_fail(rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                           "Exactly one "
+                                           "AlterConsumerGroupOffsets must "
+                                           "be passed");
+                goto fail;
+        }
+
+        if (alter_grpoffsets[0]->partitions->cnt == 0) {
+                rd_kafka_admin_result_fail(rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                           "Non-empty topic partition list "
+                                           "must be present");
+                goto fail;
+        }
+
+        for (i = 0; i < alter_grpoffsets[0]->partitions->cnt; i++) {
+                if (alter_grpoffsets[0]->partitions->elems[i].offset < 0) {
+                        rd_kafka_admin_result_fail(
+                            rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                            "All topic-partition offsets "
+                            "must be >= 0");
+                        goto fail;
+                }
+        }
+
+        /* TODO: add group id duplication check if in future more than one
+         * AlterConsumerGroupOffsets can be passed */
+
+        /* Copy offsets list for checking duplicated */
+        copied_offsets =
+            rd_kafka_topic_partition_list_copy(alter_grpoffsets[0]->partitions);
+        if (rd_kafka_topic_partition_list_has_duplicates(
+                copied_offsets, rd_false /*check partition*/)) {
+                rd_kafka_topic_partition_list_destroy(copied_offsets);
+                rd_kafka_admin_result_fail(rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                           "Duplicate partitions not allowed");
+                goto fail;
+        }
+        rd_kafka_topic_partition_list_destroy(copied_offsets);
+
+        rko->rko_u.admin_request.broker_id = RD_KAFKA_ADMIN_TARGET_COORDINATOR;
+        rko->rko_u.admin_request.coordtype = RD_KAFKA_COORD_GROUP;
+        rko->rko_u.admin_request.coordkey =
+            rd_strdup(alter_grpoffsets[0]->group_id);
+
+        /* Store copy of group on request so the group name can be reached
+         * from the response parser. */
+        rd_list_init(&rko->rko_u.admin_request.args, 1,
+                     rd_kafka_AlterConsumerGroupOffsets_free);
+        rd_list_add(&rko->rko_u.admin_request.args,
+                    (void *)rd_kafka_AlterConsumerGroupOffsets_copy(
+                        alter_grpoffsets[0]));
+
+        rd_kafka_q_enq(rk->rk_ops, rko);
+        return;
+fail:
+        rd_kafka_admin_common_worker_destroy(rk, rko, rd_true /*destroy*/);
+}
+
+/**
+ * @brief Get an array of group results from a AlterGroups result.
+ *
+ * The returned \p groups life-time is the same as the \p result object.
+ * @param cntp is updated to the number of elements in the array.
+ */
+const rd_kafka_group_result_t **
+rd_kafka_AlterConsumerGroupOffsets_result_groups(
+    const rd_kafka_AlterConsumerGroupOffsets_result_t *result,
+    size_t *cntp) {
+        return rd_kafka_admin_result_ret_groups((const rd_kafka_op_t *)result,
+                                                cntp);
+}
+
+/**@}*/
+
+
+/**@}*/
+
+/**
+ * @name List consumer group offsets (committed offsets)
+ * @{
+ *
+ *
+ *
+ *
+ */
+
+rd_kafka_ListConsumerGroupOffsets_t *rd_kafka_ListConsumerGroupOffsets_new(
+    const char *group_id,
+    const rd_kafka_topic_partition_list_t *partitions) {
+        size_t tsize = strlen(group_id) + 1;
+        rd_kafka_ListConsumerGroupOffsets_t *list_grpoffsets;
+
+        rd_assert(group_id);
+
+        /* Single allocation */
+        list_grpoffsets = rd_calloc(1, sizeof(*list_grpoffsets) + tsize);
+        list_grpoffsets->group_id = list_grpoffsets->data;
+        memcpy(list_grpoffsets->group_id, group_id, tsize);
+        if (partitions) {
+                list_grpoffsets->partitions =
+                    rd_kafka_topic_partition_list_copy(partitions);
+        }
+
+        return list_grpoffsets;
+}
+
+void rd_kafka_ListConsumerGroupOffsets_destroy(
+    rd_kafka_ListConsumerGroupOffsets_t *list_grpoffsets) {
+        if (list_grpoffsets->partitions != NULL) {
+                rd_kafka_topic_partition_list_destroy(
+                    list_grpoffsets->partitions);
+        }
+        rd_free(list_grpoffsets);
+}
+
+static void rd_kafka_ListConsumerGroupOffsets_free(void *ptr) {
+        rd_kafka_ListConsumerGroupOffsets_destroy(ptr);
+}
+
+void rd_kafka_ListConsumerGroupOffsets_destroy_array(
+    rd_kafka_ListConsumerGroupOffsets_t **list_grpoffsets,
+    size_t list_grpoffsets_cnt) {
+        size_t i;
+        for (i = 0; i < list_grpoffsets_cnt; i++)
+                rd_kafka_ListConsumerGroupOffsets_destroy(list_grpoffsets[i]);
+}
+
+/**
+ * @brief Allocate a new ListGroup and make a copy of \p src
+ */
+static rd_kafka_ListConsumerGroupOffsets_t *
+rd_kafka_ListConsumerGroupOffsets_copy(
+    const rd_kafka_ListConsumerGroupOffsets_t *src) {
+        return rd_kafka_ListConsumerGroupOffsets_new(src->group_id,
+                                                     src->partitions);
+}
+
+/**
+ * @brief Send a OffsetFetchRequest to \p rkb with the partitions
+ *        in list_grpoffsets (ListConsumerGroupOffsets_t*) using
+ *        \p options.
+ *
+ */
+static rd_kafka_resp_err_t rd_kafka_ListConsumerGroupOffsetsRequest(
+    rd_kafka_broker_t *rkb,
+    /* (rd_kafka_ListConsumerGroupOffsets_t*) */
+    const rd_list_t *list_grpoffsets,
+    rd_kafka_AdminOptions_t *options,
+    char *errstr,
+    size_t errstr_size,
+    rd_kafka_replyq_t replyq,
+    rd_kafka_resp_cb_t *resp_cb,
+    void *opaque) {
+        int op_timeout;
+        rd_bool_t require_stable_offsets;
+        const rd_kafka_ListConsumerGroupOffsets_t *grpoffsets =
+            rd_list_elem(list_grpoffsets, 0);
+
+        rd_assert(rd_list_cnt(list_grpoffsets) == 1);
+
+        op_timeout = rd_kafka_confval_get_int(&options->request_timeout);
+        require_stable_offsets =
+            rd_kafka_confval_get_int(&options->require_stable_offsets);
+        rd_kafka_OffsetFetchRequest(
+            rkb, grpoffsets->group_id, grpoffsets->partitions,
+            require_stable_offsets, op_timeout, replyq, resp_cb, opaque);
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
+}
+
+/**
+ * @brief Parse OffsetFetchResponse and create ADMIN_RESULT op.
+ */
+static rd_kafka_resp_err_t
+rd_kafka_ListConsumerGroupOffsetsResponse_parse(rd_kafka_op_t *rko_req,
+                                                rd_kafka_op_t **rko_resultp,
+                                                rd_kafka_buf_t *reply,
+                                                char *errstr,
+                                                size_t errstr_size) {
+        const rd_kafka_ListConsumerGroupOffsets_t *list_grpoffsets =
+            rd_list_elem(&rko_req->rko_u.admin_request.args, 0);
+        rd_kafka_t *rk;
+        rd_kafka_broker_t *rkb;
+        rd_kafka_topic_partition_list_t *offsets = NULL;
+        rd_kafka_op_t *rko_result;
+        rd_kafka_resp_err_t err;
+
+        rk  = rko_req->rko_rk;
+        rkb = reply->rkbuf_rkb;
+        err = rd_kafka_handle_OffsetFetch(rk, rkb, RD_KAFKA_RESP_ERR_NO_ERROR,
+                                          reply, NULL, &offsets, rd_false,
+                                          rd_true, rd_false);
+
+        if (unlikely(err != RD_KAFKA_RESP_ERR_NO_ERROR)) {
+                reply->rkbuf_err = err;
+                goto err;
+        }
+
+        /* Create result op and group_result_t */
+        rko_result = rd_kafka_admin_result_new(rko_req);
+        rd_list_init(&rko_result->rko_u.admin_result.results, 1,
+                     rd_kafka_group_result_free);
+        rd_list_add(&rko_result->rko_u.admin_result.results,
+                    rd_kafka_group_result_new(list_grpoffsets->group_id, -1,
+                                              offsets, NULL));
+
+        if (likely(offsets != NULL))
+                rd_kafka_topic_partition_list_destroy(offsets);
+
+        *rko_resultp = rko_result;
+
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
+err:
+        if (likely(offsets != NULL))
+                rd_kafka_topic_partition_list_destroy(offsets);
+
+        rd_snprintf(errstr, errstr_size,
+                    "ListConsumerGroupOffsetsResponse response failure: %s",
+                    rd_kafka_err2str(reply->rkbuf_err));
+
+        return reply->rkbuf_err;
+}
+
+void rd_kafka_ListConsumerGroupOffsets(
+    rd_kafka_t *rk,
+    rd_kafka_ListConsumerGroupOffsets_t **list_grpoffsets,
+    size_t list_grpoffsets_cnt,
+    const rd_kafka_AdminOptions_t *options,
+    rd_kafka_queue_t *rkqu) {
+        static const struct rd_kafka_admin_worker_cbs cbs = {
+            rd_kafka_ListConsumerGroupOffsetsRequest,
+            rd_kafka_ListConsumerGroupOffsetsResponse_parse,
+        };
+        rd_kafka_op_t *rko;
+        rd_kafka_topic_partition_list_t *copied_offsets;
+
+        rd_assert(rkqu);
+
+        rko = rd_kafka_admin_request_op_new(
+            rk, RD_KAFKA_OP_LISTCONSUMERGROUPOFFSETS,
+            RD_KAFKA_EVENT_LISTCONSUMERGROUPOFFSETS_RESULT, &cbs, options,
+            rkqu->rkqu_q);
+
+        if (list_grpoffsets_cnt != 1) {
+                /* For simplicity we only support one single group for now */
+                rd_kafka_admin_result_fail(rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                           "Exactly one "
+                                           "ListConsumerGroupOffsets must "
+                                           "be passed");
+                goto fail;
+        }
+
+        if (list_grpoffsets[0]->partitions != NULL &&
+            list_grpoffsets[0]->partitions->cnt == 0) {
+                /* Either pass NULL for all the partitions or a non-empty list
+                 */
+                rd_kafka_admin_result_fail(
+                    rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                    "NULL or "
+                    "non-empty topic partition list must "
+                    "be passed");
+                goto fail;
+        }
+
+        /* TODO: add group id duplication check when implementing KIP-709 */
+        if (list_grpoffsets[0]->partitions != NULL) {
+                /* Copy offsets list for checking duplicated */
+                copied_offsets = rd_kafka_topic_partition_list_copy(
+                    list_grpoffsets[0]->partitions);
+                if (rd_kafka_topic_partition_list_has_duplicates(
+                        copied_offsets, rd_false /*check partition*/)) {
+                        rd_kafka_topic_partition_list_destroy(copied_offsets);
+                        rd_kafka_admin_result_fail(
+                            rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                            "Duplicate partitions not allowed");
+                        goto fail;
+                }
+                rd_kafka_topic_partition_list_destroy(copied_offsets);
+        }
+
+        rko->rko_u.admin_request.broker_id = RD_KAFKA_ADMIN_TARGET_COORDINATOR;
+        rko->rko_u.admin_request.coordtype = RD_KAFKA_COORD_GROUP;
+        rko->rko_u.admin_request.coordkey =
+            rd_strdup(list_grpoffsets[0]->group_id);
+
+        /* Store copy of group on request so the group name can be reached
+         * from the response parser. */
+        rd_list_init(&rko->rko_u.admin_request.args, 1,
+                     rd_kafka_ListConsumerGroupOffsets_free);
+        rd_list_add(&rko->rko_u.admin_request.args,
+                    rd_kafka_ListConsumerGroupOffsets_copy(list_grpoffsets[0]));
+
+        rd_kafka_q_enq(rk->rk_ops, rko);
+        return;
+fail:
+        rd_kafka_admin_common_worker_destroy(rk, rko, rd_true /*destroy*/);
+}
+
+
+/**
+ * @brief Get an array of group results from a ListConsumerGroups result.
+ *
+ * The returned \p groups life-time is the same as the \p result object.
+ * @param cntp is updated to the number of elements in the array.
+ */
+const rd_kafka_group_result_t **rd_kafka_ListConsumerGroupOffsets_result_groups(
+    const rd_kafka_ListConsumerGroupOffsets_result_t *result,
+    size_t *cntp) {
+        return rd_kafka_admin_result_ret_groups((const rd_kafka_op_t *)result,
+                                                cntp);
+}
+
+/**@}*/
+
+/**
+ * @name List consumer groups
+ * @{
+ *
+ *
+ *
+ *
+ */
+
+#define CONSUMER_PROTOCOL_TYPE "consumer"
+
+/**
+ * @brief Create a new ConsumerGroupListing object.
+ *
+ * @param group_id The group id.
+ * @param is_simple_consumer_group Is the group simple?
+ * @param state Group state.
+ */
+static rd_kafka_ConsumerGroupListing_t *
+rd_kafka_ConsumerGroupListing_new(const char *group_id,
+                                  rd_bool_t is_simple_consumer_group,
+                                  rd_kafka_consumer_group_state_t state) {
+        rd_kafka_ConsumerGroupListing_t *grplist;
+        grplist                           = rd_calloc(1, sizeof(*grplist));
+        grplist->group_id                 = rd_strdup(group_id);
+        grplist->is_simple_consumer_group = is_simple_consumer_group;
+        grplist->state                    = state;
+        return grplist;
+}
+
+/**
+ * @brief Copy \p grplist ConsumerGroupListing.
+ *
+ * @param grplist The group listing to copy.
+ * @return A new allocated copy of the passed ConsumerGroupListing.
+ */
+static rd_kafka_ConsumerGroupListing_t *rd_kafka_ConsumerGroupListing_copy(
+    const rd_kafka_ConsumerGroupListing_t *grplist) {
+        return rd_kafka_ConsumerGroupListing_new(
+            grplist->group_id, grplist->is_simple_consumer_group,
+            grplist->state);
+}
+
+/**
+ * @brief Same as rd_kafka_ConsumerGroupListing_copy() but suitable for
+ *        rd_list_copy(). The \p opaque is ignored.
+ */
+static void *rd_kafka_ConsumerGroupListing_copy_opaque(const void *grplist,
+                                                       void *opaque) {
+        return rd_kafka_ConsumerGroupListing_copy(grplist);
+}
+
+static void rd_kafka_ConsumerGroupListing_destroy(
+    rd_kafka_ConsumerGroupListing_t *grplist) {
+        RD_IF_FREE(grplist->group_id, rd_free);
+        rd_free(grplist);
+}
+
+static void rd_kafka_ConsumerGroupListing_free(void *ptr) {
+        rd_kafka_ConsumerGroupListing_destroy(ptr);
+}
+
+const char *rd_kafka_ConsumerGroupListing_group_id(
+    const rd_kafka_ConsumerGroupListing_t *grplist) {
+        return grplist->group_id;
+}
+
+int rd_kafka_ConsumerGroupListing_is_simple_consumer_group(
+    const rd_kafka_ConsumerGroupListing_t *grplist) {
+        return grplist->is_simple_consumer_group;
+}
+
+rd_kafka_consumer_group_state_t rd_kafka_ConsumerGroupListing_state(
+    const rd_kafka_ConsumerGroupListing_t *grplist) {
+        return grplist->state;
+}
+
+/**
+ * @brief Create a new ListConsumerGroupsResult object.
+ *
+ * @param valid
+ * @param errors
+ */
+static rd_kafka_ListConsumerGroupsResult_t *
+rd_kafka_ListConsumerGroupsResult_new(const rd_list_t *valid,
+                                      const rd_list_t *errors) {
+        rd_kafka_ListConsumerGroupsResult_t *res;
+        res = rd_calloc(1, sizeof(*res));
+        rd_list_init_copy(&res->valid, valid);
+        rd_list_copy_to(&res->valid, valid,
+                        rd_kafka_ConsumerGroupListing_copy_opaque, NULL);
+        rd_list_init_copy(&res->errors, errors);
+        rd_list_copy_to(&res->errors, errors, rd_kafka_error_copy_opaque, NULL);
+        return res;
+}
+
+static void rd_kafka_ListConsumerGroupsResult_destroy(
+    rd_kafka_ListConsumerGroupsResult_t *res) {
+        rd_list_destroy(&res->valid);
+        rd_list_destroy(&res->errors);
+        rd_free(res);
+}
+
+static void rd_kafka_ListConsumerGroupsResult_free(void *ptr) {
+        rd_kafka_ListConsumerGroupsResult_destroy(ptr);
+}
+
+/**
+ * @brief Copy the passed ListConsumerGroupsResult.
+ *
+ * @param res the ListConsumerGroupsResult to copy
+ * @return a newly allocated ListConsumerGroupsResult object.
+ *
+ * @sa Release the object with rd_kafka_ListConsumerGroupsResult_destroy().
+ */
+static rd_kafka_ListConsumerGroupsResult_t *
+rd_kafka_ListConsumerGroupsResult_copy(
+    const rd_kafka_ListConsumerGroupsResult_t *res) {
+        return rd_kafka_ListConsumerGroupsResult_new(&res->valid, &res->errors);
+}
+
+/**
+ * @brief Same as rd_kafka_ListConsumerGroupsResult_copy() but suitable for
+ *        rd_list_copy(). The \p opaque is ignored.
+ */
+static void *rd_kafka_ListConsumerGroupsResult_copy_opaque(const void *list,
+                                                           void *opaque) {
+        return rd_kafka_ListConsumerGroupsResult_copy(list);
+}
+
+/**
+ * @brief Send ListConsumerGroupsRequest. Admin worker compatible callback.
+ */
+static rd_kafka_resp_err_t
+rd_kafka_admin_ListConsumerGroupsRequest(rd_kafka_broker_t *rkb,
+                                         const rd_list_t *groups /*(char*)*/,
+                                         rd_kafka_AdminOptions_t *options,
+                                         char *errstr,
+                                         size_t errstr_size,
+                                         rd_kafka_replyq_t replyq,
+                                         rd_kafka_resp_cb_t *resp_cb,
+                                         void *opaque) {
+        int i;
+        rd_kafka_resp_err_t err;
+        rd_kafka_error_t *error;
+        const char **states_str = NULL;
+        int states_str_cnt      = 0;
+        rd_list_t *states =
+            rd_kafka_confval_get_ptr(&options->match_consumer_group_states);
+
+        /* Prepare list_options */
+        if (states && rd_list_cnt(states) > 0) {
+                states_str_cnt = rd_list_cnt(states);
+                states_str     = rd_calloc(states_str_cnt, sizeof(*states_str));
+                for (i = 0; i < states_str_cnt; i++) {
+                        states_str[i] = rd_kafka_consumer_group_state_name(
+                            rd_list_get_int32(states, i));
+                }
+        }
+
+        error = rd_kafka_ListGroupsRequest(rkb, -1, states_str, states_str_cnt,
+                                           replyq, resp_cb, opaque);
+
+        if (states_str) {
+                rd_free(states_str);
+        }
+
+        if (error) {
+                rd_snprintf(errstr, errstr_size, "%s",
+                            rd_kafka_error_string(error));
+                err = rd_kafka_error_code(error);
+                rd_kafka_error_destroy(error);
+                return err;
+        }
+
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
+}
+
+/**
+ * @brief Parse ListConsumerGroupsResponse and create ADMIN_RESULT op.
+ */
+static rd_kafka_resp_err_t
+rd_kafka_ListConsumerGroupsResponse_parse(rd_kafka_op_t *rko_req,
+                                          rd_kafka_op_t **rko_resultp,
+                                          rd_kafka_buf_t *reply,
+                                          char *errstr,
+                                          size_t errstr_size) {
+        const int log_decode_errors = LOG_ERR;
+        int i, cnt;
+        int16_t error_code, api_version;
+        rd_kafka_op_t *rko_result = NULL;
+        rd_kafka_error_t *error   = NULL;
+        rd_kafka_broker_t *rkb    = reply->rkbuf_rkb;
+        rd_list_t valid, errors;
+        rd_kafka_ListConsumerGroupsResult_t *list_result;
+        char *group_id = NULL, *group_state = NULL, *proto_type = NULL;
+
+        api_version = rd_kafka_buf_ApiVersion(reply);
+        if (api_version >= 1) {
+                rd_kafka_buf_read_throttle_time(reply);
+        }
+        rd_kafka_buf_read_i16(reply, &error_code);
+        if (error_code) {
+                error = rd_kafka_error_new(error_code,
+                                           "Broker [%d"
+                                           "] "
+                                           "ListConsumerGroups: %s",
+                                           rd_kafka_broker_id(rkb),
+                                           rd_kafka_err2str(error_code));
+        }
+
+        rd_kafka_buf_read_arraycnt(reply, &cnt, RD_KAFKAP_GROUPS_MAX);
+        rd_list_init(&valid, cnt, rd_kafka_ConsumerGroupListing_free);
+        rd_list_init(&errors, 8, rd_free);
+        if (error)
+                rd_list_add(&errors, error);
+
+        rko_result = rd_kafka_admin_result_new(rko_req);
+        rd_list_init(&rko_result->rko_u.admin_result.results, 1,
+                     rd_kafka_ListConsumerGroupsResult_free);
+
+        for (i = 0; i < cnt; i++) {
+                rd_kafkap_str_t GroupId, ProtocolType,
+                    GroupState = RD_ZERO_INIT;
+                rd_kafka_ConsumerGroupListing_t *group_listing;
+                rd_bool_t is_simple_consumer_group, is_consumer_protocol_type;
+                rd_kafka_consumer_group_state_t state =
+                    RD_KAFKA_CONSUMER_GROUP_STATE_UNKNOWN;
+
+                rd_kafka_buf_read_str(reply, &GroupId);
+                rd_kafka_buf_read_str(reply, &ProtocolType);
+                if (api_version >= 4) {
+                        rd_kafka_buf_read_str(reply, &GroupState);
+                }
+                rd_kafka_buf_skip_tags(reply);
+
+                group_id   = RD_KAFKAP_STR_DUP(&GroupId);
+                proto_type = RD_KAFKAP_STR_DUP(&ProtocolType);
+                if (api_version >= 4) {
+                        group_state = RD_KAFKAP_STR_DUP(&GroupState);
+                        state = rd_kafka_consumer_group_state_code(group_state);
+                }
+
+                is_simple_consumer_group = *proto_type == '\0';
+                is_consumer_protocol_type =
+                    !strcmp(proto_type, CONSUMER_PROTOCOL_TYPE);
+                if (is_simple_consumer_group || is_consumer_protocol_type) {
+                        group_listing = rd_kafka_ConsumerGroupListing_new(
+                            group_id, is_simple_consumer_group, state);
+                        rd_list_add(&valid, group_listing);
+                }
+
+                rd_free(group_id);
+                rd_free(group_state);
+                rd_free(proto_type);
+                group_id    = NULL;
+                group_state = NULL;
+                proto_type  = NULL;
+        }
+        rd_kafka_buf_skip_tags(reply);
+
+err_parse:
+        if (group_id)
+                rd_free(group_id);
+        if (group_state)
+                rd_free(group_state);
+        if (proto_type)
+                rd_free(proto_type);
+
+        if (reply->rkbuf_err) {
+                error_code = reply->rkbuf_err;
+                error      = rd_kafka_error_new(
+                    error_code,
+                    "Broker [%d"
+                    "] "
+                    "ListConsumerGroups response protocol parse failure: %s",
+                    rd_kafka_broker_id(rkb), rd_kafka_err2str(error_code));
+                rd_list_add(&errors, error);
+        }
+
+        list_result = rd_kafka_ListConsumerGroupsResult_new(&valid, &errors);
+        rd_list_add(&rko_result->rko_u.admin_result.results, list_result);
+
+        *rko_resultp = rko_result;
+        rd_list_destroy(&valid);
+        rd_list_destroy(&errors);
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
+}
+
+/** @brief Merge the ListConsumerGroups response from a single broker
+ *         into the user response list.
+ */
+static void
+rd_kafka_ListConsumerGroups_response_merge(rd_kafka_op_t *rko_fanout,
+                                           const rd_kafka_op_t *rko_partial) {
+        int cnt;
+        rd_kafka_ListConsumerGroupsResult_t *res = NULL;
+        rd_kafka_ListConsumerGroupsResult_t *newres;
+        rd_list_t new_valid, new_errors;
+
+        rd_assert(rko_partial->rko_evtype ==
+                  RD_KAFKA_EVENT_LISTCONSUMERGROUPS_RESULT);
+
+        cnt = rd_list_cnt(&rko_fanout->rko_u.admin_request.fanout.results);
+        if (cnt) {
+                res = rd_list_elem(
+                    &rko_fanout->rko_u.admin_request.fanout.results, 0);
+        } else {
+                rd_list_init(&new_valid, 0, rd_kafka_ConsumerGroupListing_free);
+                rd_list_init(&new_errors, 0, rd_free);
+                res = rd_kafka_ListConsumerGroupsResult_new(&new_valid,
+                                                            &new_errors);
+                rd_list_set(&rko_fanout->rko_u.admin_request.fanout.results, 0,
+                            res);
+                rd_list_destroy(&new_valid);
+                rd_list_destroy(&new_errors);
+        }
+        if (!rko_partial->rko_err) {
+                int new_valid_count, new_errors_count;
+                const rd_list_t *new_valid_list, *new_errors_list;
+                /* Read the partial result and merge the valid groups
+                 * and the errors into the fanout parent result. */
+                newres =
+                    rd_list_elem(&rko_partial->rko_u.admin_result.results, 0);
+                rd_assert(newres);
+                new_valid_count  = rd_list_cnt(&newres->valid);
+                new_errors_count = rd_list_cnt(&newres->errors);
+                if (new_valid_count) {
+                        new_valid_list = &newres->valid;
+                        rd_list_grow(&res->valid, new_valid_count);
+                        rd_list_copy_to(
+                            &res->valid, new_valid_list,
+                            rd_kafka_ConsumerGroupListing_copy_opaque, NULL);
+                }
+                if (new_errors_count) {
+                        new_errors_list = &newres->errors;
+                        rd_list_grow(&res->errors, new_errors_count);
+                        rd_list_copy_to(&res->errors, new_errors_list,
+                                        rd_kafka_error_copy_opaque, NULL);
+                }
+        } else {
+                /* Op errored, e.g. timeout */
+                rd_list_add(&res->errors,
+                            rd_kafka_error_new(rko_partial->rko_err, NULL));
+        }
+}
+
+void rd_kafka_ListConsumerGroups(rd_kafka_t *rk,
+                                 const rd_kafka_AdminOptions_t *options,
+                                 rd_kafka_queue_t *rkqu) {
+        rd_kafka_op_t *rko;
+        static const struct rd_kafka_admin_worker_cbs cbs = {
+            rd_kafka_admin_ListConsumerGroupsRequest,
+            rd_kafka_ListConsumerGroupsResponse_parse};
+        static const struct rd_kafka_admin_fanout_worker_cbs fanout_cbs = {
+            rd_kafka_ListConsumerGroups_response_merge,
+            rd_kafka_ListConsumerGroupsResult_copy_opaque,
+        };
+
+        rko = rd_kafka_admin_request_op_target_all_new(
+            rk, RD_KAFKA_OP_LISTCONSUMERGROUPS,
+            RD_KAFKA_EVENT_LISTCONSUMERGROUPS_RESULT, &cbs, &fanout_cbs,
+            rd_kafka_ListConsumerGroupsResult_free, options, rkqu->rkqu_q);
+        rd_kafka_q_enq(rk->rk_ops, rko);
+}
+
+const rd_kafka_ConsumerGroupListing_t **
+rd_kafka_ListConsumerGroups_result_valid(
+    const rd_kafka_ListConsumerGroups_result_t *result,
+    size_t *cntp) {
+        int list_result_cnt;
+        const rd_kafka_ListConsumerGroupsResult_t *list_result;
+        const rd_kafka_op_t *rko = (const rd_kafka_op_t *)result;
+        rd_kafka_op_type_t reqtype =
+            rko->rko_u.admin_result.reqtype & ~RD_KAFKA_OP_FLAGMASK;
+        rd_assert(reqtype == RD_KAFKA_OP_LISTCONSUMERGROUPS);
+
+        list_result_cnt = rd_list_cnt(&rko->rko_u.admin_result.results);
+        rd_assert(list_result_cnt == 1);
+        list_result = rd_list_elem(&rko->rko_u.admin_result.results, 0);
+        *cntp       = rd_list_cnt(&list_result->valid);
 
-/**
- * @brief Get an array of group results from a AlterGroups result.
- *
- * The returned \p groups life-time is the same as the \p result object.
- * @param cntp is updated to the number of elements in the array.
- */
-const rd_kafka_group_result_t **
-rd_kafka_AlterConsumerGroupOffsets_result_groups(
-    const rd_kafka_AlterConsumerGroupOffsets_result_t *result,
-    size_t *cntp) {
-        return rd_kafka_admin_result_ret_groups((const rd_kafka_op_t *)result,
-                                                cntp);
+        return (const rd_kafka_ConsumerGroupListing_t **)
+            list_result->valid.rl_elems;
 }
 
-/**@}*/
+const rd_kafka_error_t **rd_kafka_ListConsumerGroups_result_errors(
+    const rd_kafka_ListConsumerGroups_result_t *result,
+    size_t *cntp) {
+        int list_result_cnt, error_cnt;
+        const rd_kafka_ListConsumerGroupsResult_t *list_result;
+        const rd_kafka_op_t *rko = (const rd_kafka_op_t *)result;
+        rd_kafka_op_type_t reqtype =
+            rko->rko_u.admin_result.reqtype & ~RD_KAFKA_OP_FLAGMASK;
+        rd_assert(reqtype == RD_KAFKA_OP_LISTCONSUMERGROUPS);
 
+        list_result_cnt = rd_list_cnt(&rko->rko_u.admin_result.results);
+        rd_assert(list_result_cnt == 1);
+        list_result = rko->rko_u.admin_result.results.rl_elems[0];
+        error_cnt   = rd_list_cnt(&list_result->errors);
+        if (error_cnt == 0) {
+                *cntp = 0;
+                return NULL;
+        }
+        *cntp = error_cnt;
+        return (const rd_kafka_error_t **)list_result->errors.rl_elems;
+}
 
 /**@}*/
 
 /**
- * @name List consumer group offsets (committed offsets)
+ * @name Describe consumer groups
  * @{
  *
  *
@@ -5335,399 +7527,460 @@ rd_kafka_AlterConsumerGroupOffsets_result_groups(
  *
  */
 
-rd_kafka_ListConsumerGroupOffsets_t *rd_kafka_ListConsumerGroupOffsets_new(
-    const char *group_id,
-    const rd_kafka_topic_partition_list_t *partitions) {
-        size_t tsize = strlen(group_id) + 1;
-        rd_kafka_ListConsumerGroupOffsets_t *list_grpoffsets;
-
-        rd_assert(group_id);
+/**
+ * @brief Parse authorized_operations returned in
+ * - DescribeConsumerGroups
+ * - DescribeTopics
+ * - DescribeCluster
+ *
+ * @param authorized_operations returned by RPC, containing operations encoded
+ *                              per-bit.
+ * @param cntp is set to the count of the operations, or -1 if the operations
+ *        were not requested.
+ * @returns rd_kafka_AclOperation_t *. May be NULL.
+ */
+static rd_kafka_AclOperation_t *
+rd_kafka_AuthorizedOperations_parse(int32_t authorized_operations, int *cntp) {
+        rd_kafka_AclOperation_t i;
+        int j                               = 0;
+        int count                           = 0;
+        rd_kafka_AclOperation_t *operations = NULL;
+
+        /* In case of authorized_operations not requested, return NULL. */
+        if (authorized_operations < 0) {
+                *cntp = -1;
+                return NULL;
+        }
 
-        /* Single allocation */
-        list_grpoffsets = rd_calloc(1, sizeof(*list_grpoffsets) + tsize);
-        list_grpoffsets->group_id = list_grpoffsets->data;
-        memcpy(list_grpoffsets->group_id, group_id, tsize);
-        if (partitions) {
-                list_grpoffsets->partitions =
-                    rd_kafka_topic_partition_list_copy(partitions);
+        /* Count number of bits set. ALL, ANY and UNKNOWN bits are skipped as
+         * they are always unset as per KIP-430. */
+        for (i = RD_KAFKA_ACL_OPERATION_READ; i < RD_KAFKA_ACL_OPERATION__CNT;
+             i++)
+                count += ((authorized_operations >> i) & 1);
+        *cntp = count;
+
+        /* In case no operations exist, allocate 1 byte so that the returned
+         * pointer is non-NULL. A NULL pointer implies that authorized
+         * operations were not requested. */
+        if (count == 0)
+                return rd_malloc(1);
+
+        operations = rd_malloc(sizeof(rd_kafka_AclOperation_t) * count);
+        j          = 0;
+        for (i = RD_KAFKA_ACL_OPERATION_READ; i < RD_KAFKA_ACL_OPERATION__CNT;
+             i++) {
+                if ((authorized_operations >> i) & 1) {
+                        operations[j] = i;
+                        j++;
+                }
         }
 
-        return list_grpoffsets;
+        return operations;
 }
 
-void rd_kafka_ListConsumerGroupOffsets_destroy(
-    rd_kafka_ListConsumerGroupOffsets_t *list_grpoffsets) {
-        if (list_grpoffsets->partitions != NULL) {
-                rd_kafka_topic_partition_list_destroy(
-                    list_grpoffsets->partitions);
-        }
-        rd_free(list_grpoffsets);
-}
+/**
+ * @brief Copy a list of rd_kafka_AclOperation_t.
+ *
+ * @param src Array of rd_kafka_AclOperation_t to copy from. May be NULL if
+ *            authorized operations were not requested.
+ * @param authorized_operations_cnt Count of \p src. May be -1 if authorized
+ *                                  operations were not requested.
+ * @returns Copy of \p src. May be NULL.
+ */
+static rd_kafka_AclOperation_t *
+rd_kafka_AuthorizedOperations_copy(const rd_kafka_AclOperation_t *src,
+                                   int authorized_operations_cnt) {
+        size_t copy_bytes            = 0;
+        rd_kafka_AclOperation_t *dst = NULL;
 
-static void rd_kafka_ListConsumerGroupOffsets_free(void *ptr) {
-        rd_kafka_ListConsumerGroupOffsets_destroy(ptr);
-}
+        if (authorized_operations_cnt == -1 || src == NULL)
+                return NULL;
 
-void rd_kafka_ListConsumerGroupOffsets_destroy_array(
-    rd_kafka_ListConsumerGroupOffsets_t **list_grpoffsets,
-    size_t list_grpoffsets_cnt) {
-        size_t i;
-        for (i = 0; i < list_grpoffsets_cnt; i++)
-                rd_kafka_ListConsumerGroupOffsets_destroy(list_grpoffsets[i]);
+        /* Allocate and copy 1 byte so that the returned pointer
+         * is non-NULL. A NULL pointer implies that authorized operations were
+         * not requested. */
+        if (authorized_operations_cnt == 0)
+                copy_bytes = 1;
+        else
+                copy_bytes =
+                    sizeof(rd_kafka_AclOperation_t) * authorized_operations_cnt;
+
+        dst = rd_malloc(copy_bytes);
+        memcpy(dst, src, copy_bytes);
+        return dst;
 }
 
 /**
- * @brief Allocate a new ListGroup and make a copy of \p src
+ * @brief Create a new MemberDescription object. This object is used for
+ *        creating a ConsumerGroupDescription.
+ *
+ * @param client_id The client id.
+ * @param consumer_id The consumer id (or member id).
+ * @param group_instance_id (optional) The group instance id
+ *                          for static membership.
+ * @param host The consumer host.
+ * @param assignment The member's assigned partitions, or NULL if none.
+ *
+ * @return A new allocated MemberDescription object.
+ *         Use rd_kafka_MemberDescription_destroy() to free when done.
  */
-static rd_kafka_ListConsumerGroupOffsets_t *
-rd_kafka_ListConsumerGroupOffsets_copy(
-    const rd_kafka_ListConsumerGroupOffsets_t *src) {
-        return rd_kafka_ListConsumerGroupOffsets_new(src->group_id,
-                                                     src->partitions);
+static rd_kafka_MemberDescription_t *rd_kafka_MemberDescription_new(
+    const char *client_id,
+    const char *consumer_id,
+    const char *group_instance_id,
+    const char *host,
+    const rd_kafka_topic_partition_list_t *assignment) {
+        rd_kafka_MemberDescription_t *member;
+        member              = rd_calloc(1, sizeof(*member));
+        member->client_id   = rd_strdup(client_id);
+        member->consumer_id = rd_strdup(consumer_id);
+        if (group_instance_id)
+                member->group_instance_id = rd_strdup(group_instance_id);
+        member->host = rd_strdup(host);
+        if (assignment)
+                member->assignment.partitions =
+                    rd_kafka_topic_partition_list_copy(assignment);
+        else
+                member->assignment.partitions =
+                    rd_kafka_topic_partition_list_new(0);
+        return member;
 }
 
 /**
- * @brief Send a OffsetFetchRequest to \p rkb with the partitions
- *        in list_grpoffsets (ListConsumerGroupOffsets_t*) using
- *        \p options.
+ * @brief Allocate a new MemberDescription, copy of \p src
+ *        and return it.
  *
+ * @param src The MemberDescription to copy.
+ * @return A new allocated MemberDescription object,
+ *         Use rd_kafka_MemberDescription_destroy() to free when done.
  */
-static rd_kafka_resp_err_t rd_kafka_ListConsumerGroupOffsetsRequest(
-    rd_kafka_broker_t *rkb,
-    /* (rd_kafka_ListConsumerGroupOffsets_t*) */
-    const rd_list_t *list_grpoffsets,
-    rd_kafka_AdminOptions_t *options,
-    char *errstr,
-    size_t errstr_size,
-    rd_kafka_replyq_t replyq,
-    rd_kafka_resp_cb_t *resp_cb,
-    void *opaque) {
-        int op_timeout;
-        rd_bool_t require_stable_offsets;
-        const rd_kafka_ListConsumerGroupOffsets_t *grpoffsets =
-            rd_list_elem(list_grpoffsets, 0);
-
-        rd_assert(rd_list_cnt(list_grpoffsets) == 1);
-
-        op_timeout = rd_kafka_confval_get_int(&options->request_timeout);
-        require_stable_offsets =
-            rd_kafka_confval_get_int(&options->require_stable_offsets);
-        rd_kafka_OffsetFetchRequest(
-            rkb, grpoffsets->group_id, grpoffsets->partitions,
-            require_stable_offsets, op_timeout, replyq, resp_cb, opaque);
-        return RD_KAFKA_RESP_ERR_NO_ERROR;
+static rd_kafka_MemberDescription_t *
+rd_kafka_MemberDescription_copy(const rd_kafka_MemberDescription_t *src) {
+        return rd_kafka_MemberDescription_new(src->client_id, src->consumer_id,
+                                              src->group_instance_id, src->host,
+                                              src->assignment.partitions);
 }
 
 /**
- * @brief Parse OffsetFetchResponse and create ADMIN_RESULT op.
+ * @brief MemberDescription copy, compatible with rd_list_copy_to.
+ *
+ * @param elem The MemberDescription to copy-
+ * @param opaque Not used.
  */
-static rd_kafka_resp_err_t
-rd_kafka_ListConsumerGroupOffsetsResponse_parse(rd_kafka_op_t *rko_req,
-                                                rd_kafka_op_t **rko_resultp,
-                                                rd_kafka_buf_t *reply,
-                                                char *errstr,
-                                                size_t errstr_size) {
-        const rd_kafka_ListConsumerGroupOffsets_t *list_grpoffsets =
-            rd_list_elem(&rko_req->rko_u.admin_request.args, 0);
-        rd_kafka_t *rk;
-        rd_kafka_broker_t *rkb;
-        rd_kafka_topic_partition_list_t *offsets = NULL;
-        rd_kafka_op_t *rko_result;
-        rd_kafka_resp_err_t err;
-
-        rk  = rko_req->rko_rk;
-        rkb = reply->rkbuf_rkb;
-        err = rd_kafka_handle_OffsetFetch(rk, rkb, RD_KAFKA_RESP_ERR_NO_ERROR,
-                                          reply, NULL, &offsets, rd_false,
-                                          rd_true, rd_false);
-
-        if (unlikely(err != RD_KAFKA_RESP_ERR_NO_ERROR)) {
-                reply->rkbuf_err = err;
-                goto err;
-        }
-
-        /* Create result op and group_result_t */
-        rko_result = rd_kafka_admin_result_new(rko_req);
-        rd_list_init(&rko_result->rko_u.admin_result.results, 1,
-                     rd_kafka_group_result_free);
-        rd_list_add(&rko_result->rko_u.admin_result.results,
-                    rd_kafka_group_result_new(list_grpoffsets->group_id, -1,
-                                              offsets, NULL));
-
-        if (likely(offsets != NULL))
-                rd_kafka_topic_partition_list_destroy(offsets);
-
-        *rko_resultp = rko_result;
-
-        return RD_KAFKA_RESP_ERR_NO_ERROR;
-err:
-        if (likely(offsets != NULL))
-                rd_kafka_topic_partition_list_destroy(offsets);
-
-        rd_snprintf(errstr, errstr_size,
-                    "ListConsumerGroupOffsetsResponse response failure: %s",
-                    rd_kafka_err2str(reply->rkbuf_err));
-
-        return reply->rkbuf_err;
+static void *rd_kafka_MemberDescription_list_copy(const void *elem,
+                                                  void *opaque) {
+        return rd_kafka_MemberDescription_copy(elem);
 }
 
-void rd_kafka_ListConsumerGroupOffsets(
-    rd_kafka_t *rk,
-    rd_kafka_ListConsumerGroupOffsets_t **list_grpoffsets,
-    size_t list_grpoffsets_cnt,
-    const rd_kafka_AdminOptions_t *options,
-    rd_kafka_queue_t *rkqu) {
-        static const struct rd_kafka_admin_worker_cbs cbs = {
-            rd_kafka_ListConsumerGroupOffsetsRequest,
-            rd_kafka_ListConsumerGroupOffsetsResponse_parse,
-        };
-        rd_kafka_op_t *rko;
-        rd_kafka_topic_partition_list_t *copied_offsets;
-
-        rd_assert(rkqu);
-
-        rko = rd_kafka_admin_request_op_new(
-            rk, RD_KAFKA_OP_LISTCONSUMERGROUPOFFSETS,
-            RD_KAFKA_EVENT_LISTCONSUMERGROUPOFFSETS_RESULT, &cbs, options,
-            rkqu->rkqu_q);
-
-        if (list_grpoffsets_cnt != 1) {
-                /* For simplicity we only support one single group for now */
-                rd_kafka_admin_result_fail(rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
-                                           "Exactly one "
-                                           "ListConsumerGroupOffsets must "
-                                           "be passed");
-                goto fail;
-        }
+static void
+rd_kafka_MemberDescription_destroy(rd_kafka_MemberDescription_t *member) {
+        rd_free(member->client_id);
+        rd_free(member->consumer_id);
+        rd_free(member->host);
+        if (member->group_instance_id != NULL)
+                rd_free(member->group_instance_id);
+        if (member->assignment.partitions)
+                rd_kafka_topic_partition_list_destroy(
+                    member->assignment.partitions);
+        rd_free(member);
+}
 
-        if (list_grpoffsets[0]->partitions != NULL &&
-            list_grpoffsets[0]->partitions->cnt == 0) {
-                /* Either pass NULL for all the partitions or a non-empty list
-                 */
-                rd_kafka_admin_result_fail(
-                    rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
-                    "NULL or "
-                    "non-empty topic partition list must "
-                    "be passed");
-                goto fail;
-        }
+static void rd_kafka_MemberDescription_free(void *member) {
+        rd_kafka_MemberDescription_destroy(member);
+}
 
-        /* TODO: add group id duplication check when implementing KIP-709 */
-        if (list_grpoffsets[0]->partitions != NULL) {
-                /* Copy offsets list for checking duplicated */
-                copied_offsets = rd_kafka_topic_partition_list_copy(
-                    list_grpoffsets[0]->partitions);
-                if (rd_kafka_topic_partition_list_has_duplicates(
-                        copied_offsets, rd_false /*check partition*/)) {
-                        rd_kafka_topic_partition_list_destroy(copied_offsets);
-                        rd_kafka_admin_result_fail(
-                            rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
-                            "Duplicate partitions not allowed");
-                        goto fail;
-                }
-                rd_kafka_topic_partition_list_destroy(copied_offsets);
-        }
+const char *rd_kafka_MemberDescription_client_id(
+    const rd_kafka_MemberDescription_t *member) {
+        return member->client_id;
+}
 
-        rko->rko_u.admin_request.broker_id = RD_KAFKA_ADMIN_TARGET_COORDINATOR;
-        rko->rko_u.admin_request.coordtype = RD_KAFKA_COORD_GROUP;
-        rko->rko_u.admin_request.coordkey =
-            rd_strdup(list_grpoffsets[0]->group_id);
+const char *rd_kafka_MemberDescription_group_instance_id(
+    const rd_kafka_MemberDescription_t *member) {
+        return member->group_instance_id;
+}
 
-        /* Store copy of group on request so the group name can be reached
-         * from the response parser. */
-        rd_list_init(&rko->rko_u.admin_request.args, 1,
-                     rd_kafka_ListConsumerGroupOffsets_free);
-        rd_list_add(&rko->rko_u.admin_request.args,
-                    rd_kafka_ListConsumerGroupOffsets_copy(list_grpoffsets[0]));
+const char *rd_kafka_MemberDescription_consumer_id(
+    const rd_kafka_MemberDescription_t *member) {
+        return member->consumer_id;
+}
 
-        rd_kafka_q_enq(rk->rk_ops, rko);
-        return;
-fail:
-        rd_kafka_admin_common_worker_destroy(rk, rko, rd_true /*destroy*/);
+const char *
+rd_kafka_MemberDescription_host(const rd_kafka_MemberDescription_t *member) {
+        return member->host;
 }
 
+const rd_kafka_MemberAssignment_t *rd_kafka_MemberDescription_assignment(
+    const rd_kafka_MemberDescription_t *member) {
+        return &member->assignment;
+}
 
-/**
- * @brief Get an array of group results from a ListConsumerGroups result.
- *
- * The returned \p groups life-time is the same as the \p result object.
- * @param cntp is updated to the number of elements in the array.
- */
-const rd_kafka_group_result_t **rd_kafka_ListConsumerGroupOffsets_result_groups(
-    const rd_kafka_ListConsumerGroupOffsets_result_t *result,
-    size_t *cntp) {
-        return rd_kafka_admin_result_ret_groups((const rd_kafka_op_t *)result,
-                                                cntp);
+const rd_kafka_topic_partition_list_t *rd_kafka_MemberAssignment_partitions(
+    const rd_kafka_MemberAssignment_t *assignment) {
+        return assignment->partitions;
 }
 
-/**@}*/
 
 /**
- * @name List consumer groups
- * @{
- *
- *
- *
+ * @brief Create a new ConsumerGroupDescription object.
  *
+ * @param group_id The group id.
+ * @param is_simple_consumer_group Is the group simple?
+ * @param members List of members (rd_kafka_MemberDescription_t) of this
+ *                group.
+ * @param partition_assignor (optional) Chosen assignor.
+ * @param authorized_operations (optional) authorized operations.
+ * @param state Group state.
+ * @param coordinator (optional) Group coordinator.
+ * @param error (optional) Error received for this group.
+ * @return A new allocated ConsumerGroupDescription object.
+ *         Use rd_kafka_ConsumerGroupDescription_destroy() to free when done.
  */
+static rd_kafka_ConsumerGroupDescription_t *
+rd_kafka_ConsumerGroupDescription_new(
+    const char *group_id,
+    rd_bool_t is_simple_consumer_group,
+    const rd_list_t *members,
+    const char *partition_assignor,
+    const rd_kafka_AclOperation_t *authorized_operations,
+    int authorized_operations_cnt,
+    rd_kafka_consumer_group_state_t state,
+    const rd_kafka_Node_t *coordinator,
+    rd_kafka_error_t *error) {
+        rd_kafka_ConsumerGroupDescription_t *grpdesc;
+        grpdesc                           = rd_calloc(1, sizeof(*grpdesc));
+        grpdesc->group_id                 = rd_strdup(group_id);
+        grpdesc->is_simple_consumer_group = is_simple_consumer_group;
+        if (members == NULL) {
+                rd_list_init(&grpdesc->members, 0,
+                             rd_kafka_MemberDescription_free);
+        } else {
+                rd_list_init_copy(&grpdesc->members, members);
+                rd_list_copy_to(&grpdesc->members, members,
+                                rd_kafka_MemberDescription_list_copy, NULL);
+        }
+        grpdesc->partition_assignor = !partition_assignor
+                                          ? (char *)partition_assignor
+                                          : rd_strdup(partition_assignor);
 
-#define CONSUMER_PROTOCOL_TYPE "consumer"
+        grpdesc->authorized_operations_cnt = authorized_operations_cnt;
+        grpdesc->authorized_operations     = rd_kafka_AuthorizedOperations_copy(
+            authorized_operations, authorized_operations_cnt);
+
+        grpdesc->state = state;
+        if (coordinator != NULL)
+                grpdesc->coordinator = rd_kafka_Node_copy(coordinator);
+        grpdesc->error =
+            error != NULL ? rd_kafka_error_new(rd_kafka_error_code(error), "%s",
+                                               rd_kafka_error_string(error))
+                          : NULL;
+        return grpdesc;
+}
 
 /**
- * @brief Create a new ConsumerGroupListing object.
+ * @brief New instance of ConsumerGroupDescription from an error.
  *
  * @param group_id The group id.
- * @param is_simple_consumer_group Is the group simple?
- * @param state Group state.
+ * @param error Error received for this group.
+ * @return A new allocated ConsumerGroupDescription with the passed error.
+ *         Use rd_kafka_ConsumerGroupDescription_destroy() to free when done.
  */
-static rd_kafka_ConsumerGroupListing_t *
-rd_kafka_ConsumerGroupListing_new(const char *group_id,
-                                  rd_bool_t is_simple_consumer_group,
-                                  rd_kafka_consumer_group_state_t state) {
-        rd_kafka_ConsumerGroupListing_t *grplist;
-        grplist                           = rd_calloc(1, sizeof(*grplist));
-        grplist->group_id                 = rd_strdup(group_id);
-        grplist->is_simple_consumer_group = is_simple_consumer_group;
-        grplist->state                    = state;
-        return grplist;
+static rd_kafka_ConsumerGroupDescription_t *
+rd_kafka_ConsumerGroupDescription_new_error(const char *group_id,
+                                            rd_kafka_error_t *error) {
+        return rd_kafka_ConsumerGroupDescription_new(
+            group_id, rd_false, NULL, NULL, NULL, 0,
+            RD_KAFKA_CONSUMER_GROUP_STATE_UNKNOWN, NULL, error);
 }
 
 /**
- * @brief Copy \p grplist ConsumerGroupListing.
+ * @brief Copy \p desc ConsumerGroupDescription.
  *
- * @param grplist The group listing to copy.
- * @return A new allocated copy of the passed ConsumerGroupListing.
+ * @param desc The group description to copy.
+ * @return A new allocated copy of the passed ConsumerGroupDescription.
  */
-static rd_kafka_ConsumerGroupListing_t *rd_kafka_ConsumerGroupListing_copy(
-    const rd_kafka_ConsumerGroupListing_t *grplist) {
-        return rd_kafka_ConsumerGroupListing_new(
-            grplist->group_id, grplist->is_simple_consumer_group,
-            grplist->state);
+static rd_kafka_ConsumerGroupDescription_t *
+rd_kafka_ConsumerGroupDescription_copy(
+    const rd_kafka_ConsumerGroupDescription_t *grpdesc) {
+        return rd_kafka_ConsumerGroupDescription_new(
+            grpdesc->group_id, grpdesc->is_simple_consumer_group,
+            &grpdesc->members, grpdesc->partition_assignor,
+            grpdesc->authorized_operations, grpdesc->authorized_operations_cnt,
+            grpdesc->state, grpdesc->coordinator, grpdesc->error);
 }
 
 /**
- * @brief Same as rd_kafka_ConsumerGroupListing_copy() but suitable for
+ * @brief Same as rd_kafka_ConsumerGroupDescription_copy() but suitable for
  *        rd_list_copy(). The \p opaque is ignored.
  */
-static void *rd_kafka_ConsumerGroupListing_copy_opaque(const void *grplist,
-                                                       void *opaque) {
-        return rd_kafka_ConsumerGroupListing_copy(grplist);
+static void *rd_kafka_ConsumerGroupDescription_copy_opaque(const void *grpdesc,
+                                                           void *opaque) {
+        return rd_kafka_ConsumerGroupDescription_copy(grpdesc);
 }
 
-static void rd_kafka_ConsumerGroupListing_destroy(
-    rd_kafka_ConsumerGroupListing_t *grplist) {
-        RD_IF_FREE(grplist->group_id, rd_free);
-        rd_free(grplist);
+static void rd_kafka_ConsumerGroupDescription_destroy(
+    rd_kafka_ConsumerGroupDescription_t *grpdesc) {
+        if (likely(grpdesc->group_id != NULL))
+                rd_free(grpdesc->group_id);
+        rd_list_destroy(&grpdesc->members);
+        if (likely(grpdesc->partition_assignor != NULL))
+                rd_free(grpdesc->partition_assignor);
+        if (likely(grpdesc->error != NULL))
+                rd_kafka_error_destroy(grpdesc->error);
+        if (grpdesc->coordinator)
+                rd_kafka_Node_destroy(grpdesc->coordinator);
+        if (grpdesc->authorized_operations_cnt)
+                rd_free(grpdesc->authorized_operations);
+        rd_free(grpdesc);
 }
 
-static void rd_kafka_ConsumerGroupListing_free(void *ptr) {
-        rd_kafka_ConsumerGroupListing_destroy(ptr);
+static void rd_kafka_ConsumerGroupDescription_free(void *ptr) {
+        rd_kafka_ConsumerGroupDescription_destroy(ptr);
 }
 
-const char *rd_kafka_ConsumerGroupListing_group_id(
-    const rd_kafka_ConsumerGroupListing_t *grplist) {
-        return grplist->group_id;
+const char *rd_kafka_ConsumerGroupDescription_group_id(
+    const rd_kafka_ConsumerGroupDescription_t *grpdesc) {
+        return grpdesc->group_id;
 }
 
-int rd_kafka_ConsumerGroupListing_is_simple_consumer_group(
-    const rd_kafka_ConsumerGroupListing_t *grplist) {
-        return grplist->is_simple_consumer_group;
+const rd_kafka_error_t *rd_kafka_ConsumerGroupDescription_error(
+    const rd_kafka_ConsumerGroupDescription_t *grpdesc) {
+        return grpdesc->error;
 }
 
-rd_kafka_consumer_group_state_t rd_kafka_ConsumerGroupListing_state(
-    const rd_kafka_ConsumerGroupListing_t *grplist) {
-        return grplist->state;
+
+int rd_kafka_ConsumerGroupDescription_is_simple_consumer_group(
+    const rd_kafka_ConsumerGroupDescription_t *grpdesc) {
+        return grpdesc->is_simple_consumer_group;
 }
 
-/**
- * @brief Create a new ListConsumerGroupsResult object.
- *
- * @param valid
- * @param errors
- */
-static rd_kafka_ListConsumerGroupsResult_t *
-rd_kafka_ListConsumerGroupsResult_new(const rd_list_t *valid,
-                                      const rd_list_t *errors) {
-        rd_kafka_ListConsumerGroupsResult_t *res;
-        res = rd_calloc(1, sizeof(*res));
-        rd_list_init_copy(&res->valid, valid);
-        rd_list_copy_to(&res->valid, valid,
-                        rd_kafka_ConsumerGroupListing_copy_opaque, NULL);
-        rd_list_init_copy(&res->errors, errors);
-        rd_list_copy_to(&res->errors, errors, rd_kafka_error_copy_opaque, NULL);
-        return res;
+
+const char *rd_kafka_ConsumerGroupDescription_partition_assignor(
+    const rd_kafka_ConsumerGroupDescription_t *grpdesc) {
+        return grpdesc->partition_assignor;
 }
 
-static void rd_kafka_ListConsumerGroupsResult_destroy(
-    rd_kafka_ListConsumerGroupsResult_t *res) {
-        rd_list_destroy(&res->valid);
-        rd_list_destroy(&res->errors);
-        rd_free(res);
+const rd_kafka_AclOperation_t *
+rd_kafka_ConsumerGroupDescription_authorized_operations(
+    const rd_kafka_ConsumerGroupDescription_t *grpdesc,
+    size_t *cntp) {
+        *cntp = RD_MAX(grpdesc->authorized_operations_cnt, 0);
+        return grpdesc->authorized_operations;
 }
 
-static void rd_kafka_ListConsumerGroupsResult_free(void *ptr) {
-        rd_kafka_ListConsumerGroupsResult_destroy(ptr);
+rd_kafka_consumer_group_state_t rd_kafka_ConsumerGroupDescription_state(
+    const rd_kafka_ConsumerGroupDescription_t *grpdesc) {
+        return grpdesc->state;
+}
+
+const rd_kafka_Node_t *rd_kafka_ConsumerGroupDescription_coordinator(
+    const rd_kafka_ConsumerGroupDescription_t *grpdesc) {
+        return grpdesc->coordinator;
+}
+
+size_t rd_kafka_ConsumerGroupDescription_member_count(
+    const rd_kafka_ConsumerGroupDescription_t *grpdesc) {
+        return rd_list_cnt(&grpdesc->members);
+}
+
+const rd_kafka_MemberDescription_t *rd_kafka_ConsumerGroupDescription_member(
+    const rd_kafka_ConsumerGroupDescription_t *grpdesc,
+    size_t idx) {
+        return (rd_kafka_MemberDescription_t *)rd_list_elem(&grpdesc->members,
+                                                            idx);
 }
 
 /**
- * @brief Copy the passed ListConsumerGroupsResult.
- *
- * @param res the ListConsumerGroupsResult to copy
- * @return a newly allocated ListConsumerGroupsResult object.
- *
- * @sa Release the object with rd_kafka_ListConsumerGroupsResult_destroy().
+ * @brief Group arguments comparator for DescribeConsumerGroups args
  */
-static rd_kafka_ListConsumerGroupsResult_t *
-rd_kafka_ListConsumerGroupsResult_copy(
-    const rd_kafka_ListConsumerGroupsResult_t *res) {
-        return rd_kafka_ListConsumerGroupsResult_new(&res->valid, &res->errors);
+static int rd_kafka_DescribeConsumerGroups_cmp(const void *a, const void *b) {
+        return strcmp(a, b);
 }
 
-/**
- * @brief Same as rd_kafka_ListConsumerGroupsResult_copy() but suitable for
- *        rd_list_copy(). The \p opaque is ignored.
+/** @brief Merge the DescribeConsumerGroups response from a single broker
+ *         into the user response list.
  */
-static void *rd_kafka_ListConsumerGroupsResult_copy_opaque(const void *list,
-                                                           void *opaque) {
-        return rd_kafka_ListConsumerGroupsResult_copy(list);
+static void rd_kafka_DescribeConsumerGroups_response_merge(
+    rd_kafka_op_t *rko_fanout,
+    const rd_kafka_op_t *rko_partial) {
+        rd_kafka_ConsumerGroupDescription_t *groupres = NULL;
+        rd_kafka_ConsumerGroupDescription_t *newgroupres;
+        const char *grp = rko_partial->rko_u.admin_result.opaque;
+        int orig_pos;
+
+        rd_assert(rko_partial->rko_evtype ==
+                  RD_KAFKA_EVENT_DESCRIBECONSUMERGROUPS_RESULT);
+
+        if (!rko_partial->rko_err) {
+                /* Proper results.
+                 * We only send one group per request, make sure it matches */
+                groupres =
+                    rd_list_elem(&rko_partial->rko_u.admin_result.results, 0);
+                rd_assert(groupres);
+                rd_assert(!strcmp(groupres->group_id, grp));
+                newgroupres = rd_kafka_ConsumerGroupDescription_copy(groupres);
+        } else {
+                /* Op errored, e.g. timeout */
+                rd_kafka_error_t *error =
+                    rd_kafka_error_new(rko_partial->rko_err, NULL);
+                newgroupres =
+                    rd_kafka_ConsumerGroupDescription_new_error(grp, error);
+                rd_kafka_error_destroy(error);
+        }
+
+        /* As a convenience to the application we insert group result
+         * in the same order as they were requested. */
+        orig_pos = rd_list_index(&rko_fanout->rko_u.admin_request.args, grp,
+                                 rd_kafka_DescribeConsumerGroups_cmp);
+        rd_assert(orig_pos != -1);
+
+        /* Make sure result is not already set */
+        rd_assert(rd_list_elem(&rko_fanout->rko_u.admin_request.fanout.results,
+                               orig_pos) == NULL);
+
+        rd_list_set(&rko_fanout->rko_u.admin_request.fanout.results, orig_pos,
+                    newgroupres);
 }
 
+
 /**
- * @brief Send ListConsumerGroupsRequest. Admin worker compatible callback.
+ * @brief Construct and send DescribeConsumerGroupsRequest to \p rkb
+ *        with the groups (char *) in \p groups, using
+ *        \p options.
+ *
+ *        The response (unparsed) will be enqueued on \p replyq
+ *        for handling by \p resp_cb (with \p opaque passed).
+ *
+ * @returns RD_KAFKA_RESP_ERR_NO_ERROR if the request was enqueued for
+ *          transmission, otherwise an error code and errstr will be
+ *          updated with a human readable error string.
  */
-static rd_kafka_resp_err_t
-rd_kafka_admin_ListConsumerGroupsRequest(rd_kafka_broker_t *rkb,
-                                         const rd_list_t *groups /*(char*)*/,
-                                         rd_kafka_AdminOptions_t *options,
-                                         char *errstr,
-                                         size_t errstr_size,
-                                         rd_kafka_replyq_t replyq,
-                                         rd_kafka_resp_cb_t *resp_cb,
-                                         void *opaque) {
-        int i;
+static rd_kafka_resp_err_t rd_kafka_admin_DescribeConsumerGroupsRequest(
+    rd_kafka_broker_t *rkb,
+    const rd_list_t *groups /*(char*)*/,
+    rd_kafka_AdminOptions_t *options,
+    char *errstr,
+    size_t errstr_size,
+    rd_kafka_replyq_t replyq,
+    rd_kafka_resp_cb_t *resp_cb,
+    void *opaque) {
+        int i, include_authorized_operations;
+        char *group;
         rd_kafka_resp_err_t err;
-        rd_kafka_error_t *error;
-        const char **states_str = NULL;
-        int states_str_cnt      = 0;
-        rd_list_t *states =
-            rd_kafka_confval_get_ptr(&options->match_consumer_group_states);
+        int groups_cnt          = rd_list_cnt(groups);
+        rd_kafka_error_t *error = NULL;
+        char **groups_arr       = rd_calloc(groups_cnt, sizeof(*groups_arr));
 
-        /* Prepare list_options */
-        if (states && rd_list_cnt(states) > 0) {
-                states_str_cnt = rd_list_cnt(states);
-                states_str     = rd_calloc(states_str_cnt, sizeof(*states_str));
-                for (i = 0; i < states_str_cnt; i++) {
-                        states_str[i] = rd_kafka_consumer_group_state_name(
-                            rd_list_get_int32(states, i));
-                }
+        RD_LIST_FOREACH(group, groups, i) {
+                groups_arr[i] = rd_list_elem(groups, i);
         }
 
-        error = rd_kafka_ListGroupsRequest(rkb, -1, states_str, states_str_cnt,
-                                           replyq, resp_cb, opaque);
+        include_authorized_operations =
+            rd_kafka_confval_get_int(&options->include_authorized_operations);
 
-        if (states_str) {
-                rd_free(states_str);
-        }
+        error = rd_kafka_DescribeGroupsRequest(rkb, -1, groups_arr, groups_cnt,
+                                               include_authorized_operations,
+                                               replyq, resp_cb, opaque);
+        rd_free(groups_arr);
 
         if (error) {
                 rd_snprintf(errstr, errstr_size, "%s",
@@ -5741,87 +7994,200 @@ rd_kafka_admin_ListConsumerGroupsRequest(rd_kafka_broker_t *rkb,
 }
 
 /**
- * @brief Parse ListConsumerGroupsResponse and create ADMIN_RESULT op.
+ * @brief Parse DescribeConsumerGroupsResponse and create ADMIN_RESULT op.
  */
 static rd_kafka_resp_err_t
-rd_kafka_ListConsumerGroupsResponse_parse(rd_kafka_op_t *rko_req,
-                                          rd_kafka_op_t **rko_resultp,
-                                          rd_kafka_buf_t *reply,
-                                          char *errstr,
-                                          size_t errstr_size) {
+rd_kafka_DescribeConsumerGroupsResponse_parse(rd_kafka_op_t *rko_req,
+                                              rd_kafka_op_t **rko_resultp,
+                                              rd_kafka_buf_t *reply,
+                                              char *errstr,
+                                              size_t errstr_size) {
         const int log_decode_errors = LOG_ERR;
-        int i, cnt;
-        int16_t error_code, api_version;
+        int32_t nodeid;
+        uint16_t port;
+        int16_t api_version;
+        int32_t cnt;
         rd_kafka_op_t *rko_result = NULL;
-        rd_kafka_error_t *error   = NULL;
         rd_kafka_broker_t *rkb    = reply->rkbuf_rkb;
-        rd_list_t valid, errors;
-        rd_kafka_ListConsumerGroupsResult_t *list_result;
-        char *group_id = NULL, *group_state = NULL, *proto_type = NULL;
+        rd_kafka_Node_t *node     = NULL;
+        rd_kafka_error_t *error   = NULL;
+        char *group_id = NULL, *group_state = NULL, *proto_type = NULL,
+             *proto = NULL, *host = NULL;
+        rd_kafka_AclOperation_t *operations = NULL;
+        int operation_cnt                   = -1;
 
         api_version = rd_kafka_buf_ApiVersion(reply);
         if (api_version >= 1) {
                 rd_kafka_buf_read_throttle_time(reply);
         }
-        rd_kafka_buf_read_i16(reply, &error_code);
-        if (error_code) {
-                error = rd_kafka_error_new(error_code,
-                                           "Broker [%d"
-                                           "] "
-                                           "ListConsumerGroups: %s",
-                                           rd_kafka_broker_id(rkb),
-                                           rd_kafka_err2str(error_code));
-        }
 
-        rd_kafka_buf_read_arraycnt(reply, &cnt, RD_KAFKAP_GROUPS_MAX);
-        rd_list_init(&valid, cnt, rd_kafka_ConsumerGroupListing_free);
-        rd_list_init(&errors, 8, rd_free);
-        if (error)
-                rd_list_add(&errors, error);
+        rd_kafka_buf_read_arraycnt(reply, &cnt, 100000);
 
         rko_result = rd_kafka_admin_result_new(rko_req);
-        rd_list_init(&rko_result->rko_u.admin_result.results, 1,
-                     rd_kafka_ListConsumerGroupsResult_free);
+        rd_list_init(&rko_result->rko_u.admin_result.results, cnt,
+                     rd_kafka_ConsumerGroupDescription_free);
 
-        for (i = 0; i < cnt; i++) {
-                rd_kafkap_str_t GroupId, ProtocolType,
-                    GroupState = RD_ZERO_INIT;
-                rd_kafka_ConsumerGroupListing_t *group_listing;
+        rd_kafka_broker_lock(rkb);
+        nodeid = rkb->rkb_nodeid;
+        host   = rd_strdup(rkb->rkb_origname);
+        port   = rkb->rkb_port;
+        rd_kafka_broker_unlock(rkb);
+
+        node = rd_kafka_Node_new(nodeid, host, port, NULL);
+        while (cnt-- > 0) {
+                int16_t error_code;
+                int32_t authorized_operations = -1;
+                rd_kafkap_str_t GroupId, GroupState, ProtocolType, ProtocolData;
                 rd_bool_t is_simple_consumer_group, is_consumer_protocol_type;
-                rd_kafka_consumer_group_state_t state =
-                    RD_KAFKA_CONSUMER_GROUP_STATE_UNKNOWN;
+                int32_t member_cnt;
+                rd_list_t members;
+                rd_kafka_ConsumerGroupDescription_t *grpdesc = NULL;
+
+                rd_kafka_buf_read_i16(reply, &error_code);
+                rd_kafka_buf_read_str(reply, &GroupId);
+                rd_kafka_buf_read_str(reply, &GroupState);
+                rd_kafka_buf_read_str(reply, &ProtocolType);
+                rd_kafka_buf_read_str(reply, &ProtocolData);
+                rd_kafka_buf_read_arraycnt(reply, &member_cnt, 100000);
+
+                group_id    = RD_KAFKAP_STR_DUP(&GroupId);
+                group_state = RD_KAFKAP_STR_DUP(&GroupState);
+                proto_type  = RD_KAFKAP_STR_DUP(&ProtocolType);
+                proto       = RD_KAFKAP_STR_DUP(&ProtocolData);
+
+                if (error_code) {
+                        error = rd_kafka_error_new(
+                            error_code, "DescribeConsumerGroups: %s",
+                            rd_kafka_err2str(error_code));
+                }
+
+                is_simple_consumer_group = *proto_type == '\0';
+                is_consumer_protocol_type =
+                    !strcmp(proto_type, CONSUMER_PROTOCOL_TYPE);
+                if (error == NULL && !is_simple_consumer_group &&
+                    !is_consumer_protocol_type) {
+                        error = rd_kafka_error_new(
+                            RD_KAFKA_RESP_ERR__INVALID_ARG,
+                            "GroupId %s is not a consumer group (%s).",
+                            group_id, proto_type);
+                }
+
+                rd_list_init(&members, 0, rd_kafka_MemberDescription_free);
+
+                while (member_cnt-- > 0) {
+                        rd_kafkap_str_t MemberId, ClientId, ClientHost,
+                            GroupInstanceId = RD_KAFKAP_STR_INITIALIZER;
+                        char *member_id, *client_id, *client_host,
+                            *group_instance_id = NULL;
+                        rd_kafkap_bytes_t MemberMetadata, MemberAssignment;
+                        rd_kafka_MemberDescription_t *member;
+                        rd_kafka_topic_partition_list_t *partitions = NULL;
+                        rd_kafka_buf_t *rkbuf;
+
+                        rd_kafka_buf_read_str(reply, &MemberId);
+                        if (api_version >= 4) {
+                                rd_kafka_buf_read_str(reply, &GroupInstanceId);
+                        }
+                        rd_kafka_buf_read_str(reply, &ClientId);
+                        rd_kafka_buf_read_str(reply, &ClientHost);
+                        rd_kafka_buf_read_kbytes(reply, &MemberMetadata);
+                        rd_kafka_buf_read_kbytes(reply, &MemberAssignment);
+                        if (error != NULL)
+                                continue;
+
+                        if (RD_KAFKAP_BYTES_LEN(&MemberAssignment) != 0) {
+                                int16_t version;
+                                /* Parse assignment */
+                                rkbuf = rd_kafka_buf_new_shadow(
+                                    MemberAssignment.data,
+                                    RD_KAFKAP_BYTES_LEN(&MemberAssignment),
+                                    NULL);
+                                /* Protocol parser needs a broker handle
+                                 * to log errors on. */
+                                rkbuf->rkbuf_rkb = rkb;
+                                /* Decreased in rd_kafka_buf_destroy */
+                                rd_kafka_broker_keep(rkb);
+                                rd_kafka_buf_read_i16(rkbuf, &version);
+                                const rd_kafka_topic_partition_field_t fields[] =
+                                    {RD_KAFKA_TOPIC_PARTITION_FIELD_PARTITION,
+                                     RD_KAFKA_TOPIC_PARTITION_FIELD_END};
+                                partitions = rd_kafka_buf_read_topic_partitions(
+                                    rkbuf, 0, fields);
+                                rd_kafka_buf_destroy(rkbuf);
+                                if (!partitions)
+                                        rd_kafka_buf_parse_fail(
+                                            reply,
+                                            "Error reading topic partitions");
+                        }
 
-                rd_kafka_buf_read_str(reply, &GroupId);
-                rd_kafka_buf_read_str(reply, &ProtocolType);
-                if (api_version >= 4) {
-                        rd_kafka_buf_read_str(reply, &GroupState);
-                }
-                rd_kafka_buf_skip_tags(reply);
+                        member_id = RD_KAFKAP_STR_DUP(&MemberId);
+                        if (!RD_KAFKAP_STR_IS_NULL(&GroupInstanceId)) {
+                                group_instance_id =
+                                    RD_KAFKAP_STR_DUP(&GroupInstanceId);
+                        }
+                        client_id   = RD_KAFKAP_STR_DUP(&ClientId);
+                        client_host = RD_KAFKAP_STR_DUP(&ClientHost);
 
-                group_id   = RD_KAFKAP_STR_DUP(&GroupId);
-                proto_type = RD_KAFKAP_STR_DUP(&ProtocolType);
-                if (api_version >= 4) {
-                        group_state = RD_KAFKAP_STR_DUP(&GroupState);
-                        state = rd_kafka_consumer_group_state_code(group_state);
+                        member = rd_kafka_MemberDescription_new(
+                            client_id, member_id, group_instance_id,
+                            client_host, partitions);
+                        if (partitions)
+                                rd_kafka_topic_partition_list_destroy(
+                                    partitions);
+                        rd_list_add(&members, member);
+                        rd_free(member_id);
+                        rd_free(group_instance_id);
+                        rd_free(client_id);
+                        rd_free(client_host);
+                        member_id         = NULL;
+                        group_instance_id = NULL;
+                        client_id         = NULL;
+                        client_host       = NULL;
                 }
 
-                is_simple_consumer_group = *proto_type == '\0';
-                is_consumer_protocol_type =
-                    !strcmp(proto_type, CONSUMER_PROTOCOL_TYPE);
-                if (is_simple_consumer_group || is_consumer_protocol_type) {
-                        group_listing = rd_kafka_ConsumerGroupListing_new(
-                            group_id, is_simple_consumer_group, state);
-                        rd_list_add(&valid, group_listing);
+                if (api_version >= 3) {
+                        rd_kafka_buf_read_i32(reply, &authorized_operations);
+                        /* Authorized_operations is INT_MIN
+                         * in case of not being requested, and the list is NULL
+                         * that case. */
+                        operations = rd_kafka_AuthorizedOperations_parse(
+                            authorized_operations, &operation_cnt);
                 }
 
+                if (error == NULL) {
+                        grpdesc = rd_kafka_ConsumerGroupDescription_new(
+                            group_id, is_simple_consumer_group, &members, proto,
+                            operations, operation_cnt,
+                            rd_kafka_consumer_group_state_code(group_state),
+                            node, error);
+                } else
+                        grpdesc = rd_kafka_ConsumerGroupDescription_new_error(
+                            group_id, error);
+
+                rd_list_add(&rko_result->rko_u.admin_result.results, grpdesc);
+
+                rd_list_destroy(&members);
                 rd_free(group_id);
                 rd_free(group_state);
                 rd_free(proto_type);
+                rd_free(proto);
+                RD_IF_FREE(error, rd_kafka_error_destroy);
+                RD_IF_FREE(operations, rd_free);
+
+                error       = NULL;
                 group_id    = NULL;
                 group_state = NULL;
                 proto_type  = NULL;
+                proto       = NULL;
+                operations  = NULL;
         }
-        rd_kafka_buf_skip_tags(reply);
+
+        if (host)
+                rd_free(host);
+        if (node)
+                rd_kafka_Node_destroy(node);
+        *rko_resultp = rko_result;
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
 
 err_parse:
         if (group_id)
@@ -5830,150 +8196,144 @@ rd_kafka_ListConsumerGroupsResponse_parse(rd_kafka_op_t *rko_req,
                 rd_free(group_state);
         if (proto_type)
                 rd_free(proto_type);
+        if (proto)
+                rd_free(proto);
+        if (error)
+                rd_kafka_error_destroy(error);
+        if (host)
+                rd_free(host);
+        if (node)
+                rd_kafka_Node_destroy(node);
+        if (rko_result)
+                rd_kafka_op_destroy(rko_result);
+        RD_IF_FREE(operations, rd_free);
 
-        if (reply->rkbuf_err) {
-                error_code = reply->rkbuf_err;
-                error      = rd_kafka_error_new(
-                    error_code,
-                    "Broker [%d"
-                    "] "
-                    "ListConsumerGroups response protocol parse failure: %s",
-                    rd_kafka_broker_id(rkb), rd_kafka_err2str(error_code));
-                rd_list_add(&errors, error);
-        }
-
-        list_result = rd_kafka_ListConsumerGroupsResult_new(&valid, &errors);
-        rd_list_add(&rko_result->rko_u.admin_result.results, list_result);
+        rd_snprintf(
+            errstr, errstr_size,
+            "DescribeConsumerGroups response protocol parse failure: %s",
+            rd_kafka_err2str(reply->rkbuf_err));
 
-        *rko_resultp = rko_result;
-        rd_list_destroy(&valid);
-        rd_list_destroy(&errors);
-        return RD_KAFKA_RESP_ERR_NO_ERROR;
+        return reply->rkbuf_err;
 }
 
-/** @brief Merge the ListConsumerGroups response from a single broker
- *         into the user response list.
- */
-static void
-rd_kafka_ListConsumerGroups_response_merge(rd_kafka_op_t *rko_fanout,
-                                           const rd_kafka_op_t *rko_partial) {
-        int cnt;
-        rd_kafka_ListConsumerGroupsResult_t *res = NULL;
-        rd_kafka_ListConsumerGroupsResult_t *newres;
-        rd_list_t new_valid, new_errors;
+void rd_kafka_DescribeConsumerGroups(rd_kafka_t *rk,
+                                     const char **groups,
+                                     size_t groups_cnt,
+                                     const rd_kafka_AdminOptions_t *options,
+                                     rd_kafka_queue_t *rkqu) {
+        rd_kafka_op_t *rko_fanout;
+        rd_list_t dup_list;
+        size_t i;
+        static const struct rd_kafka_admin_fanout_worker_cbs fanout_cbs = {
+            rd_kafka_DescribeConsumerGroups_response_merge,
+            rd_kafka_ConsumerGroupDescription_copy_opaque};
 
-        rd_assert(rko_partial->rko_evtype ==
-                  RD_KAFKA_EVENT_LISTCONSUMERGROUPS_RESULT);
+        rd_assert(rkqu);
 
-        cnt = rd_list_cnt(&rko_fanout->rko_u.admin_request.fanout.results);
-        if (cnt) {
-                res = rd_list_elem(
-                    &rko_fanout->rko_u.admin_request.fanout.results, 0);
-        } else {
-                rd_list_init(&new_valid, 0, rd_kafka_ConsumerGroupListing_free);
-                rd_list_init(&new_errors, 0, rd_free);
-                res = rd_kafka_ListConsumerGroupsResult_new(&new_valid,
-                                                            &new_errors);
-                rd_list_set(&rko_fanout->rko_u.admin_request.fanout.results, 0,
-                            res);
-                rd_list_destroy(&new_valid);
-                rd_list_destroy(&new_errors);
+        rko_fanout = rd_kafka_admin_fanout_op_new(
+            rk, RD_KAFKA_OP_DESCRIBECONSUMERGROUPS,
+            RD_KAFKA_EVENT_DESCRIBECONSUMERGROUPS_RESULT, &fanout_cbs, options,
+            rkqu->rkqu_q);
+
+        if (groups_cnt == 0) {
+                rd_kafka_admin_result_fail(rko_fanout,
+                                           RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                           "No groups to describe");
+                rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
+                                                     rd_true /*destroy*/);
+                return;
         }
-        if (!rko_partial->rko_err) {
-                int new_valid_count, new_errors_count;
-                const rd_list_t *new_valid_list, *new_errors_list;
-                /* Read the partial result and merge the valid groups
-                 * and the errors into the fanout parent result. */
-                newres =
-                    rd_list_elem(&rko_partial->rko_u.admin_result.results, 0);
-                rd_assert(newres);
-                new_valid_count  = rd_list_cnt(&newres->valid);
-                new_errors_count = rd_list_cnt(&newres->errors);
-                if (new_valid_count) {
-                        new_valid_list = &newres->valid;
-                        rd_list_grow(&res->valid, new_valid_count);
-                        rd_list_copy_to(
-                            &res->valid, new_valid_list,
-                            rd_kafka_ConsumerGroupListing_copy_opaque, NULL);
-                }
-                if (new_errors_count) {
-                        new_errors_list = &newres->errors;
-                        rd_list_grow(&res->errors, new_errors_count);
-                        rd_list_copy_to(&res->errors, new_errors_list,
-                                        rd_kafka_error_copy_opaque, NULL);
-                }
-        } else {
-                /* Op errored, e.g. timeout */
-                rd_list_add(&res->errors,
-                            rd_kafka_error_new(rko_partial->rko_err, NULL));
+
+        /* Copy group list and store it on the request op.
+         * Maintain original ordering. */
+        rd_list_init(&rko_fanout->rko_u.admin_request.args, (int)groups_cnt,
+                     rd_free);
+        for (i = 0; i < groups_cnt; i++)
+                rd_list_add(&rko_fanout->rko_u.admin_request.args,
+                            rd_strdup(groups[i]));
+
+        /* Check for duplicates.
+         * Make a temporary copy of the group list and sort it to check for
+         * duplicates, we don't want the original list sorted since we want
+         * to maintain ordering. */
+        rd_list_init(&dup_list,
+                     rd_list_cnt(&rko_fanout->rko_u.admin_request.args), NULL);
+        rd_list_copy_to(&dup_list, &rko_fanout->rko_u.admin_request.args, NULL,
+                        NULL);
+        rd_list_sort(&dup_list, rd_kafka_DescribeConsumerGroups_cmp);
+        if (rd_list_find_duplicate(&dup_list,
+                                   rd_kafka_DescribeConsumerGroups_cmp)) {
+                rd_list_destroy(&dup_list);
+                rd_kafka_admin_result_fail(rko_fanout,
+                                           RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                           "Duplicate groups not allowed");
+                rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
+                                                     rd_true /*destroy*/);
+                return;
         }
-}
 
-void rd_kafka_ListConsumerGroups(rd_kafka_t *rk,
-                                 const rd_kafka_AdminOptions_t *options,
-                                 rd_kafka_queue_t *rkqu) {
-        rd_kafka_op_t *rko;
-        static const struct rd_kafka_admin_worker_cbs cbs = {
-            rd_kafka_admin_ListConsumerGroupsRequest,
-            rd_kafka_ListConsumerGroupsResponse_parse};
-        static const struct rd_kafka_admin_fanout_worker_cbs fanout_cbs = {
-            rd_kafka_ListConsumerGroups_response_merge,
-            rd_kafka_ListConsumerGroupsResult_copy_opaque,
-        };
+        rd_list_destroy(&dup_list);
 
-        rko = rd_kafka_admin_request_op_target_all_new(
-            rk, RD_KAFKA_OP_LISTCONSUMERGROUPS,
-            RD_KAFKA_EVENT_LISTCONSUMERGROUPS_RESULT, &cbs, &fanout_cbs,
-            rd_kafka_ListConsumerGroupsResult_free, options, rkqu->rkqu_q);
-        rd_kafka_q_enq(rk->rk_ops, rko);
-}
+        /* Prepare results list where fanned out op's results will be
+         * accumulated. */
+        rd_list_init(&rko_fanout->rko_u.admin_request.fanout.results,
+                     (int)groups_cnt, rd_kafka_ConsumerGroupDescription_free);
+        rko_fanout->rko_u.admin_request.fanout.outstanding = (int)groups_cnt;
 
-const rd_kafka_ConsumerGroupListing_t **
-rd_kafka_ListConsumerGroups_result_valid(
-    const rd_kafka_ListConsumerGroups_result_t *result,
-    size_t *cntp) {
-        int list_result_cnt;
-        const rd_kafka_ListConsumerGroupsResult_t *list_result;
-        const rd_kafka_op_t *rko = (const rd_kafka_op_t *)result;
-        rd_kafka_op_type_t reqtype =
-            rko->rko_u.admin_result.reqtype & ~RD_KAFKA_OP_FLAGMASK;
-        rd_assert(reqtype == RD_KAFKA_OP_LISTCONSUMERGROUPS);
+        /* Create individual request ops for each group.
+         * FIXME: A future optimization is to coalesce all groups for a single
+         *        coordinator into one op. */
+        for (i = 0; i < groups_cnt; i++) {
+                static const struct rd_kafka_admin_worker_cbs cbs = {
+                    rd_kafka_admin_DescribeConsumerGroupsRequest,
+                    rd_kafka_DescribeConsumerGroupsResponse_parse,
+                };
+                char *grp =
+                    rd_list_elem(&rko_fanout->rko_u.admin_request.args, (int)i);
+                rd_kafka_op_t *rko = rd_kafka_admin_request_op_new(
+                    rk, RD_KAFKA_OP_DESCRIBECONSUMERGROUPS,
+                    RD_KAFKA_EVENT_DESCRIBECONSUMERGROUPS_RESULT, &cbs, options,
+                    rk->rk_ops);
+
+                rko->rko_u.admin_request.fanout_parent = rko_fanout;
+                rko->rko_u.admin_request.broker_id =
+                    RD_KAFKA_ADMIN_TARGET_COORDINATOR;
+                rko->rko_u.admin_request.coordtype = RD_KAFKA_COORD_GROUP;
+                rko->rko_u.admin_request.coordkey  = rd_strdup(grp);
 
-        list_result_cnt = rd_list_cnt(&rko->rko_u.admin_result.results);
-        rd_assert(list_result_cnt == 1);
-        list_result = rd_list_elem(&rko->rko_u.admin_result.results, 0);
-        *cntp       = rd_list_cnt(&list_result->valid);
+                /* Set the group name as the opaque so the fanout worker use it
+                 * to fill in errors.
+                 * References rko_fanout's memory, which will always outlive
+                 * the fanned out op. */
+                rd_kafka_AdminOptions_set_opaque(
+                    &rko->rko_u.admin_request.options, grp);
 
-        return (const rd_kafka_ConsumerGroupListing_t **)
-            list_result->valid.rl_elems;
+                rd_list_init(&rko->rko_u.admin_request.args, 1, rd_free);
+                rd_list_add(&rko->rko_u.admin_request.args,
+                            rd_strdup(groups[i]));
+
+                rd_kafka_q_enq(rk->rk_ops, rko);
+        }
 }
 
-const rd_kafka_error_t **rd_kafka_ListConsumerGroups_result_errors(
-    const rd_kafka_ListConsumerGroups_result_t *result,
+const rd_kafka_ConsumerGroupDescription_t **
+rd_kafka_DescribeConsumerGroups_result_groups(
+    const rd_kafka_DescribeConsumerGroups_result_t *result,
     size_t *cntp) {
-        int list_result_cnt, error_cnt;
-        const rd_kafka_ListConsumerGroupsResult_t *list_result;
         const rd_kafka_op_t *rko = (const rd_kafka_op_t *)result;
         rd_kafka_op_type_t reqtype =
             rko->rko_u.admin_result.reqtype & ~RD_KAFKA_OP_FLAGMASK;
-        rd_assert(reqtype == RD_KAFKA_OP_LISTCONSUMERGROUPS);
+        rd_assert(reqtype == RD_KAFKA_OP_DESCRIBECONSUMERGROUPS);
 
-        list_result_cnt = rd_list_cnt(&rko->rko_u.admin_result.results);
-        rd_assert(list_result_cnt == 1);
-        list_result = rko->rko_u.admin_result.results.rl_elems[0];
-        error_cnt   = rd_list_cnt(&list_result->errors);
-        if (error_cnt == 0) {
-                *cntp = 0;
-                return NULL;
-        }
-        *cntp = error_cnt;
-        return (const rd_kafka_error_t **)list_result->errors.rl_elems;
+        *cntp = rd_list_cnt(&rko->rko_u.admin_result.results);
+        return (const rd_kafka_ConsumerGroupDescription_t **)
+            rko->rko_u.admin_result.results.rl_elems;
 }
 
 /**@}*/
 
 /**
- * @name Describe consumer groups
+ * @name Describe Topic
  * @{
  *
  *
@@ -5981,321 +8341,266 @@ const rd_kafka_error_t **rd_kafka_ListConsumerGroups_result_errors(
  *
  */
 
-/**
- * @brief Create a new MemberDescription object. This object is used for
- *        creating a ConsumerGroupDescription.
- *
- * @param client_id The client id.
- * @param consumer_id The consumer id (or member id).
- * @param group_instance_id (optional) The group instance id
- *                          for static membership.
- * @param host The consumer host.
- * @param assignment The member's assigned partitions, or NULL if none.
- *
- * @return A new allocated MemberDescription object.
- *         Use rd_kafka_MemberDescription_destroy() to free when done.
- */
-static rd_kafka_MemberDescription_t *rd_kafka_MemberDescription_new(
-    const char *client_id,
-    const char *consumer_id,
-    const char *group_instance_id,
-    const char *host,
-    const rd_kafka_topic_partition_list_t *assignment) {
-        rd_kafka_MemberDescription_t *member;
-        member              = rd_calloc(1, sizeof(*member));
-        member->client_id   = rd_strdup(client_id);
-        member->consumer_id = rd_strdup(consumer_id);
-        if (group_instance_id)
-                member->group_instance_id = rd_strdup(group_instance_id);
-        member->host = rd_strdup(host);
-        if (assignment)
-                member->assignment.partitions =
-                    rd_kafka_topic_partition_list_copy(assignment);
-        else
-                member->assignment.partitions =
-                    rd_kafka_topic_partition_list_new(0);
-        return member;
-}
-
-/**
- * @brief Allocate a new MemberDescription, copy of \p src
- *        and return it.
- *
- * @param src The MemberDescription to copy.
- * @return A new allocated MemberDescription object,
- *         Use rd_kafka_MemberDescription_destroy() to free when done.
- */
-static rd_kafka_MemberDescription_t *
-rd_kafka_MemberDescription_copy(const rd_kafka_MemberDescription_t *src) {
-        return rd_kafka_MemberDescription_new(src->client_id, src->consumer_id,
-                                              src->group_instance_id, src->host,
-                                              src->assignment.partitions);
-}
+rd_kafka_TopicCollection_t *
+rd_kafka_TopicCollection_of_topic_names(const char **topics,
+                                        size_t topics_cnt) {
+        size_t i;
+        rd_kafka_TopicCollection_t *ret =
+            rd_calloc(1, sizeof(rd_kafka_TopicCollection_t));
 
-/**
- * @brief MemberDescription copy, compatible with rd_list_copy_to.
- *
- * @param elem The MemberDescription to copy-
- * @param opaque Not used.
- */
-static void *rd_kafka_MemberDescription_list_copy(const void *elem,
-                                                  void *opaque) {
-        return rd_kafka_MemberDescription_copy(elem);
-}
+        ret->topics_cnt = topics_cnt;
+        if (!ret->topics_cnt)
+                return ret;
 
-static void
-rd_kafka_MemberDescription_destroy(rd_kafka_MemberDescription_t *member) {
-        rd_free(member->client_id);
-        rd_free(member->consumer_id);
-        rd_free(member->host);
-        if (member->group_instance_id != NULL)
-                rd_free(member->group_instance_id);
-        if (member->assignment.partitions)
-                rd_kafka_topic_partition_list_destroy(
-                    member->assignment.partitions);
-        rd_free(member);
-}
+        ret->topics = rd_calloc(topics_cnt, sizeof(char *));
+        for (i = 0; i < topics_cnt; i++)
+                ret->topics[i] = rd_strdup(topics[i]);
 
-static void rd_kafka_MemberDescription_free(void *member) {
-        rd_kafka_MemberDescription_destroy(member);
+        return ret;
 }
 
-const char *rd_kafka_MemberDescription_client_id(
-    const rd_kafka_MemberDescription_t *member) {
-        return member->client_id;
-}
+void rd_kafka_TopicCollection_destroy(rd_kafka_TopicCollection_t *topics) {
+        size_t i;
 
-const char *rd_kafka_MemberDescription_group_instance_id(
-    const rd_kafka_MemberDescription_t *member) {
-        return member->group_instance_id;
-}
+        for (i = 0; i < topics->topics_cnt; i++)
+                rd_free(topics->topics[i]);
 
-const char *rd_kafka_MemberDescription_consumer_id(
-    const rd_kafka_MemberDescription_t *member) {
-        return member->consumer_id;
+        RD_IF_FREE(topics->topics, rd_free);
+        rd_free(topics);
 }
 
-const char *
-rd_kafka_MemberDescription_host(const rd_kafka_MemberDescription_t *member) {
-        return member->host;
-}
+/**
+ * @brief Create a new TopicPartitionInfo object.
+ *
+ * @return A newly allocated TopicPartitionInfo. Use
+ * rd_kafka_TopicPartitionInfo_destroy() to free when done.
+ */
+static rd_kafka_TopicPartitionInfo_t *rd_kafka_TopicPartitionInfo_new(
+    const struct rd_kafka_metadata_partition *partition,
+    const struct rd_kafka_metadata_broker *brokers_sorted,
+    const rd_kafka_metadata_broker_internal_t *brokers_internal,
+    int broker_cnt) {
+        size_t i;
+        rd_kafka_TopicPartitionInfo_t *pinfo =
+            rd_calloc(1, sizeof(rd_kafka_TopicPartitionInfo_t));
 
-const rd_kafka_MemberAssignment_t *rd_kafka_MemberDescription_assignment(
-    const rd_kafka_MemberDescription_t *member) {
-        return &member->assignment;
-}
+        pinfo->partition   = partition->id;
+        pinfo->isr_cnt     = partition->isr_cnt;
+        pinfo->replica_cnt = partition->replica_cnt;
 
-const rd_kafka_topic_partition_list_t *rd_kafka_MemberAssignment_partitions(
-    const rd_kafka_MemberAssignment_t *assignment) {
-        return assignment->partitions;
-}
+        if (partition->leader >= 0) {
+                pinfo->leader = rd_kafka_Node_new_from_brokers(
+                    partition->leader, brokers_sorted, brokers_internal,
+                    broker_cnt);
+        }
 
+        if (pinfo->isr_cnt > 0) {
+                pinfo->isr =
+                    rd_calloc(pinfo->isr_cnt, sizeof(rd_kafka_Node_t *));
+                for (i = 0; i < pinfo->isr_cnt; i++)
+                        pinfo->isr[i] = rd_kafka_Node_new_from_brokers(
+                            partition->isrs[i], brokers_sorted,
+                            brokers_internal, broker_cnt);
+        }
 
-/**
- * @brief Create a new ConsumerGroupDescription object.
- *
- * @param group_id The group id.
- * @param is_simple_consumer_group Is the group simple?
- * @param members List of members (rd_kafka_MemberDescription_t) of this
- *                group.
- * @param partition_assignor (optional) Chosen assignor.
- * @param state Group state.
- * @param coordinator (optional) Group coordinator.
- * @param error (optional) Error received for this group.
- * @return A new allocated ConsumerGroupDescription object.
- *         Use rd_kafka_ConsumerGroupDescription_destroy() to free when done.
- */
-static rd_kafka_ConsumerGroupDescription_t *
-rd_kafka_ConsumerGroupDescription_new(const char *group_id,
-                                      rd_bool_t is_simple_consumer_group,
-                                      const rd_list_t *members,
-                                      const char *partition_assignor,
-                                      rd_kafka_consumer_group_state_t state,
-                                      const rd_kafka_Node_t *coordinator,
-                                      rd_kafka_error_t *error) {
-        rd_kafka_ConsumerGroupDescription_t *grpdesc;
-        grpdesc                           = rd_calloc(1, sizeof(*grpdesc));
-        grpdesc->group_id                 = rd_strdup(group_id);
-        grpdesc->is_simple_consumer_group = is_simple_consumer_group;
-        if (members == NULL) {
-                rd_list_init(&grpdesc->members, 0,
-                             rd_kafka_MemberDescription_free);
-        } else {
-                rd_list_init_copy(&grpdesc->members, members);
-                rd_list_copy_to(&grpdesc->members, members,
-                                rd_kafka_MemberDescription_list_copy, NULL);
+        if (pinfo->replica_cnt > 0) {
+                pinfo->replicas =
+                    rd_calloc(pinfo->replica_cnt, sizeof(rd_kafka_Node_t *));
+                for (i = 0; i < pinfo->replica_cnt; i++)
+                        pinfo->replicas[i] = rd_kafka_Node_new_from_brokers(
+                            partition->replicas[i], brokers_sorted,
+                            brokers_internal, broker_cnt);
         }
-        grpdesc->partition_assignor = !partition_assignor
-                                          ? (char *)partition_assignor
-                                          : rd_strdup(partition_assignor);
-        grpdesc->state = state;
-        if (coordinator != NULL)
-                grpdesc->coordinator = rd_kafka_Node_copy(coordinator);
-        grpdesc->error =
-            error != NULL ? rd_kafka_error_new(rd_kafka_error_code(error), "%s",
-                                               rd_kafka_error_string(error))
-                          : NULL;
-        return grpdesc;
-}
 
-/**
- * @brief New instance of ConsumerGroupDescription from an error.
- *
- * @param group_id The group id.
- * @param error The error.
- * @return A new allocated ConsumerGroupDescription with the passed error.
- */
-static rd_kafka_ConsumerGroupDescription_t *
-rd_kafka_ConsumerGroupDescription_new_error(const char *group_id,
-                                            rd_kafka_error_t *error) {
-        return rd_kafka_ConsumerGroupDescription_new(
-            group_id, rd_false, NULL, NULL,
-            RD_KAFKA_CONSUMER_GROUP_STATE_UNKNOWN, NULL, error);
+        return pinfo;
 }
 
 /**
- * @brief Copy \p desc ConsumerGroupDescription.
- *
- * @param desc The group description to copy.
- * @return A new allocated copy of the passed ConsumerGroupDescription.
+ * @brief Destroy and deallocate a TopicPartitionInfo.
  */
-static rd_kafka_ConsumerGroupDescription_t *
-rd_kafka_ConsumerGroupDescription_copy(
-    const rd_kafka_ConsumerGroupDescription_t *grpdesc) {
-        return rd_kafka_ConsumerGroupDescription_new(
-            grpdesc->group_id, grpdesc->is_simple_consumer_group,
-            &grpdesc->members, grpdesc->partition_assignor, grpdesc->state,
-            grpdesc->coordinator, grpdesc->error);
+static void
+rd_kafka_TopicPartitionInfo_destroy(rd_kafka_TopicPartitionInfo_t *pinfo) {
+        size_t i;
+        RD_IF_FREE(pinfo->leader, rd_kafka_Node_destroy);
+
+        for (i = 0; i < pinfo->isr_cnt; i++)
+                rd_kafka_Node_destroy(pinfo->isr[i]);
+        RD_IF_FREE(pinfo->isr, rd_free);
+
+        for (i = 0; i < pinfo->replica_cnt; i++)
+                rd_kafka_Node_destroy(pinfo->replicas[i]);
+        RD_IF_FREE(pinfo->replicas, rd_free);
+
+        rd_free(pinfo);
 }
 
 /**
- * @brief Same as rd_kafka_ConsumerGroupDescription_copy() but suitable for
- *        rd_list_copy(). The \p opaque is ignored.
+ * @brief Create a new TopicDescription object.
+ *
+ * @param topic topic name
+ * @param topic_id topic id
+ * @param partitions Array of partition metadata (rd_kafka_metadata_partition).
+ * @param partition_cnt Number of partitions in partition metadata.
+ * @param authorized_operations acl operations allowed for topic.
+ * @param error Topic error reported by the broker.
+ * @return A newly allocated TopicDescription object.
+ * @remark Use rd_kafka_TopicDescription_destroy() to free when done.
  */
-static void *rd_kafka_ConsumerGroupDescription_copy_opaque(const void *grpdesc,
-                                                           void *opaque) {
-        return rd_kafka_ConsumerGroupDescription_copy(grpdesc);
-}
+static rd_kafka_TopicDescription_t *rd_kafka_TopicDescription_new(
+    const char *topic,
+    rd_kafka_Uuid_t topic_id,
+    const struct rd_kafka_metadata_partition *partitions,
+    int partition_cnt,
+    const struct rd_kafka_metadata_broker *brokers_sorted,
+    const rd_kafka_metadata_broker_internal_t *brokers_internal,
+    int broker_cnt,
+    const rd_kafka_AclOperation_t *authorized_operations,
+    int authorized_operations_cnt,
+    rd_bool_t is_internal,
+    rd_kafka_error_t *error) {
+        rd_kafka_TopicDescription_t *topicdesc;
+        int i;
+        topicdesc                = rd_calloc(1, sizeof(*topicdesc));
+        topicdesc->topic         = rd_strdup(topic);
+        topicdesc->topic_id      = topic_id;
+        topicdesc->partition_cnt = partition_cnt;
+        topicdesc->is_internal   = is_internal;
+        if (error)
+                topicdesc->error = rd_kafka_error_copy(error);
 
-static void rd_kafka_ConsumerGroupDescription_destroy(
-    rd_kafka_ConsumerGroupDescription_t *grpdesc) {
-        if (likely(grpdesc->group_id != NULL))
-                rd_free(grpdesc->group_id);
-        rd_list_destroy(&grpdesc->members);
-        if (likely(grpdesc->partition_assignor != NULL))
-                rd_free(grpdesc->partition_assignor);
-        if (likely(grpdesc->error != NULL))
-                rd_kafka_error_destroy(grpdesc->error);
-        if (grpdesc->coordinator)
-                rd_kafka_Node_destroy(grpdesc->coordinator);
-        rd_free(grpdesc);
-}
+        topicdesc->authorized_operations_cnt = authorized_operations_cnt;
+        topicdesc->authorized_operations = rd_kafka_AuthorizedOperations_copy(
+            authorized_operations, authorized_operations_cnt);
 
-static void rd_kafka_ConsumerGroupDescription_free(void *ptr) {
-        rd_kafka_ConsumerGroupDescription_destroy(ptr);
+        if (partitions) {
+                topicdesc->partitions =
+                    rd_calloc(partition_cnt, sizeof(*partitions));
+                for (i = 0; i < partition_cnt; i++)
+                        topicdesc->partitions[i] =
+                            rd_kafka_TopicPartitionInfo_new(
+                                &partitions[i], brokers_sorted,
+                                brokers_internal, broker_cnt);
+        }
+        return topicdesc;
 }
 
-const char *rd_kafka_ConsumerGroupDescription_group_id(
-    const rd_kafka_ConsumerGroupDescription_t *grpdesc) {
-        return grpdesc->group_id;
+/**
+ * @brief Create a new TopicDescription object from an error.
+ *
+ * @param topic topic name
+ * @param error Topic error reported by the broker.
+ * @return A newly allocated TopicDescription with the passed error.
+ * @remark Use rd_kafka_TopicDescription_destroy() to free when done.
+ */
+static rd_kafka_TopicDescription_t *
+rd_kafka_TopicDescription_new_error(const char *topic,
+                                    rd_kafka_Uuid_t topic_id,
+                                    rd_kafka_error_t *error) {
+        return rd_kafka_TopicDescription_new(topic, topic_id, NULL, 0, NULL,
+                                             NULL, 0, NULL, 0, rd_false, error);
 }
 
-const rd_kafka_error_t *rd_kafka_ConsumerGroupDescription_error(
-    const rd_kafka_ConsumerGroupDescription_t *grpdesc) {
-        return grpdesc->error;
-}
+static void
+rd_kafka_TopicDescription_destroy(rd_kafka_TopicDescription_t *topicdesc) {
+        int i;
 
+        RD_IF_FREE(topicdesc->topic, rd_free);
+        RD_IF_FREE(topicdesc->error, rd_kafka_error_destroy);
+        RD_IF_FREE(topicdesc->authorized_operations, rd_free);
+        for (i = 0; i < topicdesc->partition_cnt; i++)
+                rd_kafka_TopicPartitionInfo_destroy(topicdesc->partitions[i]);
+        rd_free(topicdesc->partitions);
 
-int rd_kafka_ConsumerGroupDescription_is_simple_consumer_group(
-    const rd_kafka_ConsumerGroupDescription_t *grpdesc) {
-        return grpdesc->is_simple_consumer_group;
+        rd_free(topicdesc);
 }
-
-
-const char *rd_kafka_ConsumerGroupDescription_partition_assignor(
-    const rd_kafka_ConsumerGroupDescription_t *grpdesc) {
-        return grpdesc->partition_assignor;
+
+static void rd_kafka_TopicDescription_free(void *ptr) {
+        rd_kafka_TopicDescription_destroy(ptr);
 }
 
+const int rd_kafka_TopicPartitionInfo_partition(
+    const rd_kafka_TopicPartitionInfo_t *partition) {
+        return partition->partition;
+}
 
-rd_kafka_consumer_group_state_t rd_kafka_ConsumerGroupDescription_state(
-    const rd_kafka_ConsumerGroupDescription_t *grpdesc) {
-        return grpdesc->state;
+const rd_kafka_Node_t *rd_kafka_TopicPartitionInfo_leader(
+    const rd_kafka_TopicPartitionInfo_t *partition) {
+        return partition->leader;
 }
 
-const rd_kafka_Node_t *rd_kafka_ConsumerGroupDescription_coordinator(
-    const rd_kafka_ConsumerGroupDescription_t *grpdesc) {
-        return grpdesc->coordinator;
+
+const rd_kafka_Node_t **
+rd_kafka_TopicPartitionInfo_isr(const rd_kafka_TopicPartitionInfo_t *partition,
+                                size_t *cntp) {
+        *cntp = partition->isr_cnt;
+        return (const rd_kafka_Node_t **)partition->isr;
 }
 
-size_t rd_kafka_ConsumerGroupDescription_member_count(
-    const rd_kafka_ConsumerGroupDescription_t *grpdesc) {
-        return rd_list_cnt(&grpdesc->members);
+const rd_kafka_Node_t **rd_kafka_TopicPartitionInfo_replicas(
+    const rd_kafka_TopicPartitionInfo_t *partition,
+    size_t *cntp) {
+        *cntp = partition->replica_cnt;
+        return (const rd_kafka_Node_t **)partition->replicas;
 }
 
-const rd_kafka_MemberDescription_t *rd_kafka_ConsumerGroupDescription_member(
-    const rd_kafka_ConsumerGroupDescription_t *grpdesc,
-    size_t idx) {
-        return (rd_kafka_MemberDescription_t *)rd_list_elem(&grpdesc->members,
-                                                            idx);
+const rd_kafka_TopicPartitionInfo_t **rd_kafka_TopicDescription_partitions(
+    const rd_kafka_TopicDescription_t *topicdesc,
+    size_t *cntp) {
+        *cntp = topicdesc->partition_cnt;
+        return (const rd_kafka_TopicPartitionInfo_t **)topicdesc->partitions;
 }
 
-/**
- * @brief Group arguments comparator for DescribeConsumerGroups args
- */
-static int rd_kafka_DescribeConsumerGroups_cmp(const void *a, const void *b) {
-        return strcmp(a, b);
+const rd_kafka_AclOperation_t *rd_kafka_TopicDescription_authorized_operations(
+    const rd_kafka_TopicDescription_t *topicdesc,
+    size_t *cntp) {
+        *cntp = RD_MAX(topicdesc->authorized_operations_cnt, 0);
+        return topicdesc->authorized_operations;
 }
 
-/** @brief Merge the DescribeConsumerGroups response from a single broker
- *         into the user response list.
- */
-static void rd_kafka_DescribeConsumerGroups_response_merge(
-    rd_kafka_op_t *rko_fanout,
-    const rd_kafka_op_t *rko_partial) {
-        rd_kafka_ConsumerGroupDescription_t *groupres = NULL;
-        rd_kafka_ConsumerGroupDescription_t *newgroupres;
-        const char *grp = rko_partial->rko_u.admin_result.opaque;
-        int orig_pos;
 
-        rd_assert(rko_partial->rko_evtype ==
-                  RD_KAFKA_EVENT_DESCRIBECONSUMERGROUPS_RESULT);
+const char *
+rd_kafka_TopicDescription_name(const rd_kafka_TopicDescription_t *topicdesc) {
+        return topicdesc->topic;
+}
 
-        if (!rko_partial->rko_err) {
-                /* Proper results.
-                 * We only send one group per request, make sure it matches */
-                groupres =
-                    rd_list_elem(&rko_partial->rko_u.admin_result.results, 0);
-                rd_assert(groupres);
-                rd_assert(!strcmp(groupres->group_id, grp));
-                newgroupres = rd_kafka_ConsumerGroupDescription_copy(groupres);
-        } else {
-                /* Op errored, e.g. timeout */
-                rd_kafka_error_t *error =
-                    rd_kafka_error_new(rko_partial->rko_err, NULL);
-                newgroupres =
-                    rd_kafka_ConsumerGroupDescription_new_error(grp, error);
-                rd_kafka_error_destroy(error);
-        }
+int rd_kafka_TopicDescription_is_internal(
+    const rd_kafka_TopicDescription_t *topicdesc) {
+        return topicdesc->is_internal;
+}
 
-        /* As a convenience to the application we insert group result
-         * in the same order as they were requested. */
-        orig_pos = rd_list_index(&rko_fanout->rko_u.admin_request.args, grp,
-                                 rd_kafka_DescribeConsumerGroups_cmp);
-        rd_assert(orig_pos != -1);
+const rd_kafka_error_t *
+rd_kafka_TopicDescription_error(const rd_kafka_TopicDescription_t *topicdesc) {
+        return topicdesc->error;
+}
 
-        /* Make sure result is not already set */
-        rd_assert(rd_list_elem(&rko_fanout->rko_u.admin_request.fanout.results,
-                               orig_pos) == NULL);
+const rd_kafka_Uuid_t *rd_kafka_TopicDescription_topic_id(
+    const rd_kafka_TopicDescription_t *topicdesc) {
+        return &topicdesc->topic_id;
+}
 
-        rd_list_set(&rko_fanout->rko_u.admin_request.fanout.results, orig_pos,
-                    newgroupres);
+const rd_kafka_TopicDescription_t **rd_kafka_DescribeTopics_result_topics(
+    const rd_kafka_DescribeTopics_result_t *result,
+    size_t *cntp) {
+        const rd_kafka_op_t *rko = (const rd_kafka_op_t *)result;
+        rd_kafka_op_type_t reqtype =
+            rko->rko_u.admin_result.reqtype & ~RD_KAFKA_OP_FLAGMASK;
+        rd_assert(reqtype == RD_KAFKA_OP_DESCRIBETOPICS);
+
+        *cntp = rd_list_cnt(&rko->rko_u.admin_result.results);
+        return (const rd_kafka_TopicDescription_t **)
+            rko->rko_u.admin_result.results.rl_elems;
 }
 
+/**
+ * @brief Topics arguments comparator for DescribeTopics args
+ */
+static int rd_kafka_DescribeTopics_cmp(const void *a, const void *b) {
+        return strcmp(a, b);
+}
 
 /**
- * @brief Construct and send DescribeConsumerGroupsRequest to \p rkb
- *        with the groups (char *) in \p groups, using
+ * @brief Construct and send DescribeTopicsRequest to \p rkb
+ *        with the topics (char *) in \p topics, using
  *        \p options.
  *
  *        The response (unparsed) will be enqueued on \p replyq
@@ -6305,34 +8610,27 @@ static void rd_kafka_DescribeConsumerGroups_response_merge(
  *          transmission, otherwise an error code and errstr will be
  *          updated with a human readable error string.
  */
-static rd_kafka_resp_err_t rd_kafka_admin_DescribeConsumerGroupsRequest(
-    rd_kafka_broker_t *rkb,
-    const rd_list_t *groups /*(char*)*/,
-    rd_kafka_AdminOptions_t *options,
-    char *errstr,
-    size_t errstr_size,
-    rd_kafka_replyq_t replyq,
-    rd_kafka_resp_cb_t *resp_cb,
-    void *opaque) {
-        int i;
-        char *group;
+static rd_kafka_resp_err_t
+rd_kafka_admin_DescribeTopicsRequest(rd_kafka_broker_t *rkb,
+                                     const rd_list_t *topics /*(char*)*/,
+                                     rd_kafka_AdminOptions_t *options,
+                                     char *errstr,
+                                     size_t errstr_size,
+                                     rd_kafka_replyq_t replyq,
+                                     rd_kafka_resp_cb_t *resp_cb,
+                                     void *opaque) {
         rd_kafka_resp_err_t err;
-        int groups_cnt          = rd_list_cnt(groups);
-        rd_kafka_error_t *error = NULL;
-        char **groups_arr       = rd_calloc(groups_cnt, sizeof(*groups_arr));
+        int include_topic_authorized_operations =
+            rd_kafka_confval_get_int(&options->include_authorized_operations);
 
-        RD_LIST_FOREACH(group, groups, i) {
-                groups_arr[i] = rd_list_elem(groups, i);
-        }
-        error = rd_kafka_DescribeGroupsRequest(rkb, -1, groups_arr, groups_cnt,
-                                               replyq, resp_cb, opaque);
-        rd_free(groups_arr);
+        err = rd_kafka_admin_MetadataRequest(
+            rkb, topics, "describe topics",
+            rd_false /* don't include_topic_authorized_operations */,
+            include_topic_authorized_operations,
+            rd_false /* don't force_racks */, resp_cb, replyq, opaque);
 
-        if (error) {
-                rd_snprintf(errstr, errstr_size, "%s",
-                            rd_kafka_error_string(error));
-                err = rd_kafka_error_code(error);
-                rd_kafka_error_destroy(error);
+        if (err) {
+                rd_snprintf(errstr, errstr_size, "%s", rd_kafka_err2str(err));
                 return err;
         }
 
@@ -6340,329 +8638,371 @@ static rd_kafka_resp_err_t rd_kafka_admin_DescribeConsumerGroupsRequest(
 }
 
 /**
- * @brief Parse DescribeConsumerGroupsResponse and create ADMIN_RESULT op.
+ * @brief Parse DescribeTopicsResponse and create ADMIN_RESULT op.
  */
 static rd_kafka_resp_err_t
-rd_kafka_DescribeConsumerGroupsResponse_parse(rd_kafka_op_t *rko_req,
-                                              rd_kafka_op_t **rko_resultp,
-                                              rd_kafka_buf_t *reply,
-                                              char *errstr,
-                                              size_t errstr_size) {
+rd_kafka_DescribeTopicsResponse_parse(rd_kafka_op_t *rko_req,
+                                      rd_kafka_op_t **rko_resultp,
+                                      rd_kafka_buf_t *reply,
+                                      char *errstr,
+                                      size_t errstr_size) {
+        rd_kafka_metadata_internal_t *mdi = NULL;
+        struct rd_kafka_metadata *md      = NULL;
+        rd_kafka_resp_err_t err;
+        rd_list_t topics       = rko_req->rko_u.admin_request.args;
+        rd_kafka_broker_t *rkb = reply->rkbuf_rkb;
+        int i;
         const int log_decode_errors = LOG_ERR;
-        int nodeid;
-        uint16_t port;
-        int16_t api_version;
-        int32_t cnt;
-        rd_kafka_op_t *rko_result = NULL;
-        rd_kafka_broker_t *rkb    = reply->rkbuf_rkb;
-        rd_kafka_Node_t *node     = NULL;
-        rd_kafka_error_t *error   = NULL;
-        char *group_id = NULL, *group_state = NULL, *proto_type = NULL,
-             *proto = NULL, *host = NULL;
-
-        api_version = rd_kafka_buf_ApiVersion(reply);
-        if (api_version >= 1) {
-                rd_kafka_buf_read_throttle_time(reply);
-        }
+        rd_kafka_op_t *rko_result   = NULL;
 
-        rd_kafka_buf_read_arraycnt(reply, &cnt, 100000);
+        err = rd_kafka_parse_Metadata_admin(rkb, reply, &topics, &mdi);
+        if (err)
+                goto err_parse;
 
         rko_result = rd_kafka_admin_result_new(rko_req);
-        rd_list_init(&rko_result->rko_u.admin_result.results, cnt,
-                     rd_kafka_ConsumerGroupDescription_free);
-
-        rd_kafka_broker_lock(rkb);
-        nodeid = rkb->rkb_nodeid;
-        host   = rd_strdup(rkb->rkb_origname);
-        port   = rkb->rkb_port;
-        rd_kafka_broker_unlock(rkb);
-
-        node = rd_kafka_Node_new(nodeid, host, port, NULL);
-        while (cnt-- > 0) {
-                int16_t error_code;
-                rd_kafkap_str_t GroupId, GroupState, ProtocolType, ProtocolData;
-                rd_bool_t is_simple_consumer_group, is_consumer_protocol_type;
-                int32_t member_cnt;
-                rd_list_t members;
-                rd_kafka_ConsumerGroupDescription_t *grpdesc = NULL;
-
-                rd_kafka_buf_read_i16(reply, &error_code);
-                rd_kafka_buf_read_str(reply, &GroupId);
-                rd_kafka_buf_read_str(reply, &GroupState);
-                rd_kafka_buf_read_str(reply, &ProtocolType);
-                rd_kafka_buf_read_str(reply, &ProtocolData);
-                rd_kafka_buf_read_arraycnt(reply, &member_cnt, 100000);
-
-                group_id    = RD_KAFKAP_STR_DUP(&GroupId);
-                group_state = RD_KAFKAP_STR_DUP(&GroupState);
-                proto_type  = RD_KAFKAP_STR_DUP(&ProtocolType);
-                proto       = RD_KAFKAP_STR_DUP(&ProtocolData);
+        md         = &mdi->metadata;
+        rd_list_init(&rko_result->rko_u.admin_result.results, md->topic_cnt,
+                     rd_kafka_TopicDescription_free);
 
-                if (error_code) {
-                        error = rd_kafka_error_new(
-                            error_code, "DescribeConsumerGroups: %s",
-                            rd_kafka_err2str(error_code));
-                }
+        for (i = 0; i < md->topic_cnt; i++) {
+                rd_kafka_TopicDescription_t *topicdesc = NULL;
+                int orig_pos;
 
-                is_simple_consumer_group = *proto_type == '\0';
-                is_consumer_protocol_type =
-                    !strcmp(proto_type, CONSUMER_PROTOCOL_TYPE);
-                if (error == NULL && !is_simple_consumer_group &&
-                    !is_consumer_protocol_type) {
-                        error = rd_kafka_error_new(
-                            RD_KAFKA_RESP_ERR__INVALID_ARG,
-                            "GroupId %s is not a consumer group (%s).",
-                            group_id, proto_type);
+                if (md->topics[i].err == RD_KAFKA_RESP_ERR_NO_ERROR) {
+                        rd_kafka_AclOperation_t *authorized_operations;
+                        int authorized_operation_cnt;
+                        authorized_operations =
+                            rd_kafka_AuthorizedOperations_parse(
+                                mdi->topics[i].topic_authorized_operations,
+                                &authorized_operation_cnt);
+                        topicdesc = rd_kafka_TopicDescription_new(
+                            md->topics[i].topic, mdi->topics[i].topic_id,
+                            md->topics[i].partitions,
+                            md->topics[i].partition_cnt, mdi->brokers_sorted,
+                            mdi->brokers, md->broker_cnt, authorized_operations,
+                            authorized_operation_cnt,
+                            mdi->topics[i].is_internal, NULL);
+                        RD_IF_FREE(authorized_operations, rd_free);
+                } else {
+                        rd_kafka_error_t *error = rd_kafka_error_new(
+                            md->topics[i].err, "%s",
+                            rd_kafka_err2str(md->topics[i].err));
+                        topicdesc = rd_kafka_TopicDescription_new_error(
+                            md->topics[i].topic, mdi->topics[i].topic_id,
+                            error);
+                        rd_kafka_error_destroy(error);
                 }
-
-                rd_list_init(&members, 0, rd_kafka_MemberDescription_free);
-
-                while (member_cnt-- > 0) {
-                        rd_kafkap_str_t MemberId, ClientId, ClientHost,
-                            GroupInstanceId = RD_KAFKAP_STR_INITIALIZER;
-                        char *member_id, *client_id, *client_host,
-                            *group_instance_id = NULL;
-                        rd_kafkap_bytes_t MemberMetadata, MemberAssignment;
-                        rd_kafka_MemberDescription_t *member;
-                        rd_kafka_topic_partition_list_t *partitions = NULL;
-                        rd_kafka_buf_t *rkbuf;
-
-                        rd_kafka_buf_read_str(reply, &MemberId);
-                        if (api_version >= 4) {
-                                rd_kafka_buf_read_str(reply, &GroupInstanceId);
-                        }
-                        rd_kafka_buf_read_str(reply, &ClientId);
-                        rd_kafka_buf_read_str(reply, &ClientHost);
-                        rd_kafka_buf_read_bytes(reply, &MemberMetadata);
-                        rd_kafka_buf_read_bytes(reply, &MemberAssignment);
-                        if (error != NULL)
-                                continue;
-
-                        if (RD_KAFKAP_BYTES_LEN(&MemberAssignment) != 0) {
-                                int16_t version;
-                                /* Parse assignment */
-                                rkbuf = rd_kafka_buf_new_shadow(
-                                    MemberAssignment.data,
-                                    RD_KAFKAP_BYTES_LEN(&MemberAssignment),
-                                    NULL);
-                                /* Protocol parser needs a broker handle
-                                 * to log errors on. */
-                                rkbuf->rkbuf_rkb = rkb;
-                                /* Decreased in rd_kafka_buf_destroy */
-                                rd_kafka_broker_keep(rkb);
-                                rd_kafka_buf_read_i16(rkbuf, &version);
-                                const rd_kafka_topic_partition_field_t fields[] =
-                                    {RD_KAFKA_TOPIC_PARTITION_FIELD_PARTITION,
-                                     RD_KAFKA_TOPIC_PARTITION_FIELD_END};
-                                partitions = rd_kafka_buf_read_topic_partitions(
-                                    rkbuf, 0, fields);
-                                rd_kafka_buf_destroy(rkbuf);
-                                if (!partitions)
-                                        rd_kafka_buf_parse_fail(
-                                            reply,
-                                            "Error reading topic partitions");
-                        }
-
-                        member_id = RD_KAFKAP_STR_DUP(&MemberId);
-                        if (!RD_KAFKAP_STR_IS_NULL(&GroupInstanceId)) {
-                                group_instance_id =
-                                    RD_KAFKAP_STR_DUP(&GroupInstanceId);
-                        }
-                        client_id   = RD_KAFKAP_STR_DUP(&ClientId);
-                        client_host = RD_KAFKAP_STR_DUP(&ClientHost);
-
-                        member = rd_kafka_MemberDescription_new(
-                            client_id, member_id, group_instance_id,
-                            client_host, partitions);
-                        if (partitions)
-                                rd_kafka_topic_partition_list_destroy(
-                                    partitions);
-                        rd_list_add(&members, member);
-                        rd_free(member_id);
-                        rd_free(group_instance_id);
-                        rd_free(client_id);
-                        rd_free(client_host);
-                        member_id         = NULL;
-                        group_instance_id = NULL;
-                        client_id         = NULL;
-                        client_host       = NULL;
+                orig_pos = rd_list_index(&rko_result->rko_u.admin_result.args,
+                                         topicdesc->topic,
+                                         rd_kafka_DescribeTopics_cmp);
+                if (orig_pos == -1) {
+                        rd_kafka_TopicDescription_destroy(topicdesc);
+                        rd_kafka_buf_parse_fail(
+                            reply,
+                            "Broker returned topic %s that was not "
+                            "included in the original request",
+                            topicdesc->topic);
                 }
 
-                if (api_version >= 3) {
-                        /* TODO: implement KIP-430 */
-                        int32_t authorized_operations;
-                        rd_kafka_buf_read_i32(reply, &authorized_operations);
+                if (rd_list_elem(&rko_result->rko_u.admin_result.results,
+                                 orig_pos) != NULL) {
+                        rd_kafka_TopicDescription_destroy(topicdesc);
+                        rd_kafka_buf_parse_fail(
+                            reply, "Broker returned topic %s multiple times",
+                            topicdesc->topic);
                 }
 
-                if (error == NULL) {
-                        grpdesc = rd_kafka_ConsumerGroupDescription_new(
-                            group_id, is_simple_consumer_group, &members, proto,
-                            rd_kafka_consumer_group_state_code(group_state),
-                            node, error);
-                } else {
-                        grpdesc = rd_kafka_ConsumerGroupDescription_new_error(
-                            group_id, error);
-                }
-                rd_list_add(&rko_result->rko_u.admin_result.results, grpdesc);
-                if (error)
-                        rd_kafka_error_destroy(error);
-                rd_list_destroy(&members);
-                rd_free(group_id);
-                rd_free(group_state);
-                rd_free(proto_type);
-                rd_free(proto);
-                error       = NULL;
-                group_id    = NULL;
-                group_state = NULL;
-                proto_type  = NULL;
-                proto       = NULL;
+                rd_list_set(&rko_result->rko_u.admin_result.results, orig_pos,
+                            topicdesc);
         }
+        rd_free(mdi);
 
-        if (host)
-                rd_free(host);
-        if (node)
-                rd_kafka_Node_destroy(node);
         *rko_resultp = rko_result;
         return RD_KAFKA_RESP_ERR_NO_ERROR;
 
 err_parse:
-        if (group_id)
-                rd_free(group_id);
-        if (group_state)
-                rd_free(group_state);
-        if (proto_type)
-                rd_free(proto_type);
-        if (proto)
-                rd_free(proto);
-        if (error)
-                rd_kafka_error_destroy(error);
-        if (host)
-                rd_free(host);
-        if (node)
-                rd_kafka_Node_destroy(node);
-        if (rko_result)
-                rd_kafka_op_destroy(rko_result);
-
-        rd_snprintf(
-            errstr, errstr_size,
-            "DescribeConsumerGroups response protocol parse failure: %s",
-            rd_kafka_err2str(reply->rkbuf_err));
-
+        RD_IF_FREE(rko_result, rd_kafka_op_destroy);
+        rd_snprintf(errstr, errstr_size,
+                    "DescribeTopics response protocol parse failure: %s",
+                    rd_kafka_err2str(reply->rkbuf_err));
         return reply->rkbuf_err;
 }
 
-void rd_kafka_DescribeConsumerGroups(rd_kafka_t *rk,
-                                     const char **groups,
-                                     size_t groups_cnt,
-                                     const rd_kafka_AdminOptions_t *options,
-                                     rd_kafka_queue_t *rkqu) {
-        rd_kafka_op_t *rko_fanout;
+void rd_kafka_DescribeTopics(rd_kafka_t *rk,
+                             const rd_kafka_TopicCollection_t *topics,
+                             const rd_kafka_AdminOptions_t *options,
+                             rd_kafka_queue_t *rkqu) {
+        rd_kafka_op_t *rko;
         rd_list_t dup_list;
         size_t i;
-        static const struct rd_kafka_admin_fanout_worker_cbs fanout_cbs = {
-            rd_kafka_DescribeConsumerGroups_response_merge,
-            rd_kafka_ConsumerGroupDescription_copy_opaque};
 
-        rd_assert(rkqu);
+        static const struct rd_kafka_admin_worker_cbs cbs = {
+            rd_kafka_admin_DescribeTopicsRequest,
+            rd_kafka_DescribeTopicsResponse_parse,
+        };
 
-        rko_fanout = rd_kafka_admin_fanout_op_new(
-            rk, RD_KAFKA_OP_DESCRIBECONSUMERGROUPS,
-            RD_KAFKA_EVENT_DESCRIBECONSUMERGROUPS_RESULT, &fanout_cbs, options,
-            rkqu->rkqu_q);
+        rd_assert(rkqu);
 
-        if (groups_cnt == 0) {
-                rd_kafka_admin_result_fail(rko_fanout,
-                                           RD_KAFKA_RESP_ERR__INVALID_ARG,
-                                           "No groups to describe");
-                rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
-                                                     rd_true /*destroy*/);
-                return;
-        }
+        rko = rd_kafka_admin_request_op_new(
+            rk, RD_KAFKA_OP_DESCRIBETOPICS,
+            RD_KAFKA_EVENT_DESCRIBETOPICS_RESULT, &cbs, options, rkqu->rkqu_q);
 
-        /* Copy group list and store it on the request op.
-         * Maintain original ordering. */
-        rd_list_init(&rko_fanout->rko_u.admin_request.args, (int)groups_cnt,
+        rd_list_init(&rko->rko_u.admin_request.args, (int)topics->topics_cnt,
                      rd_free);
-        for (i = 0; i < groups_cnt; i++)
-                rd_list_add(&rko_fanout->rko_u.admin_request.args,
-                            rd_strdup(groups[i]));
+        for (i = 0; i < topics->topics_cnt; i++)
+                rd_list_add(&rko->rko_u.admin_request.args,
+                            rd_strdup(topics->topics[i]));
+
+        if (rd_list_cnt(&rko->rko_u.admin_request.args)) {
+                int j;
+                char *topic_name;
+                /* Check for duplicates.
+                 * Make a temporary copy of the topic list and sort it to check
+                 * for duplicates, we don't want the original list sorted since
+                 * we want to maintain ordering. */
+                rd_list_init(&dup_list,
+                             rd_list_cnt(&rko->rko_u.admin_request.args), NULL);
+                rd_list_copy_to(&dup_list, &rko->rko_u.admin_request.args, NULL,
+                                NULL);
+                rd_list_sort(&dup_list, rd_kafka_DescribeTopics_cmp);
+                if (rd_list_find_duplicate(&dup_list,
+                                           rd_kafka_DescribeTopics_cmp)) {
+                        rd_list_destroy(&dup_list);
+                        rd_kafka_admin_result_fail(
+                            rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                            "Duplicate topics not allowed");
+                        rd_kafka_admin_common_worker_destroy(
+                            rk, rko, rd_true /*destroy*/);
+                        return;
+                }
+
+                /* Check for empty topics. */
+                RD_LIST_FOREACH(topic_name, &rko->rko_u.admin_request.args, j) {
+                        if (!topic_name[0]) {
+                                rd_list_destroy(&dup_list);
+                                rd_kafka_admin_result_fail(
+                                    rko, RD_KAFKA_RESP_ERR__INVALID_ARG,
+                                    "Empty topic name at index %d isn't "
+                                    "allowed",
+                                    j);
+                                rd_kafka_admin_common_worker_destroy(
+                                    rk, rko, rd_true /*destroy*/);
+                                return;
+                        }
+                }
 
-        /* Check for duplicates.
-         * Make a temporary copy of the group list and sort it to check for
-         * duplicates, we don't want the original list sorted since we want
-         * to maintain ordering. */
-        rd_list_init(&dup_list,
-                     rd_list_cnt(&rko_fanout->rko_u.admin_request.args), NULL);
-        rd_list_copy_to(&dup_list, &rko_fanout->rko_u.admin_request.args, NULL,
-                        NULL);
-        rd_list_sort(&dup_list, rd_kafka_DescribeConsumerGroups_cmp);
-        if (rd_list_find_duplicate(&dup_list,
-                                   rd_kafka_DescribeConsumerGroups_cmp)) {
                 rd_list_destroy(&dup_list);
-                rd_kafka_admin_result_fail(rko_fanout,
-                                           RD_KAFKA_RESP_ERR__INVALID_ARG,
-                                           "Duplicate groups not allowed");
-                rd_kafka_admin_common_worker_destroy(rk, rko_fanout,
+                rd_kafka_q_enq(rk->rk_ops, rko);
+        } else {
+                /* Empty list */
+                rd_kafka_op_t *rko_result = rd_kafka_admin_result_new(rko);
+                /* Enqueue empty result on application queue, we're done. */
+                rd_kafka_admin_result_enq(rko, rko_result);
+                rd_kafka_admin_common_worker_destroy(rk, rko,
                                                      rd_true /*destroy*/);
-                return;
         }
+}
 
-        rd_list_destroy(&dup_list);
+/**@}*/
 
-        /* Prepare results list where fanned out op's results will be
-         * accumulated. */
-        rd_list_init(&rko_fanout->rko_u.admin_request.fanout.results,
-                     (int)groups_cnt, rd_kafka_ConsumerGroupDescription_free);
-        rko_fanout->rko_u.admin_request.fanout.outstanding = (int)groups_cnt;
+/**
+ * @name Describe cluster
+ * @{
+ *
+ *
+ *
+ *
+ */
 
-        /* Create individual request ops for each group.
-         * FIXME: A future optimization is to coalesce all groups for a single
-         *        coordinator into one op. */
-        for (i = 0; i < groups_cnt; i++) {
-                static const struct rd_kafka_admin_worker_cbs cbs = {
-                    rd_kafka_admin_DescribeConsumerGroupsRequest,
-                    rd_kafka_DescribeConsumerGroupsResponse_parse,
-                };
-                char *grp =
-                    rd_list_elem(&rko_fanout->rko_u.admin_request.args, (int)i);
-                rd_kafka_op_t *rko = rd_kafka_admin_request_op_new(
-                    rk, RD_KAFKA_OP_DESCRIBECONSUMERGROUPS,
-                    RD_KAFKA_EVENT_DESCRIBECONSUMERGROUPS_RESULT, &cbs, options,
-                    rk->rk_ops);
+static const rd_kafka_ClusterDescription_t *
+rd_kafka_DescribeCluster_result_description(
+    const rd_kafka_DescribeCluster_result_t *result) {
+        int cluster_result_cnt;
+        const rd_kafka_ClusterDescription_t *clusterdesc;
+        const rd_kafka_op_t *rko = (const rd_kafka_op_t *)result;
+        rd_kafka_op_type_t reqtype =
+            rko->rko_u.admin_result.reqtype & ~RD_KAFKA_OP_FLAGMASK;
+        rd_assert(reqtype == RD_KAFKA_OP_DESCRIBECLUSTER);
 
-                rko->rko_u.admin_request.fanout_parent = rko_fanout;
-                rko->rko_u.admin_request.broker_id =
-                    RD_KAFKA_ADMIN_TARGET_COORDINATOR;
-                rko->rko_u.admin_request.coordtype = RD_KAFKA_COORD_GROUP;
-                rko->rko_u.admin_request.coordkey  = rd_strdup(grp);
+        cluster_result_cnt = rd_list_cnt(&rko->rko_u.admin_result.results);
+        rd_assert(cluster_result_cnt == 1);
+        clusterdesc = rd_list_elem(&rko->rko_u.admin_result.results, 0);
 
-                /* Set the group name as the opaque so the fanout worker use it
-                 * to fill in errors.
-                 * References rko_fanout's memory, which will always outlive
-                 * the fanned out op. */
-                rd_kafka_AdminOptions_set_opaque(
-                    &rko->rko_u.admin_request.options, grp);
+        return clusterdesc;
+}
 
-                rd_list_init(&rko->rko_u.admin_request.args, 1, rd_free);
-                rd_list_add(&rko->rko_u.admin_request.args,
-                            rd_strdup(groups[i]));
 
-                rd_kafka_q_enq(rk->rk_ops, rko);
-        }
+const rd_kafka_Node_t **rd_kafka_DescribeCluster_result_nodes(
+    const rd_kafka_DescribeCluster_result_t *result,
+    size_t *cntp) {
+        const rd_kafka_ClusterDescription_t *clusterdesc =
+            rd_kafka_DescribeCluster_result_description(result);
+        *cntp = clusterdesc->node_cnt;
+        return (const rd_kafka_Node_t **)clusterdesc->nodes;
 }
 
-const rd_kafka_ConsumerGroupDescription_t **
-rd_kafka_DescribeConsumerGroups_result_groups(
-    const rd_kafka_DescribeConsumerGroups_result_t *result,
+const rd_kafka_AclOperation_t *
+rd_kafka_DescribeCluster_result_authorized_operations(
+    const rd_kafka_DescribeCluster_result_t *result,
     size_t *cntp) {
-        const rd_kafka_op_t *rko = (const rd_kafka_op_t *)result;
-        rd_kafka_op_type_t reqtype =
-            rko->rko_u.admin_result.reqtype & ~RD_KAFKA_OP_FLAGMASK;
-        rd_assert(reqtype == RD_KAFKA_OP_DESCRIBECONSUMERGROUPS);
+        const rd_kafka_ClusterDescription_t *clusterdesc =
+            rd_kafka_DescribeCluster_result_description(result);
+        *cntp = RD_MAX(clusterdesc->authorized_operations_cnt, 0);
+        return clusterdesc->authorized_operations;
+}
 
-        *cntp = rd_list_cnt(&rko->rko_u.admin_result.results);
-        return (const rd_kafka_ConsumerGroupDescription_t **)
-            rko->rko_u.admin_result.results.rl_elems;
+const char *rd_kafka_DescribeCluster_result_cluster_id(
+    const rd_kafka_DescribeCluster_result_t *result) {
+        return rd_kafka_DescribeCluster_result_description(result)->cluster_id;
+}
+
+const rd_kafka_Node_t *rd_kafka_DescribeCluster_result_controller(
+    const rd_kafka_DescribeCluster_result_t *result) {
+        return rd_kafka_DescribeCluster_result_description(result)->controller;
+}
+
+/**
+ * @brief Create a new ClusterDescription object.
+ *
+ * @param cluster_id current cluster_id
+ * @param controller_id current controller_id.
+ * @param md metadata struct returned by parse_metadata().
+ *
+ * @returns newly allocated ClusterDescription object.
+ * @remark Use rd_kafka_ClusterDescription_destroy() to free when done.
+ */
+static rd_kafka_ClusterDescription_t *
+rd_kafka_ClusterDescription_new(const rd_kafka_metadata_internal_t *mdi) {
+        const rd_kafka_metadata_t *md = &mdi->metadata;
+        rd_kafka_ClusterDescription_t *clusterdesc =
+            rd_calloc(1, sizeof(*clusterdesc));
+        int i;
+
+        clusterdesc->cluster_id = rd_strdup(mdi->cluster_id);
+
+        if (mdi->controller_id >= 0)
+                clusterdesc->controller = rd_kafka_Node_new_from_brokers(
+                    mdi->controller_id, mdi->brokers_sorted, mdi->brokers,
+                    md->broker_cnt);
+
+        clusterdesc->authorized_operations =
+            rd_kafka_AuthorizedOperations_parse(
+                mdi->cluster_authorized_operations,
+                &clusterdesc->authorized_operations_cnt);
+
+        clusterdesc->node_cnt = md->broker_cnt;
+        clusterdesc->nodes =
+            rd_calloc(clusterdesc->node_cnt, sizeof(rd_kafka_Node_t *));
+
+        for (i = 0; i < md->broker_cnt; i++)
+                clusterdesc->nodes[i] = rd_kafka_Node_new_from_brokers(
+                    md->brokers[i].id, mdi->brokers_sorted, mdi->brokers,
+                    md->broker_cnt);
+
+        return clusterdesc;
+}
+
+static void rd_kafka_ClusterDescription_destroy(
+    rd_kafka_ClusterDescription_t *clusterdesc) {
+        RD_IF_FREE(clusterdesc->cluster_id, rd_free);
+        RD_IF_FREE(clusterdesc->controller, rd_kafka_Node_free);
+        RD_IF_FREE(clusterdesc->authorized_operations, rd_free);
+
+        if (clusterdesc->node_cnt) {
+                size_t i;
+                for (i = 0; i < clusterdesc->node_cnt; i++)
+                        rd_kafka_Node_free(clusterdesc->nodes[i]);
+                rd_free(clusterdesc->nodes);
+        }
+        rd_free(clusterdesc);
+}
+
+static void rd_kafka_ClusterDescription_free(void *ptr) {
+        rd_kafka_ClusterDescription_destroy(ptr);
+}
+/**
+ * @brief Send DescribeClusterRequest. Admin worker compatible callback.
+ */
+static rd_kafka_resp_err_t rd_kafka_admin_DescribeClusterRequest(
+    rd_kafka_broker_t *rkb,
+    const rd_list_t *ignored /* We don't use any arguments set here. */,
+    rd_kafka_AdminOptions_t *options,
+    char *errstr,
+    size_t errstr_size,
+    rd_kafka_replyq_t replyq,
+    rd_kafka_resp_cb_t *resp_cb,
+    void *opaque) {
+        rd_kafka_resp_err_t err;
+        int include_cluster_authorized_operations =
+            rd_kafka_confval_get_int(&options->include_authorized_operations);
+
+        err = rd_kafka_admin_MetadataRequest(
+            rkb, NULL /* topics */, "describe cluster",
+            include_cluster_authorized_operations,
+            rd_false /* don't include_topic_authorized_operations */,
+            rd_false /* don't force racks */, resp_cb, replyq, opaque);
+
+        if (err) {
+                rd_snprintf(errstr, errstr_size, "%s", rd_kafka_err2str(err));
+                return err;
+        }
+
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
+}
+
+/**
+ * @brief Parse DescribeCluster and create ADMIN_RESULT op.
+ */
+static rd_kafka_resp_err_t
+rd_kafka_DescribeClusterResponse_parse(rd_kafka_op_t *rko_req,
+                                       rd_kafka_op_t **rko_resultp,
+                                       rd_kafka_buf_t *reply,
+                                       char *errstr,
+                                       size_t errstr_size) {
+        rd_kafka_metadata_internal_t *mdi = NULL;
+        rd_kafka_resp_err_t err;
+        rd_kafka_ClusterDescription_t *clusterdesc = NULL;
+        rd_list_t topics          = rko_req->rko_u.admin_request.args;
+        rd_kafka_broker_t *rkb    = reply->rkbuf_rkb;
+        rd_kafka_op_t *rko_result = NULL;
+
+        err = rd_kafka_parse_Metadata_admin(rkb, reply, &topics, &mdi);
+        if (err)
+                goto err;
+
+        rko_result = rd_kafka_admin_result_new(rko_req);
+        rd_list_init(&rko_result->rko_u.admin_result.results, 1,
+                     rd_kafka_ClusterDescription_free);
+
+        clusterdesc = rd_kafka_ClusterDescription_new(mdi);
+
+        rd_free(mdi);
+
+        rd_list_add(&rko_result->rko_u.admin_result.results, clusterdesc);
+        *rko_resultp = rko_result;
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
+
+err:
+        RD_IF_FREE(rko_result, rd_kafka_op_destroy);
+        rd_snprintf(errstr, errstr_size,
+                    "DescribeCluster response protocol parse failure: %s",
+                    rd_kafka_err2str(reply->rkbuf_err));
+        return reply->rkbuf_err;
+}
+
+void rd_kafka_DescribeCluster(rd_kafka_t *rk,
+                              const rd_kafka_AdminOptions_t *options,
+                              rd_kafka_queue_t *rkqu) {
+        rd_kafka_op_t *rko;
+        static const struct rd_kafka_admin_worker_cbs cbs = {
+            rd_kafka_admin_DescribeClusterRequest,
+            rd_kafka_DescribeClusterResponse_parse};
+
+        rko = rd_kafka_admin_request_op_new(
+            rk, RD_KAFKA_OP_DESCRIBECLUSTER,
+            RD_KAFKA_EVENT_DESCRIBECLUSTER_RESULT, &cbs, options, rkqu->rkqu_q);
+
+        rd_kafka_q_enq(rk->rk_ops, rko);
 }
 
 /**@}*/
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_admin.h b/lib/librdkafka-2.3.0/src/rdkafka_admin.h
similarity index 73%
rename from lib/librdkafka-2.1.0/src/rdkafka_admin.h
rename to lib/librdkafka-2.3.0/src/rdkafka_admin.h
index 62fe9e87a38..62b2e7244cc 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_admin.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_admin.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018 Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,10 +32,18 @@
 
 
 #include "rdstring.h"
+#include "rdmap.h"
 #include "rdkafka_error.h"
 #include "rdkafka_confval.h"
-
-
+#if WITH_SSL
+typedef struct rd_kafka_broker_s rd_kafka_broker_t;
+extern int rd_kafka_ssl_hmac(rd_kafka_broker_t *rkb,
+                             const EVP_MD *evp,
+                             const rd_chariov_t *in,
+                             const rd_chariov_t *salt,
+                             int itcnt,
+                             rd_chariov_t *out);
+#endif
 
 /**
  * @brief Common AdminOptions type used for all admin APIs.
@@ -69,15 +78,9 @@ struct rd_kafka_AdminOptions_s {
                                            *     CreateTopics
                                            *     CreatePartitions
                                            *     AlterConfigs
+                                           *     IncrementalAlterConfigs
                                            */
 
-        rd_kafka_confval_t incremental; /**< BOOL: Incremental rather than
-                                         *         absolute application
-                                         *         of config.
-                                         *   Valid for:
-                                         *     AlterConfigs
-                                         */
-
         rd_kafka_confval_t broker; /**< INT: Explicitly override
                                     *        broker id to send
                                     *        requests to.
@@ -91,6 +94,14 @@ struct rd_kafka_AdminOptions_s {
                                      * Valid for:
                                      *     ListConsumerGroupOffsets
                                      */
+        rd_kafka_confval_t
+            include_authorized_operations; /**< BOOL: Whether broker should
+                                            * return authorized operations.
+                                            * Valid for:
+                                            *     DescribeConsumerGroups
+                                            *     DescribeCluster
+                                            *     DescribeTopics
+                                            */
 
         rd_kafka_confval_t
             match_consumer_group_states; /**< PTR: list of consumer group states
@@ -98,6 +109,13 @@ struct rd_kafka_AdminOptions_s {
                                           *   Valid for: ListConsumerGroups.
                                           */
 
+        rd_kafka_confval_t
+            isolation_level; /**< INT:Isolation Level needed for list Offset
+                              *   to query for.
+                              *   Default Set to
+                              * RD_KAFKA_ISOLATION_LEVEL_READ_UNCOMMITTED
+                              */
+
         rd_kafka_confval_t opaque; /**< PTR: Application opaque.
                                     *   Valid for all. */
 };
@@ -188,13 +206,6 @@ struct rd_kafka_NewPartitions_s {
  * @{
  */
 
-/* KIP-248 */
-typedef enum rd_kafka_AlterOperation_t {
-        RD_KAFKA_ALTER_OP_ADD    = 0,
-        RD_KAFKA_ALTER_OP_SET    = 1,
-        RD_KAFKA_ALTER_OP_DELETE = 2,
-} rd_kafka_AlterOperation_t;
-
 struct rd_kafka_ConfigEntry_s {
         rd_strtup_t *kv; /**< Name/Value pair */
 
@@ -202,8 +213,9 @@ struct rd_kafka_ConfigEntry_s {
 
         /* Attributes: this is a struct for easy copying */
         struct {
-                rd_kafka_AlterOperation_t operation; /**< Operation */
-                rd_kafka_ConfigSource_t source;      /**< Config source */
+                /** Operation type, used for IncrementalAlterConfigs */
+                rd_kafka_AlterConfigOpType_t op_type;
+                rd_kafka_ConfigSource_t source; /**< Config source */
                 rd_bool_t is_readonly;  /**< Value is read-only (on broker) */
                 rd_bool_t is_default;   /**< Value is at its default */
                 rd_bool_t is_sensitive; /**< Value is sensitive */
@@ -250,6 +262,10 @@ struct rd_kafka_AlterConfigs_result_s {
         rd_list_t resources; /**< Type (rd_kafka_ConfigResource_t *) */
 };
 
+struct rd_kafka_IncrementalAlterConfigs_result_s {
+        rd_list_t resources; /**< Type (rd_kafka_ConfigResource_t *) */
+};
+
 struct rd_kafka_ConfigResource_result_s {
         rd_list_t resources; /**< Type (struct rd_kafka_ConfigResource *):
                               *   List of config resources, sans config
@@ -298,6 +314,47 @@ struct rd_kafka_DeleteRecords_s {
 
 /**@}*/
 
+/**
+ * @name ListConsumerGroupOffsets
+ * @{
+ */
+
+/**
+ * @brief ListConsumerGroupOffsets result
+ */
+struct rd_kafka_ListConsumerGroupOffsets_result_s {
+        rd_list_t groups; /**< Type (rd_kafka_group_result_t *) */
+};
+
+struct rd_kafka_ListConsumerGroupOffsets_s {
+        char *group_id; /**< Points to data */
+        rd_kafka_topic_partition_list_t *partitions;
+        char data[1]; /**< The group id is allocated along with
+                       *   the struct here. */
+};
+
+/**@}*/
+
+/**
+ * @name AlterConsumerGroupOffsets
+ * @{
+ */
+
+/**
+ * @brief AlterConsumerGroupOffsets result
+ */
+struct rd_kafka_AlterConsumerGroupOffsets_result_s {
+        rd_list_t groups; /**< Type (rd_kafka_group_result_t *) */
+};
+
+struct rd_kafka_AlterConsumerGroupOffsets_s {
+        char *group_id; /**< Points to data */
+        rd_kafka_topic_partition_list_t *partitions;
+        char data[1]; /**< The group id is allocated along with
+                       *   the struct here. */
+};
+
+/**@}*/
 
 /**
  * @name DeleteConsumerGroupOffsets
@@ -320,6 +377,24 @@ struct rd_kafka_DeleteConsumerGroupOffsets_s {
 
 /**@}*/
 
+/**
+ * @name ListOffsets
+ * @{
+ */
+
+/**
+ * @struct ListOffsets result about a single partition
+ */
+struct rd_kafka_ListOffsetsResultInfo_s {
+        rd_kafka_topic_partition_t *topic_partition;
+        int64_t timestamp;
+};
+
+rd_kafka_ListOffsetsResultInfo_t *
+rd_kafka_ListOffsetsResultInfo_new(rd_kafka_topic_partition_t *rktpar,
+                                   rd_ts_t timestamp);
+/**@}*/
+
 /**
  * @name CreateAcls
  * @{
@@ -357,50 +432,6 @@ struct rd_kafka_DeleteAcls_result_response_s {
 
 /**@}*/
 
-
-/**
- * @name AlterConsumerGroupOffsets
- * @{
- */
-
-/**
- * @brief AlterConsumerGroupOffsets result
- */
-struct rd_kafka_AlterConsumerGroupOffsets_result_s {
-        rd_list_t groups; /**< Type (rd_kafka_group_result_t *) */
-};
-
-struct rd_kafka_AlterConsumerGroupOffsets_s {
-        char *group_id; /**< Points to data */
-        rd_kafka_topic_partition_list_t *partitions;
-        char data[1]; /**< The group id is allocated along with
-                       *   the struct here. */
-};
-
-/**@}*/
-
-
-/**
- * @name ListConsumerGroupOffsets
- * @{
- */
-
-/**
- * @brief ListConsumerGroupOffsets result
- */
-struct rd_kafka_ListConsumerGroupOffsets_result_s {
-        rd_list_t groups; /**< Type (rd_kafka_group_result_t *) */
-};
-
-struct rd_kafka_ListConsumerGroupOffsets_s {
-        char *group_id; /**< Points to data */
-        rd_kafka_topic_partition_list_t *partitions;
-        char data[1]; /**< The group id is allocated along with
-                       *   the struct here. */
-};
-
-/**@}*/
-
 /**
  * @name ListConsumerGroups
  * @{
@@ -473,10 +504,86 @@ struct rd_kafka_ConsumerGroupDescription_s {
         rd_kafka_consumer_group_state_t state;
         /** Consumer group coordinator. */
         rd_kafka_Node_t *coordinator;
+        /** Count of operations allowed for topic. -1 indicates operations not
+         * requested.*/
+        int authorized_operations_cnt;
+        /** Operations allowed for topic. May be NULL if operations were not
+         * requested */
+        rd_kafka_AclOperation_t *authorized_operations;
         /** Group specific error. */
         rd_kafka_error_t *error;
 };
 
 /**@}*/
 
+/**
+ * @name DescribeTopics
+ * @{
+ */
+
+/**
+ * @brief TopicCollection contains a list of topics.
+ *
+ */
+struct rd_kafka_TopicCollection_s {
+        char **topics;     /**< List of topic names. */
+        size_t topics_cnt; /**< Count of topic names. */
+};
+
+/**
+ * @brief TopicPartition result type in DescribeTopics result.
+ *
+ */
+struct rd_kafka_TopicPartitionInfo_s {
+        int partition;              /**< Partition id. */
+        rd_kafka_Node_t *leader;    /**< Leader of the partition. */
+        size_t isr_cnt;             /**< Count of insync replicas. */
+        rd_kafka_Node_t **isr;      /**< List of in sync replica nodes. */
+        size_t replica_cnt;         /**< Count of partition replicas. */
+        rd_kafka_Node_t **replicas; /**< List of replica nodes. */
+};
+
+/**
+ * @struct DescribeTopics result
+ */
+struct rd_kafka_TopicDescription_s {
+        char *topic;              /**< Topic name */
+        rd_kafka_Uuid_t topic_id; /**< Topic Id */
+        int partition_cnt;        /**< Number of partitions in \p partitions*/
+        rd_bool_t is_internal;    /**< Is the topic is internal to Kafka? */
+        rd_kafka_TopicPartitionInfo_t **partitions; /**< Partitions */
+        rd_kafka_error_t *error;       /**< Topic error reported by broker */
+        int authorized_operations_cnt; /**< Count of operations allowed for
+                                        * topic. -1 indicates operations not
+                                        * requested. */
+        rd_kafka_AclOperation_t
+            *authorized_operations; /**< Operations allowed for topic. May be
+                                     * NULL if operations were not requested */
+};
+
+/**@}*/
+
+/**
+ * @name DescribeCluster
+ * @{
+ */
+/**
+ * @struct DescribeCluster result - internal type.
+ */
+typedef struct rd_kafka_ClusterDescription_s {
+        char *cluster_id;              /**< Cluster id */
+        rd_kafka_Node_t *controller;   /**< Current controller. */
+        size_t node_cnt;               /**< Count of brokers in the cluster. */
+        rd_kafka_Node_t **nodes;       /**< Brokers in the cluster. */
+        int authorized_operations_cnt; /**< Count of operations allowed for
+                                        * cluster. -1 indicates operations not
+                                        * requested. */
+        rd_kafka_AclOperation_t
+            *authorized_operations; /**< Operations allowed for cluster. May be
+                                     * NULL if operations were not requested */
+
+} rd_kafka_ClusterDescription_t;
+
+/**@}*/
+
 #endif /* _RDKAFKA_ADMIN_H_ */
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_assignment.c b/lib/librdkafka-2.3.0/src/rdkafka_assignment.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_assignment.c
rename to lib/librdkafka-2.3.0/src/rdkafka_assignment.c
index dc4bdae9477..3b0d7e83d77 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_assignment.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_assignment.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2020 Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
+ *               2023 Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -341,6 +342,8 @@ static int rd_kafka_assignment_serve_removals(rd_kafka_t *rk) {
                  * so it will be committed below. */
                 rd_kafka_topic_partition_set_from_fetch_pos(
                     rktpar, rktp->rktp_stored_pos);
+                rd_kafka_topic_partition_set_metadata_from_rktp_stored(rktpar,
+                                                                       rktp);
                 valid_offsets += !RD_KAFKA_OFFSET_IS_LOGICAL(rktpar->offset);
 
                 /* Reset the stored offset to invalid so that
@@ -348,8 +351,8 @@ static int rd_kafka_assignment_serve_removals(rd_kafka_t *rk) {
                  * will not commit a stored offset from a previous
                  * assignment (issue #2782). */
                 rd_kafka_offset_store0(
-                    rktp, RD_KAFKA_FETCH_POS(RD_KAFKA_OFFSET_INVALID, -1),
-                    rd_true, RD_DONT_LOCK);
+                    rktp, RD_KAFKA_FETCH_POS(RD_KAFKA_OFFSET_INVALID, -1), NULL,
+                    0, rd_true, RD_DONT_LOCK);
 
                 /* Partition is no longer desired */
                 rd_kafka_toppar_desired_del(rktp);
@@ -745,8 +748,8 @@ rd_kafka_assignment_add(rd_kafka_t *rk,
                 /* Reset the stored offset to INVALID to avoid the race
                  * condition described in rdkafka_offset.h */
                 rd_kafka_offset_store0(
-                    rktp, RD_KAFKA_FETCH_POS(RD_KAFKA_OFFSET_INVALID, -1),
-                    rd_true /* force */, RD_DONT_LOCK);
+                    rktp, RD_KAFKA_FETCH_POS(RD_KAFKA_OFFSET_INVALID, -1), NULL,
+                    0, rd_true /* force */, RD_DONT_LOCK);
 
                 rd_kafka_toppar_unlock(rktp);
         }
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_assignment.h b/lib/librdkafka-2.3.0/src/rdkafka_assignment.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_assignment.h
rename to lib/librdkafka-2.3.0/src/rdkafka_assignment.h
index fa51bb10c30..1f73c4ede8b 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_assignment.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_assignment.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2020 Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_assignor.c b/lib/librdkafka-2.3.0/src/rdkafka_assignor.c
similarity index 57%
rename from lib/librdkafka-2.1.0/src/rdkafka_assignor.c
rename to lib/librdkafka-2.3.0/src/rdkafka_assignor.c
index 79257384550..607a7bfd5aa 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_assignor.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_assignor.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2015 Magnus Edenhill
+ * Copyright (c) 2015-2022, Magnus Edenhill
+ *               2023 Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -59,6 +60,9 @@ void rd_kafka_group_member_clear(rd_kafka_group_member_t *rkgm) {
         if (rkgm->rkgm_member_metadata)
                 rd_kafkap_bytes_destroy(rkgm->rkgm_member_metadata);
 
+        if (rkgm->rkgm_rack_id)
+                rd_kafkap_str_destroy(rkgm->rkgm_rack_id);
+
         memset(rkgm, 0, sizeof(*rkgm));
 }
 
@@ -106,7 +110,9 @@ rd_kafkap_bytes_t *rd_kafka_consumer_protocol_member_metadata_new(
     const rd_list_t *topics,
     const void *userdata,
     size_t userdata_size,
-    const rd_kafka_topic_partition_list_t *owned_partitions) {
+    const rd_kafka_topic_partition_list_t *owned_partitions,
+    int generation,
+    const rd_kafkap_str_t *rack_id) {
 
         rd_kafka_buf_t *rkbuf;
         rd_kafkap_bytes_t *kbytes;
@@ -124,12 +130,14 @@ rd_kafkap_bytes_t *rd_kafka_consumer_protocol_member_metadata_new(
          *   OwnedPartitions => [Topic Partitions] // added in v1
          *     Topic => string
          *     Partitions => [int32]
+         *   GenerationId => int32 // added in v2
+         *   RackId => string // added in v3
          */
 
         rkbuf = rd_kafka_buf_new(1, 100 + (topic_cnt * 100) + userdata_size);
 
         /* Version */
-        rd_kafka_buf_write_i16(rkbuf, 1);
+        rd_kafka_buf_write_i16(rkbuf, 3);
         rd_kafka_buf_write_i32(rkbuf, topic_cnt);
         RD_LIST_FOREACH(tinfo, topics, i)
         rd_kafka_buf_write_str(rkbuf, tinfo->topic, -1);
@@ -154,6 +162,12 @@ rd_kafkap_bytes_t *rd_kafka_consumer_protocol_member_metadata_new(
                     rd_false /*any offset*/, fields);
         }
 
+        /* Following data is ignored by consumer version < 2 */
+        rd_kafka_buf_write_i32(rkbuf, generation);
+
+        /* Following data is ignored by consumer version < 3 */
+        rd_kafka_buf_write_kstr(rkbuf, rack_id);
+
         /* Get binary buffer and allocate a new Kafka Bytes with a copy. */
         rd_slice_init_full(&rkbuf->rkbuf_reader, &rkbuf->rkbuf_buf);
         len    = rd_slice_remains(&rkbuf->rkbuf_reader);
@@ -170,9 +184,13 @@ rd_kafkap_bytes_t *rd_kafka_assignor_get_metadata_with_empty_userdata(
     const rd_kafka_assignor_t *rkas,
     void *assignor_state,
     const rd_list_t *topics,
-    const rd_kafka_topic_partition_list_t *owned_partitions) {
-        return rd_kafka_consumer_protocol_member_metadata_new(topics, NULL, 0,
-                                                              owned_partitions);
+    const rd_kafka_topic_partition_list_t *owned_partitions,
+    const rd_kafkap_str_t *rack_id) {
+        /* Generation was earlier populated inside userData, and older versions
+         * of clients still expect that. So, in case the userData is empty, we
+         * set the explicit generation field to the default value, -1 */
+        return rd_kafka_consumer_protocol_member_metadata_new(
+            topics, NULL, 0, owned_partitions, -1 /* generation */, rack_id);
 }
 
 
@@ -244,6 +262,8 @@ rd_kafka_member_subscriptions_map(rd_kafka_cgrp_t *rkcg,
                                   int member_cnt) {
         int ti;
         rd_kafka_assignor_topic_t *eligible_topic = NULL;
+        rd_kafka_metadata_internal_t *mdi =
+            rd_kafka_metadata_get_internal(metadata);
 
         rd_list_init(eligible_topics, RD_MIN(metadata->topic_cnt, 10),
                      (void *)rd_kafka_assignor_topic_destroy);
@@ -285,7 +305,8 @@ rd_kafka_member_subscriptions_map(rd_kafka_cgrp_t *rkcg,
                         continue;
                 }
 
-                eligible_topic->metadata = &metadata->topics[ti];
+                eligible_topic->metadata          = &metadata->topics[ti];
+                eligible_topic->metadata_internal = &mdi->topics[ti];
                 rd_list_add(eligible_topics, eligible_topic);
                 eligible_topic = NULL;
         }
@@ -485,7 +506,8 @@ rd_kafka_resp_err_t rd_kafka_assignor_add(
         const struct rd_kafka_assignor_s *rkas,
         void *assignor_state,
         const rd_list_t *topics,
-        const rd_kafka_topic_partition_list_t *owned_partitions),
+        const rd_kafka_topic_partition_list_t *owned_partitions,
+        const rd_kafkap_str_t *rack_id),
     void (*on_assignment_cb)(const struct rd_kafka_assignor_s *rkas,
                              void **assignor_state,
                              const rd_kafka_topic_partition_list_t *assignment,
@@ -636,6 +658,676 @@ void rd_kafka_assignors_term(rd_kafka_t *rk) {
         rd_list_destroy(&rk->rk_conf.partition_assignors);
 }
 
+/**
+ * @brief Computes whether rack-aware assignment needs to be used, or not.
+ */
+rd_bool_t
+rd_kafka_use_rack_aware_assignment(rd_kafka_assignor_topic_t **topics,
+                                   size_t topic_cnt,
+                                   const rd_kafka_metadata_internal_t *mdi) {
+        /* Computing needs_rack_aware_assignment requires the evaluation of
+           three criteria:
+
+           1. At least one of the member has a non-null rack.
+           2. At least one common rack exists between members and partitions.
+           3. There is a partition which doesn't have replicas on all possible
+           racks, or in other words, all partitions don't have replicas on all
+           racks. Note that 'all racks' here means racks across all replicas of
+           all partitions, not including consumer racks. Also note that 'all
+           racks' are computed per-topic for range assignor, and across topics
+           for sticky assignor.
+        */
+
+        int i;
+        size_t t;
+        rd_kafka_group_member_t *member;
+        rd_list_t *all_consumer_racks  = NULL; /* Contained Type: char* */
+        rd_list_t *all_partition_racks = NULL; /* Contained Type: char* */
+        char *rack_id                  = NULL;
+        rd_bool_t needs_rack_aware_assignment = rd_true; /* assume true */
+
+        /* Criteria 1 */
+        /* We don't copy racks, so the free function is NULL. */
+        all_consumer_racks = rd_list_new(0, NULL);
+
+        for (t = 0; t < topic_cnt; t++) {
+                RD_LIST_FOREACH(member, &topics[t]->members, i) {
+                        if (member->rkgm_rack_id &&
+                            RD_KAFKAP_STR_LEN(member->rkgm_rack_id)) {
+                                /* Repetitions are fine, we will dedup it later.
+                                 */
+                                rd_list_add(
+                                    all_consumer_racks,
+                                    /* The const qualifier has to be discarded
+                                       because of how rd_list_t and
+                                       rd_kafkap_str_t are, but we never modify
+                                       items in all_consumer_racks. */
+                                    (char *)member->rkgm_rack_id->str);
+                        }
+                }
+        }
+        if (rd_list_cnt(all_consumer_racks) == 0) {
+                needs_rack_aware_assignment = rd_false;
+                goto done;
+        }
+
+
+        /* Critera 2 */
+        /* We don't copy racks, so the free function is NULL. */
+        all_partition_racks = rd_list_new(0, NULL);
+
+        for (t = 0; t < topic_cnt; t++) {
+                const int partition_cnt = topics[t]->metadata->partition_cnt;
+                for (i = 0; i < partition_cnt; i++) {
+                        size_t j;
+                        for (j = 0; j < topics[t]
+                                            ->metadata_internal->partitions[i]
+                                            .racks_cnt;
+                             j++) {
+                                char *rack =
+                                    topics[t]
+                                        ->metadata_internal->partitions[i]
+                                        .racks[j];
+                                rd_list_add(all_partition_racks, rack);
+                        }
+                }
+        }
+
+        /* If there are no partition racks, Criteria 2 cannot possibly be met.
+         */
+        if (rd_list_cnt(all_partition_racks) == 0) {
+                needs_rack_aware_assignment = rd_false;
+                goto done;
+        }
+
+        /* Sort and dedup the racks. */
+        rd_list_deduplicate(&all_consumer_racks, rd_strcmp2);
+        rd_list_deduplicate(&all_partition_racks, rd_strcmp2);
+
+
+        /* Iterate through each list in order, and see if there's anything in
+         * common */
+        RD_LIST_FOREACH(rack_id, all_consumer_racks, i) {
+                /* Break if there's even a single match. */
+                if (rd_list_find(all_partition_racks, rack_id, rd_strcmp2)) {
+                        break;
+                }
+        }
+        if (i == rd_list_cnt(all_consumer_racks)) {
+                needs_rack_aware_assignment = rd_false;
+                goto done;
+        }
+
+        /* Criteria 3 */
+        for (t = 0; t < topic_cnt; t++) {
+                const int partition_cnt = topics[t]->metadata->partition_cnt;
+                for (i = 0; i < partition_cnt; i++) {
+                        /* Since partition_racks[i] is a subset of
+                         * all_partition_racks, and both of them are deduped,
+                         * the same size indicates that they're equal. */
+                        if ((size_t)(rd_list_cnt(all_partition_racks)) !=
+                            topics[t]
+                                ->metadata_internal->partitions[i]
+                                .racks_cnt) {
+                                break;
+                        }
+                }
+                if (i < partition_cnt) {
+                        /* Break outer loop if inner loop was broken. */
+                        break;
+                }
+        }
+
+        /* Implies that all partitions have replicas on all racks. */
+        if (t == topic_cnt)
+                needs_rack_aware_assignment = rd_false;
+
+done:
+        RD_IF_FREE(all_consumer_racks, rd_list_destroy);
+        RD_IF_FREE(all_partition_racks, rd_list_destroy);
+
+        return needs_rack_aware_assignment;
+}
+
+
+/* Helper to populate the racks for brokers in the metadata for unit tests.
+ * Passing num_broker_racks = 0 will return NULL racks. */
+void ut_populate_internal_broker_metadata(rd_kafka_metadata_internal_t *mdi,
+                                          int num_broker_racks,
+                                          rd_kafkap_str_t *all_racks[],
+                                          size_t all_racks_cnt) {
+        int i;
+
+        rd_assert(num_broker_racks < (int)all_racks_cnt);
+
+        for (i = 0; i < mdi->metadata.broker_cnt; i++) {
+                mdi->brokers[i].id = i;
+                /* Cast from const to non-const. We don't intend to modify it,
+                 * but unfortunately neither implementation of rd_kafkap_str_t
+                 * or rd_kafka_metadata_broker_internal_t can be changed. So,
+                 * this cast is used - in unit tests only. */
+                mdi->brokers[i].rack_id =
+                    (char *)(num_broker_racks
+                                 ? all_racks[i % num_broker_racks]->str
+                                 : NULL);
+        }
+}
+
+/* Helper to populate the deduplicated racks inside each partition. It's assumed
+ * that `mdi->brokers` is set, maybe using
+ * `ut_populate_internal_broker_metadata`. */
+void ut_populate_internal_topic_metadata(rd_kafka_metadata_internal_t *mdi) {
+        int ti;
+        rd_kafka_metadata_broker_internal_t *brokers_internal;
+        size_t broker_cnt;
+
+        rd_assert(mdi->brokers);
+
+        brokers_internal = mdi->brokers;
+        broker_cnt       = mdi->metadata.broker_cnt;
+
+        for (ti = 0; ti < mdi->metadata.topic_cnt; ti++) {
+                int i;
+                rd_kafka_metadata_topic_t *mdt = &mdi->metadata.topics[ti];
+                rd_kafka_metadata_topic_internal_t *mdti = &mdi->topics[ti];
+
+                for (i = 0; i < mdt->partition_cnt; i++) {
+                        int j;
+                        rd_kafka_metadata_partition_t *partition =
+                            &mdt->partitions[i];
+                        rd_kafka_metadata_partition_internal_t
+                            *partition_internal = &mdti->partitions[i];
+
+                        rd_list_t *curr_list;
+                        char *rack;
+
+                        if (partition->replica_cnt == 0)
+                                continue;
+
+                        curr_list = rd_list_new(
+                            0, NULL); /* use a list for de-duplication */
+                        for (j = 0; j < partition->replica_cnt; j++) {
+                                rd_kafka_metadata_broker_internal_t key = {
+                                    .id = partition->replicas[j]};
+                                rd_kafka_metadata_broker_internal_t *broker =
+                                    bsearch(
+                                        &key, brokers_internal, broker_cnt,
+                                        sizeof(
+                                            rd_kafka_metadata_broker_internal_t),
+                                        rd_kafka_metadata_broker_internal_cmp);
+                                if (!broker || !broker->rack_id)
+                                        continue;
+                                rd_list_add(curr_list, broker->rack_id);
+                        }
+                        rd_list_deduplicate(&curr_list, rd_strcmp2);
+
+                        partition_internal->racks_cnt = rd_list_cnt(curr_list);
+                        partition_internal->racks     = rd_malloc(
+                            sizeof(char *) * partition_internal->racks_cnt);
+                        RD_LIST_FOREACH(rack, curr_list, j) {
+                                partition_internal->racks[j] =
+                                    rack; /* no duplication */
+                        }
+                        rd_list_destroy(curr_list);
+                }
+        }
+}
+
+/* Helper to destroy test metadata. Destroying the metadata has some additional
+ * steps in case of tests. */
+void ut_destroy_metadata(rd_kafka_metadata_t *md) {
+        int ti;
+        rd_kafka_metadata_internal_t *mdi = rd_kafka_metadata_get_internal(md);
+
+        for (ti = 0; ti < md->topic_cnt; ti++) {
+                int i;
+                rd_kafka_metadata_topic_t *mdt           = &md->topics[ti];
+                rd_kafka_metadata_topic_internal_t *mdti = &mdi->topics[ti];
+
+                for (i = 0; mdti && i < mdt->partition_cnt; i++) {
+                        rd_free(mdti->partitions[i].racks);
+                }
+        }
+
+        rd_kafka_metadata_destroy(md);
+}
+
+
+/**
+ * @brief Set a member's owned partitions based on its assignment.
+ *
+ * For use between assignor_run(). This is mimicing a consumer receiving
+ * its new assignment and including it in the next rebalance as its
+ * owned-partitions.
+ */
+void ut_set_owned(rd_kafka_group_member_t *rkgm) {
+        if (rkgm->rkgm_owned)
+                rd_kafka_topic_partition_list_destroy(rkgm->rkgm_owned);
+
+        rkgm->rkgm_owned =
+            rd_kafka_topic_partition_list_copy(rkgm->rkgm_assignment);
+}
+
+
+void ut_print_toppar_list(const rd_kafka_topic_partition_list_t *partitions) {
+        int i;
+
+        for (i = 0; i < partitions->cnt; i++)
+                RD_UT_SAY(" %s [%" PRId32 "]", partitions->elems[i].topic,
+                          partitions->elems[i].partition);
+}
+
+
+/* Implementation for ut_init_member and ut_init_member_with_rackv. */
+static void ut_init_member_internal(rd_kafka_group_member_t *rkgm,
+                                    const char *member_id,
+                                    const rd_kafkap_str_t *rack_id,
+                                    va_list ap) {
+        const char *topic;
+
+        memset(rkgm, 0, sizeof(*rkgm));
+
+        rkgm->rkgm_member_id         = rd_kafkap_str_new(member_id, -1);
+        rkgm->rkgm_group_instance_id = rd_kafkap_str_new(member_id, -1);
+        rkgm->rkgm_rack_id = rack_id ? rd_kafkap_str_copy(rack_id) : NULL;
+
+        rd_list_init(&rkgm->rkgm_eligible, 0, NULL);
+
+        rkgm->rkgm_subscription = rd_kafka_topic_partition_list_new(4);
+
+        while ((topic = va_arg(ap, const char *)))
+                rd_kafka_topic_partition_list_add(rkgm->rkgm_subscription,
+                                                  topic, RD_KAFKA_PARTITION_UA);
+
+        rkgm->rkgm_assignment =
+            rd_kafka_topic_partition_list_new(rkgm->rkgm_subscription->size);
+
+        rkgm->rkgm_generation = 1;
+}
+
+/**
+ * @brief Initialize group member struct for testing.
+ *
+ * va-args is a NULL-terminated list of (const char *) topics.
+ *
+ * Use rd_kafka_group_member_clear() to free fields.
+ */
+void ut_init_member(rd_kafka_group_member_t *rkgm, const char *member_id, ...) {
+        va_list ap;
+        va_start(ap, member_id);
+        ut_init_member_internal(rkgm, member_id, NULL, ap);
+        va_end(ap);
+}
+
+/**
+ * @brief Initialize group member struct for testing with a rackid.
+ *
+ * va-args is a NULL-terminated list of (const char *) topics.
+ *
+ * Use rd_kafka_group_member_clear() to free fields.
+ */
+void ut_init_member_with_rackv(rd_kafka_group_member_t *rkgm,
+                               const char *member_id,
+                               const rd_kafkap_str_t *rack_id,
+                               ...) {
+        va_list ap;
+        va_start(ap, rack_id);
+        ut_init_member_internal(rkgm, member_id, rack_id, ap);
+        va_end(ap);
+}
+
+/**
+ * @brief Initialize group member struct for testing with a rackid.
+ *
+ * Topics that the member is subscribed to are specified in an array with the
+ * size specified separately.
+ *
+ * Use rd_kafka_group_member_clear() to free fields.
+ */
+void ut_init_member_with_rack(rd_kafka_group_member_t *rkgm,
+                              const char *member_id,
+                              const rd_kafkap_str_t *rack_id,
+                              char *topics[],
+                              size_t topic_cnt) {
+        size_t i;
+
+        memset(rkgm, 0, sizeof(*rkgm));
+
+        rkgm->rkgm_member_id         = rd_kafkap_str_new(member_id, -1);
+        rkgm->rkgm_group_instance_id = rd_kafkap_str_new(member_id, -1);
+        rkgm->rkgm_rack_id = rack_id ? rd_kafkap_str_copy(rack_id) : NULL;
+        rd_list_init(&rkgm->rkgm_eligible, 0, NULL);
+
+        rkgm->rkgm_subscription = rd_kafka_topic_partition_list_new(4);
+
+        for (i = 0; i < topic_cnt; i++) {
+                rd_kafka_topic_partition_list_add(
+                    rkgm->rkgm_subscription, topics[i], RD_KAFKA_PARTITION_UA);
+        }
+        rkgm->rkgm_assignment =
+            rd_kafka_topic_partition_list_new(rkgm->rkgm_subscription->size);
+}
+
+/**
+ * @brief Verify that member's assignment matches the expected partitions.
+ *
+ * The va-list is a NULL-terminated list of (const char *topic, int partition)
+ * tuples.
+ *
+ * @returns 0 on success, else raises a unittest error and returns 1.
+ */
+int verifyAssignment0(const char *function,
+                      int line,
+                      rd_kafka_group_member_t *rkgm,
+                      ...) {
+        va_list ap;
+        int cnt = 0;
+        const char *topic;
+        int fails = 0;
+
+        va_start(ap, rkgm);
+        while ((topic = va_arg(ap, const char *))) {
+                int partition = va_arg(ap, int);
+                cnt++;
+
+                if (!rd_kafka_topic_partition_list_find(rkgm->rkgm_assignment,
+                                                        topic, partition)) {
+                        RD_UT_WARN(
+                            "%s:%d: Expected %s [%d] not found in %s's "
+                            "assignment (%d partition(s))",
+                            function, line, topic, partition,
+                            rkgm->rkgm_member_id->str,
+                            rkgm->rkgm_assignment->cnt);
+                        fails++;
+                }
+        }
+        va_end(ap);
+
+        if (cnt != rkgm->rkgm_assignment->cnt) {
+                RD_UT_WARN(
+                    "%s:%d: "
+                    "Expected %d assigned partition(s) for %s, not %d",
+                    function, line, cnt, rkgm->rkgm_member_id->str,
+                    rkgm->rkgm_assignment->cnt);
+                fails++;
+        }
+
+        if (fails)
+                ut_print_toppar_list(rkgm->rkgm_assignment);
+
+        RD_UT_ASSERT(!fails, "%s:%d: See previous errors", function, line);
+
+        return 0;
+}
+
+/**
+ * @brief Verify that all members' assignment matches the expected partitions.
+ *
+ * The va-list is a list of (const char *topic, int partition)
+ * tuples, and NULL to demarcate different members' assignment.
+ *
+ * @returns 0 on success, else raises a unittest error and returns 1.
+ */
+int verifyMultipleAssignment0(const char *function,
+                              int line,
+                              rd_kafka_group_member_t *rkgms,
+                              size_t member_cnt,
+                              ...) {
+        va_list ap;
+        const char *topic;
+        int fails = 0;
+        size_t i  = 0;
+
+        if (member_cnt == 0) {
+                return 0;
+        }
+
+        va_start(ap, member_cnt);
+        for (i = 0; i < member_cnt; i++) {
+                rd_kafka_group_member_t *rkgm = &rkgms[i];
+                int cnt                       = 0;
+                int local_fails               = 0;
+
+                while ((topic = va_arg(ap, const char *))) {
+                        int partition = va_arg(ap, int);
+                        cnt++;
+
+                        if (!rd_kafka_topic_partition_list_find(
+                                rkgm->rkgm_assignment, topic, partition)) {
+                                RD_UT_WARN(
+                                    "%s:%d: Expected %s [%d] not found in %s's "
+                                    "assignment (%d partition(s))",
+                                    function, line, topic, partition,
+                                    rkgm->rkgm_member_id->str,
+                                    rkgm->rkgm_assignment->cnt);
+                                local_fails++;
+                        }
+                }
+
+                if (cnt != rkgm->rkgm_assignment->cnt) {
+                        RD_UT_WARN(
+                            "%s:%d: "
+                            "Expected %d assigned partition(s) for %s, not %d",
+                            function, line, cnt, rkgm->rkgm_member_id->str,
+                            rkgm->rkgm_assignment->cnt);
+                        fails++;
+                }
+
+                if (local_fails)
+                        ut_print_toppar_list(rkgm->rkgm_assignment);
+                fails += local_fails;
+        }
+        va_end(ap);
+
+        RD_UT_ASSERT(!fails, "%s:%d: See previous errors", function, line);
+
+        return 0;
+}
+
+
+#define verifyNumPartitionsWithRackMismatchPartition(rktpar, metadata,         \
+                                                     increase)                 \
+        do {                                                                   \
+                if (!rktpar)                                                   \
+                        break;                                                 \
+                int i;                                                         \
+                rd_bool_t noneMatch = rd_true;                                 \
+                rd_kafka_metadata_internal_t *metadata_internal =              \
+                    rd_kafka_metadata_get_internal(metadata);                  \
+                                                                               \
+                for (i = 0; i < metadata->topics[j].partitions[k].replica_cnt; \
+                     i++) {                                                    \
+                        int32_t replica_id =                                   \
+                            metadata->topics[j].partitions[k].replicas[i];     \
+                        rd_kafka_metadata_broker_internal_t *broker;           \
+                        rd_kafka_metadata_broker_internal_find(                \
+                            metadata_internal, replica_id, broker);            \
+                                                                               \
+                        if (broker && !strcmp(rack_id, broker->rack_id)) {     \
+                                noneMatch = rd_false;                          \
+                                break;                                         \
+                        }                                                      \
+                }                                                              \
+                                                                               \
+                if (noneMatch)                                                 \
+                        increase++;                                            \
+        } while (0);
+
+/**
+ * @brief Verify number of partitions with rack mismatch.
+ */
+int verifyNumPartitionsWithRackMismatch0(const char *function,
+                                         int line,
+                                         rd_kafka_metadata_t *metadata,
+                                         rd_kafka_group_member_t *rkgms,
+                                         size_t member_cnt,
+                                         int expectedNumMismatch) {
+        size_t i;
+        int j, k;
+
+        int numMismatched = 0;
+        for (i = 0; i < member_cnt; i++) {
+                rd_kafka_group_member_t *rkgm = &rkgms[i];
+                const char *rack_id           = rkgm->rkgm_rack_id->str;
+                if (rack_id) {
+                        for (j = 0; j < metadata->topic_cnt; j++) {
+                                for (k = 0;
+                                     k < metadata->topics[j].partition_cnt;
+                                     k++) {
+                                        rd_kafka_topic_partition_t *rktpar =
+                                            rd_kafka_topic_partition_list_find(
+                                                rkgm->rkgm_assignment,
+                                                metadata->topics[j].topic, k);
+                                        verifyNumPartitionsWithRackMismatchPartition(
+                                            rktpar, metadata, numMismatched);
+                                }
+                        }
+                }
+        }
+
+        RD_UT_ASSERT(expectedNumMismatch == numMismatched,
+                     "%s:%d: Expected %d mismatches, got %d", function, line,
+                     expectedNumMismatch, numMismatched);
+
+        return 0;
+}
+
+
+int verifyValidityAndBalance0(const char *func,
+                              int line,
+                              rd_kafka_group_member_t *members,
+                              size_t member_cnt,
+                              const rd_kafka_metadata_t *metadata) {
+        int fails = 0;
+        int i;
+        rd_bool_t verbose = rd_false; /* Enable for troubleshooting */
+
+        RD_UT_SAY("%s:%d: verifying assignment for %d member(s):", func, line,
+                  (int)member_cnt);
+
+        for (i = 0; i < (int)member_cnt; i++) {
+                const char *consumer = members[i].rkgm_member_id->str;
+                const rd_kafka_topic_partition_list_t *partitions =
+                    members[i].rkgm_assignment;
+                int p, j;
+
+                if (verbose)
+                        RD_UT_SAY(
+                            "%s:%d:   "
+                            "consumer \"%s\", %d subscribed topic(s), "
+                            "%d assigned partition(s):",
+                            func, line, consumer,
+                            members[i].rkgm_subscription->cnt, partitions->cnt);
+
+                for (p = 0; p < partitions->cnt; p++) {
+                        const rd_kafka_topic_partition_t *partition =
+                            &partitions->elems[p];
+
+                        if (verbose)
+                                RD_UT_SAY("%s:%d:     %s [%" PRId32 "]", func,
+                                          line, partition->topic,
+                                          partition->partition);
+
+                        if (!rd_kafka_topic_partition_list_find(
+                                members[i].rkgm_subscription, partition->topic,
+                                RD_KAFKA_PARTITION_UA)) {
+                                RD_UT_WARN("%s [%" PRId32
+                                           "] is assigned to "
+                                           "%s but it is not subscribed to "
+                                           "that topic",
+                                           partition->topic,
+                                           partition->partition, consumer);
+                                fails++;
+                        }
+                }
+
+                /* Update the member's owned partitions to match
+                 * the assignment. */
+                ut_set_owned(&members[i]);
+
+                if (i == (int)member_cnt - 1)
+                        continue;
+
+                for (j = i + 1; j < (int)member_cnt; j++) {
+                        const char *otherConsumer =
+                            members[j].rkgm_member_id->str;
+                        const rd_kafka_topic_partition_list_t *otherPartitions =
+                            members[j].rkgm_assignment;
+                        rd_bool_t balanced =
+                            abs(partitions->cnt - otherPartitions->cnt) <= 1;
+
+                        for (p = 0; p < partitions->cnt; p++) {
+                                const rd_kafka_topic_partition_t *partition =
+                                    &partitions->elems[p];
+
+                                if (rd_kafka_topic_partition_list_find(
+                                        otherPartitions, partition->topic,
+                                        partition->partition)) {
+                                        RD_UT_WARN(
+                                            "Consumer %s and %s are both "
+                                            "assigned %s [%" PRId32 "]",
+                                            consumer, otherConsumer,
+                                            partition->topic,
+                                            partition->partition);
+                                        fails++;
+                                }
+
+
+                                /* If assignment is imbalanced and this topic
+                                 * is also subscribed by the other consumer
+                                 * it means the assignment strategy failed to
+                                 * properly balance the partitions. */
+                                if (!balanced &&
+                                    rd_kafka_topic_partition_list_find_topic(
+                                        otherPartitions, partition->topic)) {
+                                        RD_UT_WARN(
+                                            "Some %s partition(s) can be "
+                                            "moved from "
+                                            "%s (%d partition(s)) to "
+                                            "%s (%d partition(s)) to "
+                                            "achieve a better balance",
+                                            partition->topic, consumer,
+                                            partitions->cnt, otherConsumer,
+                                            otherPartitions->cnt);
+                                        fails++;
+                                }
+                        }
+                }
+        }
+
+        RD_UT_ASSERT(!fails, "%s:%d: See %d previous errors", func, line,
+                     fails);
+
+        return 0;
+}
+
+/**
+ * @brief Checks that all assigned partitions are fully balanced.
+ *
+ * Only works for symmetrical subscriptions.
+ */
+int isFullyBalanced0(const char *function,
+                     int line,
+                     const rd_kafka_group_member_t *members,
+                     size_t member_cnt) {
+        int min_assignment = INT_MAX;
+        int max_assignment = -1;
+        size_t i;
+
+        for (i = 0; i < member_cnt; i++) {
+                int size = members[i].rkgm_assignment->cnt;
+                if (size < min_assignment)
+                        min_assignment = size;
+                if (size > max_assignment)
+                        max_assignment = size;
+        }
+
+        RD_UT_ASSERT(max_assignment - min_assignment <= 1,
+                     "%s:%d: Assignment not balanced: min %d, max %d", function,
+                     line, min_assignment, max_assignment);
+
+        return 0;
+}
 
 
 /**
@@ -881,6 +1573,7 @@ static int ut_assignors(void) {
         /* Run through test cases */
         for (i = 0; tests[i].name; i++) {
                 int ie, it, im;
+                rd_kafka_metadata_internal_t metadata_internal;
                 rd_kafka_metadata_t metadata;
                 rd_kafka_group_member_t *members;
 
@@ -888,14 +1581,38 @@ static int ut_assignors(void) {
                 metadata.topic_cnt = tests[i].topic_cnt;
                 metadata.topics =
                     rd_alloca(sizeof(*metadata.topics) * metadata.topic_cnt);
+                metadata_internal.topics = rd_alloca(
+                    sizeof(*metadata_internal.topics) * metadata.topic_cnt);
+
                 memset(metadata.topics, 0,
                        sizeof(*metadata.topics) * metadata.topic_cnt);
+                memset(metadata_internal.topics, 0,
+                       sizeof(*metadata_internal.topics) * metadata.topic_cnt);
+
                 for (it = 0; it < metadata.topic_cnt; it++) {
+                        int pt;
                         metadata.topics[it].topic =
                             (char *)tests[i].topics[it].name;
                         metadata.topics[it].partition_cnt =
                             tests[i].topics[it].partition_cnt;
-                        metadata.topics[it].partitions = NULL; /* Not used */
+                        metadata.topics[it].partitions =
+                            rd_alloca(metadata.topics[it].partition_cnt *
+                                      sizeof(rd_kafka_metadata_partition_t));
+                        metadata_internal.topics[it].partitions = rd_alloca(
+                            metadata.topics[it].partition_cnt *
+                            sizeof(rd_kafka_metadata_partition_internal_t));
+                        for (pt = 0; pt < metadata.topics[it].partition_cnt;
+                             pt++) {
+                                metadata.topics[it].partitions[pt].id = pt;
+                                metadata.topics[it].partitions[pt].replica_cnt =
+                                    0;
+                                metadata_internal.topics[it]
+                                    .partitions[pt]
+                                    .racks_cnt = 0;
+                                metadata_internal.topics[it]
+                                    .partitions[pt]
+                                    .racks = NULL;
+                        }
                 }
 
                 /* Create members */
@@ -946,9 +1663,12 @@ static int ut_assignors(void) {
                         }
 
                         /* Run assignor */
-                        err = rd_kafka_assignor_run(
-                            rk->rk_cgrp, rkas, &metadata, members,
-                            tests[i].member_cnt, errstr, sizeof(errstr));
+                        metadata_internal.metadata = metadata;
+                        err                        = rd_kafka_assignor_run(
+                            rk->rk_cgrp, rkas,
+                            (rd_kafka_metadata_t *)(&metadata_internal),
+                            members, tests[i].member_cnt, errstr,
+                            sizeof(errstr));
 
                         RD_UT_ASSERT(!err, "Assignor case %s for %s failed: %s",
                                      tests[i].name,
diff --git a/lib/librdkafka-2.3.0/src/rdkafka_assignor.h b/lib/librdkafka-2.3.0/src/rdkafka_assignor.h
new file mode 100644
index 00000000000..6797e70b118
--- /dev/null
+++ b/lib/librdkafka-2.3.0/src/rdkafka_assignor.h
@@ -0,0 +1,402 @@
+/*
+ * librdkafka - The Apache Kafka C/C++ library
+ *
+ * Copyright (c) 2015-2022, Magnus Edenhill
+ *               2023 Confluent Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _RDKAFKA_ASSIGNOR_H_
+#define _RDKAFKA_ASSIGNOR_H_
+
+#include "rdkafka_metadata.h"
+
+/*!
+ * Enumerates the different rebalance protocol types.
+ *
+ * @sa rd_kafka_rebalance_protocol()
+ */
+typedef enum rd_kafka_rebalance_protocol_t {
+        RD_KAFKA_REBALANCE_PROTOCOL_NONE,       /**< Rebalance protocol is
+                                                     unknown */
+        RD_KAFKA_REBALANCE_PROTOCOL_EAGER,      /**< Eager rebalance
+                                                     protocol */
+        RD_KAFKA_REBALANCE_PROTOCOL_COOPERATIVE /**< Cooperative
+                                                     rebalance protocol*/
+} rd_kafka_rebalance_protocol_t;
+
+
+
+typedef struct rd_kafka_group_member_s {
+        /** Subscribed topics (partition field is ignored). */
+        rd_kafka_topic_partition_list_t *rkgm_subscription;
+        /** Partitions assigned to this member after running the assignor.
+         *  E.g., the current assignment coming out of the rebalance. */
+        rd_kafka_topic_partition_list_t *rkgm_assignment;
+        /** Partitions reported as currently owned by the member, read
+         *  from consumer metadata. E.g., the current assignment going into
+         *  the rebalance. */
+        rd_kafka_topic_partition_list_t *rkgm_owned;
+        /** List of eligible topics in subscription. E.g., subscribed topics
+         *  that exist. */
+        rd_list_t rkgm_eligible;
+        /** Member id (e.g., client.id-some-uuid). */
+        rd_kafkap_str_t *rkgm_member_id;
+        /** Group instance id. */
+        rd_kafkap_str_t *rkgm_group_instance_id;
+        /** Member-specific opaque userdata. */
+        rd_kafkap_bytes_t *rkgm_userdata;
+        /** Member metadata, e.g., the currently owned partitions. */
+        rd_kafkap_bytes_t *rkgm_member_metadata;
+        /** Group generation id. */
+        int rkgm_generation;
+        /** Member rack id. */
+        rd_kafkap_str_t *rkgm_rack_id;
+} rd_kafka_group_member_t;
+
+
+int rd_kafka_group_member_cmp(const void *_a, const void *_b);
+
+int rd_kafka_group_member_find_subscription(rd_kafka_t *rk,
+                                            const rd_kafka_group_member_t *rkgm,
+                                            const char *topic);
+
+/**
+ * Structure to hold metadata for a single topic and all its
+ * subscribing members.
+ */
+typedef struct rd_kafka_assignor_topic_s {
+        const rd_kafka_metadata_topic_t *metadata;
+        const rd_kafka_metadata_topic_internal_t *metadata_internal;
+        rd_list_t members; /* rd_kafka_group_member_t * */
+} rd_kafka_assignor_topic_t;
+
+
+int rd_kafka_assignor_topic_cmp(const void *_a, const void *_b);
+
+
+typedef struct rd_kafka_assignor_s {
+        rd_kafkap_str_t *rkas_protocol_type;
+        rd_kafkap_str_t *rkas_protocol_name;
+
+        int rkas_enabled;
+
+        /** Order for strategies. */
+        int rkas_index;
+
+        rd_kafka_rebalance_protocol_t rkas_protocol;
+
+        rd_kafka_resp_err_t (*rkas_assign_cb)(
+            rd_kafka_t *rk,
+            const struct rd_kafka_assignor_s *rkas,
+            const char *member_id,
+            const rd_kafka_metadata_t *metadata,
+            rd_kafka_group_member_t *members,
+            size_t member_cnt,
+            rd_kafka_assignor_topic_t **eligible_topics,
+            size_t eligible_topic_cnt,
+            char *errstr,
+            size_t errstr_size,
+            void *opaque);
+
+        rd_kafkap_bytes_t *(*rkas_get_metadata_cb)(
+            const struct rd_kafka_assignor_s *rkas,
+            void *assignor_state,
+            const rd_list_t *topics,
+            const rd_kafka_topic_partition_list_t *owned_partitions,
+            const rd_kafkap_str_t *rack_id);
+
+        void (*rkas_on_assignment_cb)(
+            const struct rd_kafka_assignor_s *rkas,
+            void **assignor_state,
+            const rd_kafka_topic_partition_list_t *assignment,
+            const rd_kafkap_bytes_t *assignment_userdata,
+            const rd_kafka_consumer_group_metadata_t *rkcgm);
+
+        void (*rkas_destroy_state_cb)(void *assignor_state);
+
+        int (*rkas_unittest)(void);
+
+        void *rkas_opaque;
+} rd_kafka_assignor_t;
+
+
+rd_kafka_resp_err_t rd_kafka_assignor_add(
+    rd_kafka_t *rk,
+    const char *protocol_type,
+    const char *protocol_name,
+    rd_kafka_rebalance_protocol_t rebalance_protocol,
+    rd_kafka_resp_err_t (*assign_cb)(
+        rd_kafka_t *rk,
+        const struct rd_kafka_assignor_s *rkas,
+        const char *member_id,
+        const rd_kafka_metadata_t *metadata,
+        rd_kafka_group_member_t *members,
+        size_t member_cnt,
+        rd_kafka_assignor_topic_t **eligible_topics,
+        size_t eligible_topic_cnt,
+        char *errstr,
+        size_t errstr_size,
+        void *opaque),
+    rd_kafkap_bytes_t *(*get_metadata_cb)(
+        const struct rd_kafka_assignor_s *rkas,
+        void *assignor_state,
+        const rd_list_t *topics,
+        const rd_kafka_topic_partition_list_t *owned_partitions,
+        const rd_kafkap_str_t *rack_id),
+    void (*on_assignment_cb)(const struct rd_kafka_assignor_s *rkas,
+                             void **assignor_state,
+                             const rd_kafka_topic_partition_list_t *assignment,
+                             const rd_kafkap_bytes_t *userdata,
+                             const rd_kafka_consumer_group_metadata_t *rkcgm),
+    void (*destroy_state_cb)(void *assignor_state),
+    int (*unittest_cb)(void),
+    void *opaque);
+
+rd_kafkap_bytes_t *rd_kafka_consumer_protocol_member_metadata_new(
+    const rd_list_t *topics,
+    const void *userdata,
+    size_t userdata_size,
+    const rd_kafka_topic_partition_list_t *owned_partitions,
+    int generation,
+    const rd_kafkap_str_t *rack_id);
+
+rd_kafkap_bytes_t *rd_kafka_assignor_get_metadata_with_empty_userdata(
+    const rd_kafka_assignor_t *rkas,
+    void *assignor_state,
+    const rd_list_t *topics,
+    const rd_kafka_topic_partition_list_t *owned_partitions,
+    const rd_kafkap_str_t *rack_id);
+
+
+void rd_kafka_assignor_update_subscription(
+    const rd_kafka_assignor_t *rkas,
+    const rd_kafka_topic_partition_list_t *subscription);
+
+
+rd_kafka_resp_err_t rd_kafka_assignor_run(struct rd_kafka_cgrp_s *rkcg,
+                                          const rd_kafka_assignor_t *rkas,
+                                          rd_kafka_metadata_t *metadata,
+                                          rd_kafka_group_member_t *members,
+                                          int member_cnt,
+                                          char *errstr,
+                                          size_t errstr_size);
+
+rd_kafka_assignor_t *rd_kafka_assignor_find(rd_kafka_t *rk,
+                                            const char *protocol);
+
+int rd_kafka_assignors_init(rd_kafka_t *rk, char *errstr, size_t errstr_size);
+void rd_kafka_assignors_term(rd_kafka_t *rk);
+
+
+
+void rd_kafka_group_member_clear(rd_kafka_group_member_t *rkgm);
+
+
+rd_kafka_resp_err_t rd_kafka_range_assignor_init(rd_kafka_t *rk);
+rd_kafka_resp_err_t rd_kafka_roundrobin_assignor_init(rd_kafka_t *rk);
+rd_kafka_resp_err_t rd_kafka_sticky_assignor_init(rd_kafka_t *rk);
+rd_bool_t
+rd_kafka_use_rack_aware_assignment(rd_kafka_assignor_topic_t **topics,
+                                   size_t topic_cnt,
+                                   const rd_kafka_metadata_internal_t *mdi);
+
+/**
+ * @name Common unit test functions, macros, and enums to use across assignors.
+ *
+ *
+ *
+ */
+
+/* Tests can be parametrized to contain either only broker racks, only consumer
+ * racks or both.*/
+typedef enum {
+        RD_KAFKA_RANGE_ASSIGNOR_UT_NO_BROKER_RACK           = 0,
+        RD_KAFKA_RANGE_ASSIGNOR_UT_NO_CONSUMER_RACK         = 1,
+        RD_KAFKA_RANGE_ASSIGNOR_UT_BROKER_AND_CONSUMER_RACK = 2,
+        RD_KAFKA_RANGE_ASSIGNOR_UT_CONFIG_CNT               = 3,
+} rd_kafka_assignor_ut_rack_config_t;
+
+
+void ut_populate_internal_broker_metadata(rd_kafka_metadata_internal_t *mdi,
+                                          int num_broker_racks,
+                                          rd_kafkap_str_t *all_racks[],
+                                          size_t all_racks_cnt);
+
+void ut_populate_internal_topic_metadata(rd_kafka_metadata_internal_t *mdi);
+
+void ut_destroy_metadata(rd_kafka_metadata_t *md);
+
+void ut_set_owned(rd_kafka_group_member_t *rkgm);
+
+void ut_print_toppar_list(const rd_kafka_topic_partition_list_t *partitions);
+
+void ut_init_member(rd_kafka_group_member_t *rkgm, const char *member_id, ...);
+
+void ut_init_member_with_rackv(rd_kafka_group_member_t *rkgm,
+                               const char *member_id,
+                               const rd_kafkap_str_t *rack_id,
+                               ...);
+
+void ut_init_member_with_rack(rd_kafka_group_member_t *rkgm,
+                              const char *member_id,
+                              const rd_kafkap_str_t *rack_id,
+                              char *topics[],
+                              size_t topic_cnt);
+
+int verifyAssignment0(const char *function,
+                      int line,
+                      rd_kafka_group_member_t *rkgm,
+                      ...);
+
+int verifyMultipleAssignment0(const char *function,
+                              int line,
+                              rd_kafka_group_member_t *rkgms,
+                              size_t member_cnt,
+                              ...);
+
+int verifyNumPartitionsWithRackMismatch0(const char *function,
+                                         int line,
+                                         rd_kafka_metadata_t *metadata,
+                                         rd_kafka_group_member_t *rkgms,
+                                         size_t member_cnt,
+                                         int expectedNumMismatch);
+
+#define verifyAssignment(rkgm, ...)                                            \
+        do {                                                                   \
+                if (verifyAssignment0(__FUNCTION__, __LINE__, rkgm,            \
+                                      __VA_ARGS__))                            \
+                        return 1;                                              \
+        } while (0)
+
+#define verifyMultipleAssignment(rkgms, member_cnt, ...)                       \
+        do {                                                                   \
+                if (verifyMultipleAssignment0(__FUNCTION__, __LINE__, rkgms,   \
+                                              member_cnt, __VA_ARGS__))        \
+                        return 1;                                              \
+        } while (0)
+
+#define verifyNumPartitionsWithRackMismatch(metadata, rkgms, member_cnt,       \
+                                            expectedNumMismatch)               \
+        do {                                                                   \
+                if (verifyNumPartitionsWithRackMismatch0(                      \
+                        __FUNCTION__, __LINE__, metadata, rkgms, member_cnt,   \
+                        expectedNumMismatch))                                  \
+                        return 1;                                              \
+        } while (0)
+
+int verifyValidityAndBalance0(const char *func,
+                              int line,
+                              rd_kafka_group_member_t *members,
+                              size_t member_cnt,
+                              const rd_kafka_metadata_t *metadata);
+
+#define verifyValidityAndBalance(members, member_cnt, metadata)                \
+        do {                                                                   \
+                if (verifyValidityAndBalance0(__FUNCTION__, __LINE__, members, \
+                                              member_cnt, metadata))           \
+                        return 1;                                              \
+        } while (0)
+
+int isFullyBalanced0(const char *function,
+                     int line,
+                     const rd_kafka_group_member_t *members,
+                     size_t member_cnt);
+
+#define isFullyBalanced(members, member_cnt)                                   \
+        do {                                                                   \
+                if (isFullyBalanced0(__FUNCTION__, __LINE__, members,          \
+                                     member_cnt))                              \
+                        return 1;                                              \
+        } while (0)
+
+/* Helper macro to initialize a consumer with or without a rack depending on the
+ * value of parametrization. */
+#define ut_initMemberConditionalRack(member_ptr, member_id, rack,              \
+                                     parametrization, ...)                     \
+        do {                                                                   \
+                if (parametrization ==                                         \
+                    RD_KAFKA_RANGE_ASSIGNOR_UT_NO_CONSUMER_RACK) {             \
+                        ut_init_member(member_ptr, member_id, __VA_ARGS__);    \
+                } else {                                                       \
+                        ut_init_member_with_rackv(member_ptr, member_id, rack, \
+                                                  __VA_ARGS__);                \
+                }                                                              \
+        } while (0)
+
+/* Helper macro to initialize rd_kafka_metadata_t* with or without replicas
+ * depending on the value of parametrization. This accepts variadic arguments
+ * for topics. */
+#define ut_initMetadataConditionalRack(metadataPtr, replication_factor,                \
+                                       num_broker_racks, all_racks,                    \
+                                       all_racks_cnt, parametrization, ...)            \
+        do {                                                                           \
+                int num_brokers = num_broker_racks > 0                                 \
+                                      ? replication_factor * num_broker_racks          \
+                                      : replication_factor;                            \
+                if (parametrization ==                                                 \
+                    RD_KAFKA_RANGE_ASSIGNOR_UT_NO_BROKER_RACK) {                       \
+                        *(metadataPtr) =                                               \
+                            rd_kafka_metadata_new_topic_mockv(__VA_ARGS__);            \
+                } else {                                                               \
+                        *(metadataPtr) =                                               \
+                            rd_kafka_metadata_new_topic_with_partition_replicas_mockv( \
+                                replication_factor, num_brokers, __VA_ARGS__);         \
+                        ut_populate_internal_broker_metadata(                          \
+                            rd_kafka_metadata_get_internal(*(metadataPtr)),            \
+                            num_broker_racks, all_racks, all_racks_cnt);               \
+                        ut_populate_internal_topic_metadata(                           \
+                            rd_kafka_metadata_get_internal(*(metadataPtr)));           \
+                }                                                                      \
+        } while (0)
+
+
+/* Helper macro to initialize rd_kafka_metadata_t* with or without replicas
+ * depending on the value of parametrization. This accepts a list of topics,
+ * rather than being variadic.
+ */
+#define ut_initMetadataConditionalRack0(                                       \
+    metadataPtr, replication_factor, num_broker_racks, all_racks,              \
+    all_racks_cnt, parametrization, topics, topic_cnt)                         \
+        do {                                                                   \
+                int num_brokers = num_broker_racks > 0                         \
+                                      ? replication_factor * num_broker_racks  \
+                                      : replication_factor;                    \
+                if (parametrization ==                                         \
+                    RD_KAFKA_RANGE_ASSIGNOR_UT_NO_BROKER_RACK) {               \
+                        *(metadataPtr) = rd_kafka_metadata_new_topic_mock(     \
+                            topics, topic_cnt, -1, 0);                         \
+                } else {                                                       \
+                        *(metadataPtr) = rd_kafka_metadata_new_topic_mock(     \
+                            topics, topic_cnt, replication_factor,             \
+                            num_brokers);                                      \
+                        ut_populate_internal_broker_metadata(                  \
+                            rd_kafka_metadata_get_internal(*(metadataPtr)),    \
+                            num_broker_racks, all_racks, all_racks_cnt);       \
+                        ut_populate_internal_topic_metadata(                   \
+                            rd_kafka_metadata_get_internal(*(metadataPtr)));   \
+                }                                                              \
+        } while (0)
+
+
+#endif /* _RDKAFKA_ASSIGNOR_H_ */
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_aux.c b/lib/librdkafka-2.3.0/src/rdkafka_aux.c
similarity index 79%
rename from lib/librdkafka-2.1.0/src/rdkafka_aux.c
rename to lib/librdkafka-2.3.0/src/rdkafka_aux.c
index 753f03d6782..d327b6c8b09 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_aux.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_aux.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018 Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
+ *               2023 Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -234,19 +235,60 @@ void rd_kafka_acl_result_free(void *ptr) {
  * @return A new allocated Node object.
  *         Use rd_kafka_Node_destroy() to free when done.
  */
-rd_kafka_Node_t *rd_kafka_Node_new(int id,
+rd_kafka_Node_t *rd_kafka_Node_new(int32_t id,
                                    const char *host,
                                    uint16_t port,
-                                   const char *rack_id) {
+                                   const char *rack) {
         rd_kafka_Node_t *ret = rd_calloc(1, sizeof(*ret));
         ret->id              = id;
         ret->port            = port;
         ret->host            = rd_strdup(host);
-        if (rack_id != NULL)
-                ret->rack_id = rd_strdup(rack_id);
+        if (rack != NULL)
+                ret->rack = rd_strdup(rack);
         return ret;
 }
 
+/**
+ * @brief Create a new Node object given a node id, and use broker information
+ * to populate other fields.
+ *
+ * @return A new allocated Node object.
+ *         Use rd_kafka_Node_destroy() to free when done.
+ * @remark The \p brokers_sorted and \p brokers_internal arrays are asumed to be
+ * sorted by id.
+ */
+rd_kafka_Node_t *rd_kafka_Node_new_from_brokers(
+    int32_t id,
+    const struct rd_kafka_metadata_broker *brokers_sorted,
+    const rd_kafka_metadata_broker_internal_t *brokers_internal,
+    int broker_cnt) {
+        rd_kafka_Node_t *node = rd_calloc(1, sizeof(*node));
+        struct rd_kafka_metadata_broker key_sorted       = {.id = id};
+        rd_kafka_metadata_broker_internal_t key_internal = {.id = id};
+
+        struct rd_kafka_metadata_broker *broker =
+            bsearch(&key_sorted, brokers_sorted, broker_cnt,
+                    sizeof(struct rd_kafka_metadata_broker),
+                    rd_kafka_metadata_broker_cmp);
+
+        rd_kafka_metadata_broker_internal_t *broker_internal =
+            bsearch(&key_internal, brokers_internal, broker_cnt,
+                    sizeof(rd_kafka_metadata_broker_internal_t),
+                    rd_kafka_metadata_broker_internal_cmp);
+
+        node->id = id;
+
+        if (!broker)
+                return node;
+
+        node->host = rd_strdup(broker->host);
+        node->port = broker->port;
+        if (broker_internal && broker_internal->rack_id)
+                node->rack = rd_strdup(broker_internal->rack_id);
+
+        return node;
+}
+
 /**
  * @brief Copy \p src Node object
  *
@@ -255,16 +297,26 @@ rd_kafka_Node_t *rd_kafka_Node_new(int id,
  *         Use rd_kafka_Node_destroy() to free when done.
  */
 rd_kafka_Node_t *rd_kafka_Node_copy(const rd_kafka_Node_t *src) {
-        return rd_kafka_Node_new(src->id, src->host, src->port, src->rack_id);
+        return rd_kafka_Node_new(src->id, src->host, src->port, src->rack);
 }
 
 void rd_kafka_Node_destroy(rd_kafka_Node_t *node) {
         rd_free(node->host);
-        if (node->rack_id)
-                rd_free(node->rack_id);
+        if (node->rack)
+                rd_free(node->rack);
         rd_free(node);
 }
 
+/**
+ * @brief Same as rd_kafka_Node_destroy, but for use as callback which accepts
+ *        (void *) arguments.
+ *
+ * @param node
+ */
+void rd_kafka_Node_free(void *node) {
+        rd_kafka_Node_destroy((rd_kafka_Node_t *)node);
+}
+
 int rd_kafka_Node_id(const rd_kafka_Node_t *node) {
         return node->id;
 }
@@ -276,3 +328,7 @@ const char *rd_kafka_Node_host(const rd_kafka_Node_t *node) {
 uint16_t rd_kafka_Node_port(const rd_kafka_Node_t *node) {
         return node->port;
 }
+
+const char *rd_kafka_Node_rack(const rd_kafka_Node_t *node) {
+        return node->rack;
+}
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_aux.h b/lib/librdkafka-2.3.0/src/rdkafka_aux.h
similarity index 87%
rename from lib/librdkafka-2.1.0/src/rdkafka_aux.h
rename to lib/librdkafka-2.3.0/src/rdkafka_aux.h
index ccf18e91e7c..fec88cb2ad9 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_aux.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_aux.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018 Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
+ *               2023 Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -107,14 +108,24 @@ typedef struct rd_kafka_Node_s {
         int id;        /*< Node id */
         char *host;    /*< Node host */
         uint16_t port; /*< Node port */
-        char *rack_id; /*< (optional) Node rack id */
+        char *rack;    /*< (optional) Node rack id */
 } rd_kafka_Node_t;
 
-rd_kafka_Node_t *
-rd_kafka_Node_new(int id, const char *host, uint16_t port, const char *rack_id);
+rd_kafka_Node_t *rd_kafka_Node_new(int32_t id,
+                                   const char *host,
+                                   uint16_t port,
+                                   const char *rack_id);
+
+rd_kafka_Node_t *rd_kafka_Node_new_from_brokers(
+    int32_t id,
+    const struct rd_kafka_metadata_broker *brokers_sorted,
+    const rd_kafka_metadata_broker_internal_t *brokers_internal,
+    int broker_cnt);
 
 rd_kafka_Node_t *rd_kafka_Node_copy(const rd_kafka_Node_t *src);
 
 void rd_kafka_Node_destroy(rd_kafka_Node_t *node);
 
+void rd_kafka_Node_free(void *node);
+
 #endif /* _RDKAFKA_AUX_H_ */
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_background.c b/lib/librdkafka-2.3.0/src/rdkafka_background.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_background.c
rename to lib/librdkafka-2.3.0/src/rdkafka_background.c
index c69ec1767dd..a9c96606c0d 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_background.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_background.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018 Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_broker.c b/lib/librdkafka-2.3.0/src/rdkafka_broker.c
similarity index 96%
rename from lib/librdkafka-2.1.0/src/rdkafka_broker.c
rename to lib/librdkafka-2.3.0/src/rdkafka_broker.c
index e8fc27b1115..e92f008bfc2 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_broker.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_broker.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023 Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -49,6 +50,7 @@
 #include <ctype.h>
 
 #include "rd.h"
+#include "rdaddr.h"
 #include "rdkafka_int.h"
 #include "rdkafka_msg.h"
 #include "rdkafka_msgset.h"
@@ -79,9 +81,9 @@
 static const int rd_kafka_max_block_ms = 1000;
 
 const char *rd_kafka_broker_state_names[] = {
-    "INIT",        "DOWN", "TRY_CONNECT", "CONNECT",          "SSL_HANDSHAKE",
-    "AUTH_LEGACY", "UP",   "UPDATE",      "APIVERSION_QUERY", "AUTH_HANDSHAKE",
-    "AUTH_REQ"};
+    "INIT",        "DOWN",  "TRY_CONNECT", "CONNECT",          "SSL_HANDSHAKE",
+    "AUTH_LEGACY", "UP",    "UPDATE",      "APIVERSION_QUERY", "AUTH_HANDSHAKE",
+    "AUTH_REQ",    "REAUTH"};
 
 const char *rd_kafka_secproto_names[] = {
     [RD_KAFKA_PROTO_PLAINTEXT]      = "plaintext",
@@ -573,6 +575,8 @@ void rd_kafka_broker_fail(rd_kafka_broker_t *rkb,
                 rkb->rkb_recv_buf = NULL;
         }
 
+        rkb->rkb_reauth_in_progress = rd_false;
+
         va_start(ap, fmt);
         rd_kafka_broker_set_error(rkb, level, err, fmt, ap);
         va_end(ap);
@@ -591,6 +595,11 @@ void rd_kafka_broker_fail(rd_kafka_broker_t *rkb,
         old_state = rkb->rkb_state;
         rd_kafka_broker_set_state(rkb, RD_KAFKA_BROKER_STATE_DOWN);
 
+        /* Stop any pending reauth timer, since a teardown/reconnect will
+         * require a new timer. */
+        rd_kafka_timer_stop(&rkb->rkb_rk->rk_timers, &rkb->rkb_sasl_reauth_tmr,
+                            1 /*lock*/);
+
         /* Unlock broker since a requeue will try to lock it. */
         rd_kafka_broker_unlock(rkb);
 
@@ -1834,7 +1843,7 @@ static rd_kafka_buf_t *rd_kafka_waitresp_find(rd_kafka_broker_t *rkb,
  */
 static int rd_kafka_req_response(rd_kafka_broker_t *rkb,
                                  rd_kafka_buf_t *rkbuf) {
-        rd_kafka_buf_t *req;
+        rd_kafka_buf_t *req   = NULL;
         int log_decode_errors = LOG_ERR;
 
         rd_kafka_assert(rkb->rkb_rk, thrd_is_current(rkb->rkb_thread));
@@ -2237,7 +2246,8 @@ static int rd_kafka_broker_connect(rd_kafka_broker_t *rkb) {
  */
 void rd_kafka_broker_connect_up(rd_kafka_broker_t *rkb) {
 
-        rkb->rkb_max_inflight = rkb->rkb_rk->rk_conf.max_inflight;
+        rkb->rkb_max_inflight       = rkb->rkb_rk->rk_conf.max_inflight;
+        rkb->rkb_reauth_in_progress = rd_false;
 
         rd_kafka_broker_lock(rkb);
         rd_kafka_broker_set_state(rkb, RD_KAFKA_BROKER_STATE_UP);
@@ -2813,6 +2823,7 @@ int rd_kafka_send(rd_kafka_broker_t *rkb) {
  */
 void rd_kafka_broker_buf_retry(rd_kafka_broker_t *rkb, rd_kafka_buf_t *rkbuf) {
 
+        int64_t backoff = 0;
         /* Restore original replyq since replyq.q will have been NULLed
          * by buf_callback()/replyq_enq(). */
         if (!rkbuf->rkbuf_replyq.q && rkbuf->rkbuf_orig_replyq.q) {
@@ -2840,9 +2851,24 @@ void rd_kafka_broker_buf_retry(rd_kafka_broker_t *rkb, rd_kafka_buf_t *rkbuf) {
                    rkb->rkb_rk->rk_conf.retry_backoff_ms);
 
         rd_atomic64_add(&rkb->rkb_c.tx_retries, 1);
+        /* In some cases, failed Produce requests do not increment the retry
+         * count, see rd_kafka_handle_Produce_error. */
+        if (rkbuf->rkbuf_retries > 0)
+                backoff = (1 << (rkbuf->rkbuf_retries - 1)) *
+                          (rkb->rkb_rk->rk_conf.retry_backoff_ms);
+        else
+                backoff = rkb->rkb_rk->rk_conf.retry_backoff_ms;
+
+        /* We are multiplying by 10 as (backoff_ms * percent * 1000)/100 ->
+         * backoff_ms * jitter * 10 */
+        backoff = rd_jitter(100 - RD_KAFKA_RETRY_JITTER_PERCENT,
+                            100 + RD_KAFKA_RETRY_JITTER_PERCENT) *
+                  backoff * 10;
 
-        rkbuf->rkbuf_ts_retry =
-            rd_clock() + (rkb->rkb_rk->rk_conf.retry_backoff_ms * 1000);
+        if (backoff > rkb->rkb_rk->rk_conf.retry_backoff_max_ms * 1000)
+                backoff = rkb->rkb_rk->rk_conf.retry_backoff_max_ms * 1000;
+
+        rkbuf->rkbuf_ts_retry = rd_clock() + backoff;
         /* Precaution: time out the request if it hasn't moved from the
          * retry queue within the retry interval (such as when the broker is
          * down). */
@@ -3451,6 +3477,20 @@ rd_kafka_broker_op_serve(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko) {
                 wakeup = rd_true;
                 break;
 
+        case RD_KAFKA_OP_SASL_REAUTH:
+                rd_rkb_dbg(rkb, BROKER, "REAUTH", "Received REAUTH op");
+
+                /* We don't need a lock for rkb_max_inflight. It's changed only
+                 * on the broker thread. */
+                rkb->rkb_max_inflight = 1;
+
+                rd_kafka_broker_lock(rkb);
+                rd_kafka_broker_set_state(rkb, RD_KAFKA_BROKER_STATE_REAUTH);
+                rd_kafka_broker_unlock(rkb);
+
+                wakeup = rd_true;
+                break;
+
         default:
                 rd_kafka_assert(rkb->rkb_rk, !*"unhandled op type");
                 break;
@@ -4528,8 +4568,15 @@ static int rd_kafka_broker_thread_main(void *arg) {
                             rd_kafka_broker_addresses_exhausted(rkb))
                                 rd_kafka_broker_update_reconnect_backoff(
                                     rkb, &rkb->rkb_rk->rk_conf, rd_clock());
+                        /* If we haven't made progress from the last state, and
+                         * if we have exceeded
+                         * socket_connection_setup_timeout_ms, then error out.
+                         * Don't error out in case this is a reauth, for which
+                         * socket_connection_setup_timeout_ms is not
+                         * applicable. */
                         else if (
                             rkb->rkb_state == orig_state &&
+                            !rkb->rkb_reauth_in_progress &&
                             rd_clock() >=
                                 (rkb->rkb_ts_connect +
                                  (rd_ts_t)rk->rk_conf
@@ -4544,6 +4591,22 @@ static int rd_kafka_broker_thread_main(void *arg) {
 
                         break;
 
+                case RD_KAFKA_BROKER_STATE_REAUTH:
+                        /* Since we've already authenticated once, the provider
+                         * should be ready. */
+                        rd_assert(rd_kafka_sasl_ready(rkb->rkb_rk));
+
+                        /* Since we aren't disconnecting, the transport isn't
+                         * destroyed, and as a consequence, some of the SASL
+                         * state leaks unless we destroy it before the reauth.
+                         */
+                        rd_kafka_sasl_close(rkb->rkb_transport);
+
+                        rkb->rkb_reauth_in_progress = rd_true;
+
+                        rd_kafka_broker_connect_auth(rkb);
+                        break;
+
                 case RD_KAFKA_BROKER_STATE_UPDATE:
                         /* FALLTHRU */
                 case RD_KAFKA_BROKER_STATE_UP:
@@ -4672,6 +4735,9 @@ void rd_kafka_broker_destroy_final(rd_kafka_broker_t *rkb) {
         mtx_unlock(&rkb->rkb_logname_lock);
         mtx_destroy(&rkb->rkb_logname_lock);
 
+        rd_kafka_timer_stop(&rkb->rkb_rk->rk_timers, &rkb->rkb_sasl_reauth_tmr,
+                            1 /*lock*/);
+
         mtx_destroy(&rkb->rkb_lock);
 
         rd_refcnt_destroy(&rkb->rkb_refcnt);
@@ -5208,6 +5274,31 @@ static int rd_kafka_broker_name_parse(rd_kafka_t *rk,
         return 0;
 }
 
+/**
+ * @brief Add a broker from a string of type "[proto://]host[:port]" to the list
+ * of brokers. *cnt is increased by one if a broker was added, else not.
+ */
+static void rd_kafka_find_or_add_broker(rd_kafka_t *rk,
+                                        rd_kafka_secproto_t proto,
+                                        const char *host,
+                                        uint16_t port,
+                                        int *cnt) {
+        rd_kafka_broker_t *rkb = NULL;
+
+        if ((rkb = rd_kafka_broker_find(rk, proto, host, port)) &&
+            rkb->rkb_source == RD_KAFKA_CONFIGURED) {
+                (*cnt)++;
+        } else if (rd_kafka_broker_add(rk, RD_KAFKA_CONFIGURED, proto, host,
+                                       port, RD_KAFKA_NODEID_UA) != NULL)
+                (*cnt)++;
+
+        /* If rd_kafka_broker_find returned a broker its
+         * reference needs to be released
+         * See issue #193 */
+        if (rkb)
+                rd_kafka_broker_destroy(rkb);
+}
+
 /**
  * @brief Adds a (csv list of) broker(s).
  * Returns the number of brokers succesfully added.
@@ -5215,17 +5306,22 @@ static int rd_kafka_broker_name_parse(rd_kafka_t *rk,
  * @locality any thread
  * @locks none
  */
-int rd_kafka_brokers_add0(rd_kafka_t *rk, const char *brokerlist) {
+int rd_kafka_brokers_add0(rd_kafka_t *rk,
+                          const char *brokerlist,
+                          rd_bool_t is_bootstrap_server_list) {
         char *s_copy = rd_strdup(brokerlist);
         char *s      = s_copy;
         int cnt      = 0;
-        rd_kafka_broker_t *rkb;
-        int pre_cnt = rd_atomic32_get(&rk->rk_broker_cnt);
+        int pre_cnt  = rd_atomic32_get(&rk->rk_broker_cnt);
+        rd_sockaddr_inx_t *sinx;
+        rd_sockaddr_list_t *sockaddr_list;
 
         /* Parse comma-separated list of brokers. */
         while (*s) {
                 uint16_t port;
                 const char *host;
+                const char *err_str;
+                const char *resolved_FQDN;
                 rd_kafka_secproto_t proto;
 
                 if (*s == ',' || *s == ' ') {
@@ -5238,20 +5334,43 @@ int rd_kafka_brokers_add0(rd_kafka_t *rk, const char *brokerlist) {
                         break;
 
                 rd_kafka_wrlock(rk);
+                if (is_bootstrap_server_list &&
+                    rk->rk_conf.client_dns_lookup ==
+                        RD_KAFKA_RESOLVE_CANONICAL_BOOTSTRAP_SERVERS_ONLY) {
+                        rd_kafka_dbg(rk, ALL, "INIT",
+                                     "Canonicalizing bootstrap broker %s:%d",
+                                     host, port);
+                        sockaddr_list = rd_getaddrinfo(
+                            host, RD_KAFKA_PORT_STR, AI_ADDRCONFIG,
+                            rk->rk_conf.broker_addr_family, SOCK_STREAM,
+                            IPPROTO_TCP, rk->rk_conf.resolve_cb,
+                            rk->rk_conf.opaque, &err_str);
+
+                        if (!sockaddr_list) {
+                                rd_kafka_log(rk, LOG_WARNING, "BROKER",
+                                             "Failed to resolve '%s': %s", host,
+                                             err_str);
+                                rd_kafka_wrunlock(rk);
+                                continue;
+                        }
 
-                if ((rkb = rd_kafka_broker_find(rk, proto, host, port)) &&
-                    rkb->rkb_source == RD_KAFKA_CONFIGURED) {
-                        cnt++;
-                } else if (rd_kafka_broker_add(rk, RD_KAFKA_CONFIGURED, proto,
-                                               host, port,
-                                               RD_KAFKA_NODEID_UA) != NULL)
-                        cnt++;
-
-                /* If rd_kafka_broker_find returned a broker its
-                 * reference needs to be released
-                 * See issue #193 */
-                if (rkb)
-                        rd_kafka_broker_destroy(rkb);
+                        RD_SOCKADDR_LIST_FOREACH(sinx, sockaddr_list) {
+                                resolved_FQDN = rd_sockaddr2str(
+                                    sinx, RD_SOCKADDR2STR_F_RESOLVE);
+                                rd_kafka_dbg(
+                                    rk, ALL, "INIT",
+                                    "Adding broker with resolved hostname %s",
+                                    resolved_FQDN);
+
+                                rd_kafka_find_or_add_broker(
+                                    rk, proto, resolved_FQDN, port, &cnt);
+                        };
+
+                        rd_sockaddr_list_destroy(sockaddr_list);
+                } else {
+                        rd_kafka_find_or_add_broker(rk, proto, host, port,
+                                                    &cnt);
+                }
 
                 rd_kafka_wrunlock(rk);
         }
@@ -5273,7 +5392,7 @@ int rd_kafka_brokers_add0(rd_kafka_t *rk, const char *brokerlist) {
 
 
 int rd_kafka_brokers_add(rd_kafka_t *rk, const char *brokerlist) {
-        return rd_kafka_brokers_add0(rk, brokerlist);
+        return rd_kafka_brokers_add0(rk, brokerlist, rd_false);
 }
 
 
@@ -5851,6 +5970,46 @@ void rd_kafka_broker_monitor_del(rd_kafka_broker_monitor_t *rkbmon) {
         rd_kafka_broker_destroy(rkb);
 }
 
+/**
+ * @brief Starts the reauth timer for this broker.
+ *        If connections_max_reauth_ms=0, then no timer is set.
+ *
+ * @locks none
+ * @locality broker thread
+ */
+void rd_kafka_broker_start_reauth_timer(rd_kafka_broker_t *rkb,
+                                        int64_t connections_max_reauth_ms) {
+        /* Timer should not already be started. It indicates that we're about to
+         * schedule an extra reauth, but this shouldn't be a cause for failure
+         * in production use cases, so, clear the timer. */
+        if (rd_kafka_timer_is_started(&rkb->rkb_rk->rk_timers,
+                                      &rkb->rkb_sasl_reauth_tmr))
+                rd_kafka_timer_stop(&rkb->rkb_rk->rk_timers,
+                                    &rkb->rkb_sasl_reauth_tmr, 1 /*lock*/);
+
+        if (connections_max_reauth_ms == 0)
+                return;
+
+        rd_kafka_timer_start_oneshot(
+            &rkb->rkb_rk->rk_timers, &rkb->rkb_sasl_reauth_tmr, rd_false,
+            connections_max_reauth_ms * 900 /* 90% * microsecond*/,
+            rd_kafka_broker_start_reauth_cb, (void *)rkb);
+}
+
+/**
+ * @brief Starts the reauth process for the broker rkb.
+ *
+ * @locks none
+ * @locality main thread
+ */
+void rd_kafka_broker_start_reauth_cb(rd_kafka_timers_t *rkts, void *_rkb) {
+        rd_kafka_op_t *rko     = NULL;
+        rd_kafka_broker_t *rkb = (rd_kafka_broker_t *)_rkb;
+        rd_dassert(rkb);
+        rko = rd_kafka_op_new(RD_KAFKA_OP_SASL_REAUTH);
+        rd_kafka_q_enq(rkb->rkb_ops, rko);
+}
+
 /**
  * @name Unit tests
  * @{
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_broker.h b/lib/librdkafka-2.3.0/src/rdkafka_broker.h
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdkafka_broker.h
rename to lib/librdkafka-2.3.0/src/rdkafka_broker.h
index 1e454d4d718..30f66b25c9d 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_broker.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_broker.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012,2013 Magnus Edenhill
+ * Copyright (c) 2012,2022, Magnus Edenhill
+ *               2023 Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -54,6 +55,7 @@ typedef enum {
         RD_KAFKA_BROKER_STATE_APIVERSION_QUERY,
         RD_KAFKA_BROKER_STATE_AUTH_HANDSHAKE,
         RD_KAFKA_BROKER_STATE_AUTH_REQ,
+        RD_KAFKA_BROKER_STATE_REAUTH,
 } rd_kafka_broker_state_t;
 
 /**
@@ -252,6 +254,9 @@ struct rd_kafka_broker_s { /* rd_kafka_broker_t */
         /** Absolute time of last connection attempt. */
         rd_ts_t rkb_ts_connect;
 
+        /** True if a reauthentication is in progress. */
+        rd_bool_t rkb_reauth_in_progress;
+
         /**< Persistent connection demand is tracked by
          *   a counter for each type of demand.
          *   The broker thread will maintain a persistent connection
@@ -323,6 +328,9 @@ struct rd_kafka_broker_s { /* rd_kafka_broker_t */
                 rd_kafka_resp_err_t err; /**< Last error code */
                 int cnt;                 /**< Number of identical errors */
         } rkb_last_err;
+
+
+        rd_kafka_timer_t rkb_sasl_reauth_tmr;
 };
 
 #define rd_kafka_broker_keep(rkb) rd_refcnt_add(&(rkb)->rkb_refcnt)
@@ -461,7 +469,9 @@ rd_kafka_broker_t *rd_kafka_broker_controller_async(rd_kafka_t *rk,
                                                     int state,
                                                     rd_kafka_enq_once_t *eonce);
 
-int rd_kafka_brokers_add0(rd_kafka_t *rk, const char *brokerlist);
+int rd_kafka_brokers_add0(rd_kafka_t *rk,
+                          const char *brokerlist,
+                          rd_bool_t is_bootstrap_server_list);
 void rd_kafka_broker_set_state(rd_kafka_broker_t *rkb, int state);
 
 void rd_kafka_broker_fail(rd_kafka_broker_t *rkb,
@@ -602,6 +612,11 @@ void rd_kafka_broker_monitor_add(rd_kafka_broker_monitor_t *rkbmon,
 
 void rd_kafka_broker_monitor_del(rd_kafka_broker_monitor_t *rkbmon);
 
+void rd_kafka_broker_start_reauth_timer(rd_kafka_broker_t *rkb,
+                                        int64_t connections_max_reauth_ms);
+
+void rd_kafka_broker_start_reauth_cb(rd_kafka_timers_t *rkts, void *rkb);
+
 int unittest_broker(void);
 
 #endif /* _RDKAFKA_BROKER_H_ */
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_buf.c b/lib/librdkafka-2.3.0/src/rdkafka_buf.c
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_buf.c
rename to lib/librdkafka-2.3.0/src/rdkafka_buf.c
index 5a0e131e8b9..362f57a27d2 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_buf.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_buf.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -120,6 +121,18 @@ rd_kafka_buf_t *rd_kafka_buf_new0(int segcnt, size_t size, int flags) {
         return rkbuf;
 }
 
+/**
+ * @brief Upgrade request header to flexver by writing header tags.
+ */
+void rd_kafka_buf_upgrade_flexver_request(rd_kafka_buf_t *rkbuf) {
+        if (likely(!(rkbuf->rkbuf_flags & RD_KAFKA_OP_F_FLEXVER))) {
+                rkbuf->rkbuf_flags |= RD_KAFKA_OP_F_FLEXVER;
+
+                /* Empty request header tags */
+                rd_kafka_buf_write_i8(rkbuf, 0);
+        }
+}
+
 
 /**
  * @brief Create new request buffer with the request-header written (will
@@ -165,12 +178,7 @@ rd_kafka_buf_t *rd_kafka_buf_new_request0(rd_kafka_broker_t *rkb,
         rd_kafka_buf_write_kstr(rkbuf, rkb->rkb_rk->rk_client_id);
 
         if (is_flexver) {
-                /* Must set flexver after writing the client id since
-                 * it is still a standard non-compact string. */
-                rkbuf->rkbuf_flags |= RD_KAFKA_OP_F_FLEXVER;
-
-                /* Empty request header tags */
-                rd_kafka_buf_write_i8(rkbuf, 0);
+                rd_kafka_buf_upgrade_flexver_request(rkbuf);
         }
 
         return rkbuf;
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_buf.h b/lib/librdkafka-2.3.0/src/rdkafka_buf.h
similarity index 92%
rename from lib/librdkafka-2.1.0/src/rdkafka_buf.h
rename to lib/librdkafka-2.3.0/src/rdkafka_buf.h
index b4f606317b0..099f705018a 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_buf.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_buf.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023 Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -48,21 +49,36 @@ typedef struct rd_tmpabuf_s {
         size_t of;
         char *buf;
         int failed;
-        int assert_on_fail;
+        rd_bool_t assert_on_fail;
 } rd_tmpabuf_t;
 
 /**
- * @brief Allocate new tmpabuf with \p size bytes pre-allocated.
+ * @brief Initialize new tmpabuf of non-final \p size bytes.
  */
 static RD_UNUSED void
-rd_tmpabuf_new(rd_tmpabuf_t *tab, size_t size, int assert_on_fail) {
-        tab->buf            = rd_malloc(size);
-        tab->size           = size;
+rd_tmpabuf_new(rd_tmpabuf_t *tab, size_t size, rd_bool_t assert_on_fail) {
+        tab->buf            = NULL;
+        tab->size           = RD_ROUNDUP(size, 8);
         tab->of             = 0;
         tab->failed         = 0;
         tab->assert_on_fail = assert_on_fail;
 }
 
+/**
+ * @brief Add a new allocation of \p _size bytes,
+ *        rounded up to maximum word size,
+ *        for \p _times times.
+ */
+#define rd_tmpabuf_add_alloc_times(_tab, _size, _times)                        \
+        (_tab)->size += RD_ROUNDUP(_size, 8) * _times
+
+#define rd_tmpabuf_add_alloc(_tab, _size)                                      \
+        rd_tmpabuf_add_alloc_times(_tab, _size, 1)
+/**
+ * @brief Finalize tmpabuf pre-allocating tab->size bytes.
+ */
+#define rd_tmpabuf_finalize(_tab) (_tab)->buf = rd_malloc((_tab)->size)
+
 /**
  * @brief Free memory allocated by tmpabuf
  */
@@ -366,6 +382,9 @@ struct rd_kafka_buf_s { /* rd_kafka_buf_t */
                         rd_bool_t all_topics;  /**< Full/All topics requested */
                         rd_bool_t cgrp_update; /**< Update cgrp with topic
                                                 *   status from response. */
+                        rd_bool_t force_racks; /**< Force the returned metadata
+                                                *   to contain partition to
+                                                *   rack mapping. */
 
                         int *decr; /* Decrement this integer by one
                                     * when request is complete:
@@ -682,6 +701,10 @@ struct rd_kafka_buf_s { /* rd_kafka_buf_t */
                 size_t _slen;                                                  \
                 char *_dst;                                                    \
                 rd_kafka_buf_read_str(rkbuf, &_kstr);                          \
+                if (RD_KAFKAP_STR_IS_NULL(&_kstr)) {                           \
+                        dst = NULL;                                            \
+                        break;                                                 \
+                }                                                              \
                 _slen = RD_KAFKAP_STR_LEN(&_kstr);                             \
                 if (!(_dst = rd_tmpabuf_write(tmpabuf, _kstr.str, _slen + 1))) \
                         rd_kafka_buf_parse_fail(                               \
@@ -703,12 +726,21 @@ struct rd_kafka_buf_s { /* rd_kafka_buf_t */
                 rd_kafka_buf_skip(rkbuf, RD_KAFKAP_STR_LEN0(_slen));           \
         } while (0)
 
-/* Read Kafka Bytes representation (4+N).
- *  The 'kbytes' will be updated to point to rkbuf data */
-#define rd_kafka_buf_read_bytes(rkbuf, kbytes)                                 \
+/**
+ * Read Kafka COMPACT_BYTES representation (VARINT+N) or
+ * standard BYTES representation(4+N).
+ * The 'kbytes' will be updated to point to rkbuf data.
+ */
+#define rd_kafka_buf_read_kbytes(rkbuf, kbytes)                                \
         do {                                                                   \
-                int _klen;                                                     \
-                rd_kafka_buf_read_i32a(rkbuf, _klen);                          \
+                int32_t _klen;                                                 \
+                if (!(rkbuf->rkbuf_flags & RD_KAFKA_OP_F_FLEXVER)) {           \
+                        rd_kafka_buf_read_i32a(rkbuf, _klen);                  \
+                } else {                                                       \
+                        uint64_t _uva;                                         \
+                        rd_kafka_buf_read_uvarint(rkbuf, &_uva);               \
+                        _klen = ((int32_t)_uva) - 1;                           \
+                }                                                              \
                 (kbytes)->len = _klen;                                         \
                 if (RD_KAFKAP_BYTES_IS_NULL(kbytes)) {                         \
                         (kbytes)->data = NULL;                                 \
@@ -720,7 +752,6 @@ struct rd_kafka_buf_s { /* rd_kafka_buf_t */
                         rd_kafka_buf_check_len(rkbuf, _klen);                  \
         } while (0)
 
-
 /**
  * @brief Read \p size bytes from buffer, setting \p *ptr to the start
  *        of the memory region.
@@ -737,7 +768,7 @@ struct rd_kafka_buf_s { /* rd_kafka_buf_t */
 /**
  * @brief Read varint-lengted Kafka Bytes representation
  */
-#define rd_kafka_buf_read_bytes_varint(rkbuf, kbytes)                          \
+#define rd_kafka_buf_read_kbytes_varint(rkbuf, kbytes)                         \
         do {                                                                   \
                 int64_t _len2;                                                 \
                 size_t _r =                                                    \
@@ -784,9 +815,8 @@ struct rd_kafka_buf_s { /* rd_kafka_buf_t */
                         uint64_t _tagtype, _taglen;                            \
                         rd_kafka_buf_read_uvarint(rkbuf, &_tagtype);           \
                         rd_kafka_buf_read_uvarint(rkbuf, &_taglen);            \
-                        if (_taglen > 1)                                       \
-                                rd_kafka_buf_skip(rkbuf,                       \
-                                                  (size_t)(_taglen - 1));      \
+                        if (_taglen > 0)                                       \
+                                rd_kafka_buf_skip(rkbuf, (size_t)(_taglen));   \
                 }                                                              \
         } while (0)
 
@@ -918,6 +948,7 @@ rd_kafka_buf_t *rd_kafka_buf_new_request0(rd_kafka_broker_t *rkb,
 #define rd_kafka_buf_new_flexver_request(rkb, ApiKey, segcnt, size,            \
                                          is_flexver)                           \
         rd_kafka_buf_new_request0(rkb, ApiKey, segcnt, size, is_flexver)
+void rd_kafka_buf_upgrade_flexver_request(rd_kafka_buf_t *rkbuf);
 
 rd_kafka_buf_t *
 rd_kafka_buf_new_shadow(const void *ptr, size_t size, void (*free_cb)(void *));
@@ -1191,7 +1222,6 @@ rd_kafka_buf_update_i64(rd_kafka_buf_t *rkbuf, size_t of, int64_t v) {
         rd_kafka_buf_update(rkbuf, of, &v, sizeof(v));
 }
 
-
 /**
  * @brief Write standard (2-byte header) or KIP-482 COMPACT_STRING to buffer.
  *
@@ -1297,30 +1327,40 @@ static RD_INLINE void rd_kafka_buf_push_kstr(rd_kafka_buf_t *rkbuf,
 static RD_INLINE size_t
 rd_kafka_buf_write_kbytes(rd_kafka_buf_t *rkbuf,
                           const rd_kafkap_bytes_t *kbytes) {
-        size_t len;
+        size_t len, r;
 
-        if (!kbytes || RD_KAFKAP_BYTES_IS_NULL(kbytes))
-                return rd_kafka_buf_write_i32(rkbuf, -1);
+        if (!(rkbuf->rkbuf_flags & RD_KAFKA_OP_F_FLEXVER)) {
+                if (!kbytes || RD_KAFKAP_BYTES_IS_NULL(kbytes))
+                        return rd_kafka_buf_write_i32(rkbuf, -1);
 
-        if (RD_KAFKAP_BYTES_IS_SERIALIZED(kbytes))
-                return rd_kafka_buf_write(rkbuf, RD_KAFKAP_BYTES_SER(kbytes),
-                                          RD_KAFKAP_BYTES_SIZE(kbytes));
+                if (RD_KAFKAP_BYTES_IS_SERIALIZED(kbytes))
+                        return rd_kafka_buf_write(rkbuf,
+                                                  RD_KAFKAP_BYTES_SER(kbytes),
+                                                  RD_KAFKAP_BYTES_SIZE(kbytes));
 
-        len = RD_KAFKAP_BYTES_LEN(kbytes);
-        rd_kafka_buf_write_i32(rkbuf, (int32_t)len);
-        rd_kafka_buf_write(rkbuf, kbytes->data, len);
+                len = RD_KAFKAP_BYTES_LEN(kbytes);
+                rd_kafka_buf_write_i32(rkbuf, (int32_t)len);
+                rd_kafka_buf_write(rkbuf, kbytes->data, len);
 
-        return 4 + len;
-}
+                return 4 + len;
+        }
 
-/**
- * Push (i.e., no copy) Kafka bytes to buffer iovec
- */
-static RD_INLINE void
-rd_kafka_buf_push_kbytes(rd_kafka_buf_t *rkbuf,
-                         const rd_kafkap_bytes_t *kbytes) {
-        rd_kafka_buf_push(rkbuf, RD_KAFKAP_BYTES_SER(kbytes),
-                          RD_KAFKAP_BYTES_SIZE(kbytes), NULL);
+        /* COMPACT_BYTES lengths are:
+         *  0   = NULL,
+         *  1   = empty
+         *  N.. = length + 1
+         */
+        if (!kbytes)
+                len = 0;
+        else
+                len = kbytes->len + 1;
+
+        r = rd_kafka_buf_write_uvarint(rkbuf, (uint64_t)len);
+        if (len > 1) {
+                rd_kafka_buf_write(rkbuf, kbytes->data, len - 1);
+                r += len - 1;
+        }
+        return r;
 }
 
 /**
@@ -1404,4 +1444,20 @@ void rd_kafka_buf_set_maker(rd_kafka_buf_t *rkbuf,
                             void *make_opaque,
                             void (*free_make_opaque_cb)(void *make_opaque));
 
+
+#define rd_kafka_buf_read_uuid(rkbuf, uuid)                                    \
+        do {                                                                   \
+                rd_kafka_buf_read_i64(rkbuf,                                   \
+                                      &((uuid)->most_significant_bits));       \
+                rd_kafka_buf_read_i64(rkbuf,                                   \
+                                      &((uuid)->least_significant_bits));      \
+                (uuid)->base64str[0] = '\0';                                   \
+        } while (0)
+
+static RD_UNUSED void rd_kafka_buf_write_uuid(rd_kafka_buf_t *rkbuf,
+                                              rd_kafka_Uuid_t *uuid) {
+        rd_kafka_buf_write_i64(rkbuf, uuid->most_significant_bits);
+        rd_kafka_buf_write_i64(rkbuf, uuid->least_significant_bits);
+}
+
 #endif /* _RDKAFKA_BUF_H_ */
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_cert.c b/lib/librdkafka-2.3.0/src/rdkafka_cert.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_cert.c
rename to lib/librdkafka-2.3.0/src/rdkafka_cert.c
index 2a19e454931..a14814d0a18 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_cert.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_cert.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2019 Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_cert.h b/lib/librdkafka-2.3.0/src/rdkafka_cert.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_cert.h
rename to lib/librdkafka-2.3.0/src/rdkafka_cert.h
index b53f46c010d..819773ba308 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_cert.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_cert.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2019 Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_cgrp.c b/lib/librdkafka-2.3.0/src/rdkafka_cgrp.c
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdkafka_cgrp.c
rename to lib/librdkafka-2.3.0/src/rdkafka_cgrp.c
index 026e933210e..eb953bb56b2 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_cgrp.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_cgrp.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023 Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -369,7 +370,8 @@ void rd_kafka_cgrp_destroy_final(rd_kafka_cgrp_t *rkcg) {
         rd_list_destroy(&rkcg->rkcg_toppars);
         rd_list_destroy(rkcg->rkcg_subscribed_topics);
         rd_kafka_topic_partition_list_destroy(rkcg->rkcg_errored_topics);
-        if (rkcg->rkcg_assignor && rkcg->rkcg_assignor->rkas_destroy_state_cb)
+        if (rkcg->rkcg_assignor && rkcg->rkcg_assignor->rkas_destroy_state_cb &&
+            rkcg->rkcg_assignor_state)
                 rkcg->rkcg_assignor->rkas_destroy_state_cb(
                     rkcg->rkcg_assignor_state);
         rd_free(rkcg);
@@ -415,7 +417,7 @@ rd_kafka_cgrp_t *rd_kafka_cgrp_new(rd_kafka_t *rk,
         rkcg->rkcg_wait_coord_q             = rd_kafka_q_new(rk);
         rkcg->rkcg_wait_coord_q->rkq_serve  = rkcg->rkcg_ops->rkq_serve;
         rkcg->rkcg_wait_coord_q->rkq_opaque = rkcg->rkcg_ops->rkq_opaque;
-        rkcg->rkcg_q                        = rd_kafka_q_new(rk);
+        rkcg->rkcg_q                        = rd_kafka_consume_q_new(rk);
         rkcg->rkcg_group_instance_id =
             rd_kafkap_str_new(rk->rk_conf.group_instance_id, -1);
 
@@ -753,8 +755,11 @@ void rd_kafka_cgrp_coord_query(rd_kafka_cgrp_t *rkcg, const char *reason) {
 
         rd_kafka_broker_destroy(rkb);
 
-        /* Back off the next intervalled query since we just sent one. */
-        rd_interval_reset_to_now(&rkcg->rkcg_coord_query_intvl, 0);
+        /* Back off the next intervalled query with a jitter since we just sent
+         * one. */
+        rd_interval_reset_to_now_with_jitter(&rkcg->rkcg_coord_query_intvl, 0,
+                                             500,
+                                             RD_KAFKA_RETRY_JITTER_PERCENT);
 }
 
 /**
@@ -1511,7 +1516,7 @@ static void rd_kafka_cgrp_handle_SyncGroup_memberstate(
         if (!(assignment =
                   rd_kafka_buf_read_topic_partitions(rkbuf, 0, fields)))
                 goto err_parse;
-        rd_kafka_buf_read_bytes(rkbuf, &UserData);
+        rd_kafka_buf_read_kbytes(rkbuf, &UserData);
 
 done:
         rd_kafka_cgrp_update_session_timeout(rkcg, rd_true /*reset timeout*/);
@@ -1616,7 +1621,7 @@ static void rd_kafka_cgrp_handle_SyncGroup(rd_kafka_t *rk,
                 rd_kafka_buf_read_throttle_time(rkbuf);
 
         rd_kafka_buf_read_i16(rkbuf, &ErrorCode);
-        rd_kafka_buf_read_bytes(rkbuf, &MemberState);
+        rd_kafka_buf_read_kbytes(rkbuf, &MemberState);
 
 err:
         actions = rd_kafka_err_action(rkb, ErrorCode, request,
@@ -1662,7 +1667,7 @@ static void rd_kafka_cgrp_handle_SyncGroup(rd_kafka_t *rk,
 static void rd_kafka_cgrp_assignor_run(rd_kafka_cgrp_t *rkcg,
                                        rd_kafka_assignor_t *rkas,
                                        rd_kafka_resp_err_t err,
-                                       rd_kafka_metadata_t *metadata,
+                                       rd_kafka_metadata_internal_t *metadata,
                                        rd_kafka_group_member_t *members,
                                        int member_cnt) {
         char errstr[512];
@@ -1677,8 +1682,8 @@ static void rd_kafka_cgrp_assignor_run(rd_kafka_cgrp_t *rkcg,
         *errstr = '\0';
 
         /* Run assignor */
-        err = rd_kafka_assignor_run(rkcg, rkas, metadata, members, member_cnt,
-                                    errstr, sizeof(errstr));
+        err = rd_kafka_assignor_run(rkcg, rkas, &metadata->metadata, members,
+                                    member_cnt, errstr, sizeof(errstr));
 
         if (err) {
                 if (!*errstr)
@@ -1745,7 +1750,7 @@ rd_kafka_cgrp_assignor_handle_Metadata_op(rd_kafka_t *rk,
         }
 
         rd_kafka_cgrp_assignor_run(rkcg, rkcg->rkcg_assignor, rko->rko_err,
-                                   rko->rko_u.metadata.md,
+                                   rko->rko_u.metadata.mdi,
                                    rkcg->rkcg_group_leader.members,
                                    rkcg->rkcg_group_leader.member_cnt);
 
@@ -1777,9 +1782,12 @@ static int rd_kafka_group_MemberMetadata_consumer_read(
         rkbuf = rd_kafka_buf_new_shadow(
             MemberMetadata->data, RD_KAFKAP_BYTES_LEN(MemberMetadata), NULL);
 
-        /* Protocol parser needs a broker handle to log errors on. */
-        rkbuf->rkbuf_rkb = rkb;
-        rd_kafka_broker_keep(rkb);
+        /* Protocol parser needs a broker handle to log errors on.
+         * If none is provided, don't log errors (mainly for unit tests). */
+        if (rkb) {
+                rkbuf->rkbuf_rkb = rkb;
+                rd_kafka_broker_keep(rkb);
+        }
 
         rd_kafka_buf_read_i16(rkbuf, &Version);
         rd_kafka_buf_read_i32(rkbuf, &subscription_cnt);
@@ -1799,7 +1807,7 @@ static int rd_kafka_group_MemberMetadata_consumer_read(
                     rkgm->rkgm_subscription, topic_name, RD_KAFKA_PARTITION_UA);
         }
 
-        rd_kafka_buf_read_bytes(rkbuf, &UserData);
+        rd_kafka_buf_read_kbytes(rkbuf, &UserData);
         rkgm->rkgm_userdata = rd_kafkap_bytes_copy(&UserData);
 
         const rd_kafka_topic_partition_field_t fields[] = {
@@ -1810,6 +1818,16 @@ static int rd_kafka_group_MemberMetadata_consumer_read(
                   rd_kafka_buf_read_topic_partitions(rkbuf, 0, fields)))
                 goto err;
 
+        if (Version >= 2) {
+                rd_kafka_buf_read_i32(rkbuf, &rkgm->rkgm_generation);
+        }
+
+        if (Version >= 3) {
+                rd_kafkap_str_t RackId = RD_KAFKAP_STR_INITIALIZER;
+                rd_kafka_buf_read_str(rkbuf, &RackId);
+                rkgm->rkgm_rack_id = rd_kafkap_str_copy(&RackId);
+        }
+
         rd_kafka_buf_destroy(rkbuf);
 
         return 0;
@@ -1818,10 +1836,11 @@ static int rd_kafka_group_MemberMetadata_consumer_read(
         err = rkbuf->rkbuf_err;
 
 err:
-        rd_rkb_dbg(rkb, CGRP, "MEMBERMETA",
-                   "Failed to parse MemberMetadata for \"%.*s\": %s",
-                   RD_KAFKAP_STR_PR(rkgm->rkgm_member_id),
-                   rd_kafka_err2str(err));
+        if (rkb)
+                rd_rkb_dbg(rkb, CGRP, "MEMBERMETA",
+                           "Failed to parse MemberMetadata for \"%.*s\": %s",
+                           RD_KAFKAP_STR_PR(rkgm->rkgm_member_id),
+                           rd_kafka_err2str(err));
         if (rkgm->rkgm_subscription) {
                 rd_kafka_topic_partition_list_destroy(rkgm->rkgm_subscription);
                 rkgm->rkgm_subscription = NULL;
@@ -1899,7 +1918,9 @@ static void rd_kafka_cgrp_handle_JoinGroup(rd_kafka_t *rk,
                                      "Unsupported assignment strategy \"%s\"",
                                      protocol_name);
                         if (rkcg->rkcg_assignor) {
-                                if (rkcg->rkcg_assignor->rkas_destroy_state_cb)
+                                if (rkcg->rkcg_assignor
+                                        ->rkas_destroy_state_cb &&
+                                    rkcg->rkcg_assignor_state)
                                         rkcg->rkcg_assignor
                                             ->rkas_destroy_state_cb(
                                                 rkcg->rkcg_assignor_state);
@@ -1937,7 +1958,8 @@ static void rd_kafka_cgrp_handle_JoinGroup(rd_kafka_t *rk,
         }
 
         if (rkcg->rkcg_assignor && rkcg->rkcg_assignor != rkas) {
-                if (rkcg->rkcg_assignor->rkas_destroy_state_cb)
+                if (rkcg->rkcg_assignor->rkas_destroy_state_cb &&
+                    rkcg->rkcg_assignor_state)
                         rkcg->rkcg_assignor->rkas_destroy_state_cb(
                             rkcg->rkcg_assignor_state);
                 rkcg->rkcg_assignor_state = NULL;
@@ -1950,6 +1972,7 @@ static void rd_kafka_cgrp_handle_JoinGroup(rd_kafka_t *rk,
                 int sub_cnt = 0;
                 rd_list_t topics;
                 rd_kafka_op_t *rko;
+                rd_bool_t any_member_rack = rd_false;
                 rd_kafka_dbg(rkb->rkb_rk, CGRP, "JOINGROUP",
                              "I am elected leader for group \"%s\" "
                              "with %" PRId32 " member(s)",
@@ -1974,7 +1997,7 @@ static void rd_kafka_cgrp_handle_JoinGroup(rd_kafka_t *rk,
                         rd_kafka_buf_read_str(rkbuf, &MemberId);
                         if (request->rkbuf_reqhdr.ApiVersion >= 5)
                                 rd_kafka_buf_read_str(rkbuf, &GroupInstanceId);
-                        rd_kafka_buf_read_bytes(rkbuf, &MemberMetadata);
+                        rd_kafka_buf_read_kbytes(rkbuf, &MemberMetadata);
 
                         rkgm                 = &members[sub_cnt];
                         rkgm->rkgm_member_id = rd_kafkap_str_copy(&MemberId);
@@ -1995,6 +2018,9 @@ static void rd_kafka_cgrp_handle_JoinGroup(rd_kafka_t *rk,
                                 rd_kafka_topic_partition_list_get_topic_names(
                                     rkgm->rkgm_subscription, &topics,
                                     0 /*dont include regex*/);
+                                if (!any_member_rack && rkgm->rkgm_rack_id &&
+                                    RD_KAFKAP_STR_LEN(rkgm->rkgm_rack_id))
+                                        any_member_rack = rd_true;
                         }
                 }
 
@@ -2032,7 +2058,11 @@ static void rd_kafka_cgrp_handle_JoinGroup(rd_kafka_t *rk,
                      * avoid triggering a rejoin or error propagation
                      * on receiving the response since some topics
                      * may be missing. */
-                    rd_false, rko);
+                    rd_false,
+                    /* force_racks is true if any memeber has a client rack set,
+                       since we will require partition to rack mapping in that
+                       case for rack-aware assignors. */
+                    any_member_rack, rko);
                 rd_list_destroy(&topics);
 
         } else {
@@ -3107,7 +3137,8 @@ static void rd_kafka_cgrp_op_handle_OffsetCommit(rd_kafka_t *rk,
             !(err == RD_KAFKA_RESP_ERR__NO_OFFSET &&
               rko_orig->rko_u.offset_commit.silent_empty)) {
                 /* Propagate commit results (success or permanent error)
-                 * unless we're shutting down or commit was empty. */
+                 * unless we're shutting down or commit was empty, or if
+                 * there was a rebalance in progress. */
                 rd_kafka_cgrp_propagate_commit_result(rkcg, rko_orig, err,
                                                       errcnt, offsets);
         }
@@ -5952,6 +5983,75 @@ static int unittest_list_to_map(void) {
         RD_UT_PASS();
 }
 
+int unittest_member_metadata_serdes(void) {
+        rd_list_t *topics = rd_list_new(0, (void *)rd_kafka_topic_info_destroy);
+        rd_kafka_topic_partition_list_t *owned_partitions =
+            rd_kafka_topic_partition_list_new(0);
+        rd_kafkap_str_t *rack_id    = rd_kafkap_str_new("myrack", -1);
+        const void *userdata        = NULL;
+        const int32_t userdata_size = 0;
+        const int generation        = 3;
+        const char topic_name[]     = "mytopic";
+        rd_kafka_group_member_t *rkgm;
+        int version;
+
+        rd_list_add(topics, rd_kafka_topic_info_new(topic_name, 3));
+        rd_kafka_topic_partition_list_add(owned_partitions, topic_name, 0);
+        rkgm = rd_calloc(1, sizeof(*rkgm));
+
+        /* Note that the version variable doesn't actually change the Version
+         *  field in the serialized message. It only runs the tests with/without
+         *  additional fields added in that particular version. */
+        for (version = 0; version <= 3; version++) {
+                rd_kafkap_bytes_t *member_metadata;
+
+                /* Serialize. */
+                member_metadata =
+                    rd_kafka_consumer_protocol_member_metadata_new(
+                        topics, userdata, userdata_size,
+                        version >= 1 ? owned_partitions : NULL,
+                        version >= 2 ? generation : -1,
+                        version >= 3 ? rack_id : NULL);
+
+                /* Deserialize. */
+                rd_kafka_group_MemberMetadata_consumer_read(NULL, rkgm,
+                                                            member_metadata);
+
+                /* Compare results. */
+                RD_UT_ASSERT(rkgm->rkgm_subscription->cnt ==
+                                 rd_list_cnt(topics),
+                             "subscription size should be correct");
+                RD_UT_ASSERT(!strcmp(topic_name,
+                                     rkgm->rkgm_subscription->elems[0].topic),
+                             "subscriptions should be correct");
+                RD_UT_ASSERT(rkgm->rkgm_userdata->len == userdata_size,
+                             "userdata should have the size 0");
+                if (version >= 1)
+                        RD_UT_ASSERT(!rd_kafka_topic_partition_list_cmp(
+                                         rkgm->rkgm_owned, owned_partitions,
+                                         rd_kafka_topic_partition_cmp),
+                                     "owned partitions should be same");
+                if (version >= 2)
+                        RD_UT_ASSERT(generation == rkgm->rkgm_generation,
+                                     "generation should be same");
+                if (version >= 3)
+                        RD_UT_ASSERT(
+                            !rd_kafkap_str_cmp(rack_id, rkgm->rkgm_rack_id),
+                            "rack id should be same");
+
+                rd_kafka_group_member_clear(rkgm);
+                rd_kafkap_bytes_destroy(member_metadata);
+        }
+
+        /* Clean up. */
+        rd_list_destroy(topics);
+        rd_kafka_topic_partition_list_destroy(owned_partitions);
+        rd_kafkap_str_destroy(rack_id);
+        rd_free(rkgm);
+
+        RD_UT_PASS();
+}
+
 
 /**
  * @brief Consumer group unit tests
@@ -5964,6 +6064,7 @@ int unittest_cgrp(void) {
         fails += unittest_set_subtract();
         fails += unittest_map_to_list();
         fails += unittest_list_to_map();
+        fails += unittest_member_metadata_serdes();
 
         return fails;
 }
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_cgrp.h b/lib/librdkafka-2.3.0/src/rdkafka_cgrp.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_cgrp.h
rename to lib/librdkafka-2.3.0/src/rdkafka_cgrp.h
index 4fa51e54897..ff62e8d2852 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_cgrp.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_cgrp.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_conf.c b/lib/librdkafka-2.3.0/src/rdkafka_conf.c
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdkafka_conf.c
rename to lib/librdkafka-2.3.0/src/rdkafka_conf.c
index e481f4dd86d..154582d6fcc 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_conf.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_conf.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2022 Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023 Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -457,10 +458,12 @@ static const struct rd_kafka_property rd_kafka_properties[] = {
     {_RK_GLOBAL, "topic.metadata.refresh.fast.interval.ms", _RK_C_INT,
      _RK(metadata_refresh_fast_interval_ms),
      "When a topic loses its leader a new metadata request will be "
-     "enqueued with this initial interval, exponentially increasing "
+     "enqueued immediately and then with this initial interval, exponentially "
+     "increasing upto `retry.backoff.max.ms`, "
      "until the topic metadata has been refreshed. "
+     "If not set explicitly, it will be defaulted to `retry.backoff.ms`. "
      "This is used to recover quickly from transitioning leader brokers.",
-     1, 60 * 1000, 250},
+     1, 60 * 1000, 100},
     {_RK_GLOBAL | _RK_DEPRECATED, "topic.metadata.refresh.fast.cnt", _RK_C_INT,
      _RK(metadata_refresh_fast_cnt), "No longer used.", 0, 1000, 10},
     {_RK_GLOBAL, "topic.metadata.refresh.sparse", _RK_C_BOOL,
@@ -897,11 +900,13 @@ static const struct rd_kafka_property rd_kafka_properties[] = {
      "Java TrustStores are not supported, use `ssl.ca.location` "
      "and a certificate file instead. "
      "See "
-     "https://github.com/edenhill/librdkafka/wiki/Using-SSL-with-librdkafka "
+     "https://github.com/confluentinc/librdkafka/"
+     "wiki/Using-SSL-with-librdkafka "
      "for more information."},
     {_RK_GLOBAL, "sasl.jaas.config", _RK_C_INVALID, _RK(dummy),
      "Java JAAS configuration is not supported, see "
-     "https://github.com/edenhill/librdkafka/wiki/Using-SASL-with-librdkafka "
+     "https://github.com/confluentinc/librdkafka/"
+     "wiki/Using-SASL-with-librdkafka "
      "for more information."},
 
     {_RK_GLOBAL | _RK_HIGH, "sasl.mechanisms", _RK_C_STR, _RK(sasl.mechanisms),
@@ -1197,6 +1202,16 @@ static const struct rd_kafka_property rd_kafka_properties[] = {
      "Maximum time the broker may wait to fill the Fetch response "
      "with fetch.min.bytes of messages.",
      0, 300 * 1000, 500},
+    {_RK_GLOBAL | _RK_CONSUMER | _RK_MED, "fetch.queue.backoff.ms", _RK_C_INT,
+     _RK(fetch_queue_backoff_ms),
+     "How long to postpone the next fetch request for a "
+     "topic+partition in case the current fetch queue thresholds "
+     "(queued.min.messages or queued.max.messages.kbytes) have "
+     "been exceded. "
+     "This property may need to be decreased if the queue thresholds are "
+     "set low and the application is experiencing long (~1s) delays "
+     "between messages. Low values may increase CPU utilization.",
+     0, 300 * 1000, 1000},
     {_RK_GLOBAL | _RK_CONSUMER | _RK_MED, "fetch.message.max.bytes", _RK_C_INT,
      _RK(fetch_msg_max_bytes),
      "Initial maximum number of bytes per topic+partition to request when "
@@ -1360,10 +1375,21 @@ static const struct rd_kafka_property rd_kafka_properties[] = {
      0, INT32_MAX, INT32_MAX},
     {_RK_GLOBAL | _RK_PRODUCER, "retries", _RK_C_ALIAS,
      .sdef = "message.send.max.retries"},
+
     {_RK_GLOBAL | _RK_PRODUCER | _RK_MED, "retry.backoff.ms", _RK_C_INT,
      _RK(retry_backoff_ms),
-     "The backoff time in milliseconds before retrying a protocol request.", 1,
-     300 * 1000, 100},
+     "The backoff time in milliseconds before retrying a protocol request, "
+     "this is the first backoff time, "
+     "and will be backed off exponentially until number of retries is "
+     "exhausted, and it's capped by retry.backoff.max.ms.",
+     1, 300 * 1000, 100},
+
+    {_RK_GLOBAL | _RK_PRODUCER | _RK_MED, "retry.backoff.max.ms", _RK_C_INT,
+     _RK(retry_backoff_max_ms),
+     "The max backoff time in milliseconds before retrying a protocol request, "
+     "this is the atmost backoff allowed for exponentially backed off "
+     "requests.",
+     1, 300 * 1000, 1000},
 
     {_RK_GLOBAL | _RK_PRODUCER, "queue.buffering.backpressure.threshold",
      _RK_C_INT, _RK(queue_backpressure_thres),
@@ -1427,6 +1453,19 @@ static const struct rd_kafka_property rd_kafka_properties[] = {
      "A higher value allows for more effective batching of these "
      "messages.",
      0, 900000, 10},
+    {_RK_GLOBAL, "client.dns.lookup", _RK_C_S2I, _RK(client_dns_lookup),
+     "Controls how the client uses DNS lookups. By default, when the lookup "
+     "returns multiple IP addresses for a hostname, they will all be attempted "
+     "for connection before the connection is considered failed. This applies "
+     "to both bootstrap and advertised servers. If the value is set to "
+     "`resolve_canonical_bootstrap_servers_only`, each entry will be resolved "
+     "and expanded into a list of canonical names. NOTE: Default here is "
+     "different from the Java client's default behavior, which connects only "
+     "to the first IP address returned for a hostname. ",
+     .vdef = RD_KAFKA_USE_ALL_DNS_IPS,
+     .s2i  = {{RD_KAFKA_USE_ALL_DNS_IPS, "use_all_dns_ips"},
+             {RD_KAFKA_RESOLVE_CANONICAL_BOOTSTRAP_SERVERS_ONLY,
+              "resolve_canonical_bootstrap_servers_only"}}},
 
 
     /*
@@ -3903,6 +3942,10 @@ const char *rd_kafka_conf_finalize(rd_kafka_type_t cltype,
                 conf->sparse_connect_intvl =
                     RD_MAX(11, RD_MIN(conf->reconnect_backoff_ms / 2, 1000));
         }
+        if (!rd_kafka_conf_is_modified(
+                conf, "topic.metadata.refresh.fast.interval.ms"))
+                conf->metadata_refresh_fast_interval_ms =
+                    conf->retry_backoff_ms;
 
         if (!rd_kafka_conf_is_modified(conf, "connections.max.idle.ms") &&
             conf->brokerlist && rd_strcasestr(conf->brokerlist, "azure")) {
@@ -4091,6 +4134,31 @@ int rd_kafka_conf_warn(rd_kafka_t *rk) {
                              "recommend not using set_default_topic_conf");
 
         /* Additional warnings */
+        if (rk->rk_conf.retry_backoff_ms > rk->rk_conf.retry_backoff_max_ms) {
+                rd_kafka_log(
+                    rk, LOG_WARNING, "CONFWARN",
+                    "Configuration `retry.backoff.ms` with value %d is greater "
+                    "than configuration `retry.backoff.max.ms` with value %d. "
+                    "A static backoff with value `retry.backoff.max.ms` will "
+                    "be applied.",
+                    rk->rk_conf.retry_backoff_ms,
+                    rk->rk_conf.retry_backoff_max_ms);
+        }
+
+        if (rd_kafka_conf_is_modified(
+                &rk->rk_conf, "topic.metadata.refresh.fast.interval.ms") &&
+            rk->rk_conf.metadata_refresh_fast_interval_ms >
+                rk->rk_conf.retry_backoff_max_ms) {
+                rd_kafka_log(
+                    rk, LOG_WARNING, "CONFWARN",
+                    "Configuration `topic.metadata.refresh.fast.interval.ms` "
+                    "with value %d is greater than configuration "
+                    "`retry.backoff.max.ms` with value %d. "
+                    "A static backoff with value `retry.backoff.max.ms` will "
+                    "be applied.",
+                    rk->rk_conf.metadata_refresh_fast_interval_ms,
+                    rk->rk_conf.retry_backoff_max_ms);
+        }
         if (rk->rk_type == RD_KAFKA_CONSUMER) {
                 if (rk->rk_conf.fetch_wait_max_ms + 1000 >
                     rk->rk_conf.socket_timeout_ms)
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_conf.h b/lib/librdkafka-2.3.0/src/rdkafka_conf.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_conf.h
rename to lib/librdkafka-2.3.0/src/rdkafka_conf.h
index 161d6e469d5..bd17a261bf8 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_conf.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_conf.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2014-2018 Magnus Edenhill
+ * Copyright (c) 2014-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -158,6 +158,11 @@ typedef enum {
         RD_KAFKA_SSL_ENDPOINT_ID_HTTPS, /**< RFC2818 */
 } rd_kafka_ssl_endpoint_id_t;
 
+typedef enum {
+        RD_KAFKA_USE_ALL_DNS_IPS,
+        RD_KAFKA_RESOLVE_CANONICAL_BOOTSTRAP_SERVERS_ONLY,
+} rd_kafka_client_dns_lookup_t;
+
 /* Increase in steps of 64 as needed.
  * This must be larger than sizeof(rd_kafka_[topic_]conf_t) */
 #define RD_KAFKA_CONF_PROPS_IDX_MAX (64 * 33)
@@ -224,6 +229,7 @@ struct rd_kafka_conf_s {
         int api_version_fallback_ms;
         char *broker_version_fallback;
         rd_kafka_secproto_t security_protocol;
+        rd_kafka_client_dns_lookup_t client_dns_lookup;
 
         struct {
 #if WITH_SSL
@@ -355,6 +361,7 @@ struct rd_kafka_conf_s {
         int fetch_msg_max_bytes;
         int fetch_max_bytes;
         int fetch_min_bytes;
+        int fetch_queue_backoff_ms;
         int fetch_error_backoff_ms;
         char *group_id_str;
         char *group_instance_id;
@@ -416,6 +423,7 @@ struct rd_kafka_conf_s {
         int queue_backpressure_thres;
         int max_retries;
         int retry_backoff_ms;
+        int retry_backoff_max_ms;
         int batch_num_messages;
         int batch_size;
         rd_kafka_compression_t compression_codec;
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_confval.h b/lib/librdkafka-2.3.0/src/rdkafka_confval.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_confval.h
rename to lib/librdkafka-2.3.0/src/rdkafka_confval.h
index 3f2bad549eb..ca826169571 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_confval.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_confval.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2014-2018 Magnus Edenhill
+ * Copyright (c) 2014-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_coord.c b/lib/librdkafka-2.3.0/src/rdkafka_coord.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_coord.c
rename to lib/librdkafka-2.3.0/src/rdkafka_coord.c
index 9e41bab72ad..a880f23a465 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_coord.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_coord.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019 Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_coord.h b/lib/librdkafka-2.3.0/src/rdkafka_coord.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_coord.h
rename to lib/librdkafka-2.3.0/src/rdkafka_coord.h
index 4e00a552bc2..a04ca222e25 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_coord.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_coord.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019 Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_error.c b/lib/librdkafka-2.3.0/src/rdkafka_error.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_error.c
rename to lib/librdkafka-2.3.0/src/rdkafka_error.c
index 4a218daffee..680593630d9 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_error.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_error.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2020 Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_error.h b/lib/librdkafka-2.3.0/src/rdkafka_error.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_error.h
rename to lib/librdkafka-2.3.0/src/rdkafka_error.h
index 79984f5efb5..4b4d912f30e 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_error.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_error.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2020 Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_event.c b/lib/librdkafka-2.3.0/src/rdkafka_event.c
similarity index 84%
rename from lib/librdkafka-2.1.0/src/rdkafka_event.c
rename to lib/librdkafka-2.3.0/src/rdkafka_event.c
index ffd1a17805c..6ea366a5a89 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_event.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_event.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016 Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -60,6 +61,8 @@ const char *rd_kafka_event_name(const rd_kafka_event_t *rkev) {
                 return "CreatePartitionsResult";
         case RD_KAFKA_EVENT_ALTERCONFIGS_RESULT:
                 return "AlterConfigsResult";
+        case RD_KAFKA_EVENT_INCREMENTALALTERCONFIGS_RESULT:
+                return "IncrementalAlterConfigsResult";
         case RD_KAFKA_EVENT_DESCRIBECONFIGS_RESULT:
                 return "DescribeConfigsResult";
         case RD_KAFKA_EVENT_DELETERECORDS_RESULT:
@@ -68,6 +71,10 @@ const char *rd_kafka_event_name(const rd_kafka_event_t *rkev) {
                 return "ListConsumerGroupsResult";
         case RD_KAFKA_EVENT_DESCRIBECONSUMERGROUPS_RESULT:
                 return "DescribeConsumerGroupsResult";
+        case RD_KAFKA_EVENT_DESCRIBETOPICS_RESULT:
+                return "DescribeTopicsResult";
+        case RD_KAFKA_EVENT_DESCRIBECLUSTER_RESULT:
+                return "DescribeClusterResult";
         case RD_KAFKA_EVENT_DELETEGROUPS_RESULT:
                 return "DeleteGroupsResult";
         case RD_KAFKA_EVENT_DELETECONSUMERGROUPOFFSETS_RESULT:
@@ -84,6 +91,12 @@ const char *rd_kafka_event_name(const rd_kafka_event_t *rkev) {
                 return "ListConsumerGroupOffsetsResult";
         case RD_KAFKA_EVENT_OAUTHBEARER_TOKEN_REFRESH:
                 return "SaslOAuthBearerTokenRefresh";
+        case RD_KAFKA_EVENT_DESCRIBEUSERSCRAMCREDENTIALS_RESULT:
+                return "DescribeUserScramCredentials";
+        case RD_KAFKA_EVENT_ALTERUSERSCRAMCREDENTIALS_RESULT:
+                return "AlterUserScramCredentials";
+        case RD_KAFKA_EVENT_LISTOFFSETS_RESULT:
+                return "ListOffsetsResult";
         default:
                 return "?unknown?";
         }
@@ -329,6 +342,15 @@ rd_kafka_event_AlterConfigs_result(rd_kafka_event_t *rkev) {
                 return (const rd_kafka_AlterConfigs_result_t *)rkev;
 }
 
+const rd_kafka_IncrementalAlterConfigs_result_t *
+rd_kafka_event_IncrementalAlterConfigs_result(rd_kafka_event_t *rkev) {
+        if (!rkev ||
+            rkev->rko_evtype != RD_KAFKA_EVENT_INCREMENTALALTERCONFIGS_RESULT)
+                return NULL;
+        else
+                return (const rd_kafka_IncrementalAlterConfigs_result_t *)rkev;
+}
+
 
 const rd_kafka_DescribeConfigs_result_t *
 rd_kafka_event_DescribeConfigs_result(rd_kafka_event_t *rkev) {
@@ -364,6 +386,22 @@ rd_kafka_event_DescribeConsumerGroups_result(rd_kafka_event_t *rkev) {
                 return (const rd_kafka_DescribeConsumerGroups_result_t *)rkev;
 }
 
+const rd_kafka_DescribeTopics_result_t *
+rd_kafka_event_DescribeTopics_result(rd_kafka_event_t *rkev) {
+        if (!rkev || rkev->rko_evtype != RD_KAFKA_EVENT_DESCRIBETOPICS_RESULT)
+                return NULL;
+        else
+                return (const rd_kafka_DescribeTopics_result_t *)rkev;
+}
+
+const rd_kafka_DescribeCluster_result_t *
+rd_kafka_event_DescribeCluster_result(rd_kafka_event_t *rkev) {
+        if (!rkev || rkev->rko_evtype != RD_KAFKA_EVENT_DESCRIBECLUSTER_RESULT)
+                return NULL;
+        else
+                return (const rd_kafka_DescribeCluster_result_t *)rkev;
+}
+
 const rd_kafka_DeleteGroups_result_t *
 rd_kafka_event_DeleteGroups_result(rd_kafka_event_t *rkev) {
         if (!rkev || rkev->rko_evtype != RD_KAFKA_EVENT_DELETEGROUPS_RESULT)
@@ -416,6 +454,34 @@ rd_kafka_event_AlterConsumerGroupOffsets_result(rd_kafka_event_t *rkev) {
                     const rd_kafka_AlterConsumerGroupOffsets_result_t *)rkev;
 }
 
+const rd_kafka_DescribeUserScramCredentials_result_t *
+rd_kafka_event_DescribeUserScramCredentials_result(rd_kafka_event_t *rkev) {
+        if (!rkev || rkev->rko_evtype !=
+                         RD_KAFKA_EVENT_DESCRIBEUSERSCRAMCREDENTIALS_RESULT)
+                return NULL;
+        else
+                return (
+                    const rd_kafka_DescribeUserScramCredentials_result_t *)rkev;
+}
+
+const rd_kafka_AlterUserScramCredentials_result_t *
+rd_kafka_event_AlterUserScramCredentials_result(rd_kafka_event_t *rkev) {
+        if (!rkev ||
+            rkev->rko_evtype != RD_KAFKA_EVENT_ALTERUSERSCRAMCREDENTIALS_RESULT)
+                return NULL;
+        else
+                return (
+                    const rd_kafka_AlterUserScramCredentials_result_t *)rkev;
+}
+
+const rd_kafka_ListOffsets_result_t *
+rd_kafka_event_ListOffsets_result(rd_kafka_event_t *rkev) {
+        if (!rkev || rkev->rko_evtype != RD_KAFKA_EVENT_LISTOFFSETS_RESULT)
+                return NULL;
+        else
+                return (const rd_kafka_ListOffsets_result_t *)rkev;
+}
+
 const rd_kafka_ListConsumerGroupOffsets_result_t *
 rd_kafka_event_ListConsumerGroupOffsets_result(rd_kafka_event_t *rkev) {
         if (!rkev ||
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_event.h b/lib/librdkafka-2.3.0/src/rdkafka_event.h
similarity index 92%
rename from lib/librdkafka-2.1.0/src/rdkafka_event.h
rename to lib/librdkafka-2.3.0/src/rdkafka_event.h
index 3f9c22e34bb..5d22456b387 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_event.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_event.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016 Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -98,10 +99,13 @@ static RD_UNUSED RD_INLINE int rd_kafka_event_setup(rd_kafka_t *rk,
         case RD_KAFKA_EVENT_DELETETOPICS_RESULT:
         case RD_KAFKA_EVENT_CREATEPARTITIONS_RESULT:
         case RD_KAFKA_EVENT_ALTERCONFIGS_RESULT:
+        case RD_KAFKA_EVENT_INCREMENTALALTERCONFIGS_RESULT:
         case RD_KAFKA_EVENT_DESCRIBECONFIGS_RESULT:
         case RD_KAFKA_EVENT_DELETERECORDS_RESULT:
         case RD_KAFKA_EVENT_LISTCONSUMERGROUPS_RESULT:
         case RD_KAFKA_EVENT_DESCRIBECONSUMERGROUPS_RESULT:
+        case RD_KAFKA_EVENT_DESCRIBETOPICS_RESULT:
+        case RD_KAFKA_EVENT_DESCRIBECLUSTER_RESULT:
         case RD_KAFKA_EVENT_DELETEGROUPS_RESULT:
         case RD_KAFKA_EVENT_DELETECONSUMERGROUPOFFSETS_RESULT:
         case RD_KAFKA_EVENT_CREATEACLS_RESULT:
@@ -110,6 +114,9 @@ static RD_UNUSED RD_INLINE int rd_kafka_event_setup(rd_kafka_t *rk,
         case RD_KAFKA_EVENT_ALTERCONSUMERGROUPOFFSETS_RESULT:
         case RD_KAFKA_EVENT_LISTCONSUMERGROUPOFFSETS_RESULT:
         case RD_KAFKA_EVENT_OAUTHBEARER_TOKEN_REFRESH:
+        case RD_KAFKA_EVENT_DESCRIBEUSERSCRAMCREDENTIALS_RESULT:
+        case RD_KAFKA_EVENT_ALTERUSERSCRAMCREDENTIALS_RESULT:
+        case RD_KAFKA_EVENT_LISTOFFSETS_RESULT:
                 return 1;
 
         default:
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_feature.c b/lib/librdkafka-2.3.0/src/rdkafka_feature.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_feature.c
rename to lib/librdkafka-2.3.0/src/rdkafka_feature.c
index a2fc085c5b0..b32cdf689dd 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_feature.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_feature.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -204,7 +205,7 @@ static const struct rd_kafka_feature_map {
         .depends =
             {
                 {RD_KAFKAP_SaslHandshake, 1, 1},
-                {RD_KAFKAP_SaslAuthenticate, 0, 0},
+                {RD_KAFKAP_SaslAuthenticate, 0, 1},
                 {-1},
             },
     },
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_feature.h b/lib/librdkafka-2.3.0/src/rdkafka_feature.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_feature.h
rename to lib/librdkafka-2.3.0/src/rdkafka_feature.h
index a651a07df07..9597956ee8a 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_feature.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_feature.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_fetcher.c b/lib/librdkafka-2.3.0/src/rdkafka_fetcher.c
similarity index 95%
rename from lib/librdkafka-2.1.0/src/rdkafka_fetcher.c
rename to lib/librdkafka-2.3.0/src/rdkafka_fetcher.c
index 8ee67a4205a..ed8702239b8 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_fetcher.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_fetcher.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2022 Magnus Edenhill
+ * Copyright (c) 2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -51,15 +51,29 @@ static void rd_kafka_broker_fetch_backoff(rd_kafka_broker_t *rkb,
 
 /**
  * @brief Backoff the next Fetch for specific partition
+ *
+ * @returns the absolute backoff time (the current time for no backoff).
  */
-static void rd_kafka_toppar_fetch_backoff(rd_kafka_broker_t *rkb,
-                                          rd_kafka_toppar_t *rktp,
-                                          rd_kafka_resp_err_t err) {
-        int backoff_ms = rkb->rkb_rk->rk_conf.fetch_error_backoff_ms;
+static rd_ts_t rd_kafka_toppar_fetch_backoff(rd_kafka_broker_t *rkb,
+                                             rd_kafka_toppar_t *rktp,
+                                             rd_kafka_resp_err_t err) {
+        int backoff_ms;
 
         /* Don't back off on reaching end of partition */
-        if (err == RD_KAFKA_RESP_ERR__PARTITION_EOF)
-                return;
+        if (err == RD_KAFKA_RESP_ERR__PARTITION_EOF) {
+                rktp->rktp_ts_fetch_backoff = 0;
+                return rd_clock(); /* Immediate: No practical backoff */
+        }
+
+        if (err == RD_KAFKA_RESP_ERR__QUEUE_FULL)
+                backoff_ms = rkb->rkb_rk->rk_conf.fetch_queue_backoff_ms;
+        else
+                backoff_ms = rkb->rkb_rk->rk_conf.fetch_error_backoff_ms;
+
+        if (unlikely(!backoff_ms)) {
+                rktp->rktp_ts_fetch_backoff = 0;
+                return rd_clock(); /* Immediate: No practical backoff */
+        }
 
         /* Certain errors that may require manual intervention should have
          * a longer backoff time. */
@@ -73,8 +87,9 @@ static void rd_kafka_toppar_fetch_backoff(rd_kafka_broker_t *rkb,
                    rktp->rktp_rkt->rkt_topic->str, rktp->rktp_partition,
                    backoff_ms, err ? ": " : "",
                    err ? rd_kafka_err2str(err) : "");
-}
 
+        return rktp->rktp_ts_fetch_backoff;
+}
 
 /**
  * @brief Handle preferred replica in fetch response.
@@ -879,8 +894,6 @@ int rd_kafka_broker_fetch_toppars(rd_kafka_broker_t *rkb, rd_ts_t now) {
                                  * This can happen if metadata is read initially
                                  * without an existing topic (see
                                  * rd_kafka_topic_metadata_update2).
-                                 * TODO: have a private metadata struct that
-                                 * stores leader epochs before topic creation.
                                  */
                                 rd_kafka_buf_write_i32(rkbuf, 0);
                         } else {
@@ -975,7 +988,25 @@ int rd_kafka_broker_fetch_toppars(rd_kafka_broker_t *rkb, rd_ts_t now) {
         return cnt;
 }
 
-
+/**
+ * @brief Decide whether it should start fetching from next fetch start
+ *        or continue with current fetch pos.
+ *
+ * @param rktp the toppar
+ *
+ * @returns rd_true if it should start fetching from next fetch start,
+ *          rd_false otherwise.
+ *
+ * @locality any
+ * @locks toppar_lock() MUST be held
+ */
+rd_bool_t rd_kafka_toppar_fetch_decide_start_from_next_fetch_start(
+    rd_kafka_toppar_t *rktp) {
+        return rktp->rktp_op_version > rktp->rktp_fetch_version ||
+               rd_kafka_fetch_pos_cmp(&rktp->rktp_next_fetch_start,
+                                      &rktp->rktp_last_next_fetch_start) ||
+               rktp->rktp_offsets.fetch_pos.offset == RD_KAFKA_OFFSET_INVALID;
+}
 
 /**
  * @brief Decide whether this toppar should be on the fetch list or not.
@@ -1005,7 +1036,7 @@ rd_ts_t rd_kafka_toppar_fetch_decide(rd_kafka_toppar_t *rktp,
                         rd_interval(&rktp->rktp_lease_intvl,
                                     5 * 60 * 1000 * 1000 /*5 minutes*/, 0) > 0;
         if (lease_expired) {
-                /* delete_to_leader() requires no locks to be held */
+                /* delegate_to_leader() requires no locks to be held */
                 rd_kafka_toppar_unlock(rktp);
                 rd_kafka_toppar_delegate_to_leader(rktp);
                 rd_kafka_toppar_lock(rktp);
@@ -1037,10 +1068,7 @@ rd_ts_t rd_kafka_toppar_fetch_decide(rd_kafka_toppar_t *rktp,
 
         /* Update broker thread's fetch op version */
         version = rktp->rktp_op_version;
-        if (version > rktp->rktp_fetch_version ||
-            rd_kafka_fetch_pos_cmp(&rktp->rktp_next_fetch_start,
-                                   &rktp->rktp_last_next_fetch_start) ||
-            rktp->rktp_offsets.fetch_pos.offset == RD_KAFKA_OFFSET_INVALID) {
+        if (rd_kafka_toppar_fetch_decide_start_from_next_fetch_start(rktp)) {
                 /* New version barrier, something was modified from the
                  * control plane. Reset and start over.
                  * Alternatively only the next_offset changed but not the
@@ -1084,22 +1112,24 @@ rd_ts_t rd_kafka_toppar_fetch_decide(rd_kafka_toppar_t *rktp,
                        rktp->rktp_next_fetch_start.offset)) {
                 should_fetch = 0;
                 reason       = "no concrete offset";
-
+        } else if (rktp->rktp_ts_fetch_backoff > rd_clock()) {
+                reason       = "fetch backed off";
+                ts_backoff   = rktp->rktp_ts_fetch_backoff;
+                should_fetch = 0;
         } else if (rd_kafka_q_len(rktp->rktp_fetchq) >=
                    rkb->rkb_rk->rk_conf.queued_min_msgs) {
                 /* Skip toppars who's local message queue is already above
                  * the lower threshold. */
-                reason       = "queued.min.messages exceeded";
+                reason     = "queued.min.messages exceeded";
+                ts_backoff = rd_kafka_toppar_fetch_backoff(
+                    rkb, rktp, RD_KAFKA_RESP_ERR__QUEUE_FULL);
                 should_fetch = 0;
 
         } else if ((int64_t)rd_kafka_q_size(rktp->rktp_fetchq) >=
                    rkb->rkb_rk->rk_conf.queued_max_msg_bytes) {
-                reason       = "queued.max.messages.kbytes exceeded";
-                should_fetch = 0;
-
-        } else if (rktp->rktp_ts_fetch_backoff > rd_clock()) {
-                reason       = "fetch backed off";
-                ts_backoff   = rktp->rktp_ts_fetch_backoff;
+                reason     = "queued.max.messages.kbytes exceeded";
+                ts_backoff = rd_kafka_toppar_fetch_backoff(
+                    rkb, rktp, RD_KAFKA_RESP_ERR__QUEUE_FULL);
                 should_fetch = 0;
         }
 
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_fetcher.h b/lib/librdkafka-2.3.0/src/rdkafka_fetcher.h
similarity index 92%
rename from lib/librdkafka-2.1.0/src/rdkafka_fetcher.h
rename to lib/librdkafka-2.3.0/src/rdkafka_fetcher.h
index 0e3af82bb24..8c64f3b0d9e 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_fetcher.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_fetcher.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2022 Magnus Edenhill
+ * Copyright (c) 2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -33,6 +33,9 @@
 
 int rd_kafka_broker_fetch_toppars(rd_kafka_broker_t *rkb, rd_ts_t now);
 
+rd_bool_t rd_kafka_toppar_fetch_decide_start_from_next_fetch_start(
+    rd_kafka_toppar_t *rktp);
+
 rd_ts_t rd_kafka_toppar_fetch_decide(rd_kafka_toppar_t *rktp,
                                      rd_kafka_broker_t *rkb,
                                      int force_remove);
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_header.c b/lib/librdkafka-2.3.0/src/rdkafka_header.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_header.c
rename to lib/librdkafka-2.3.0/src/rdkafka_header.c
index 98359b424c9..eb3024c51ed 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_header.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_header.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_header.h b/lib/librdkafka-2.3.0/src/rdkafka_header.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_header.h
rename to lib/librdkafka-2.3.0/src/rdkafka_header.h
index bd6b0e9593a..6d6747ea669 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_header.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_header.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_idempotence.c b/lib/librdkafka-2.3.0/src/rdkafka_idempotence.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_idempotence.c
rename to lib/librdkafka-2.3.0/src/rdkafka_idempotence.c
index 3245e856ed1..1c189f5c872 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_idempotence.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_idempotence.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018 Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_idempotence.h b/lib/librdkafka-2.3.0/src/rdkafka_idempotence.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_idempotence.h
rename to lib/librdkafka-2.3.0/src/rdkafka_idempotence.h
index 5be8d606d5c..87de3b97a01 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_idempotence.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_idempotence.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018 Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_int.h b/lib/librdkafka-2.3.0/src/rdkafka_int.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_int.h
rename to lib/librdkafka-2.3.0/src/rdkafka_int.h
index 584ff3c9656..e586dd6e692 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_int.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_int.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -78,7 +79,8 @@ struct rd_kafka_topic_s;
 struct rd_kafka_msg_s;
 struct rd_kafka_broker_s;
 struct rd_kafka_toppar_s;
-
+typedef struct rd_kafka_metadata_internal_s rd_kafka_metadata_internal_t;
+typedef struct rd_kafka_toppar_s rd_kafka_toppar_t;
 typedef struct rd_kafka_lwtopic_s rd_kafka_lwtopic_t;
 
 
@@ -129,6 +131,7 @@ typedef struct rd_kafka_fetch_pos_s {
 #define RD_KAFKAP_TOPICS_MAX     1000000
 #define RD_KAFKAP_PARTITIONS_MAX 100000
 #define RD_KAFKAP_GROUPS_MAX     100000
+#define RD_KAFKAP_CONFIGS_MAX    10000
 
 
 #define RD_KAFKA_OFFSET_IS_LOGICAL(OFF) ((OFF) < 0)
@@ -350,8 +353,9 @@ struct rd_kafka_s {
         rd_ts_t rk_ts_metadata; /* Timestamp of most recent
                                  * metadata. */
 
-        struct rd_kafka_metadata *rk_full_metadata; /* Last full metadata. */
-        rd_ts_t rk_ts_full_metadata;                /* Timesstamp of .. */
+        rd_kafka_metadata_internal_t
+            *rk_full_metadata;       /* Last full metadata. */
+        rd_ts_t rk_ts_full_metadata; /* Timestamp of .. */
         struct rd_kafka_metadata_cache rk_metadata_cache; /* Metadata cache */
 
         char *rk_clusterid;      /* ClusterId from metadata */
@@ -859,6 +863,8 @@ const char *rd_kafka_purge_flags2str(int flags);
 #define RD_KAFKA_DBG_ALL         0xfffff
 #define RD_KAFKA_DBG_NONE        0x0
 
+/* Jitter Percent for exponential retry backoff */
+#define RD_KAFKA_RETRY_JITTER_PERCENT 20
 
 void rd_kafka_log0(const rd_kafka_conf_t *conf,
                    const rd_kafka_t *rk,
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_interceptor.c b/lib/librdkafka-2.3.0/src/rdkafka_interceptor.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_interceptor.c
rename to lib/librdkafka-2.3.0/src/rdkafka_interceptor.c
index c962d2d99e7..b5bacece3cb 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_interceptor.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_interceptor.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_interceptor.h b/lib/librdkafka-2.3.0/src/rdkafka_interceptor.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_interceptor.h
rename to lib/librdkafka-2.3.0/src/rdkafka_interceptor.h
index 85f061ba914..d9aa4153262 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_interceptor.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_interceptor.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_lz4.c b/lib/librdkafka-2.3.0/src/rdkafka_lz4.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_lz4.c
rename to lib/librdkafka-2.3.0/src/rdkafka_lz4.c
index b52108bb1f0..87024ff8ed4 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_lz4.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_lz4.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_lz4.h b/lib/librdkafka-2.3.0/src/rdkafka_lz4.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_lz4.h
rename to lib/librdkafka-2.3.0/src/rdkafka_lz4.h
index eb0ef98836c..c724ea21243 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_lz4.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_lz4.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_metadata.c b/lib/librdkafka-2.3.0/src/rdkafka_metadata.c
similarity index 64%
rename from lib/librdkafka-2.1.0/src/rdkafka_metadata.c
rename to lib/librdkafka-2.3.0/src/rdkafka_metadata.c
index 4e32e5d584e..de90b166e68 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_metadata.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_metadata.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -38,6 +39,49 @@
 #include <string.h>
 #include <stdarg.h>
 
+/**
+ * @brief Id comparator for rd_kafka_metadata_broker_internal_t
+ */
+int rd_kafka_metadata_broker_internal_cmp(const void *_a, const void *_b) {
+        const rd_kafka_metadata_broker_internal_t *a = _a;
+        const rd_kafka_metadata_broker_internal_t *b = _b;
+        return RD_CMP(a->id, b->id);
+}
+
+
+/**
+ * @brief Id comparator for struct rd_kafka_metadata_broker*
+ */
+int rd_kafka_metadata_broker_cmp(const void *_a, const void *_b) {
+        const struct rd_kafka_metadata_broker *a = _a;
+        const struct rd_kafka_metadata_broker *b = _b;
+        return RD_CMP(a->id, b->id);
+}
+
+
+/**
+ * @brief Id comparator for rd_kafka_metadata_partition_internal_t
+ */
+static int rd_kafka_metadata_partition_internal_cmp(const void *_a,
+                                                    const void *_b) {
+        const rd_kafka_metadata_partition_internal_t *a = _a;
+        const rd_kafka_metadata_partition_internal_t *b = _b;
+        return RD_CMP(a->id, b->id);
+}
+
+/**
+ * @brief Helper function to clear a rd_kafka_metadata_partition.
+ *
+ * @note Does not deallocate the rd_kafka_metadata_partition itself.
+ * @note Should not be used if there is an metadata struct allocated with
+ * tmpabuf in which rd_kafka_metadata_partition is contained.
+ */
+void rd_kafka_metadata_partition_clear(
+    struct rd_kafka_metadata_partition *rkmp) {
+        RD_IF_FREE(rkmp->isrs, rd_free);
+        RD_IF_FREE(rkmp->replicas, rd_free);
+}
+
 
 rd_kafka_resp_err_t
 rd_kafka_metadata(rd_kafka_t *rk,
@@ -82,15 +126,15 @@ rd_kafka_metadata(rd_kafka_t *rk,
         rd_kafka_op_set_replyq(rko, rkq, 0);
         rko->rko_u.metadata.force = 1; /* Force metadata request regardless
                                         * of outstanding metadata requests. */
-        rd_kafka_MetadataRequest(rkb, &topics, "application requested",
-                                 allow_auto_create_topics,
-                                 /* cgrp_update:
-                                  * Only update consumer group state
-                                  * on response if this lists all
-                                  * topics in the cluster, since a
-                                  * partial request may make it seem
-                                  * like some subscribed topics are missing. */
-                                 all_topics ? rd_true : rd_false, rko);
+        rd_kafka_MetadataRequest(
+            rkb, &topics, "application requested", allow_auto_create_topics,
+            /* cgrp_update:
+             * Only update consumer group state
+             * on response if this lists all
+             * topics in the cluster, since a
+             * partial request may make it seem
+             * like some subscribed topics are missing. */
+            all_topics ? rd_true : rd_false, rd_false /* force_racks */, rko);
 
         rd_list_destroy(&topics);
         rd_kafka_broker_destroy(rkb);
@@ -113,8 +157,9 @@ rd_kafka_metadata(rd_kafka_t *rk,
 
         /* Reply: pass metadata pointer to application who now owns it*/
         rd_kafka_assert(rk, rko->rko_u.metadata.md);
-        *metadatap             = rko->rko_u.metadata.md;
-        rko->rko_u.metadata.md = NULL;
+        *metadatap              = rko->rko_u.metadata.md;
+        rko->rko_u.metadata.md  = NULL;
+        rko->rko_u.metadata.mdi = NULL;
         rd_kafka_op_destroy(rko);
 
         return RD_KAFKA_RESP_ERR_NO_ERROR;
@@ -127,12 +172,13 @@ void rd_kafka_metadata_destroy(const struct rd_kafka_metadata *metadata) {
 }
 
 
-/**
- * @returns a newly allocated copy of metadata \p src of size \p size
- */
-struct rd_kafka_metadata *
-rd_kafka_metadata_copy(const struct rd_kafka_metadata *src, size_t size) {
+static rd_kafka_metadata_internal_t *rd_kafka_metadata_copy_internal(
+    const rd_kafka_metadata_internal_t *src_internal,
+    size_t size,
+    rd_bool_t populate_racks) {
         struct rd_kafka_metadata *md;
+        rd_kafka_metadata_internal_t *mdi;
+        const struct rd_kafka_metadata *src = &src_internal->metadata;
         rd_tmpabuf_t tbuf;
         int i;
 
@@ -142,24 +188,39 @@ rd_kafka_metadata_copy(const struct rd_kafka_metadata *src, size_t size) {
          * Because of this we copy all the structs verbatim but
          * any pointer fields needs to be copied explicitly to update
          * the pointer address. */
-        rd_tmpabuf_new(&tbuf, size, 1 /*assert on fail*/);
-        md = rd_tmpabuf_write(&tbuf, src, sizeof(*md));
+        rd_tmpabuf_new(&tbuf, size, rd_true /*assert on fail*/);
+        rd_tmpabuf_finalize(&tbuf);
+        mdi = rd_tmpabuf_write(&tbuf, src, sizeof(*mdi));
+        md  = &mdi->metadata;
 
         rd_tmpabuf_write_str(&tbuf, src->orig_broker_name);
 
 
         /* Copy Brokers */
         md->brokers = rd_tmpabuf_write(&tbuf, src->brokers,
-                                       md->broker_cnt * sizeof(*md->brokers));
+                                       src->broker_cnt * sizeof(*src->brokers));
+        /* Copy internal Brokers */
+        mdi->brokers =
+            rd_tmpabuf_write(&tbuf, src_internal->brokers,
+                             src->broker_cnt * sizeof(*src_internal->brokers));
 
-        for (i = 0; i < md->broker_cnt; i++)
+        for (i = 0; i < md->broker_cnt; i++) {
                 md->brokers[i].host =
                     rd_tmpabuf_write_str(&tbuf, src->brokers[i].host);
+                if (src_internal->brokers[i].rack_id) {
+                        mdi->brokers[i].rack_id = rd_tmpabuf_write_str(
+                            &tbuf, src_internal->brokers[i].rack_id);
+                }
+        }
 
 
         /* Copy TopicMetadata */
         md->topics = rd_tmpabuf_write(&tbuf, src->topics,
                                       md->topic_cnt * sizeof(*md->topics));
+        /* Copy internal TopicMetadata */
+        mdi->topics =
+            rd_tmpabuf_write(&tbuf, src_internal->topics,
+                             md->topic_cnt * sizeof(*src_internal->topics));
 
         for (i = 0; i < md->topic_cnt; i++) {
                 int j;
@@ -173,8 +234,17 @@ rd_kafka_metadata_copy(const struct rd_kafka_metadata *src, size_t size) {
                     rd_tmpabuf_write(&tbuf, src->topics[i].partitions,
                                      md->topics[i].partition_cnt *
                                          sizeof(*md->topics[i].partitions));
+                /* Copy internal partitions */
+                mdi->topics[i].partitions = rd_tmpabuf_write(
+                    &tbuf, src_internal->topics[i].partitions,
+                    md->topics[i].partition_cnt *
+                        sizeof(*src_internal->topics[i].partitions));
 
                 for (j = 0; j < md->topics[i].partition_cnt; j++) {
+                        int k;
+                        char *rack;
+                        rd_list_t *curr_list;
+
                         /* Copy replicas and ISRs */
                         md->topics[i].partitions[j].replicas = rd_tmpabuf_write(
                             &tbuf, src->topics[i].partitions[j].replicas,
@@ -185,6 +255,59 @@ rd_kafka_metadata_copy(const struct rd_kafka_metadata *src, size_t size) {
                             &tbuf, src->topics[i].partitions[j].isrs,
                             md->topics[i].partitions[j].isr_cnt *
                                 sizeof(*md->topics[i].partitions[j].isrs));
+
+                        mdi->topics[i].partitions[j].racks_cnt = 0;
+                        mdi->topics[i].partitions[j].racks     = NULL;
+
+                        /* Iterate through replicas and populate racks, if
+                         * needed. */
+                        if (!populate_racks)
+                                continue;
+
+                        /* This is quite possibly a recomputation, because we've
+                         * already done this for the src_internal. However,
+                         * since the racks need to point inside the tmpbuf, we
+                         * make this calculation again. Since this is done only
+                         * in a case of a full metadata refresh, this will be
+                         * fairly rare. */
+                        curr_list = rd_list_new(0, NULL);
+                        for (k = 0; k < md->topics[i].partitions[j].replica_cnt;
+                             k++) {
+                                rd_kafka_metadata_broker_internal_t key = {
+                                    .id = md->topics[i]
+                                              .partitions[j]
+                                              .replicas[k]};
+                                rd_kafka_metadata_broker_internal_t *found =
+                                    bsearch(
+                                        &key, mdi->brokers, md->broker_cnt,
+                                        sizeof(
+                                            rd_kafka_metadata_broker_internal_t),
+                                        rd_kafka_metadata_broker_internal_cmp);
+                                if (!found || !found->rack_id)
+                                        continue;
+                                rd_list_add(curr_list, found->rack_id);
+                        }
+
+                        if (!rd_list_cnt(curr_list)) {
+                                rd_list_destroy(curr_list);
+                                continue;
+                        }
+
+                        rd_list_deduplicate(&curr_list, rd_strcmp2);
+
+                        mdi->topics[i].partitions[j].racks_cnt =
+                            rd_list_cnt(curr_list);
+                        mdi->topics[i].partitions[j].racks = rd_tmpabuf_alloc(
+                            &tbuf, sizeof(char *) * rd_list_cnt(curr_list));
+                        RD_LIST_FOREACH(rack, curr_list, k) {
+                                /* We don't copy here,`rack` points to memory
+                                 * inside `mdi` already, and it's allocated
+                                 * within a tmpabuf. So, the lifetime of
+                                 * mdi->topics[i].partitions[j].racks[k] is the
+                                 * same as the lifetime of the outer `mdi`. */
+                                mdi->topics[i].partitions[j].racks[k] = rack;
+                        }
+                        rd_list_destroy(curr_list);
                 }
         }
 
@@ -192,30 +315,38 @@ rd_kafka_metadata_copy(const struct rd_kafka_metadata *src, size_t size) {
         if (rd_tmpabuf_failed(&tbuf))
                 rd_kafka_assert(NULL, !*"metadata copy failed");
 
-        /* Delibarely not destroying the tmpabuf since we return
+        /* Deliberately not destroying the tmpabuf since we return
          * its allocated memory. */
 
-        return md;
+        return mdi;
 }
 
 
-
 /**
- * @brief Partition (id) comparator for partition_id_leader_epoch struct.
+ * @returns a newly allocated copy of metadata \p src of size \p size
  */
-static int rd_kafka_metadata_partition_leader_epoch_cmp(const void *_a,
-                                                        const void *_b) {
-        const rd_kafka_partition_leader_epoch_t *a = _a, *b = _b;
-        return RD_CMP(a->partition_id, b->partition_id);
+rd_kafka_metadata_internal_t *
+rd_kafka_metadata_copy(const rd_kafka_metadata_internal_t *src_internal,
+                       size_t size) {
+        return rd_kafka_metadata_copy_internal(src_internal, size, rd_false);
 }
 
 
+/**
+ * @returns a newly allocated copy of metadata \p src of size \p size, with
+ * partition racks included.
+ */
+rd_kafka_metadata_internal_t *rd_kafka_metadata_copy_add_racks(
+    const rd_kafka_metadata_internal_t *src_internal,
+    size_t size) {
+        return rd_kafka_metadata_copy_internal(src_internal, size, rd_true);
+}
 
 /**
  * @brief Update topic state and information based on topic metadata.
  *
  * @param mdt Topic metadata.
- * @param leader_epochs Per-partition leader epoch array, or NULL if not known.
+ * @param mdit Topic internal metadata.
  *
  * @locality rdkafka main thread
  * @locks_acquired rd_kafka_wrlock(rk)
@@ -223,7 +354,7 @@ static int rd_kafka_metadata_partition_leader_epoch_cmp(const void *_a,
 static void rd_kafka_parse_Metadata_update_topic(
     rd_kafka_broker_t *rkb,
     const rd_kafka_metadata_topic_t *mdt,
-    const rd_kafka_partition_leader_epoch_t *leader_epochs) {
+    const rd_kafka_metadata_topic_internal_t *mdit) {
 
         rd_rkb_dbg(rkb, METADATA, "METADATA",
                    /* The indent below is intentional */
@@ -244,7 +375,7 @@ static void rd_kafka_parse_Metadata_update_topic(
         } else {
                 /* Update local topic & partition state based
                  * on metadata */
-                rd_kafka_topic_metadata_update2(rkb, mdt, leader_epochs);
+                rd_kafka_topic_metadata_update2(rkb, mdt, mdit);
         }
 }
 
@@ -268,50 +399,111 @@ rd_bool_t rd_kafka_has_reliable_leader_epochs(rd_kafka_broker_t *rkb) {
         return ApiVersion >= 9;
 }
 
+/* Populates the topic partition to rack mapping for the the topic given by
+ * `topic_idx` in the `mdi`. It's assumed that the internal broker metadata is
+ * already populated. */
+static void
+rd_kafka_populate_metadata_topic_racks(rd_tmpabuf_t *tbuf,
+                                       size_t topic_idx,
+                                       rd_kafka_metadata_internal_t *mdi) {
+        rd_kafka_metadata_broker_internal_t *brokers_internal;
+        size_t broker_cnt;
+        int i;
+        rd_kafka_metadata_topic_t *mdt;
+        rd_kafka_metadata_topic_internal_t *mdti;
 
-/**
- * @brief Handle a Metadata response message.
- *
- * @param topics are the requested topics (may be NULL)
- *
- * The metadata will be marshalled into 'struct rd_kafka_metadata*' structs.
- *
- * The marshalled metadata is returned in \p *mdp, (NULL on error).
+        rd_dassert(mdi->brokers);
+        rd_dassert(mdi->metadata.topic_cnt > (int)topic_idx);
 
- * @returns an error code on parse failure, else NO_ERRRO.
- *
- * @locality rdkafka main thread
- */
-rd_kafka_resp_err_t rd_kafka_parse_Metadata(rd_kafka_broker_t *rkb,
-                                            rd_kafka_buf_t *request,
-                                            rd_kafka_buf_t *rkbuf,
-                                            struct rd_kafka_metadata **mdp) {
+        brokers_internal = mdi->brokers;
+        broker_cnt       = mdi->metadata.broker_cnt;
+
+        mdt  = &mdi->metadata.topics[topic_idx];
+        mdti = &mdi->topics[topic_idx];
+
+        for (i = 0; i < mdt->partition_cnt; i++) {
+                int j;
+                rd_kafka_metadata_partition_t *mdp = &mdt->partitions[i];
+                rd_kafka_metadata_partition_internal_t *mdpi =
+                    &mdti->partitions[i];
+
+                rd_list_t *curr_list;
+                char *rack;
+
+                if (mdp->replica_cnt == 0)
+                        continue;
+
+                curr_list =
+                    rd_list_new(0, NULL); /* use a list for de-duplication */
+                for (j = 0; j < mdp->replica_cnt; j++) {
+                        rd_kafka_metadata_broker_internal_t key = {
+                            .id = mdp->replicas[j]};
+                        rd_kafka_metadata_broker_internal_t *broker =
+                            bsearch(&key, brokers_internal, broker_cnt,
+                                    sizeof(rd_kafka_metadata_broker_internal_t),
+                                    rd_kafka_metadata_broker_internal_cmp);
+                        if (!broker || !broker->rack_id)
+                                continue;
+                        rd_list_add(curr_list, broker->rack_id);
+                }
+                rd_list_deduplicate(&curr_list, rd_strcmp2);
+
+                mdpi->racks_cnt = rd_list_cnt(curr_list);
+                mdpi->racks =
+                    rd_tmpabuf_alloc(tbuf, sizeof(char *) * mdpi->racks_cnt);
+                RD_LIST_FOREACH(rack, curr_list, j) {
+                        mdpi->racks[j] = rack; /* Don't copy, rack points inside
+                                                  tbuf already*/
+                }
+                rd_list_destroy(curr_list);
+        }
+}
+
+/* Internal implementation for parsing Metadata. */
+static rd_kafka_resp_err_t
+rd_kafka_parse_Metadata0(rd_kafka_broker_t *rkb,
+                         rd_kafka_buf_t *request,
+                         rd_kafka_buf_t *rkbuf,
+                         rd_kafka_metadata_internal_t **mdip,
+                         rd_list_t *request_topics,
+                         const char *reason) {
         rd_kafka_t *rk = rkb->rkb_rk;
         int i, j, k;
         rd_tmpabuf_t tbuf;
-        struct rd_kafka_metadata *md = NULL;
+        rd_kafka_metadata_internal_t *mdi = NULL;
+        rd_kafka_metadata_t *md           = NULL;
         size_t rkb_namelen;
         const int log_decode_errors       = LOG_ERR;
         rd_list_t *missing_topics         = NULL;
-        const rd_list_t *requested_topics = request->rkbuf_u.Metadata.topics;
-        rd_bool_t all_topics = request->rkbuf_u.Metadata.all_topics;
-        rd_bool_t cgrp_update =
-            request->rkbuf_u.Metadata.cgrp_update && rk->rk_cgrp;
-        const char *reason = request->rkbuf_u.Metadata.reason
-                                 ? request->rkbuf_u.Metadata.reason
-                                 : "(no reason)";
-        int ApiVersion             = request->rkbuf_reqhdr.ApiVersion;
+        const rd_list_t *requested_topics = request_topics;
+        rd_bool_t all_topics              = rd_false;
+        rd_bool_t cgrp_update             = rd_false;
+        rd_bool_t has_reliable_leader_epochs =
+            rd_kafka_has_reliable_leader_epochs(rkb);
+        int ApiVersion             = rkbuf->rkbuf_reqhdr.ApiVersion;
         rd_kafkap_str_t cluster_id = RD_ZERO_INIT;
         int32_t controller_id      = -1;
         rd_kafka_resp_err_t err    = RD_KAFKA_RESP_ERR_NO_ERROR;
         int broker_changes         = 0;
         int cache_changes          = 0;
-        /** This array is reused and resized as necessary to hold per-partition
-         *  leader epochs (ApiVersion >= 7). */
-        rd_kafka_partition_leader_epoch_t *leader_epochs = NULL;
-        /** Number of allocated elements in leader_epochs. */
-        size_t leader_epochs_size = 0;
-        rd_ts_t ts_start          = rd_clock();
+        rd_ts_t ts_start           = rd_clock();
+        /* If client rack is present, the metadata cache (topic or full) needs
+         * to contain the partition to rack map. */
+        rd_bool_t has_client_rack = rk->rk_conf.client_rack &&
+                                    RD_KAFKAP_STR_LEN(rk->rk_conf.client_rack);
+        rd_bool_t compute_racks = has_client_rack;
+
+        if (request) {
+                requested_topics = request->rkbuf_u.Metadata.topics;
+                all_topics       = request->rkbuf_u.Metadata.all_topics;
+                cgrp_update =
+                    request->rkbuf_u.Metadata.cgrp_update && rk->rk_cgrp;
+                compute_racks |= request->rkbuf_u.Metadata.force_racks;
+        }
+
+        /* If there's reason is NULL, set it to a human-readable string. */
+        if (!reason)
+                reason = "(no reason)";
 
         /* Ignore metadata updates when terminating */
         if (rd_kafka_terminating(rkb->rkb_rk)) {
@@ -329,17 +521,24 @@ rd_kafka_resp_err_t rd_kafka_parse_Metadata(rd_kafka_broker_t *rkb,
         rd_kafka_broker_lock(rkb);
         rkb_namelen = strlen(rkb->rkb_name) + 1;
         /* We assume that the marshalled representation is
-         * no more than 4 times larger than the wire representation. */
-        rd_tmpabuf_new(&tbuf,
-                       sizeof(*md) + rkb_namelen + (rkbuf->rkbuf_totlen * 4),
-                       0 /*dont assert on fail*/);
-
-        if (!(md = rd_tmpabuf_alloc(&tbuf, sizeof(*md)))) {
+         * no more than 4 times larger than the wire representation.
+         * This is increased to 5 times in case if we want to compute partition
+         * to rack mapping. */
+        rd_tmpabuf_new(&tbuf, 0, rd_false /*dont assert on fail*/);
+        rd_tmpabuf_add_alloc(&tbuf, sizeof(*mdi));
+        rd_tmpabuf_add_alloc(&tbuf, rkb_namelen);
+        rd_tmpabuf_add_alloc(&tbuf, rkbuf->rkbuf_totlen *
+                                        (4 + (compute_racks ? 1 : 0)));
+
+        rd_tmpabuf_finalize(&tbuf);
+
+        if (!(mdi = rd_tmpabuf_alloc(&tbuf, sizeof(*mdi)))) {
                 rd_kafka_broker_unlock(rkb);
                 err = RD_KAFKA_RESP_ERR__CRIT_SYS_RESOURCE;
                 goto err;
         }
 
+        md                 = &mdi->metadata;
         md->orig_broker_id = rkb->rkb_nodeid;
         md->orig_broker_name =
             rd_tmpabuf_write(&tbuf, rkb->rkb_name, rkb_namelen);
@@ -358,31 +557,58 @@ rd_kafka_resp_err_t rd_kafka_parse_Metadata(rd_kafka_broker_t *rkb,
                                         "%d brokers: tmpabuf memory shortage",
                                         md->broker_cnt);
 
+        if (!(mdi->brokers = rd_tmpabuf_alloc(
+                  &tbuf, md->broker_cnt * sizeof(*mdi->brokers))))
+                rd_kafka_buf_parse_fail(
+                    rkbuf, "%d internal brokers: tmpabuf memory shortage",
+                    md->broker_cnt);
+
+        if (!(mdi->brokers_sorted = rd_tmpabuf_alloc(
+                  &tbuf, md->broker_cnt * sizeof(*mdi->brokers_sorted))))
+                rd_kafka_buf_parse_fail(
+                    rkbuf, "%d sorted brokers: tmpabuf memory shortage",
+                    md->broker_cnt);
+
         for (i = 0; i < md->broker_cnt; i++) {
                 rd_kafka_buf_read_i32a(rkbuf, md->brokers[i].id);
                 rd_kafka_buf_read_str_tmpabuf(rkbuf, &tbuf,
                                               md->brokers[i].host);
                 rd_kafka_buf_read_i32a(rkbuf, md->brokers[i].port);
 
+                mdi->brokers[i].id = md->brokers[i].id;
                 if (ApiVersion >= 1) {
-                        rd_kafkap_str_t rack;
-                        rd_kafka_buf_read_str(rkbuf, &rack);
+                        rd_kafka_buf_read_str_tmpabuf(rkbuf, &tbuf,
+                                                      mdi->brokers[i].rack_id);
+                } else {
+                        mdi->brokers[i].rack_id = NULL;
                 }
 
                 rd_kafka_buf_skip_tags(rkbuf);
         }
 
-        if (ApiVersion >= 2)
+        mdi->cluster_id = NULL;
+        if (ApiVersion >= 2) {
                 rd_kafka_buf_read_str(rkbuf, &cluster_id);
+                if (cluster_id.str)
+                        mdi->cluster_id =
+                            rd_tmpabuf_write_str(&tbuf, cluster_id.str);
+        }
 
+        mdi->controller_id = -1;
         if (ApiVersion >= 1) {
                 rd_kafka_buf_read_i32(rkbuf, &controller_id);
+                mdi->controller_id = controller_id;
                 rd_rkb_dbg(rkb, METADATA, "METADATA",
                            "ClusterId: %.*s, ControllerId: %" PRId32,
                            RD_KAFKAP_STR_PR(&cluster_id), controller_id);
         }
 
-
+        qsort(mdi->brokers, md->broker_cnt, sizeof(mdi->brokers[i]),
+              rd_kafka_metadata_broker_internal_cmp);
+        memcpy(mdi->brokers_sorted, md->brokers,
+               sizeof(*mdi->brokers_sorted) * md->broker_cnt);
+        qsort(mdi->brokers_sorted, md->broker_cnt, sizeof(*mdi->brokers_sorted),
+              rd_kafka_metadata_broker_cmp);
 
         /* Read TopicMetadata */
         rd_kafka_buf_read_arraycnt(rkbuf, &md->topic_cnt, RD_KAFKAP_TOPICS_MAX);
@@ -394,15 +620,25 @@ rd_kafka_resp_err_t rd_kafka_parse_Metadata(rd_kafka_broker_t *rkb,
                 rd_kafka_buf_parse_fail(
                     rkbuf, "%d topics: tmpabuf memory shortage", md->topic_cnt);
 
+        if (!(mdi->topics = rd_tmpabuf_alloc(&tbuf, md->topic_cnt *
+                                                        sizeof(*mdi->topics))))
+                rd_kafka_buf_parse_fail(
+                    rkbuf, "%d internal topics: tmpabuf memory shortage",
+                    md->topic_cnt);
+
         for (i = 0; i < md->topic_cnt; i++) {
                 rd_kafka_buf_read_i16a(rkbuf, md->topics[i].err);
                 rd_kafka_buf_read_str_tmpabuf(rkbuf, &tbuf,
                                               md->topics[i].topic);
-                if (ApiVersion >= 1) {
-                        int8_t is_internal;
-                        rd_kafka_buf_read_i8(rkbuf, &is_internal);
+
+                if (ApiVersion >= 10) {
+                        rd_kafka_buf_read_uuid(rkbuf, &mdi->topics[i].topic_id);
                 }
 
+                if (ApiVersion >= 1)
+                        rd_kafka_buf_read_bool(rkbuf,
+                                               &mdi->topics[i].is_internal);
+
                 /* PartitionMetadata */
                 rd_kafka_buf_read_arraycnt(rkbuf, &md->topics[i].partition_cnt,
                                            RD_KAFKAP_PARTITIONS_MAX);
@@ -416,16 +652,15 @@ rd_kafka_resp_err_t rd_kafka_parse_Metadata(rd_kafka_broker_t *rkb,
                                                 md->topics[i].topic,
                                                 md->topics[i].partition_cnt);
 
-                /* Resize reused leader_epochs array to fit this partition's
-                 * leader epochs. */
-                if (ApiVersion >= 7 && md->topics[i].partition_cnt > 0 &&
-                    (size_t)md->topics[i].partition_cnt > leader_epochs_size) {
-                        leader_epochs_size =
-                            RD_MAX(32, md->topics[i].partition_cnt);
-                        leader_epochs =
-                            rd_realloc(leader_epochs, sizeof(*leader_epochs) *
-                                                          leader_epochs_size);
-                }
+                if (!(mdi->topics[i].partitions = rd_tmpabuf_alloc(
+                          &tbuf, md->topics[i].partition_cnt *
+                                     sizeof(*mdi->topics[i].partitions))))
+                        rd_kafka_buf_parse_fail(rkbuf,
+                                                "%s: %d internal partitions: "
+                                                "tmpabuf memory shortage",
+                                                md->topics[i].topic,
+                                                md->topics[i].partition_cnt);
+
 
                 for (j = 0; j < md->topics[i].partition_cnt; j++) {
                         rd_kafka_buf_read_i16a(rkbuf,
@@ -434,12 +669,22 @@ rd_kafka_resp_err_t rd_kafka_parse_Metadata(rd_kafka_broker_t *rkb,
                                                md->topics[i].partitions[j].id);
                         rd_kafka_buf_read_i32a(
                             rkbuf, md->topics[i].partitions[j].leader);
+
+                        mdi->topics[i].partitions[j].id =
+                            md->topics[i].partitions[j].id;
                         if (ApiVersion >= 7) {
-                                leader_epochs[j].partition_id =
-                                    md->topics[i].partitions[j].id;
                                 rd_kafka_buf_read_i32(
-                                    rkbuf, &leader_epochs[j].leader_epoch);
+                                    rkbuf,
+                                    &mdi->topics[i].partitions[j].leader_epoch);
+                                if (!has_reliable_leader_epochs)
+                                        mdi->topics[i]
+                                            .partitions[j]
+                                            .leader_epoch = -1;
+                        } else {
+                                mdi->topics[i].partitions[j].leader_epoch = -1;
                         }
+                        mdi->topics[i].partitions[j].racks_cnt = 0;
+                        mdi->topics[i].partitions[j].racks     = NULL;
 
                         /* Replicas */
                         rd_kafka_buf_read_arraycnt(
@@ -511,14 +756,51 @@ rd_kafka_resp_err_t rd_kafka_parse_Metadata(rd_kafka_broker_t *rkb,
                         rd_kafka_buf_skip_tags(rkbuf);
                 }
 
+                mdi->topics[i].topic_authorized_operations = -1;
                 if (ApiVersion >= 8) {
                         int32_t TopicAuthorizedOperations;
                         /* TopicAuthorizedOperations */
                         rd_kafka_buf_read_i32(rkbuf,
                                               &TopicAuthorizedOperations);
+                        mdi->topics[i].topic_authorized_operations =
+                            TopicAuthorizedOperations;
                 }
 
                 rd_kafka_buf_skip_tags(rkbuf);
+        }
+
+        mdi->cluster_authorized_operations = -1;
+        if (ApiVersion >= 8 && ApiVersion <= 10) {
+                int32_t ClusterAuthorizedOperations;
+                /* ClusterAuthorizedOperations */
+                rd_kafka_buf_read_i32(rkbuf, &ClusterAuthorizedOperations);
+                mdi->cluster_authorized_operations =
+                    ClusterAuthorizedOperations;
+        }
+
+        rd_kafka_buf_skip_tags(rkbuf);
+
+        /* Entire Metadata response now parsed without errors:
+         * update our internal state according to the response. */
+
+        if (md->broker_cnt == 0 && md->topic_cnt == 0) {
+                rd_rkb_dbg(rkb, METADATA, "METADATA",
+                           "No brokers or topics in metadata: should retry");
+                err = RD_KAFKA_RESP_ERR__PARTIAL;
+                goto err;
+        }
+
+        /* Update our list of brokers. */
+        for (i = 0; i < md->broker_cnt; i++) {
+                rd_rkb_dbg(rkb, METADATA, "METADATA",
+                           "  Broker #%i/%i: %s:%i NodeId %" PRId32, i,
+                           md->broker_cnt, md->brokers[i].host,
+                           md->brokers[i].port, md->brokers[i].id);
+                rd_kafka_broker_update(rkb->rkb_rk, rkb->rkb_proto,
+                                       &md->brokers[i], NULL);
+        }
+
+        for (i = 0; i < md->topic_cnt; i++) {
 
                 /* Ignore topics in blacklist */
                 if (rkb->rkb_rk->rk_conf.topic_blacklist &&
@@ -532,38 +814,20 @@ rd_kafka_resp_err_t rd_kafka_parse_Metadata(rd_kafka_broker_t *rkb,
                         continue;
                 }
 
-                if (leader_epochs_size > 0 &&
-                    !rd_kafka_has_reliable_leader_epochs(rkb)) {
-                        /* Prior to Kafka version 2.4 (which coincides with
-                         * Metadata version 9), the broker does not propagate
-                         * leader epoch information accurately while a
-                         * reassignment is in progress. Relying on a stale
-                         * epoch can lead to FENCED_LEADER_EPOCH errors which
-                         * can prevent consumption throughout the course of
-                         * a reassignment. It is safer in this case to revert
-                         * to the behavior in previous protocol versions
-                         * which checks leader status only. */
-                        leader_epochs_size = 0;
-                        rd_free(leader_epochs);
-                        leader_epochs = NULL;
-                }
-
-
                 /* Sort partitions by partition id */
                 qsort(md->topics[i].partitions, md->topics[i].partition_cnt,
                       sizeof(*md->topics[i].partitions),
                       rd_kafka_metadata_partition_id_cmp);
-                if (leader_epochs_size > 0) {
-                        /* And sort leader_epochs by partition id */
-                        qsort(leader_epochs, md->topics[i].partition_cnt,
-                              sizeof(*leader_epochs),
-                              rd_kafka_metadata_partition_leader_epoch_cmp);
-                }
+                qsort(mdi->topics[i].partitions, md->topics[i].partition_cnt,
+                      sizeof(*mdi->topics[i].partitions),
+                      rd_kafka_metadata_partition_internal_cmp);
+
+                if (compute_racks)
+                        rd_kafka_populate_metadata_topic_racks(&tbuf, i, mdi);
 
                 /* Update topic state based on the topic metadata */
                 rd_kafka_parse_Metadata_update_topic(rkb, &md->topics[i],
-                                                     leader_epochs);
-
+                                                     &mdi->topics[i]);
 
                 if (requested_topics) {
                         rd_list_free_cb(missing_topics,
@@ -576,42 +840,21 @@ rd_kafka_resp_err_t rd_kafka_parse_Metadata(rd_kafka_broker_t *rkb,
 
                                 rd_kafka_wrlock(rk);
                                 rd_kafka_metadata_cache_topic_update(
-                                    rk, &md->topics[i],
-                                    rd_false /*propagate later*/);
+                                    rk, &md->topics[i], &mdi->topics[i],
+                                    rd_false /*propagate later*/,
+                                    /* use has_client_rack rather than
+                                       compute_racks. We need cached rack ids
+                                       only in case we need to rejoin the group
+                                       if they change and client.rack is set
+                                       (KIP-881). */
+                                    has_client_rack, mdi->brokers,
+                                    md->broker_cnt);
                                 cache_changes++;
                                 rd_kafka_wrunlock(rk);
                         }
                 }
         }
 
-        if (ApiVersion >= 8 && ApiVersion <= 10) {
-                int32_t ClusterAuthorizedOperations;
-                /* ClusterAuthorizedOperations */
-                rd_kafka_buf_read_i32(rkbuf, &ClusterAuthorizedOperations);
-        }
-
-        rd_kafka_buf_skip_tags(rkbuf);
-
-        /* Entire Metadata response now parsed without errors:
-         * update our internal state according to the response. */
-
-        if (md->broker_cnt == 0 && md->topic_cnt == 0) {
-                rd_rkb_dbg(rkb, METADATA, "METADATA",
-                           "No brokers or topics in metadata: should retry");
-                err = RD_KAFKA_RESP_ERR__PARTIAL;
-                goto err;
-        }
-
-        /* Update our list of brokers. */
-        for (i = 0; i < md->broker_cnt; i++) {
-                rd_rkb_dbg(rkb, METADATA, "METADATA",
-                           "  Broker #%i/%i: %s:%i NodeId %" PRId32, i,
-                           md->broker_cnt, md->brokers[i].host,
-                           md->brokers[i].port, md->brokers[i].id);
-                rd_kafka_broker_update(rkb->rkb_rk, rkb->rkb_proto,
-                                       &md->brokers[i], NULL);
-        }
-
         /* Requested topics not seen in metadata? Propogate to topic code. */
         if (missing_topics) {
                 char *topic;
@@ -690,9 +933,18 @@ rd_kafka_resp_err_t rd_kafka_parse_Metadata(rd_kafka_broker_t *rkb,
 
                 if (rkb->rkb_rk->rk_full_metadata)
                         rd_kafka_metadata_destroy(
-                            rkb->rkb_rk->rk_full_metadata);
-                rkb->rkb_rk->rk_full_metadata =
-                    rd_kafka_metadata_copy(md, tbuf.of);
+                            &rkb->rkb_rk->rk_full_metadata->metadata);
+
+                /* use has_client_rack rather than compute_racks. We need cached
+                 * rack ids only in case we need to rejoin the group if they
+                 * change and client.rack is set (KIP-881). */
+                if (has_client_rack)
+                        rkb->rkb_rk->rk_full_metadata =
+                            rd_kafka_metadata_copy_add_racks(mdi, tbuf.of);
+                else
+                        rkb->rkb_rk->rk_full_metadata =
+                            rd_kafka_metadata_copy(mdi, tbuf.of);
+
                 rkb->rkb_rk->rk_ts_full_metadata = rkb->rkb_rk->rk_ts_metadata;
                 rd_rkb_dbg(rkb, METADATA, "METADATA",
                            "Caching full metadata with "
@@ -738,16 +990,13 @@ rd_kafka_resp_err_t rd_kafka_parse_Metadata(rd_kafka_broker_t *rkb,
         if (missing_topics)
                 rd_list_destroy(missing_topics);
 
-        if (leader_epochs)
-                rd_free(leader_epochs);
-
         /* This metadata request was triggered by someone wanting
          * the metadata information back as a reply, so send that reply now.
          * In this case we must not rd_free the metadata memory here,
          * the requestee will do.
          * The tbuf is explicitly not destroyed as we return its memory
          * to the caller. */
-        *mdp = md;
+        *mdip = mdi;
 
         return RD_KAFKA_RESP_ERR_NO_ERROR;
 
@@ -764,16 +1013,62 @@ rd_kafka_resp_err_t rd_kafka_parse_Metadata(rd_kafka_broker_t *rkb,
 
         if (missing_topics)
                 rd_list_destroy(missing_topics);
-
-        if (leader_epochs)
-                rd_free(leader_epochs);
-
         rd_tmpabuf_destroy(&tbuf);
 
         return err;
 }
 
 
+/**
+ * @brief Handle a Metadata response message.
+ *
+ * @param request Initial Metadata request, containing the topic information.
+ *                Must not be NULL.
+ *                We require the topic information while parsing to make sure
+ *                that there are no missing topics.
+ * @param mdip A pointer to (rd_kafka_metadata_internal_t *) into which the
+ *             metadata will be marshalled (set to NULL on error.)
+ *
+ * @returns an error code on parse failure, else NO_ERROR.
+ *
+ * @locality rdkafka main thread
+ */
+rd_kafka_resp_err_t
+rd_kafka_parse_Metadata(rd_kafka_broker_t *rkb,
+                        rd_kafka_buf_t *request,
+                        rd_kafka_buf_t *rkbuf,
+                        rd_kafka_metadata_internal_t **mdip) {
+        const char *reason = request->rkbuf_u.Metadata.reason;
+        return rd_kafka_parse_Metadata0(rkb, request, rkbuf, mdip, NULL,
+                                        reason);
+}
+
+/**
+ * @brief Handle a Metadata response message for admin requests.
+ *
+ * @param request_topics List containing topics in Metadata request. Must not
+ *                       be NULL. It is more convenient in the Admin flow to
+ *                       preserve the topic names rather than the initial
+ *                       Metadata request.
+ *                       We require the topic information while parsing to make
+ *                      sure that there are no missing topics.
+ * @param mdip A pointer to (rd_kafka_metadata_internal_t *) into which the
+ *             metadata will be marshalled (set to NULL on error.)
+ *
+ * @returns an error code on parse failure, else NO_ERROR.
+ *
+ * @locality rdkafka main thread
+ */
+rd_kafka_resp_err_t
+rd_kafka_parse_Metadata_admin(rd_kafka_broker_t *rkb,
+                              rd_kafka_buf_t *rkbuf,
+                              rd_list_t *request_topics,
+                              rd_kafka_metadata_internal_t **mdip) {
+        return rd_kafka_parse_Metadata0(rkb, NULL, rkbuf, mdip, request_topics,
+                                        "(admin request)");
+}
+
+
 /**
  * @brief Add all topics in current cached full metadata
  *        that matches the topics in \p match
@@ -795,12 +1090,15 @@ rd_kafka_metadata_topic_match(rd_kafka_t *rk,
                               rd_kafka_topic_partition_list_t *errored) {
         int ti, i;
         size_t cnt = 0;
-        const struct rd_kafka_metadata *metadata;
+        rd_kafka_metadata_internal_t *mdi;
+        struct rd_kafka_metadata *metadata;
         rd_kafka_topic_partition_list_t *unmatched;
 
         rd_kafka_rdlock(rk);
-        metadata = rk->rk_full_metadata;
-        if (!metadata) {
+        mdi      = rk->rk_full_metadata;
+        metadata = &mdi->metadata;
+
+        if (!mdi) {
                 rd_kafka_rdunlock(rk);
                 return 0;
         }
@@ -841,10 +1139,11 @@ rd_kafka_metadata_topic_match(rd_kafka_t *rk,
                                 continue; /* Skip errored topics */
                         }
 
-                        rd_list_add(
-                            tinfos,
-                            rd_kafka_topic_info_new(
-                                topic, metadata->topics[ti].partition_cnt));
+                        rd_list_add(tinfos,
+                                    rd_kafka_topic_info_new_with_rack(
+                                        topic,
+                                        metadata->topics[ti].partition_cnt,
+                                        mdi->topics[ti].partitions));
 
                         cnt++;
                 }
@@ -889,16 +1188,18 @@ rd_kafka_metadata_topic_filter(rd_kafka_t *rk,
         rd_kafka_rdlock(rk);
         /* For each topic in match, look up the topic in the cache. */
         for (i = 0; i < match->cnt; i++) {
-                const char *topic = match->elems[i].topic;
-                const rd_kafka_metadata_topic_t *mtopic;
+                const char *topic                       = match->elems[i].topic;
+                const rd_kafka_metadata_topic_t *mtopic = NULL;
 
                 /* Ignore topics in blacklist */
                 if (rk->rk_conf.topic_blacklist &&
                     rd_kafka_pattern_match(rk->rk_conf.topic_blacklist, topic))
                         continue;
 
-                mtopic =
-                    rd_kafka_metadata_cache_topic_get(rk, topic, 1 /*valid*/);
+                struct rd_kafka_metadata_cache_entry *rkmce =
+                    rd_kafka_metadata_cache_find(rk, topic, 1 /* valid */);
+                if (rkmce)
+                        mtopic = &rkmce->rkmce_mtopic;
 
                 if (!mtopic)
                         rd_kafka_topic_partition_list_add(errored, topic,
@@ -909,8 +1210,11 @@ rd_kafka_metadata_topic_filter(rd_kafka_t *rk,
                                                           RD_KAFKA_PARTITION_UA)
                             ->err = mtopic->err;
                 else {
-                        rd_list_add(tinfos, rd_kafka_topic_info_new(
-                                                topic, mtopic->partition_cnt));
+                        rd_list_add(tinfos,
+                                    rd_kafka_topic_info_new_with_rack(
+                                        topic, mtopic->partition_cnt,
+                                        rkmce->rkmce_metadata_internal_topic
+                                            .partitions));
 
                         cnt++;
                 }
@@ -1041,7 +1345,7 @@ rd_kafka_metadata_refresh_topics(rd_kafka_t *rk,
                      rd_list_cnt(&q_topics), rd_list_cnt(topics), reason);
 
         rd_kafka_MetadataRequest(rkb, &q_topics, reason, allow_auto_create,
-                                 cgrp_update, NULL);
+                                 cgrp_update, rd_false /* force_racks */, NULL);
 
         rd_list_destroy(&q_topics);
 
@@ -1216,9 +1520,9 @@ rd_kafka_resp_err_t rd_kafka_metadata_refresh_all(rd_kafka_t *rk,
         }
 
         rd_list_init(&topics, 0, NULL); /* empty list = all topics */
-        rd_kafka_MetadataRequest(rkb, &topics, reason,
-                                 rd_false /*no auto create*/,
-                                 rd_true /*cgrp update*/, NULL);
+        rd_kafka_MetadataRequest(
+            rkb, &topics, reason, rd_false /*no auto create*/,
+            rd_true /*cgrp update*/, rd_false /* force_rack */, NULL);
         rd_list_destroy(&topics);
 
         if (destroy_rkb)
@@ -1256,7 +1560,7 @@ rd_kafka_metadata_request(rd_kafka_t *rk,
         }
 
         rd_kafka_MetadataRequest(rkb, topics, reason, allow_auto_create_topics,
-                                 cgrp_update, rko);
+                                 cgrp_update, rd_false /* force racks */, rko);
 
         if (destroy_rkb)
                 rd_kafka_broker_destroy(rkb);
@@ -1321,15 +1625,13 @@ static void rd_kafka_metadata_leader_query_tmr_cb(rd_kafka_timers_t *rkts,
                     rk, NULL, &topics, rd_true /*force*/,
                     rk->rk_conf.allow_auto_create_topics,
                     rd_false /*!cgrp_update*/, "partition leader query");
-                /* Back off next query exponentially until we reach
-                 * the standard query interval - then stop the timer
-                 * since the intervalled querier will do the job for us. */
-                if (rk->rk_conf.metadata_refresh_interval_ms > 0 &&
-                    rtmr->rtmr_interval * 2 / 1000 >=
-                        rk->rk_conf.metadata_refresh_interval_ms)
-                        rd_kafka_timer_stop(rkts, rtmr, 1 /*lock*/);
-                else
-                        rd_kafka_timer_exp_backoff(rkts, rtmr);
+
+                /* Back off next query exponentially till we reach
+                 * the retry backoff max ms */
+                rd_kafka_timer_exp_backoff(
+                    rkts, rtmr, rk->rk_conf.retry_backoff_ms * 1000,
+                    rk->rk_conf.retry_backoff_max_ms * 1000,
+                    RD_KAFKA_RETRY_JITTER_PERCENT);
         }
 
         rd_list_destroy(&topics);
@@ -1359,7 +1661,7 @@ void rd_kafka_metadata_fast_leader_query(rd_kafka_t *rk) {
                              "Starting fast leader query");
                 rd_kafka_timer_start(
                     &rk->rk_timers, &rk->rk_metadata_cache.rkmc_query_tmr,
-                    rk->rk_conf.metadata_refresh_fast_interval_ms * 1000,
+                    0 /* First request should be tried immediately */,
                     rd_kafka_metadata_leader_query_tmr_cb, NULL);
         }
 }
@@ -1371,44 +1673,71 @@ void rd_kafka_metadata_fast_leader_query(rd_kafka_t *rk) {
  *
  * @param topics elements are checked for .topic and .partition_cnt
  * @param topic_cnt is the number of topic elements in \p topics.
+ * @param replication_factor is the number of replicas of each partition (set to
+ * -1 to ignore).
+ * @param num_brokers is the number of brokers in the cluster.
  *
  * @returns a newly allocated metadata object that must be freed with
  *          rd_kafka_metadata_destroy().
  *
+ * @note \p replication_factor and \p num_brokers must be used together for
+ * setting replicas of each partition.
+ *
  * @sa rd_kafka_metadata_copy()
  */
 rd_kafka_metadata_t *
 rd_kafka_metadata_new_topic_mock(const rd_kafka_metadata_topic_t *topics,
-                                 size_t topic_cnt) {
+                                 size_t topic_cnt,
+                                 int replication_factor,
+                                 int num_brokers) {
+        rd_kafka_metadata_internal_t *mdi;
         rd_kafka_metadata_t *md;
         rd_tmpabuf_t tbuf;
-        size_t topic_names_size = 0;
-        int total_partition_cnt = 0;
         size_t i;
+        int curr_broker = 0;
+
+        /* If the replication factor is given, num_brokers must also be given */
+        rd_assert(replication_factor <= 0 || num_brokers > 0);
+
+        /* Allocate contiguous buffer which will back all the memory
+         * needed by the final metadata_t object */
+        rd_tmpabuf_new(&tbuf, sizeof(*mdi), rd_true /*assert on fail*/);
+
+        rd_tmpabuf_add_alloc(&tbuf, topic_cnt * sizeof(*md->topics));
+        rd_tmpabuf_add_alloc(&tbuf, topic_cnt * sizeof(*mdi->topics));
+        rd_tmpabuf_add_alloc(&tbuf, num_brokers * sizeof(*md->brokers));
 
         /* Calculate total partition count and topic names size before
          * allocating memory. */
         for (i = 0; i < topic_cnt; i++) {
-                topic_names_size += 1 + strlen(topics[i].topic);
-                total_partition_cnt += topics[i].partition_cnt;
+                rd_tmpabuf_add_alloc(&tbuf, 1 + strlen(topics[i].topic));
+                rd_tmpabuf_add_alloc(&tbuf,
+                                     topics[i].partition_cnt *
+                                         sizeof(*md->topics[i].partitions));
+                rd_tmpabuf_add_alloc(&tbuf,
+                                     topics[i].partition_cnt *
+                                         sizeof(*mdi->topics[i].partitions));
+                if (replication_factor > 0)
+                        rd_tmpabuf_add_alloc_times(
+                            &tbuf, replication_factor * sizeof(int),
+                            topics[i].partition_cnt);
         }
 
+        rd_tmpabuf_finalize(&tbuf);
 
-        /* Allocate contiguous buffer which will back all the memory
-         * needed by the final metadata_t object */
-        rd_tmpabuf_new(
-            &tbuf,
-            sizeof(*md) + (sizeof(*md->topics) * topic_cnt) + topic_names_size +
-                (64 /*topic name size..*/ * topic_cnt) +
-                (sizeof(*md->topics[0].partitions) * total_partition_cnt),
-            1 /*assert on fail*/);
-
-        md = rd_tmpabuf_alloc(&tbuf, sizeof(*md));
-        memset(md, 0, sizeof(*md));
+        mdi = rd_tmpabuf_alloc(&tbuf, sizeof(*mdi));
+        memset(mdi, 0, sizeof(*mdi));
+        md = &mdi->metadata;
 
         md->topic_cnt = (int)topic_cnt;
         md->topics =
             rd_tmpabuf_alloc(&tbuf, md->topic_cnt * sizeof(*md->topics));
+        mdi->topics =
+            rd_tmpabuf_alloc(&tbuf, md->topic_cnt * sizeof(*mdi->topics));
+
+        md->broker_cnt = num_brokers;
+        mdi->brokers =
+            rd_tmpabuf_alloc(&tbuf, md->broker_cnt * sizeof(*mdi->brokers));
 
         for (i = 0; i < (size_t)md->topic_cnt; i++) {
                 int j;
@@ -1421,12 +1750,42 @@ rd_kafka_metadata_new_topic_mock(const rd_kafka_metadata_topic_t *topics,
                 md->topics[i].partitions = rd_tmpabuf_alloc(
                     &tbuf, md->topics[i].partition_cnt *
                                sizeof(*md->topics[i].partitions));
+                mdi->topics[i].partitions = rd_tmpabuf_alloc(
+                    &tbuf, md->topics[i].partition_cnt *
+                               sizeof(*mdi->topics[i].partitions));
 
                 for (j = 0; j < md->topics[i].partition_cnt; j++) {
+                        int k;
                         memset(&md->topics[i].partitions[j], 0,
                                sizeof(md->topics[i].partitions[j]));
-                        md->topics[i].partitions[j].id = j;
+                        memset(&mdi->topics[i].partitions[j], 0,
+                               sizeof(mdi->topics[i].partitions[j]));
+                        md->topics[i].partitions[j].id            = j;
+                        mdi->topics[i].partitions[j].id           = j;
+                        mdi->topics[i].partitions[j].leader_epoch = -1;
+                        mdi->topics[i].partitions[j].racks_cnt    = 0;
+                        mdi->topics[i].partitions[j].racks        = NULL;
+                        md->topics[i].partitions[j].id            = j;
+
+                        /* In case replication_factor is not given, don't set
+                         * replicas. */
+                        if (replication_factor <= 0)
+                                continue;
+
+                        md->topics[i].partitions[j].replicas = rd_tmpabuf_alloc(
+                            &tbuf, replication_factor * sizeof(int));
+                        md->topics[i].partitions[j].leader = curr_broker;
+                        md->topics[i].partitions[j].replica_cnt =
+                            replication_factor;
+                        for (k = 0; k < replication_factor; k++) {
+                                md->topics[i].partitions[j].replicas[k] =
+                                    (j + k + curr_broker) % num_brokers;
+                        }
                 }
+                if (num_brokers > 0)
+                        curr_broker =
+                            (curr_broker + md->topics[i].partition_cnt) %
+                            num_brokers;
         }
 
         /* Check for tmpabuf errors */
@@ -1438,6 +1797,24 @@ rd_kafka_metadata_new_topic_mock(const rd_kafka_metadata_topic_t *topics,
         return md;
 }
 
+/* Implementation for rd_kafka_metadata_new_topic*mockv() */
+static rd_kafka_metadata_t *
+rd_kafka_metadata_new_topic_mockv_internal(size_t topic_cnt,
+                                           int replication_factor,
+                                           int num_brokers,
+                                           va_list args) {
+        rd_kafka_metadata_topic_t *topics;
+        size_t i;
+
+        topics = rd_alloca(sizeof(*topics) * topic_cnt);
+        for (i = 0; i < topic_cnt; i++) {
+                topics[i].topic         = va_arg(args, char *);
+                topics[i].partition_cnt = va_arg(args, int);
+        }
+
+        return rd_kafka_metadata_new_topic_mock(
+            topics, topic_cnt, replication_factor, num_brokers);
+}
 
 /**
  * @brief Create mock Metadata (for testing) based on the
@@ -1451,18 +1828,75 @@ rd_kafka_metadata_new_topic_mock(const rd_kafka_metadata_topic_t *topics,
  * @sa rd_kafka_metadata_new_topic_mock()
  */
 rd_kafka_metadata_t *rd_kafka_metadata_new_topic_mockv(size_t topic_cnt, ...) {
-        rd_kafka_metadata_topic_t *topics;
+        rd_kafka_metadata_t *metadata;
         va_list ap;
-        size_t i;
 
-        topics = rd_alloca(sizeof(*topics) * topic_cnt);
+        va_start(ap, topic_cnt);
+        metadata =
+            rd_kafka_metadata_new_topic_mockv_internal(topic_cnt, -1, 0, ap);
+        va_end(ap);
+
+        return metadata;
+}
+
+/**
+ * @brief Create mock Metadata (for testing) based on the
+ *        var-arg tuples of (const char *topic, int partition_cnt).
+ *
+ * @param replication_factor is the number of replicas of each partition.
+ * @param num_brokers is the number of brokers in the cluster.
+ * @param topic_cnt is the number of topic,partition_cnt tuples.
+ *
+ * @returns a newly allocated metadata object that must be freed with
+ *          rd_kafka_metadata_destroy().
+ *
+ * @sa rd_kafka_metadata_new_topic_mock()
+ */
+rd_kafka_metadata_t *rd_kafka_metadata_new_topic_with_partition_replicas_mockv(
+    int replication_factor,
+    int num_brokers,
+    size_t topic_cnt,
+    ...) {
+        rd_kafka_metadata_t *metadata;
+        va_list ap;
 
         va_start(ap, topic_cnt);
+        metadata = rd_kafka_metadata_new_topic_mockv_internal(
+            topic_cnt, replication_factor, num_brokers, ap);
+        va_end(ap);
+
+        return metadata;
+}
+
+/**
+ * @brief Create mock Metadata (for testing) based on arrays topic_names and
+ * partition_cnts.
+ *
+ * @param replication_factor is the number of replicas of each partition.
+ * @param num_brokers is the number of brokers in the cluster.
+ * @param topic_names names of topics.
+ * @param partition_cnts number of partitions in each topic.
+ * @param topic_cnt number of topics.
+ *
+ * @return rd_kafka_metadata_t*
+ *
+ * @sa rd_kafka_metadata_new_topic_mock()
+ */
+rd_kafka_metadata_t *
+rd_kafka_metadata_new_topic_with_partition_replicas_mock(int replication_factor,
+                                                         int num_brokers,
+                                                         char *topic_names[],
+                                                         int *partition_cnts,
+                                                         size_t topic_cnt) {
+        rd_kafka_metadata_topic_t *topics;
+        size_t i;
+
+        topics = rd_alloca(sizeof(*topics) * topic_cnt);
         for (i = 0; i < topic_cnt; i++) {
-                topics[i].topic         = va_arg(ap, char *);
-                topics[i].partition_cnt = va_arg(ap, int);
+                topics[i].topic         = topic_names[i];
+                topics[i].partition_cnt = partition_cnts[i];
         }
-        va_end(ap);
 
-        return rd_kafka_metadata_new_topic_mock(topics, topic_cnt);
+        return rd_kafka_metadata_new_topic_mock(
+            topics, topic_cnt, replication_factor, num_brokers);
 }
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_metadata.h b/lib/librdkafka-2.3.0/src/rdkafka_metadata.h
similarity index 59%
rename from lib/librdkafka-2.1.0/src/rdkafka_metadata.h
rename to lib/librdkafka-2.3.0/src/rdkafka_metadata.h
index 53a959b8ecf..213bf2b8968 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_metadata.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_metadata.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,15 +32,96 @@
 
 #include "rdavl.h"
 
+/**
+ * @brief Metadata partition internal container
+ */
+typedef struct rd_kafka_metadata_partition_internal_s {
+        /** Partition Id */
+        int32_t id;
+        /** Partition leader epoch */
+        int32_t leader_epoch;
+        /* Racks for this partition. Sorted and de-duplicated. */
+        char **racks;
+        /* Count of the racks */
+        size_t racks_cnt;
+} rd_kafka_metadata_partition_internal_t;
+
+/**
+ * @brief Metadata topic internal container
+ */
+typedef struct rd_kafka_metadata_topic_internal_s {
+        /** Internal metadata partition structs.
+         *  same count as metadata.topics[i].partition_cnt.
+         *  Sorted by Partition Id. */
+        rd_kafka_metadata_partition_internal_t *partitions;
+        rd_kafka_Uuid_t topic_id;
+        int32_t topic_authorized_operations; /**< ACL operations allowed
+                                              * for topic, -1 if not
+                                              * supported by broker */
+        rd_bool_t is_internal;               /**< Is topic internal to Kafka? */
+} rd_kafka_metadata_topic_internal_t;
+
+
+/**
+ * @brief Metadata broker internal container
+ */
+typedef struct rd_kafka_metadata_broker_internal_s {
+        /** Broker Id. */
+        int32_t id;
+        /** Rack Id (optional). */
+        char *rack_id;
+} rd_kafka_metadata_broker_internal_t;
+
+/**
+ * @brief Metadata internal container
+ */
+typedef struct rd_kafka_metadata_internal_s {
+        rd_kafka_metadata_t
+            metadata; /**< Public metadata struct. Must
+                           be kept the first field so the pointer
+                           can be cast to *rd_kafka_metadata_internal_t
+                           when needed */
+        /* Identical to metadata->brokers, but sorted by broker id. */
+        struct rd_kafka_metadata_broker *brokers_sorted;
+        /* Internal metadata brokers. Same count as metadata.broker_cnt.
+         * Sorted by broker id. */
+        rd_kafka_metadata_broker_internal_t *brokers;
+        /* Internal metadata topics. Same count as metadata.topic_cnt. */
+        rd_kafka_metadata_topic_internal_t *topics;
+        char *cluster_id;  /**< Cluster id (optionally populated)*/
+        int controller_id; /**< current controller id for cluster, -1 if not
+                            * supported by broker. */
+        int32_t cluster_authorized_operations; /**< ACL operations allowed
+                                                * for cluster, -1 if not
+                                                * supported by broker */
+} rd_kafka_metadata_internal_t;
+
+/**
+ * @brief The internal metadata type corresponding to the
+ *        public one.
+ */
+#define rd_kafka_metadata_get_internal(md) ((rd_kafka_metadata_internal_t *)md)
+
 rd_bool_t rd_kafka_has_reliable_leader_epochs(rd_kafka_broker_t *rkb);
 
-rd_kafka_resp_err_t rd_kafka_parse_Metadata(rd_kafka_broker_t *rkb,
-                                            rd_kafka_buf_t *request,
-                                            rd_kafka_buf_t *rkbuf,
-                                            struct rd_kafka_metadata **mdp);
+rd_kafka_resp_err_t
+rd_kafka_parse_Metadata(rd_kafka_broker_t *rkb,
+                        rd_kafka_buf_t *request,
+                        rd_kafka_buf_t *rkbuf,
+                        rd_kafka_metadata_internal_t **mdip);
 
-struct rd_kafka_metadata *
-rd_kafka_metadata_copy(const struct rd_kafka_metadata *md, size_t size);
+rd_kafka_resp_err_t
+rd_kafka_parse_Metadata_admin(rd_kafka_broker_t *rkb,
+                              rd_kafka_buf_t *rkbuf,
+                              rd_list_t *request_topics,
+                              rd_kafka_metadata_internal_t **mdip);
+
+rd_kafka_metadata_internal_t *
+rd_kafka_metadata_copy(const rd_kafka_metadata_internal_t *mdi, size_t size);
+
+rd_kafka_metadata_internal_t *
+rd_kafka_metadata_copy_add_racks(const rd_kafka_metadata_internal_t *mdi,
+                                 size_t size);
 
 size_t
 rd_kafka_metadata_topic_match(rd_kafka_t *rk,
@@ -95,11 +177,40 @@ rd_kafka_metadata_request(rd_kafka_t *rk,
 
 int rd_kafka_metadata_partition_id_cmp(const void *_a, const void *_b);
 
+int rd_kafka_metadata_broker_internal_cmp(const void *_a, const void *_b);
+
+int rd_kafka_metadata_broker_cmp(const void *_a, const void *_b);
+
+void rd_kafka_metadata_partition_clear(
+    struct rd_kafka_metadata_partition *rkmp);
+
+#define rd_kafka_metadata_broker_internal_find(mdi, broker_id, broker)         \
+        do {                                                                   \
+                rd_kafka_metadata_broker_internal_t __key = {.id = broker_id}; \
+                broker =                                                       \
+                    bsearch(&__key, mdi->brokers, mdi->metadata.broker_cnt,    \
+                            sizeof(rd_kafka_metadata_broker_internal_t),       \
+                            rd_kafka_metadata_broker_internal_cmp);            \
+        } while (0)
+
+
 rd_kafka_metadata_t *
 rd_kafka_metadata_new_topic_mock(const rd_kafka_metadata_topic_t *topics,
-                                 size_t topic_cnt);
+                                 size_t topic_cnt,
+                                 int replication_factor,
+                                 int num_brokers);
 rd_kafka_metadata_t *rd_kafka_metadata_new_topic_mockv(size_t topic_cnt, ...);
-
+rd_kafka_metadata_t *rd_kafka_metadata_new_topic_with_partition_replicas_mockv(
+    int replication_factor,
+    int num_brokers,
+    size_t topic_cnt,
+    ...);
+rd_kafka_metadata_t *
+rd_kafka_metadata_new_topic_with_partition_replicas_mock(int replication_factor,
+                                                         int num_brokers,
+                                                         char *topic_names[],
+                                                         int *partition_cnts,
+                                                         size_t topic_cnt);
 
 /**
  * @{
@@ -115,6 +226,8 @@ struct rd_kafka_metadata_cache_entry {
         /** Last known leader epochs array (same size as the partition count),
          *  or NULL if not known. */
         rd_kafka_metadata_topic_t rkmce_mtopic; /* Cached topic metadata */
+        /* Cached internal topic metadata */
+        rd_kafka_metadata_topic_internal_t rkmce_metadata_internal_topic;
         /* rkmce_topics.partitions memory points here. */
 };
 
@@ -155,14 +268,17 @@ struct rd_kafka_metadata_cache {
 
 
 
+int rd_kafka_metadata_cache_delete_by_name(rd_kafka_t *rk, const char *topic);
 void rd_kafka_metadata_cache_expiry_start(rd_kafka_t *rk);
 int rd_kafka_metadata_cache_evict_by_age(rd_kafka_t *rk, rd_ts_t ts);
-void rd_kafka_metadata_cache_topic_update(rd_kafka_t *rk,
-                                          const rd_kafka_metadata_topic_t *mdt,
-                                          rd_bool_t propagate);
-void rd_kafka_metadata_cache_update(rd_kafka_t *rk,
-                                    const rd_kafka_metadata_t *md,
-                                    int abs_update);
+void rd_kafka_metadata_cache_topic_update(
+    rd_kafka_t *rk,
+    const rd_kafka_metadata_topic_t *mdt,
+    const rd_kafka_metadata_topic_internal_t *mdit,
+    rd_bool_t propagate,
+    rd_bool_t include_metadata,
+    rd_kafka_metadata_broker_internal_t *brokers,
+    size_t broker_cnt);
 void rd_kafka_metadata_cache_propagate_changes(rd_kafka_t *rk);
 struct rd_kafka_metadata_cache_entry *
 rd_kafka_metadata_cache_find(rd_kafka_t *rk, const char *topic, int valid);
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_metadata_cache.c b/lib/librdkafka-2.3.0/src/rdkafka_metadata_cache.c
similarity index 86%
rename from lib/librdkafka-2.1.0/src/rdkafka_metadata_cache.c
rename to lib/librdkafka-2.3.0/src/rdkafka_metadata_cache.c
index 514d391a83d..b3bad4de8d3 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_metadata_cache.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_metadata_cache.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -93,8 +94,7 @@ rd_kafka_metadata_cache_delete(rd_kafka_t *rk,
  * @locks rd_kafka_wrlock()
  * @returns 1 if entry was found and removed, else 0.
  */
-static int rd_kafka_metadata_cache_delete_by_name(rd_kafka_t *rk,
-                                                  const char *topic) {
+int rd_kafka_metadata_cache_delete_by_name(rd_kafka_t *rk, const char *topic) {
         struct rd_kafka_metadata_cache_entry *rkmce;
 
         rkmce = rd_kafka_metadata_cache_find(rk, topic, 1);
@@ -238,13 +238,16 @@ int rd_kafka_metadata_partition_id_cmp(const void *_a, const void *_b) {
  *
  * @locks_required rd_kafka_wrlock()
  */
-static struct rd_kafka_metadata_cache_entry *
-rd_kafka_metadata_cache_insert(rd_kafka_t *rk,
-                               const rd_kafka_metadata_topic_t *mtopic,
-                               rd_ts_t now,
-                               rd_ts_t ts_expires) {
+static struct rd_kafka_metadata_cache_entry *rd_kafka_metadata_cache_insert(
+    rd_kafka_t *rk,
+    const rd_kafka_metadata_topic_t *mtopic,
+    const rd_kafka_metadata_topic_internal_t *metadata_internal_topic,
+    rd_ts_t now,
+    rd_ts_t ts_expires,
+    rd_bool_t include_racks,
+    rd_kafka_metadata_broker_internal_t *brokers_internal,
+    size_t broker_cnt) {
         struct rd_kafka_metadata_cache_entry *rkmce, *old;
-        size_t topic_len;
         rd_tmpabuf_t tbuf;
         int i;
 
@@ -253,19 +256,41 @@ rd_kafka_metadata_cache_insert(rd_kafka_t *rk,
          * rd_tmpabuf_t provides the infrastructure to do this.
          * Because of this we copy all the structs verbatim but
          * any pointer fields needs to be copied explicitly to update
-         * the pointer address. */
-        topic_len = strlen(mtopic->topic) + 1;
-        rd_tmpabuf_new(&tbuf,
-                       RD_ROUNDUP(sizeof(*rkmce), 8) +
-                           RD_ROUNDUP(topic_len, 8) +
-                           (mtopic->partition_cnt *
-                            RD_ROUNDUP(sizeof(*mtopic->partitions), 8)),
-                       1 /*assert on fail*/);
+         * the pointer address.
+         * See also rd_kafka_metadata_cache_delete which frees this. */
+        rd_tmpabuf_new(&tbuf, 0, rd_true /*assert on fail*/);
+
+        rd_tmpabuf_add_alloc(&tbuf, sizeof(*rkmce));
+        rd_tmpabuf_add_alloc(&tbuf, strlen(mtopic->topic) + 1);
+        rd_tmpabuf_add_alloc(&tbuf, mtopic->partition_cnt *
+                                        sizeof(*mtopic->partitions));
+        rd_tmpabuf_add_alloc(&tbuf,
+                             mtopic->partition_cnt *
+                                 sizeof(*metadata_internal_topic->partitions));
+
+        for (i = 0; include_racks && i < mtopic->partition_cnt; i++) {
+                size_t j;
+                rd_tmpabuf_add_alloc(
+                    &tbuf, metadata_internal_topic->partitions[i].racks_cnt *
+                               sizeof(char *));
+                for (j = 0;
+                     j < metadata_internal_topic->partitions[i].racks_cnt;
+                     j++) {
+                        rd_tmpabuf_add_alloc(
+                            &tbuf, strlen(metadata_internal_topic->partitions[i]
+                                              .racks[j]) +
+                                       1);
+                }
+        }
+
+        rd_tmpabuf_finalize(&tbuf);
 
         rkmce = rd_tmpabuf_alloc(&tbuf, sizeof(*rkmce));
 
         rkmce->rkmce_mtopic = *mtopic;
 
+        rkmce->rkmce_metadata_internal_topic = *metadata_internal_topic;
+
         /* Copy topic name and update pointer */
         rkmce->rkmce_mtopic.topic = rd_tmpabuf_write_str(&tbuf, mtopic->topic);
 
@@ -274,6 +299,41 @@ rd_kafka_metadata_cache_insert(rd_kafka_t *rk,
             &tbuf, mtopic->partitions,
             mtopic->partition_cnt * sizeof(*mtopic->partitions));
 
+        /* Copy partition array (internal) and update pointer */
+        rkmce->rkmce_metadata_internal_topic.partitions =
+            rd_tmpabuf_write(&tbuf, metadata_internal_topic->partitions,
+                             mtopic->partition_cnt *
+                                 sizeof(*metadata_internal_topic->partitions));
+
+
+        /* Sort partitions for future bsearch() lookups. */
+        qsort(rkmce->rkmce_mtopic.partitions, rkmce->rkmce_mtopic.partition_cnt,
+              sizeof(*rkmce->rkmce_mtopic.partitions),
+              rd_kafka_metadata_partition_id_cmp);
+
+        /* partitions (internal) are already sorted. */
+
+        if (include_racks) {
+                for (i = 0; i < rkmce->rkmce_mtopic.partition_cnt; i++) {
+                        size_t j;
+                        rd_kafka_metadata_partition_t *mdp =
+                            &rkmce->rkmce_mtopic.partitions[i];
+                        rd_kafka_metadata_partition_internal_t *mdpi =
+                            &rkmce->rkmce_metadata_internal_topic.partitions[i];
+                        rd_kafka_metadata_partition_internal_t *mdpi_orig =
+                            &metadata_internal_topic->partitions[i];
+
+                        if (mdp->replica_cnt == 0 || mdpi->racks_cnt == 0)
+                                continue;
+
+                        mdpi->racks = rd_tmpabuf_alloc(
+                            &tbuf, sizeof(char *) * mdpi->racks_cnt);
+                        for (j = 0; j < mdpi_orig->racks_cnt; j++)
+                                mdpi->racks[j] = rd_tmpabuf_write_str(
+                                    &tbuf, mdpi_orig->racks[j]);
+                }
+        }
+
         /* Clear uncached fields. */
         for (i = 0; i < mtopic->partition_cnt; i++) {
                 rkmce->rkmce_mtopic.partitions[i].replicas    = NULL;
@@ -281,12 +341,6 @@ rd_kafka_metadata_cache_insert(rd_kafka_t *rk,
                 rkmce->rkmce_mtopic.partitions[i].isrs        = NULL;
                 rkmce->rkmce_mtopic.partitions[i].isr_cnt     = 0;
         }
-
-        /* Sort partitions for future bsearch() lookups. */
-        qsort(rkmce->rkmce_mtopic.partitions, rkmce->rkmce_mtopic.partition_cnt,
-              sizeof(*rkmce->rkmce_mtopic.partitions),
-              rd_kafka_metadata_partition_id_cmp);
-
         TAILQ_INSERT_TAIL(&rk->rk_metadata_cache.rkmc_expiry, rkmce,
                           rkmce_link);
         rk->rk_metadata_cache.rkmc_cnt++;
@@ -365,9 +419,14 @@ void rd_kafka_metadata_cache_expiry_start(rd_kafka_t *rk) {
  *
  * @locks rd_kafka_wrlock()
  */
-void rd_kafka_metadata_cache_topic_update(rd_kafka_t *rk,
-                                          const rd_kafka_metadata_topic_t *mdt,
-                                          rd_bool_t propagate) {
+void rd_kafka_metadata_cache_topic_update(
+    rd_kafka_t *rk,
+    const rd_kafka_metadata_topic_t *mdt,
+    const rd_kafka_metadata_topic_internal_t *mdit,
+    rd_bool_t propagate,
+    rd_bool_t include_racks,
+    rd_kafka_metadata_broker_internal_t *brokers,
+    size_t broker_cnt) {
         rd_ts_t now        = rd_clock();
         rd_ts_t ts_expires = now + (rk->rk_conf.metadata_max_age_ms * 1000);
         int changed        = 1;
@@ -380,7 +439,9 @@ void rd_kafka_metadata_cache_topic_update(rd_kafka_t *rk,
         if (!mdt->err ||
             mdt->err == RD_KAFKA_RESP_ERR_TOPIC_AUTHORIZATION_FAILED ||
             mdt->err == RD_KAFKA_RESP_ERR_UNKNOWN_TOPIC_OR_PART)
-                rd_kafka_metadata_cache_insert(rk, mdt, now, ts_expires);
+                rd_kafka_metadata_cache_insert(rk, mdt, mdit, now, ts_expires,
+                                               include_racks, brokers,
+                                               broker_cnt);
         else
                 changed =
                     rd_kafka_metadata_cache_delete_by_name(rk, mdt->topic);
@@ -390,45 +451,6 @@ void rd_kafka_metadata_cache_topic_update(rd_kafka_t *rk,
 }
 
 
-/**
- * @brief Update the metadata cache with the provided metadata.
- *
- * @param abs_update int: absolute update: purge cache before updating.
- *
- * @locks rd_kafka_wrlock()
- */
-void rd_kafka_metadata_cache_update(rd_kafka_t *rk,
-                                    const rd_kafka_metadata_t *md,
-                                    int abs_update) {
-        struct rd_kafka_metadata_cache_entry *rkmce;
-        rd_ts_t now        = rd_clock();
-        rd_ts_t ts_expires = now + (rk->rk_conf.metadata_max_age_ms * 1000);
-        int i;
-
-        rd_kafka_dbg(rk, METADATA, "METADATA",
-                     "%s of metadata cache with %d topic(s)",
-                     abs_update ? "Absolute update" : "Update", md->topic_cnt);
-
-        if (abs_update)
-                rd_kafka_metadata_cache_purge(rk, rd_false /*not observers*/);
-
-
-        for (i = 0; i < md->topic_cnt; i++)
-                rd_kafka_metadata_cache_insert(rk, &md->topics[i], now,
-                                               ts_expires);
-
-        /* Update expiry timer */
-        if ((rkmce = TAILQ_FIRST(&rk->rk_metadata_cache.rkmc_expiry)))
-                rd_kafka_timer_start(&rk->rk_timers,
-                                     &rk->rk_metadata_cache.rkmc_expiry_tmr,
-                                     rkmce->rkmce_ts_expires - now,
-                                     rd_kafka_metadata_cache_evict_tmr_cb, rk);
-
-        if (md->topic_cnt > 0 || abs_update)
-                rd_kafka_metadata_cache_propagate_changes(rk);
-}
-
-
 /**
  * @brief Remove cache hints for topics in \p topics
  *        This is done when the Metadata response has been parsed and
@@ -499,6 +521,8 @@ int rd_kafka_metadata_cache_hint(rd_kafka_t *rk,
         RD_LIST_FOREACH(topic, topics, i) {
                 rd_kafka_metadata_topic_t mtopic = {.topic = (char *)topic,
                                                     .err   = err};
+                rd_kafka_metadata_topic_internal_t metadata_internal_topic =
+                    RD_ZERO_INIT;
                 /*const*/ struct rd_kafka_metadata_cache_entry *rkmce;
 
                 /* !replace: Dont overwrite valid entries */
@@ -512,7 +536,9 @@ int rd_kafka_metadata_cache_hint(rd_kafka_t *rk,
                         /* FALLTHRU */
                 }
 
-                rd_kafka_metadata_cache_insert(rk, &mtopic, now, ts_expires);
+                rd_kafka_metadata_cache_insert(rk, &mtopic,
+                                               &metadata_internal_topic, now,
+                                               ts_expires, rd_false, NULL, 0);
                 cnt++;
 
                 if (dst)
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_mock.c b/lib/librdkafka-2.3.0/src/rdkafka_mock.c
similarity index 95%
rename from lib/librdkafka-2.1.0/src/rdkafka_mock.c
rename to lib/librdkafka-2.3.0/src/rdkafka_mock.c
index ae7940533c1..6c8df688c78 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_mock.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_mock.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019 Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -37,10 +38,14 @@
 #include "rdkafka_interceptor.h"
 #include "rdkafka_mock_int.h"
 #include "rdkafka_transport_int.h"
-
+#include "rdkafka_mock.h"
 #include <stdarg.h>
 
+typedef struct rd_kafka_mock_request_s rd_kafka_mock_request_t;
+
 static void rd_kafka_mock_cluster_destroy0(rd_kafka_mock_cluster_t *mcluster);
+static rd_kafka_mock_request_t *
+rd_kafka_mock_request_new(int32_t id, int16_t api_key, int64_t timestamp_us);
 
 
 static rd_kafka_mock_broker_t *
@@ -424,12 +429,15 @@ int64_t rd_kafka_mock_partition_offset_for_leader_epoch(
  * @brief Automatically assign replicas for partition
  */
 static void
-rd_kafka_mock_partition_assign_replicas(rd_kafka_mock_partition_t *mpart) {
+rd_kafka_mock_partition_assign_replicas(rd_kafka_mock_partition_t *mpart,
+                                        int replication_factor) {
         rd_kafka_mock_cluster_t *mcluster = mpart->topic->cluster;
-        int replica_cnt =
-            RD_MIN(mcluster->defaults.replication_factor, mcluster->broker_cnt);
+        int replica_cnt = RD_MIN(replication_factor, mcluster->broker_cnt);
         rd_kafka_mock_broker_t *mrkb;
         int i = 0;
+        int first_replica =
+            (mpart->id * replication_factor) % mcluster->broker_cnt;
+        int skipped = 0;
 
         if (mpart->replicas)
                 rd_free(mpart->replicas);
@@ -437,7 +445,19 @@ rd_kafka_mock_partition_assign_replicas(rd_kafka_mock_partition_t *mpart) {
         mpart->replicas    = rd_calloc(replica_cnt, sizeof(*mpart->replicas));
         mpart->replica_cnt = replica_cnt;
 
-        /* FIXME: randomize this using perhaps reservoir sampling */
+
+        /* Use a predictable, determininistic order on a per-topic basis.
+         *
+         * Two loops are needed for wraparound. */
+        TAILQ_FOREACH(mrkb, &mcluster->brokers, link) {
+                if (skipped < first_replica) {
+                        skipped++;
+                        continue;
+                }
+                if (i == mpart->replica_cnt)
+                        break;
+                mpart->replicas[i++] = mrkb;
+        }
         TAILQ_FOREACH(mrkb, &mcluster->brokers, link) {
                 if (i == mpart->replica_cnt)
                         break;
@@ -562,7 +582,7 @@ static void rd_kafka_mock_partition_init(rd_kafka_mock_topic_t *mtopic,
 
         rd_list_init(&mpart->pidstates, 0, rd_free);
 
-        rd_kafka_mock_partition_assign_replicas(mpart);
+        rd_kafka_mock_partition_assign_replicas(mpart, replication_factor);
 }
 
 rd_kafka_mock_partition_t *
@@ -1111,6 +1131,15 @@ rd_kafka_mock_connection_parse_request(rd_kafka_mock_connection_t *mconn,
                 return -1;
         }
 
+        mtx_lock(&mcluster->lock);
+        if (mcluster->track_requests) {
+                rd_list_add(&mcluster->request_list,
+                            rd_kafka_mock_request_new(
+                                mconn->broker->id, rkbuf->rkbuf_reqhdr.ApiKey,
+                                rd_clock()));
+        }
+        mtx_unlock(&mcluster->lock);
+
         rd_kafka_dbg(rk, MOCK, "MOCK",
                      "Broker %" PRId32 ": Received %sRequestV%hd from %s",
                      mconn->broker->id,
@@ -1470,6 +1499,9 @@ static void rd_kafka_mock_broker_destroy(rd_kafka_mock_broker_t *mrkb) {
                 rd_kafka_mock_error_stack_destroy(errstack);
         }
 
+        if (mrkb->rack)
+                rd_free(mrkb->rack);
+
         TAILQ_REMOVE(&mrkb->cluster->brokers, mrkb, link);
         mrkb->cluster->broker_cnt--;
 
@@ -2506,6 +2538,7 @@ rd_kafka_mock_cluster_t *rd_kafka_mock_cluster_new(rd_kafka_t *rk,
         TAILQ_INIT(&mcluster->topics);
         mcluster->defaults.partition_cnt      = 4;
         mcluster->defaults.replication_factor = RD_MIN(3, broker_cnt);
+        mcluster->track_requests              = rd_false;
 
         TAILQ_INIT(&mcluster->cgrps);
 
@@ -2583,3 +2616,95 @@ const char *
 rd_kafka_mock_cluster_bootstraps(const rd_kafka_mock_cluster_t *mcluster) {
         return mcluster->bootstraps;
 }
+
+/**
+ * @struct Represents a request to the mock cluster along with a timestamp.
+ */
+struct rd_kafka_mock_request_s {
+        int32_t id;      /**< Broker id */
+        int16_t api_key; /**< API Key of request */
+        rd_ts_t timestamp /**< Timestamp at which request was received */;
+};
+
+/**
+ * @brief Allocate and initialize a rd_kafka_mock_request_t *
+ */
+static rd_kafka_mock_request_t *
+rd_kafka_mock_request_new(int32_t id, int16_t api_key, int64_t timestamp_us) {
+        rd_kafka_mock_request_t *request;
+        request            = rd_malloc(sizeof(*request));
+        request->id        = id;
+        request->api_key   = api_key;
+        request->timestamp = timestamp_us;
+        return request;
+}
+
+static rd_kafka_mock_request_t *
+rd_kafka_mock_request_copy(rd_kafka_mock_request_t *mrequest) {
+        rd_kafka_mock_request_t *request;
+        request            = rd_malloc(sizeof(*request));
+        request->id        = mrequest->id;
+        request->api_key   = mrequest->api_key;
+        request->timestamp = mrequest->timestamp;
+        return request;
+}
+
+void rd_kafka_mock_request_destroy(rd_kafka_mock_request_t *element) {
+        rd_free(element);
+}
+
+static void rd_kafka_mock_request_free(void *element) {
+        rd_kafka_mock_request_destroy(element);
+}
+
+void rd_kafka_mock_start_request_tracking(rd_kafka_mock_cluster_t *mcluster) {
+        mtx_lock(&mcluster->lock);
+        mcluster->track_requests = rd_true;
+        rd_list_init(&mcluster->request_list, 32, rd_kafka_mock_request_free);
+        mtx_unlock(&mcluster->lock);
+}
+
+void rd_kafka_mock_stop_request_tracking(rd_kafka_mock_cluster_t *mcluster) {
+        mtx_lock(&mcluster->lock);
+        mcluster->track_requests = rd_false;
+        rd_list_clear(&mcluster->request_list);
+        mtx_unlock(&mcluster->lock);
+}
+
+rd_kafka_mock_request_t **
+rd_kafka_mock_get_requests(rd_kafka_mock_cluster_t *mcluster, size_t *cntp) {
+        size_t i;
+        rd_kafka_mock_request_t **ret = NULL;
+
+        mtx_lock(&mcluster->lock);
+        *cntp = rd_list_cnt(&mcluster->request_list);
+        if (*cntp > 0) {
+                ret = rd_calloc(*cntp, sizeof(rd_kafka_mock_request_t *));
+                for (i = 0; i < *cntp; i++) {
+                        rd_kafka_mock_request_t *mreq =
+                            rd_list_elem(&mcluster->request_list, i);
+                        ret[i] = rd_kafka_mock_request_copy(mreq);
+                }
+        }
+
+        mtx_unlock(&mcluster->lock);
+        return ret;
+}
+
+void rd_kafka_mock_clear_requests(rd_kafka_mock_cluster_t *mcluster) {
+        mtx_lock(&mcluster->lock);
+        rd_list_clear(&mcluster->request_list);
+        mtx_unlock(&mcluster->lock);
+}
+
+int32_t rd_kafka_mock_request_id(rd_kafka_mock_request_t *mreq) {
+        return mreq->id;
+}
+
+int16_t rd_kafka_mock_request_api_key(rd_kafka_mock_request_t *mreq) {
+        return mreq->api_key;
+}
+
+rd_ts_t rd_kafka_mock_request_timestamp(rd_kafka_mock_request_t *mreq) {
+        return mreq->timestamp;
+}
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_mock.h b/lib/librdkafka-2.3.0/src/rdkafka_mock.h
similarity index 87%
rename from lib/librdkafka-2.1.0/src/rdkafka_mock.h
rename to lib/librdkafka-2.3.0/src/rdkafka_mock.h
index f06efe8fd5e..822680c501a 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_mock.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_mock.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019-2022 Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -364,6 +364,62 @@ rd_kafka_mock_set_apiversion(rd_kafka_mock_cluster_t *mcluster,
                              int16_t MinVersion,
                              int16_t MaxVersion);
 
+/**
+ * @brief Start tracking RPC requests for this mock cluster.
+ * @sa rd_kafka_mock_get_requests to get the requests.
+ */
+RD_EXPORT
+void rd_kafka_mock_start_request_tracking(rd_kafka_mock_cluster_t *mcluster);
+
+/**
+ * @brief Stop tracking RPC requests for this mock cluster.
+ *        Does not clear already tracked requests.
+ */
+RD_EXPORT
+void rd_kafka_mock_stop_request_tracking(rd_kafka_mock_cluster_t *mcluster);
+
+/**
+ * @name Represents a request to the mock cluster along with a timestamp.
+ */
+typedef struct rd_kafka_mock_request_s rd_kafka_mock_request_t;
+
+/**
+ * @brief Destroy a rd_kafka_mock_request_t * and deallocate memory.
+ */
+RD_EXPORT void rd_kafka_mock_request_destroy(rd_kafka_mock_request_t *mreq);
+
+/**
+ * @brief Get the broker id to which \p mreq was sent.
+ */
+RD_EXPORT int32_t rd_kafka_mock_request_id(rd_kafka_mock_request_t *mreq);
+
+/**
+ * @brief Get the ApiKey with which \p mreq was sent.
+ */
+RD_EXPORT int16_t rd_kafka_mock_request_api_key(rd_kafka_mock_request_t *mreq);
+
+/**
+ * @brief Get the timestamp in micros at which \p mreq was sent.
+ */
+RD_EXPORT int64_t
+rd_kafka_mock_request_timestamp(rd_kafka_mock_request_t *mreq);
+
+/**
+ * @brief Get the list of requests sent to this mock cluster.
+ *
+ * @param cntp is set to the count of requests.
+ * @return List of rd_kafka_mock_request_t *.
+ * @remark each element of the returned array must be freed with
+ *         rd_kafka_mock_request_destroy, and the list itself must be freed too.
+ */
+RD_EXPORT rd_kafka_mock_request_t **
+rd_kafka_mock_get_requests(rd_kafka_mock_cluster_t *mcluster, size_t *cntp);
+
+/**
+ * @brief Clear the list of requests sent to this mock broker, in case request
+ *        tracking is/was turned on.
+ */
+RD_EXPORT void rd_kafka_mock_clear_requests(rd_kafka_mock_cluster_t *mcluster);
 
 /**@}*/
 
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_mock_cgrp.c b/lib/librdkafka-2.3.0/src/rdkafka_mock_cgrp.c
similarity index 93%
rename from lib/librdkafka-2.1.0/src/rdkafka_mock_cgrp.c
rename to lib/librdkafka-2.3.0/src/rdkafka_mock_cgrp.c
index 8f71fb48c9d..60b3aa1567e 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_mock_cgrp.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_mock_cgrp.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020 Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -294,15 +295,30 @@ static void rd_kafka_mock_cgrp_elect_leader(rd_kafka_mock_cgrp_t *mcgrp) {
 
         mcgrp->generation_id++;
 
-        /* Elect a leader.
-         * FIXME: For now we'll use the first member */
-        mcgrp->leader = TAILQ_FIRST(&mcgrp->members);
+        /* Elect a leader deterministically if the group.instance.id is
+         * available, using the lexicographic order of group.instance.ids.
+         * This is not how it's done on a real broker, which uses the first
+         * member joined. But we use a determinstic method for better testing,
+         * (in case we want to enforce a some consumer to be the group leader).
+         * If group.instance.id is not specified for any consumer, we use the
+         * first one joined, similar to the real broker. */
+        mcgrp->leader = NULL;
+        TAILQ_FOREACH(member, &mcgrp->members, link) {
+                if (!mcgrp->leader)
+                        mcgrp->leader = member;
+                else if (mcgrp->leader->group_instance_id &&
+                         member->group_instance_id &&
+                         (rd_strcmp(mcgrp->leader->group_instance_id,
+                                    member->group_instance_id) > 0))
+                        mcgrp->leader = member;
+        }
 
-        rd_kafka_dbg(mcgrp->cluster->rk, MOCK, "MOCK",
-                     "Consumer group %s with %d member(s) is rebalancing: "
-                     "elected leader is %s, generation id %d",
-                     mcgrp->id, mcgrp->member_cnt, mcgrp->leader->id,
-                     mcgrp->generation_id);
+        rd_kafka_dbg(
+            mcgrp->cluster->rk, MOCK, "MOCK",
+            "Consumer group %s with %d member(s) is rebalancing: "
+            "elected leader is %s (group.instance.id = %s), generation id %d",
+            mcgrp->id, mcgrp->member_cnt, mcgrp->leader->id,
+            mcgrp->leader->group_instance_id, mcgrp->generation_id);
 
         /* Find the most commonly supported protocol name among the members.
          * FIXME: For now we'll blindly use the first protocol of the leader. */
@@ -525,6 +541,7 @@ rd_kafka_mock_cgrp_member_add(rd_kafka_mock_cgrp_t *mcgrp,
                               rd_kafka_buf_t *resp,
                               const rd_kafkap_str_t *MemberId,
                               const rd_kafkap_str_t *ProtocolType,
+                              const rd_kafkap_str_t *GroupInstanceId,
                               rd_kafka_mock_cgrp_proto_t *protos,
                               int proto_cnt,
                               int session_timeout_ms) {
@@ -549,6 +566,10 @@ rd_kafka_mock_cgrp_member_add(rd_kafka_mock_cgrp_t *mcgrp,
                 } else
                         member->id = RD_KAFKAP_STR_DUP(MemberId);
 
+                if (RD_KAFKAP_STR_LEN(GroupInstanceId))
+                        member->group_instance_id =
+                            RD_KAFKAP_STR_DUP(GroupInstanceId);
+
                 TAILQ_INSERT_TAIL(&mcgrp->members, member, link);
                 mcgrp->member_cnt++;
         }
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_mock_handlers.c b/lib/librdkafka-2.3.0/src/rdkafka_mock_handlers.c
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_mock_handlers.c
rename to lib/librdkafka-2.3.0/src/rdkafka_mock_handlers.c
index 3a004d41dba..047f890f5ef 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_mock_handlers.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_mock_handlers.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019 Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill,
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -96,7 +97,7 @@ static int rd_kafka_mock_handle_Produce(rd_kafka_mock_connection_t *mconn,
                                 mpart = rd_kafka_mock_partition_find(mtopic,
                                                                      Partition);
 
-                        rd_kafka_buf_read_bytes(rkbuf, &records);
+                        rd_kafka_buf_read_kbytes(rkbuf, &records);
 
                         /* Response: Partition */
                         rd_kafka_buf_write_i32(resp, Partition);
@@ -352,9 +353,10 @@ static int rd_kafka_mock_handle_Fetch(rd_kafka_mock_connection_t *mconn,
                         if (mset && partsize < (size_t)PartMaxBytes &&
                             totsize < (size_t)MaxBytes) {
                                 /* Response: Records */
-                                rd_kafka_buf_write_kbytes(resp, &mset->bytes);
-                                partsize += RD_KAFKAP_BYTES_SIZE(&mset->bytes);
-                                totsize += RD_KAFKAP_BYTES_SIZE(&mset->bytes);
+                                size_t written = rd_kafka_buf_write_kbytes(
+                                    resp, &mset->bytes);
+                                partsize += written;
+                                totsize += written;
 
                                 /* FIXME: Multiple messageSets ? */
                         } else {
@@ -431,10 +433,10 @@ static int rd_kafka_mock_handle_ListOffsets(rd_kafka_mock_connection_t *mconn,
         /* Inject error, if any */
         all_err = rd_kafka_mock_next_request_error(mconn, resp);
 
-        rd_kafka_buf_read_i32(rkbuf, &TopicsCnt);
+        rd_kafka_buf_read_arraycnt(rkbuf, &TopicsCnt, RD_KAFKAP_TOPICS_MAX);
 
         /* Response: #Topics */
-        rd_kafka_buf_write_i32(resp, TopicsCnt);
+        rd_kafka_buf_write_arraycnt(resp, TopicsCnt);
 
         while (TopicsCnt-- > 0) {
                 rd_kafkap_str_t Topic;
@@ -442,14 +444,15 @@ static int rd_kafka_mock_handle_ListOffsets(rd_kafka_mock_connection_t *mconn,
                 rd_kafka_mock_topic_t *mtopic;
 
                 rd_kafka_buf_read_str(rkbuf, &Topic);
-                rd_kafka_buf_read_i32(rkbuf, &PartitionCnt);
+                rd_kafka_buf_read_arraycnt(rkbuf, &PartitionCnt,
+                                           RD_KAFKAP_PARTITIONS_MAX);
 
                 mtopic = rd_kafka_mock_topic_find_by_kstr(mcluster, &Topic);
 
                 /* Response: Topic */
                 rd_kafka_buf_write_kstr(resp, &Topic);
                 /* Response: #Partitions */
-                rd_kafka_buf_write_i32(resp, PartitionCnt);
+                rd_kafka_buf_write_arraycnt(resp, PartitionCnt);
 
                 while (PartitionCnt-- > 0) {
                         int32_t Partition, CurrentLeaderEpoch = -1;
@@ -469,6 +472,9 @@ static int rd_kafka_mock_handle_ListOffsets(rd_kafka_mock_connection_t *mconn,
                         if (rkbuf->rkbuf_reqhdr.ApiVersion == 0)
                                 rd_kafka_buf_read_i32(rkbuf, &MaxNumOffsets);
 
+                        /* Partition tags */
+                        rd_kafka_buf_skip_tags(rkbuf);
+
                         if (mtopic)
                                 mpart = rd_kafka_mock_partition_find(mtopic,
                                                                      Partition);
@@ -522,6 +528,9 @@ static int rd_kafka_mock_handle_ListOffsets(rd_kafka_mock_connection_t *mconn,
                                     resp, mpart ? mpart->leader_epoch : -1);
                         }
 
+                        /* Response: Partition tags */
+                        rd_kafka_buf_write_tags(resp);
+
                         rd_kafka_dbg(mcluster->rk, MOCK, "MOCK",
                                      "Topic %.*s [%" PRId32
                                      "] returning "
@@ -532,6 +541,11 @@ static int rd_kafka_mock_handle_ListOffsets(rd_kafka_mock_connection_t *mconn,
                                      rd_kafka_offset2str(Timestamp),
                                      rd_kafka_err2str(err));
                 }
+
+                /* Topic tags */
+                rd_kafka_buf_skip_tags(rkbuf);
+                /* Response: Topic tags */
+                rd_kafka_buf_write_tags(resp);
         }
 
 
@@ -1165,7 +1179,7 @@ static int rd_kafka_mock_handle_JoinGroup(rd_kafka_mock_connection_t *mconn,
                 rd_kafkap_str_t ProtocolName;
                 rd_kafkap_bytes_t Metadata;
                 rd_kafka_buf_read_str(rkbuf, &ProtocolName);
-                rd_kafka_buf_read_bytes(rkbuf, &Metadata);
+                rd_kafka_buf_read_kbytes(rkbuf, &Metadata);
                 protos[i].name     = rd_kafkap_str_copy(&ProtocolName);
                 protos[i].metadata = rd_kafkap_bytes_copy(&Metadata);
         }
@@ -1199,8 +1213,8 @@ static int rd_kafka_mock_handle_JoinGroup(rd_kafka_mock_connection_t *mconn,
                 /* This triggers an async rebalance, the response will be
                  * sent later. */
                 err = rd_kafka_mock_cgrp_member_add(
-                    mcgrp, mconn, resp, &MemberId, &ProtocolType, protos,
-                    ProtocolCnt, SessionTimeoutMs);
+                    mcgrp, mconn, resp, &MemberId, &ProtocolType,
+                    &GroupInstanceId, protos, ProtocolCnt, SessionTimeoutMs);
                 if (!err) {
                         /* .._add() assumes ownership of resp and protos */
                         protos = NULL;
@@ -1453,7 +1467,7 @@ static int rd_kafka_mock_handle_SyncGroup(rd_kafka_mock_connection_t *mconn,
                 rd_kafka_mock_cgrp_member_t *member2;
 
                 rd_kafka_buf_read_str(rkbuf, &MemberId2);
-                rd_kafka_buf_read_bytes(rkbuf, &Metadata);
+                rd_kafka_buf_read_kbytes(rkbuf, &Metadata);
 
                 if (err)
                         continue;
@@ -2112,7 +2126,7 @@ const struct rd_kafka_mock_api_handler
         /* [request-type] = { MinVersion, MaxVersion, FlexVersion, callback } */
         [RD_KAFKAP_Produce]      = {0, 7, -1, rd_kafka_mock_handle_Produce},
         [RD_KAFKAP_Fetch]        = {0, 11, -1, rd_kafka_mock_handle_Fetch},
-        [RD_KAFKAP_ListOffsets]  = {0, 5, -1, rd_kafka_mock_handle_ListOffsets},
+        [RD_KAFKAP_ListOffsets]  = {0, 7, 6, rd_kafka_mock_handle_ListOffsets},
         [RD_KAFKAP_OffsetFetch]  = {0, 6, 6, rd_kafka_mock_handle_OffsetFetch},
         [RD_KAFKAP_OffsetCommit] = {0, 8, 8, rd_kafka_mock_handle_OffsetCommit},
         [RD_KAFKAP_ApiVersion]   = {0, 2, 3, rd_kafka_mock_handle_ApiVersion},
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_mock_int.h b/lib/librdkafka-2.3.0/src/rdkafka_mock_int.h
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdkafka_mock_int.h
rename to lib/librdkafka-2.3.0/src/rdkafka_mock_int.h
index ea3b6cab4db..87da2d4e312 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_mock_int.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_mock_int.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019 Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -392,9 +393,19 @@ struct rd_kafka_mock_cluster_s {
         /**< Request handlers */
         struct rd_kafka_mock_api_handler api_handlers[RD_KAFKAP__NUM];
 
+        /**< Appends the requests received to mock cluster if set to true,
+         *   defaulted to false for less memory usage. */
+        rd_bool_t track_requests;
+        /**< List of API requests for this broker. Type:
+         *   rd_kafka_mock_request_t*
+         */
+        rd_list_t request_list;
+
         /**< Mutex for:
          *   .errstacks
          *   .apiversions
+         *   .track_requests
+         *   .request_list
          */
         mtx_t lock;
 
@@ -505,6 +516,7 @@ rd_kafka_mock_cgrp_member_add(rd_kafka_mock_cgrp_t *mcgrp,
                               rd_kafka_mock_connection_t *mconn,
                               rd_kafka_buf_t *resp,
                               const rd_kafkap_str_t *MemberId,
+                              const rd_kafkap_str_t *GroupInstanceId,
                               const rd_kafkap_str_t *ProtocolType,
                               rd_kafka_mock_cgrp_proto_t *protos,
                               int proto_cnt,
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_msg.c b/lib/librdkafka-2.3.0/src/rdkafka_msg.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_msg.c
rename to lib/librdkafka-2.3.0/src/rdkafka_msg.c
index 17b67999bb1..5e71209dbfc 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_msg.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_msg.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012,2013 Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill,
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -1562,8 +1563,8 @@ rd_kafka_message_status(const rd_kafka_message_t *rkmessage) {
 
 int32_t rd_kafka_message_leader_epoch(const rd_kafka_message_t *rkmessage) {
         rd_kafka_msg_t *rkm;
-
-        if (unlikely(!rkmessage->rkt ||
+        if (unlikely(!rkmessage->rkt || rd_kafka_rkt_is_lw(rkmessage->rkt) ||
+                     !rkmessage->rkt->rkt_rk ||
                      rkmessage->rkt->rkt_rk->rk_type != RD_KAFKA_CONSUMER))
                 return -1;
 
@@ -2032,9 +2033,11 @@ static int unittest_msgq_order(const char *what,
         }
 
         /* Retry the messages, which moves them back to sendq
-         * maintaining the original order */
+         * maintaining the original order with exponential backoff
+         * set to false */
         rd_kafka_retry_msgq(&rkmq, &sendq, 1, 1, 0,
-                            RD_KAFKA_MSG_STATUS_NOT_PERSISTED, cmp);
+                            RD_KAFKA_MSG_STATUS_NOT_PERSISTED, cmp, rd_false, 0,
+                            0);
 
         RD_UT_ASSERT(rd_kafka_msgq_len(&sendq) == 0,
                      "sendq FIFO should be empty, not contain %d messages",
@@ -2072,9 +2075,11 @@ static int unittest_msgq_order(const char *what,
         }
 
         /* Retry the messages, which should now keep the 3 first messages
-         * on sendq (no more retries) and just number 4 moved back. */
+         * on sendq (no more retries) and just number 4 moved back.
+         * No exponential backoff applied. */
         rd_kafka_retry_msgq(&rkmq, &sendq, 1, 1, 0,
-                            RD_KAFKA_MSG_STATUS_NOT_PERSISTED, cmp);
+                            RD_KAFKA_MSG_STATUS_NOT_PERSISTED, cmp, rd_false, 0,
+                            0);
 
         if (fifo) {
                 if (ut_verify_msgq_order("readded #2", &rkmq, 4, 6, rd_true))
@@ -2093,9 +2098,10 @@ static int unittest_msgq_order(const char *what,
                         return 1;
         }
 
-        /* Move all messages back on rkmq */
+        /* Move all messages back on rkmq without any exponential backoff. */
         rd_kafka_retry_msgq(&rkmq, &sendq, 0, 1000, 0,
-                            RD_KAFKA_MSG_STATUS_NOT_PERSISTED, cmp);
+                            RD_KAFKA_MSG_STATUS_NOT_PERSISTED, cmp, rd_false, 0,
+                            0);
 
 
         /* Move first half of messages to sendq (1,2,3).
@@ -2115,11 +2121,14 @@ static int unittest_msgq_order(const char *what,
         rkm                       = ut_rd_kafka_msg_new(msgsize);
         rkm->rkm_u.producer.msgid = i;
         rd_kafka_msgq_enq_sorted0(&rkmq, rkm, cmp);
-
+        /* No exponential backoff applied. */
         rd_kafka_retry_msgq(&rkmq, &sendq, 0, 1000, 0,
-                            RD_KAFKA_MSG_STATUS_NOT_PERSISTED, cmp);
+                            RD_KAFKA_MSG_STATUS_NOT_PERSISTED, cmp, rd_false, 0,
+                            0);
+        /* No exponential backoff applied. */
         rd_kafka_retry_msgq(&rkmq, &sendq2, 0, 1000, 0,
-                            RD_KAFKA_MSG_STATUS_NOT_PERSISTED, cmp);
+                            RD_KAFKA_MSG_STATUS_NOT_PERSISTED, cmp, rd_false, 0,
+                            0);
 
         RD_UT_ASSERT(rd_kafka_msgq_len(&sendq) == 0,
                      "sendq FIFO should be empty, not contain %d messages",
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_msg.h b/lib/librdkafka-2.3.0/src/rdkafka_msg.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_msg.h
rename to lib/librdkafka-2.3.0/src/rdkafka_msg.h
index 877fac15cd3..db09892d57b 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_msg.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_msg.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012,2013 Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_msgbatch.h b/lib/librdkafka-2.3.0/src/rdkafka_msgbatch.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_msgbatch.h
rename to lib/librdkafka-2.3.0/src/rdkafka_msgbatch.h
index 09c7977067b..b65a0f9c0a3 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_msgbatch.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_msgbatch.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019 Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_msgset.h b/lib/librdkafka-2.3.0/src/rdkafka_msgset.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_msgset.h
rename to lib/librdkafka-2.3.0/src/rdkafka_msgset.h
index b79f1c946c9..9336e0c6b3c 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_msgset.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_msgset.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_msgset_reader.c b/lib/librdkafka-2.3.0/src/rdkafka_msgset_reader.c
similarity index 96%
rename from lib/librdkafka-2.1.0/src/rdkafka_msgset_reader.c
rename to lib/librdkafka-2.3.0/src/rdkafka_msgset_reader.c
index 58779f3be6f..451dd35442b 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_msgset_reader.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_msgset_reader.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -631,10 +632,10 @@ rd_kafka_msgset_reader_msg_v0_1(rd_kafka_msgset_reader_t *msetr) {
 
 
         /* Extract key */
-        rd_kafka_buf_read_bytes(rkbuf, &Key);
+        rd_kafka_buf_read_kbytes(rkbuf, &Key);
 
         /* Extract Value */
-        rd_kafka_buf_read_bytes(rkbuf, &Value);
+        rd_kafka_buf_read_kbytes(rkbuf, &Value);
         Value_len = RD_KAFKAP_BYTES_LEN(&Value);
 
         /* MessageSets may contain offsets earlier than we
@@ -668,14 +669,14 @@ rd_kafka_msgset_reader_msg_v0_1(rd_kafka_msgset_reader_t *msetr) {
 
         /* Create op/message container for message. */
         rko = rd_kafka_op_new_fetch_msg(
-            &rkm, rktp, msetr->msetr_tver->version, rkbuf, hdr.Offset,
+            &rkm, rktp, msetr->msetr_tver->version, rkbuf,
+            RD_KAFKA_FETCH_POS(hdr.Offset, msetr->msetr_leader_epoch),
             (size_t)RD_KAFKAP_BYTES_LEN(&Key),
             RD_KAFKAP_BYTES_IS_NULL(&Key) ? NULL : Key.data,
             (size_t)RD_KAFKAP_BYTES_LEN(&Value),
             RD_KAFKAP_BYTES_IS_NULL(&Value) ? NULL : Value.data);
 
-        rkm->rkm_u.consumer.leader_epoch = msetr->msetr_leader_epoch;
-        rkm->rkm_broker_id               = msetr->msetr_broker_id;
+        rkm->rkm_broker_id = msetr->msetr_broker_id;
 
         /* Assign message timestamp.
          * If message was in a compressed MessageSet and the outer/wrapper
@@ -733,6 +734,7 @@ rd_kafka_msgset_reader_msg_v2(rd_kafka_msgset_reader_t *msetr) {
                 ? LOG_DEBUG
                 : 0;
         size_t message_end;
+        rd_kafka_fetch_pos_t msetr_pos;
 
         rd_kafka_buf_read_varint(rkbuf, &hdr.Length);
         message_end =
@@ -742,15 +744,23 @@ rd_kafka_msgset_reader_msg_v2(rd_kafka_msgset_reader_t *msetr) {
         rd_kafka_buf_read_varint(rkbuf, &hdr.TimestampDelta);
         rd_kafka_buf_read_varint(rkbuf, &hdr.OffsetDelta);
         hdr.Offset = msetr->msetr_v2_hdr->BaseOffset + hdr.OffsetDelta;
-
-        /* Skip message if outdated */
+        msetr_pos  = RD_KAFKA_FETCH_POS(hdr.Offset, msetr->msetr_leader_epoch);
+
+        /* Skip message if outdated.
+         * Don't check offset leader epoch, just log it, as if current leader
+         * epoch is different the fetch will fail (KIP-320) and if offset leader
+         * epoch is different it'll return an empty fetch (KIP-595). If we
+         * checked it, it's possible to have a loop when moving from a broker
+         * that supports leader epoch to one that doesn't. */
         if (hdr.Offset < rktp->rktp_offsets.fetch_pos.offset) {
-                rd_rkb_dbg(msetr->msetr_rkb, MSG, "MSG",
-                           "%s [%" PRId32
-                           "]: "
-                           "Skip offset %" PRId64 " < fetch_offset %" PRId64,
-                           rktp->rktp_rkt->rkt_topic->str, rktp->rktp_partition,
-                           hdr.Offset, rktp->rktp_offsets.fetch_pos.offset);
+                rd_rkb_dbg(
+                    msetr->msetr_rkb, MSG, "MSG",
+                    "%s [%" PRId32
+                    "]: "
+                    "Skip %s < fetch %s",
+                    rktp->rktp_rkt->rkt_topic->str, rktp->rktp_partition,
+                    rd_kafka_fetch_pos2str(msetr_pos),
+                    rd_kafka_fetch_pos2str(rktp->rktp_offsets.fetch_pos));
                 rd_kafka_buf_skip_to(rkbuf, message_end);
                 return RD_KAFKA_RESP_ERR_NO_ERROR; /* Continue with next msg */
         }
@@ -771,10 +781,11 @@ rd_kafka_msgset_reader_msg_v2(rd_kafka_msgset_reader_t *msetr) {
                             rkbuf,
                             "%s [%" PRId32
                             "]: "
-                            "Ctrl message at offset %" PRId64
+                            "Ctrl message at %s"
                             " has invalid key size %" PRId64,
                             rktp->rktp_rkt->rkt_topic->str,
-                            rktp->rktp_partition, hdr.Offset,
+                            rktp->rktp_partition,
+                            rd_kafka_fetch_pos2str(msetr_pos),
                             ctrl_data.KeySize);
 
                 rd_kafka_buf_read_i16(rkbuf, &ctrl_data.Version);
@@ -784,11 +795,10 @@ rd_kafka_msgset_reader_msg_v2(rd_kafka_msgset_reader_t *msetr) {
                                    "%s [%" PRId32
                                    "]: "
                                    "Skipping ctrl msg with "
-                                   "unsupported version %" PRId16
-                                   " at offset %" PRId64,
+                                   "unsupported version %" PRId16 " at %s",
                                    rktp->rktp_rkt->rkt_topic->str,
                                    rktp->rktp_partition, ctrl_data.Version,
-                                   hdr.Offset);
+                                   rd_kafka_fetch_pos2str(msetr_pos));
                         rd_kafka_buf_skip_to(rkbuf, message_end);
                         return RD_KAFKA_RESP_ERR_NO_ERROR; /* Continue with next
                                                               msg */
@@ -799,10 +809,11 @@ rd_kafka_msgset_reader_msg_v2(rd_kafka_msgset_reader_t *msetr) {
                             rkbuf,
                             "%s [%" PRId32
                             "]: "
-                            "Ctrl message at offset %" PRId64
+                            "Ctrl message at %s"
                             " has invalid key size %" PRId64,
                             rktp->rktp_rkt->rkt_topic->str,
-                            rktp->rktp_partition, hdr.Offset,
+                            rktp->rktp_partition,
+                            rd_kafka_fetch_pos2str(msetr_pos),
                             ctrl_data.KeySize);
 
                 rd_kafka_buf_read_i16(rkbuf, &ctrl_data.Type);
@@ -827,14 +838,15 @@ rd_kafka_msgset_reader_msg_v2(rd_kafka_msgset_reader_t *msetr) {
                                            MSG | RD_KAFKA_DBG_EOS, "TXN",
                                            "%s [%" PRId32
                                            "] received abort txn "
-                                           "ctrl msg at offset %" PRId64
+                                           "ctrl msg at %s"
                                            " for "
                                            "PID %" PRId64
                                            ", but there are no "
                                            "known aborted transactions: "
                                            "ignoring",
                                            rktp->rktp_rkt->rkt_topic->str,
-                                           rktp->rktp_partition, hdr.Offset,
+                                           rktp->rktp_partition,
+                                           rd_kafka_fetch_pos2str(msetr_pos),
                                            msetr->msetr_v2_hdr->PID);
                                 break;
                         }
@@ -844,14 +856,14 @@ rd_kafka_msgset_reader_msg_v2(rd_kafka_msgset_reader_t *msetr) {
                         aborted_txn_start_offset =
                             rd_kafka_aborted_txns_pop_offset(
                                 msetr->msetr_aborted_txns,
-                                msetr->msetr_v2_hdr->PID, hdr.Offset);
+                                msetr->msetr_v2_hdr->PID, msetr_pos.offset);
 
                         if (unlikely(aborted_txn_start_offset == -1)) {
                                 rd_rkb_dbg(msetr->msetr_rkb,
                                            MSG | RD_KAFKA_DBG_EOS, "TXN",
                                            "%s [%" PRId32
                                            "] received abort txn "
-                                           "ctrl msg at offset %" PRId64
+                                           "ctrl msg at %s"
                                            " for "
                                            "PID %" PRId64
                                            ", but this offset is "
@@ -859,7 +871,8 @@ rd_kafka_msgset_reader_msg_v2(rd_kafka_msgset_reader_t *msetr) {
                                            "transaction: aborted transaction "
                                            "was possibly empty: ignoring",
                                            rktp->rktp_rkt->rkt_topic->str,
-                                           rktp->rktp_partition, hdr.Offset,
+                                           rktp->rktp_partition,
+                                           rd_kafka_fetch_pos2str(msetr_pos),
                                            msetr->msetr_v2_hdr->PID);
                                 break;
                         }
@@ -873,16 +886,16 @@ rd_kafka_msgset_reader_msg_v2(rd_kafka_msgset_reader_t *msetr) {
                                    "]: "
                                    "Unsupported ctrl message "
                                    "type %" PRId16
-                                   " at offset"
-                                   " %" PRId64 ": ignoring",
+                                   " at "
+                                   " %s: ignoring",
                                    rktp->rktp_rkt->rkt_topic->str,
                                    rktp->rktp_partition, ctrl_data.Type,
-                                   hdr.Offset);
+                                   rd_kafka_fetch_pos2str(msetr_pos));
                         break;
                 }
 
                 rko = rd_kafka_op_new_ctrl_msg(rktp, msetr->msetr_tver->version,
-                                               rkbuf, hdr.Offset);
+                                               rkbuf, msetr_pos);
                 rd_kafka_q_enq(&msetr->msetr_rkq, rko);
                 msetr->msetr_msgcnt++;
 
@@ -894,8 +907,8 @@ rd_kafka_msgset_reader_msg_v2(rd_kafka_msgset_reader_t *msetr) {
         /* Note: messages in aborted transactions are skipped at the MessageSet
          * level */
 
-        rd_kafka_buf_read_bytes_varint(rkbuf, &hdr.Key);
-        rd_kafka_buf_read_bytes_varint(rkbuf, &hdr.Value);
+        rd_kafka_buf_read_kbytes_varint(rkbuf, &hdr.Key);
+        rd_kafka_buf_read_kbytes_varint(rkbuf, &hdr.Value);
 
         /* We parse the Headers later, just store the size (possibly truncated)
          * and pointer to the headers. */
@@ -905,14 +918,13 @@ rd_kafka_msgset_reader_msg_v2(rd_kafka_msgset_reader_t *msetr) {
 
         /* Create op/message container for message. */
         rko = rd_kafka_op_new_fetch_msg(
-            &rkm, rktp, msetr->msetr_tver->version, rkbuf, hdr.Offset,
+            &rkm, rktp, msetr->msetr_tver->version, rkbuf, msetr_pos,
             (size_t)RD_KAFKAP_BYTES_LEN(&hdr.Key),
             RD_KAFKAP_BYTES_IS_NULL(&hdr.Key) ? NULL : hdr.Key.data,
             (size_t)RD_KAFKAP_BYTES_LEN(&hdr.Value),
             RD_KAFKAP_BYTES_IS_NULL(&hdr.Value) ? NULL : hdr.Value.data);
 
-        rkm->rkm_u.consumer.leader_epoch = msetr->msetr_leader_epoch;
-        rkm->rkm_broker_id               = msetr->msetr_broker_id;
+        rkm->rkm_broker_id = msetr->msetr_broker_id;
 
         /* Store pointer to unparsed message headers, they will
          * be parsed on the first access.
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_msgset_writer.c b/lib/librdkafka-2.3.0/src/rdkafka_msgset_writer.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_msgset_writer.c
rename to lib/librdkafka-2.3.0/src/rdkafka_msgset_writer.c
index beb36bfac01..21f16b5a81a 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_msgset_writer.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_msgset_writer.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_offset.c b/lib/librdkafka-2.3.0/src/rdkafka_offset.c
similarity index 94%
rename from lib/librdkafka-2.1.0/src/rdkafka_offset.c
rename to lib/librdkafka-2.3.0/src/rdkafka_offset.c
index ffa6a9d5247..701a41613d3 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_offset.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_offset.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012,2013 Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -557,8 +558,10 @@ rd_kafka_offset_broker_commit(rd_kafka_toppar_t *rktp, const char *reason) {
         offsets = rd_kafka_topic_partition_list_new(1);
         rktpar  = rd_kafka_topic_partition_list_add(
             offsets, rktp->rktp_rkt->rkt_topic->str, rktp->rktp_partition);
+
         rd_kafka_topic_partition_set_from_fetch_pos(rktpar,
                                                     rktp->rktp_committing_pos);
+        rd_kafka_topic_partition_set_metadata_from_rktp_stored(rktpar, rktp);
 
         rd_kafka_dbg(rktp->rktp_rkt->rkt_rk, TOPIC, "OFFSETCMT",
                      "%.*s [%" PRId32 "]: committing %s: %s",
@@ -643,7 +646,8 @@ rd_kafka_resp_err_t rd_kafka_offset_store(rd_kafka_topic_t *app_rkt,
         rd_kafka_topic_t *rkt = rd_kafka_topic_proper(app_rkt);
         rd_kafka_toppar_t *rktp;
         rd_kafka_resp_err_t err;
-        rd_kafka_fetch_pos_t pos = {offset + 1, -1 /*no leader epoch known*/};
+        rd_kafka_fetch_pos_t pos =
+            RD_KAFKA_FETCH_POS(offset + 1, -1 /*no leader epoch known*/);
 
         /* Find toppar */
         rd_kafka_topic_rdlock(rkt);
@@ -653,8 +657,8 @@ rd_kafka_resp_err_t rd_kafka_offset_store(rd_kafka_topic_t *app_rkt,
         }
         rd_kafka_topic_rdunlock(rkt);
 
-        err = rd_kafka_offset_store0(rktp, pos, rd_false /* Don't force */,
-                                     RD_DO_LOCK);
+        err = rd_kafka_offset_store0(rktp, pos, NULL, 0,
+                                     rd_false /* Don't force */, RD_DO_LOCK);
 
         rd_kafka_toppar_destroy(rktp);
 
@@ -675,7 +679,8 @@ rd_kafka_offsets_store(rd_kafka_t *rk,
         for (i = 0; i < offsets->cnt; i++) {
                 rd_kafka_topic_partition_t *rktpar = &offsets->elems[i];
                 rd_kafka_toppar_t *rktp;
-                rd_kafka_fetch_pos_t pos = {rktpar->offset, -1};
+                rd_kafka_fetch_pos_t pos =
+                    RD_KAFKA_FETCH_POS(rktpar->offset, -1);
 
                 rktp =
                     rd_kafka_topic_partition_get_toppar(rk, rktpar, rd_false);
@@ -689,7 +694,8 @@ rd_kafka_offsets_store(rd_kafka_t *rk,
                     rd_kafka_topic_partition_get_leader_epoch(rktpar);
 
                 rktpar->err = rd_kafka_offset_store0(
-                    rktp, pos, rd_false /* don't force */, RD_DO_LOCK);
+                    rktp, pos, rktpar->metadata, rktpar->metadata_size,
+                    rd_false /* don't force */, RD_DO_LOCK);
                 rd_kafka_toppar_destroy(rktp);
 
                 if (rktpar->err)
@@ -721,10 +727,10 @@ rd_kafka_error_t *rd_kafka_offset_store_message(rd_kafka_message_t *rkmessage) {
                                           "Invalid message object, "
                                           "not a consumed message");
 
-        pos.offset       = rkmessage->offset + 1;
-        pos.leader_epoch = rkm->rkm_u.consumer.leader_epoch;
-        err = rd_kafka_offset_store0(rktp, pos, rd_false /* Don't force */,
-                                     RD_DO_LOCK);
+        pos = RD_KAFKA_FETCH_POS(rkmessage->offset + 1,
+                                 rkm->rkm_u.consumer.leader_epoch);
+        err = rd_kafka_offset_store0(rktp, pos, NULL, 0,
+                                     rd_false /* Don't force */, RD_DO_LOCK);
 
         if (err == RD_KAFKA_RESP_ERR__STATE)
                 return rd_kafka_error_new(err, "Partition is not assigned");
@@ -956,9 +962,6 @@ static void rd_kafka_toppar_handle_OffsetForLeaderEpoch(rd_kafka_t *rk,
                                    "supported by broker: validation skipped",
                                    RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic),
                                    rktp->rktp_partition);
-                        /* Reset the epoch to -1 since it can't be used with
-                         * older brokers. */
-                        rktp->rktp_next_fetch_start.leader_epoch = -1;
                         rd_kafka_toppar_set_fetch_state(
                             rktp, RD_KAFKA_TOPPAR_FETCH_ACTIVE);
                         goto done;
@@ -988,27 +991,15 @@ static void rd_kafka_toppar_handle_OffsetForLeaderEpoch(rd_kafka_t *rk,
                         rd_kafka_topic_leader_query0(rk, rktp->rktp_rkt, 1,
                                                      rd_true /* force */);
 
-                if (actions & RD_KAFKA_ERR_ACTION_RETRY) {
-                        /* No need for refcnt on rktp for timer opaque
-                         * since the timer resides on the rktp and will be
-                         * stopped on toppar remove. */
-                        rd_kafka_timer_start_oneshot(
-                            &rk->rk_timers, &rktp->rktp_validate_tmr, rd_false,
-                            500 * 1000 /* 500ms */,
-                            rd_kafka_offset_validate_tmr_cb, rktp);
-                        goto done;
-                }
-
-                if (!(actions & RD_KAFKA_ERR_ACTION_REFRESH)) {
-                        /* Permanent error */
-                        rd_kafka_offset_reset(
-                            rktp, rd_kafka_broker_id(rkb),
-                            RD_KAFKA_FETCH_POS(RD_KAFKA_OFFSET_INVALID,
-                                               rktp->rktp_leader_epoch),
-                            RD_KAFKA_RESP_ERR__LOG_TRUNCATION,
-                            "Unable to validate offset and epoch: %s",
-                            rd_kafka_err2str(err));
-                }
+                /* No need for refcnt on rktp for timer opaque
+                 * since the timer resides on the rktp and will be
+                 * stopped on toppar remove.
+                 * Retries the validation with a new call even in
+                 * case of permanent error. */
+                rd_kafka_timer_start_oneshot(
+                    &rk->rk_timers, &rktp->rktp_validate_tmr, rd_false,
+                    500 * 1000 /* 500ms */, rd_kafka_offset_validate_tmr_cb,
+                    rktp);
                 goto done;
         }
 
@@ -1020,17 +1011,18 @@ static void rd_kafka_toppar_handle_OffsetForLeaderEpoch(rd_kafka_t *rk,
 
         if (end_offset < 0 || end_offset_leader_epoch < 0) {
                 rd_kafka_offset_reset(
-                    rktp, rd_kafka_broker_id(rkb), rktp->rktp_next_fetch_start,
+                    rktp, rd_kafka_broker_id(rkb),
+                    rktp->rktp_offset_validation_pos,
                     RD_KAFKA_RESP_ERR__LOG_TRUNCATION,
                     "No epoch found less or equal to "
                     "%s: broker end offset is %" PRId64
                     " (offset leader epoch %" PRId32
                     ")."
                     " Reset using configured policy.",
-                    rd_kafka_fetch_pos2str(rktp->rktp_next_fetch_start),
+                    rd_kafka_fetch_pos2str(rktp->rktp_offset_validation_pos),
                     end_offset, end_offset_leader_epoch);
 
-        } else if (end_offset < rktp->rktp_next_fetch_start.offset) {
+        } else if (end_offset < rktp->rktp_offset_validation_pos.offset) {
 
                 if (rktp->rktp_rkt->rkt_conf.auto_offset_reset ==
                     RD_KAFKA_OFFSET_INVALID /* auto.offset.reset=error */) {
@@ -1044,7 +1036,8 @@ static void rd_kafka_toppar_handle_OffsetForLeaderEpoch(rd_kafka_t *rk,
                             " (offset leader epoch %" PRId32
                             "). "
                             "Reset to INVALID.",
-                            rd_kafka_fetch_pos2str(rktp->rktp_next_fetch_start),
+                            rd_kafka_fetch_pos2str(
+                                rktp->rktp_offset_validation_pos),
                             end_offset, end_offset_leader_epoch);
 
                 } else {
@@ -1074,8 +1067,6 @@ static void rd_kafka_toppar_handle_OffsetForLeaderEpoch(rd_kafka_t *rk,
                            rktp->rktp_partition, end_offset,
                            end_offset_leader_epoch);
 
-                rktp->rktp_next_fetch_start.leader_epoch =
-                    end_offset_leader_epoch;
                 rd_kafka_toppar_set_fetch_state(rktp,
                                                 RD_KAFKA_TOPPAR_FETCH_ACTIVE);
         }
@@ -1166,7 +1157,7 @@ void rd_kafka_offset_validate(rd_kafka_toppar_t *rktp, const char *fmt, ...) {
          * there is no point in doing validation.
          * This is the case for epoch-less seek()s or epoch-less
          * committed offsets. */
-        if (rktp->rktp_next_fetch_start.leader_epoch == -1) {
+        if (rktp->rktp_offset_validation_pos.leader_epoch == -1) {
                 rd_kafka_dbg(
                     rktp->rktp_rkt->rkt_rk, FETCH, "VALIDATE",
                     "%.*s [%" PRId32
@@ -1174,7 +1165,7 @@ void rd_kafka_offset_validate(rd_kafka_toppar_t *rktp, const char *fmt, ...) {
                     "validation for %s: no leader epoch set",
                     RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic),
                     rktp->rktp_partition,
-                    rd_kafka_fetch_pos2str(rktp->rktp_next_fetch_start));
+                    rd_kafka_fetch_pos2str(rktp->rktp_offset_validation_pos));
                 rd_kafka_toppar_set_fetch_state(rktp,
                                                 RD_KAFKA_TOPPAR_FETCH_ACTIVE);
                 return;
@@ -1188,18 +1179,18 @@ void rd_kafka_offset_validate(rd_kafka_toppar_t *rktp, const char *fmt, ...) {
         rktpar = rd_kafka_topic_partition_list_add(
             parts, rktp->rktp_rkt->rkt_topic->str, rktp->rktp_partition);
         rd_kafka_topic_partition_set_leader_epoch(
-            rktpar, rktp->rktp_next_fetch_start.leader_epoch);
+            rktpar, rktp->rktp_offset_validation_pos.leader_epoch);
         rd_kafka_topic_partition_set_current_leader_epoch(
             rktpar, rktp->rktp_leader_epoch);
         rd_kafka_toppar_keep(rktp); /* for request opaque */
 
-        rd_rkb_dbg(rktp->rktp_leader, FETCH, "VALIDATE",
-                   "%.*s [%" PRId32
-                   "]: querying broker for epoch "
-                   "validation of %s: %s",
-                   RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic),
-                   rktp->rktp_partition,
-                   rd_kafka_fetch_pos2str(rktp->rktp_next_fetch_start), reason);
+        rd_rkb_dbg(
+            rktp->rktp_leader, FETCH, "VALIDATE",
+            "%.*s [%" PRId32
+            "]: querying broker for epoch "
+            "validation of %s: %s",
+            RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), rktp->rktp_partition,
+            rd_kafka_fetch_pos2str(rktp->rktp_offset_validation_pos), reason);
 
         rd_kafka_OffsetForLeaderEpochRequest(
             rktp->rktp_leader, parts, RD_KAFKA_REPLYQ(rktp->rktp_ops, 0),
@@ -1441,7 +1432,7 @@ rd_kafka_resp_err_t rd_kafka_offset_store_stop(rd_kafka_toppar_t *rktp) {
                     rktp,
                     RD_KAFKA_FETCH_POS(rktp->rktp_offsets_fin.eof_offset,
                                        rktp->rktp_leader_epoch),
-                    rd_true /* force */, RD_DONT_LOCK);
+                    NULL, 0, rd_true /* force */, RD_DONT_LOCK);
 
         /* Commit offset to backing store.
          * This might be an async operation. */
@@ -1539,7 +1530,7 @@ void rd_kafka_update_app_pos(rd_kafka_t *rk,
 
         rktp->rktp_app_pos = pos;
         if (rk->rk_conf.enable_auto_offset_store)
-                rd_kafka_offset_store0(rktp, pos,
+                rd_kafka_offset_store0(rktp, pos, NULL, 0,
                                        /* force: ignore assignment state */
                                        rd_true, RD_DONT_LOCK);
 
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_offset.h b/lib/librdkafka-2.3.0/src/rdkafka_offset.h
similarity index 87%
rename from lib/librdkafka-2.1.0/src/rdkafka_offset.h
rename to lib/librdkafka-2.3.0/src/rdkafka_offset.h
index 7b01c84877d..de9b5dec985 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_offset.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_offset.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012,2013 Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -73,6 +74,8 @@ const char *rd_kafka_offset2str(int64_t offset);
  *
  * @param pos Offset and leader epoch to set, may be an absolute offset
  *            or .._INVALID.
+ * @param metadata Metadata to be set (optional).
+ * @param metadata_size Size of the metadata to be set.
  * @param force Forcibly set \p offset regardless of assignment state.
  * @param do_lock Whether to lock the \p rktp or not (already locked by caller).
  *
@@ -84,6 +87,8 @@ const char *rd_kafka_offset2str(int64_t offset);
 static RD_INLINE RD_UNUSED rd_kafka_resp_err_t
 rd_kafka_offset_store0(rd_kafka_toppar_t *rktp,
                        const rd_kafka_fetch_pos_t pos,
+                       void *metadata,
+                       size_t metadata_size,
                        rd_bool_t force,
                        rd_dolock_t do_lock) {
         rd_kafka_resp_err_t err = RD_KAFKA_RESP_ERR_NO_ERROR;
@@ -96,7 +101,17 @@ rd_kafka_offset_store0(rd_kafka_toppar_t *rktp,
                      !rd_kafka_is_simple_consumer(rktp->rktp_rkt->rkt_rk))) {
                 err = RD_KAFKA_RESP_ERR__STATE;
         } else {
-                rktp->rktp_stored_pos = pos;
+                if (rktp->rktp_stored_metadata) {
+                        rd_free(rktp->rktp_stored_metadata);
+                        rktp->rktp_stored_metadata = NULL;
+                }
+                rktp->rktp_stored_pos           = pos;
+                rktp->rktp_stored_metadata_size = metadata_size;
+                if (metadata) {
+                        rktp->rktp_stored_metadata = rd_malloc(metadata_size);
+                        memcpy(rktp->rktp_stored_metadata, metadata,
+                               rktp->rktp_stored_metadata_size);
+                }
         }
 
         if (do_lock)
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_op.c b/lib/librdkafka-2.3.0/src/rdkafka_op.c
similarity index 81%
rename from lib/librdkafka-2.1.0/src/rdkafka_op.c
rename to lib/librdkafka-2.3.0/src/rdkafka_op.c
index 128b8bb404f..34e9e3fd344 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_op.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_op.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -43,48 +44,52 @@ rd_atomic32_t rd_kafka_op_cnt;
 const char *rd_kafka_op2str(rd_kafka_op_type_t type) {
         int skiplen                                = 6;
         static const char *names[RD_KAFKA_OP__END] = {
-            [RD_KAFKA_OP_NONE]               = "REPLY:NONE",
-            [RD_KAFKA_OP_FETCH]              = "REPLY:FETCH",
-            [RD_KAFKA_OP_ERR]                = "REPLY:ERR",
-            [RD_KAFKA_OP_CONSUMER_ERR]       = "REPLY:CONSUMER_ERR",
-            [RD_KAFKA_OP_DR]                 = "REPLY:DR",
-            [RD_KAFKA_OP_STATS]              = "REPLY:STATS",
-            [RD_KAFKA_OP_OFFSET_COMMIT]      = "REPLY:OFFSET_COMMIT",
-            [RD_KAFKA_OP_NODE_UPDATE]        = "REPLY:NODE_UPDATE",
-            [RD_KAFKA_OP_XMIT_BUF]           = "REPLY:XMIT_BUF",
-            [RD_KAFKA_OP_RECV_BUF]           = "REPLY:RECV_BUF",
-            [RD_KAFKA_OP_XMIT_RETRY]         = "REPLY:XMIT_RETRY",
-            [RD_KAFKA_OP_FETCH_START]        = "REPLY:FETCH_START",
-            [RD_KAFKA_OP_FETCH_STOP]         = "REPLY:FETCH_STOP",
-            [RD_KAFKA_OP_SEEK]               = "REPLY:SEEK",
-            [RD_KAFKA_OP_PAUSE]              = "REPLY:PAUSE",
-            [RD_KAFKA_OP_OFFSET_FETCH]       = "REPLY:OFFSET_FETCH",
-            [RD_KAFKA_OP_PARTITION_JOIN]     = "REPLY:PARTITION_JOIN",
-            [RD_KAFKA_OP_PARTITION_LEAVE]    = "REPLY:PARTITION_LEAVE",
-            [RD_KAFKA_OP_REBALANCE]          = "REPLY:REBALANCE",
-            [RD_KAFKA_OP_TERMINATE]          = "REPLY:TERMINATE",
-            [RD_KAFKA_OP_COORD_QUERY]        = "REPLY:COORD_QUERY",
-            [RD_KAFKA_OP_SUBSCRIBE]          = "REPLY:SUBSCRIBE",
-            [RD_KAFKA_OP_ASSIGN]             = "REPLY:ASSIGN",
-            [RD_KAFKA_OP_GET_SUBSCRIPTION]   = "REPLY:GET_SUBSCRIPTION",
-            [RD_KAFKA_OP_GET_ASSIGNMENT]     = "REPLY:GET_ASSIGNMENT",
-            [RD_KAFKA_OP_THROTTLE]           = "REPLY:THROTTLE",
-            [RD_KAFKA_OP_NAME]               = "REPLY:NAME",
-            [RD_KAFKA_OP_CG_METADATA]        = "REPLY:CG_METADATA",
-            [RD_KAFKA_OP_OFFSET_RESET]       = "REPLY:OFFSET_RESET",
-            [RD_KAFKA_OP_METADATA]           = "REPLY:METADATA",
-            [RD_KAFKA_OP_LOG]                = "REPLY:LOG",
-            [RD_KAFKA_OP_WAKEUP]             = "REPLY:WAKEUP",
-            [RD_KAFKA_OP_CREATETOPICS]       = "REPLY:CREATETOPICS",
-            [RD_KAFKA_OP_DELETETOPICS]       = "REPLY:DELETETOPICS",
-            [RD_KAFKA_OP_CREATEPARTITIONS]   = "REPLY:CREATEPARTITIONS",
-            [RD_KAFKA_OP_ALTERCONFIGS]       = "REPLY:ALTERCONFIGS",
+            [RD_KAFKA_OP_NONE]             = "REPLY:NONE",
+            [RD_KAFKA_OP_FETCH]            = "REPLY:FETCH",
+            [RD_KAFKA_OP_ERR]              = "REPLY:ERR",
+            [RD_KAFKA_OP_CONSUMER_ERR]     = "REPLY:CONSUMER_ERR",
+            [RD_KAFKA_OP_DR]               = "REPLY:DR",
+            [RD_KAFKA_OP_STATS]            = "REPLY:STATS",
+            [RD_KAFKA_OP_OFFSET_COMMIT]    = "REPLY:OFFSET_COMMIT",
+            [RD_KAFKA_OP_NODE_UPDATE]      = "REPLY:NODE_UPDATE",
+            [RD_KAFKA_OP_XMIT_BUF]         = "REPLY:XMIT_BUF",
+            [RD_KAFKA_OP_RECV_BUF]         = "REPLY:RECV_BUF",
+            [RD_KAFKA_OP_XMIT_RETRY]       = "REPLY:XMIT_RETRY",
+            [RD_KAFKA_OP_FETCH_START]      = "REPLY:FETCH_START",
+            [RD_KAFKA_OP_FETCH_STOP]       = "REPLY:FETCH_STOP",
+            [RD_KAFKA_OP_SEEK]             = "REPLY:SEEK",
+            [RD_KAFKA_OP_PAUSE]            = "REPLY:PAUSE",
+            [RD_KAFKA_OP_OFFSET_FETCH]     = "REPLY:OFFSET_FETCH",
+            [RD_KAFKA_OP_PARTITION_JOIN]   = "REPLY:PARTITION_JOIN",
+            [RD_KAFKA_OP_PARTITION_LEAVE]  = "REPLY:PARTITION_LEAVE",
+            [RD_KAFKA_OP_REBALANCE]        = "REPLY:REBALANCE",
+            [RD_KAFKA_OP_TERMINATE]        = "REPLY:TERMINATE",
+            [RD_KAFKA_OP_COORD_QUERY]      = "REPLY:COORD_QUERY",
+            [RD_KAFKA_OP_SUBSCRIBE]        = "REPLY:SUBSCRIBE",
+            [RD_KAFKA_OP_ASSIGN]           = "REPLY:ASSIGN",
+            [RD_KAFKA_OP_GET_SUBSCRIPTION] = "REPLY:GET_SUBSCRIPTION",
+            [RD_KAFKA_OP_GET_ASSIGNMENT]   = "REPLY:GET_ASSIGNMENT",
+            [RD_KAFKA_OP_THROTTLE]         = "REPLY:THROTTLE",
+            [RD_KAFKA_OP_NAME]             = "REPLY:NAME",
+            [RD_KAFKA_OP_CG_METADATA]      = "REPLY:CG_METADATA",
+            [RD_KAFKA_OP_OFFSET_RESET]     = "REPLY:OFFSET_RESET",
+            [RD_KAFKA_OP_METADATA]         = "REPLY:METADATA",
+            [RD_KAFKA_OP_LOG]              = "REPLY:LOG",
+            [RD_KAFKA_OP_WAKEUP]           = "REPLY:WAKEUP",
+            [RD_KAFKA_OP_CREATETOPICS]     = "REPLY:CREATETOPICS",
+            [RD_KAFKA_OP_DELETETOPICS]     = "REPLY:DELETETOPICS",
+            [RD_KAFKA_OP_CREATEPARTITIONS] = "REPLY:CREATEPARTITIONS",
+            [RD_KAFKA_OP_ALTERCONFIGS]     = "REPLY:ALTERCONFIGS",
+            [RD_KAFKA_OP_INCREMENTALALTERCONFIGS] =
+                "REPLY:INCREMENTALALTERCONFIGS",
             [RD_KAFKA_OP_DESCRIBECONFIGS]    = "REPLY:DESCRIBECONFIGS",
             [RD_KAFKA_OP_DELETERECORDS]      = "REPLY:DELETERECORDS",
             [RD_KAFKA_OP_LISTCONSUMERGROUPS] = "REPLY:LISTCONSUMERGROUPS",
             [RD_KAFKA_OP_DESCRIBECONSUMERGROUPS] =
                 "REPLY:DESCRIBECONSUMERGROUPS",
-            [RD_KAFKA_OP_DELETEGROUPS] = "REPLY:DELETEGROUPS",
+            [RD_KAFKA_OP_DESCRIBETOPICS]  = "REPLY:DESCRIBETOPICS",
+            [RD_KAFKA_OP_DESCRIBECLUSTER] = "REPLY:DESCRIBECLUSTER",
+            [RD_KAFKA_OP_DELETEGROUPS]    = "REPLY:DELETEGROUPS",
             [RD_KAFKA_OP_DELETECONSUMERGROUPOFFSETS] =
                 "REPLY:DELETECONSUMERGROUPOFFSETS",
             [RD_KAFKA_OP_CREATEACLS]   = "REPLY:CREATEACLS",
@@ -104,8 +109,14 @@ const char *rd_kafka_op2str(rd_kafka_op_type_t type) {
             [RD_KAFKA_OP_TXN]                 = "REPLY:TXN",
             [RD_KAFKA_OP_GET_REBALANCE_PROTOCOL] =
                 "REPLY:GET_REBALANCE_PROTOCOL",
-            [RD_KAFKA_OP_LEADERS] = "REPLY:LEADERS",
-            [RD_KAFKA_OP_BARRIER] = "REPLY:BARRIER",
+            [RD_KAFKA_OP_LEADERS]     = "REPLY:LEADERS",
+            [RD_KAFKA_OP_BARRIER]     = "REPLY:BARRIER",
+            [RD_KAFKA_OP_SASL_REAUTH] = "REPLY:SASL_REAUTH",
+            [RD_KAFKA_OP_ALTERUSERSCRAMCREDENTIALS] =
+                "REPLY:ALTERUSERSCRAMCREDENTIALS",
+            [RD_KAFKA_OP_DESCRIBEUSERSCRAMCREDENTIALS] =
+                "REPLY:DESCRIBEUSERSCRAMCREDENTIALS",
+            [RD_KAFKA_OP_LISTOFFSETS] = "REPLY:LISTOFFSETS",
         };
 
         if (type & RD_KAFKA_OP_REPLY)
@@ -193,47 +204,51 @@ rd_kafka_op_t *rd_kafka_op_new0(const char *source, rd_kafka_op_type_t type) {
                  * if we forgot to add an op type to                           \
                  * this list. */
         static const size_t op2size[RD_KAFKA_OP__END] = {
-            [RD_KAFKA_OP_FETCH]              = sizeof(rko->rko_u.fetch),
-            [RD_KAFKA_OP_ERR]                = sizeof(rko->rko_u.err),
-            [RD_KAFKA_OP_CONSUMER_ERR]       = sizeof(rko->rko_u.err),
-            [RD_KAFKA_OP_DR]                 = sizeof(rko->rko_u.dr),
-            [RD_KAFKA_OP_STATS]              = sizeof(rko->rko_u.stats),
-            [RD_KAFKA_OP_OFFSET_COMMIT]      = sizeof(rko->rko_u.offset_commit),
-            [RD_KAFKA_OP_NODE_UPDATE]        = sizeof(rko->rko_u.node),
-            [RD_KAFKA_OP_XMIT_BUF]           = sizeof(rko->rko_u.xbuf),
-            [RD_KAFKA_OP_RECV_BUF]           = sizeof(rko->rko_u.xbuf),
-            [RD_KAFKA_OP_XMIT_RETRY]         = sizeof(rko->rko_u.xbuf),
-            [RD_KAFKA_OP_FETCH_START]        = sizeof(rko->rko_u.fetch_start),
-            [RD_KAFKA_OP_FETCH_STOP]         = _RD_KAFKA_OP_EMPTY,
-            [RD_KAFKA_OP_SEEK]               = sizeof(rko->rko_u.fetch_start),
-            [RD_KAFKA_OP_PAUSE]              = sizeof(rko->rko_u.pause),
-            [RD_KAFKA_OP_OFFSET_FETCH]       = sizeof(rko->rko_u.offset_fetch),
-            [RD_KAFKA_OP_PARTITION_JOIN]     = _RD_KAFKA_OP_EMPTY,
-            [RD_KAFKA_OP_PARTITION_LEAVE]    = _RD_KAFKA_OP_EMPTY,
-            [RD_KAFKA_OP_REBALANCE]          = sizeof(rko->rko_u.rebalance),
-            [RD_KAFKA_OP_TERMINATE]          = _RD_KAFKA_OP_EMPTY,
-            [RD_KAFKA_OP_COORD_QUERY]        = _RD_KAFKA_OP_EMPTY,
-            [RD_KAFKA_OP_SUBSCRIBE]          = sizeof(rko->rko_u.subscribe),
-            [RD_KAFKA_OP_ASSIGN]             = sizeof(rko->rko_u.assign),
-            [RD_KAFKA_OP_GET_SUBSCRIPTION]   = sizeof(rko->rko_u.subscribe),
-            [RD_KAFKA_OP_GET_ASSIGNMENT]     = sizeof(rko->rko_u.assign),
-            [RD_KAFKA_OP_THROTTLE]           = sizeof(rko->rko_u.throttle),
-            [RD_KAFKA_OP_NAME]               = sizeof(rko->rko_u.name),
-            [RD_KAFKA_OP_CG_METADATA]        = sizeof(rko->rko_u.cg_metadata),
-            [RD_KAFKA_OP_OFFSET_RESET]       = sizeof(rko->rko_u.offset_reset),
-            [RD_KAFKA_OP_METADATA]           = sizeof(rko->rko_u.metadata),
-            [RD_KAFKA_OP_LOG]                = sizeof(rko->rko_u.log),
-            [RD_KAFKA_OP_WAKEUP]             = _RD_KAFKA_OP_EMPTY,
-            [RD_KAFKA_OP_CREATETOPICS]       = sizeof(rko->rko_u.admin_request),
-            [RD_KAFKA_OP_DELETETOPICS]       = sizeof(rko->rko_u.admin_request),
-            [RD_KAFKA_OP_CREATEPARTITIONS]   = sizeof(rko->rko_u.admin_request),
-            [RD_KAFKA_OP_ALTERCONFIGS]       = sizeof(rko->rko_u.admin_request),
+            [RD_KAFKA_OP_FETCH]            = sizeof(rko->rko_u.fetch),
+            [RD_KAFKA_OP_ERR]              = sizeof(rko->rko_u.err),
+            [RD_KAFKA_OP_CONSUMER_ERR]     = sizeof(rko->rko_u.err),
+            [RD_KAFKA_OP_DR]               = sizeof(rko->rko_u.dr),
+            [RD_KAFKA_OP_STATS]            = sizeof(rko->rko_u.stats),
+            [RD_KAFKA_OP_OFFSET_COMMIT]    = sizeof(rko->rko_u.offset_commit),
+            [RD_KAFKA_OP_NODE_UPDATE]      = sizeof(rko->rko_u.node),
+            [RD_KAFKA_OP_XMIT_BUF]         = sizeof(rko->rko_u.xbuf),
+            [RD_KAFKA_OP_RECV_BUF]         = sizeof(rko->rko_u.xbuf),
+            [RD_KAFKA_OP_XMIT_RETRY]       = sizeof(rko->rko_u.xbuf),
+            [RD_KAFKA_OP_FETCH_START]      = sizeof(rko->rko_u.fetch_start),
+            [RD_KAFKA_OP_FETCH_STOP]       = _RD_KAFKA_OP_EMPTY,
+            [RD_KAFKA_OP_SEEK]             = sizeof(rko->rko_u.fetch_start),
+            [RD_KAFKA_OP_PAUSE]            = sizeof(rko->rko_u.pause),
+            [RD_KAFKA_OP_OFFSET_FETCH]     = sizeof(rko->rko_u.offset_fetch),
+            [RD_KAFKA_OP_PARTITION_JOIN]   = _RD_KAFKA_OP_EMPTY,
+            [RD_KAFKA_OP_PARTITION_LEAVE]  = _RD_KAFKA_OP_EMPTY,
+            [RD_KAFKA_OP_REBALANCE]        = sizeof(rko->rko_u.rebalance),
+            [RD_KAFKA_OP_TERMINATE]        = _RD_KAFKA_OP_EMPTY,
+            [RD_KAFKA_OP_COORD_QUERY]      = _RD_KAFKA_OP_EMPTY,
+            [RD_KAFKA_OP_SUBSCRIBE]        = sizeof(rko->rko_u.subscribe),
+            [RD_KAFKA_OP_ASSIGN]           = sizeof(rko->rko_u.assign),
+            [RD_KAFKA_OP_GET_SUBSCRIPTION] = sizeof(rko->rko_u.subscribe),
+            [RD_KAFKA_OP_GET_ASSIGNMENT]   = sizeof(rko->rko_u.assign),
+            [RD_KAFKA_OP_THROTTLE]         = sizeof(rko->rko_u.throttle),
+            [RD_KAFKA_OP_NAME]             = sizeof(rko->rko_u.name),
+            [RD_KAFKA_OP_CG_METADATA]      = sizeof(rko->rko_u.cg_metadata),
+            [RD_KAFKA_OP_OFFSET_RESET]     = sizeof(rko->rko_u.offset_reset),
+            [RD_KAFKA_OP_METADATA]         = sizeof(rko->rko_u.metadata),
+            [RD_KAFKA_OP_LOG]              = sizeof(rko->rko_u.log),
+            [RD_KAFKA_OP_WAKEUP]           = _RD_KAFKA_OP_EMPTY,
+            [RD_KAFKA_OP_CREATETOPICS]     = sizeof(rko->rko_u.admin_request),
+            [RD_KAFKA_OP_DELETETOPICS]     = sizeof(rko->rko_u.admin_request),
+            [RD_KAFKA_OP_CREATEPARTITIONS] = sizeof(rko->rko_u.admin_request),
+            [RD_KAFKA_OP_ALTERCONFIGS]     = sizeof(rko->rko_u.admin_request),
+            [RD_KAFKA_OP_INCREMENTALALTERCONFIGS] =
+                sizeof(rko->rko_u.admin_request),
             [RD_KAFKA_OP_DESCRIBECONFIGS]    = sizeof(rko->rko_u.admin_request),
             [RD_KAFKA_OP_DELETERECORDS]      = sizeof(rko->rko_u.admin_request),
             [RD_KAFKA_OP_LISTCONSUMERGROUPS] = sizeof(rko->rko_u.admin_request),
             [RD_KAFKA_OP_DESCRIBECONSUMERGROUPS] =
                 sizeof(rko->rko_u.admin_request),
-            [RD_KAFKA_OP_DELETEGROUPS] = sizeof(rko->rko_u.admin_request),
+            [RD_KAFKA_OP_DESCRIBETOPICS]  = sizeof(rko->rko_u.admin_request),
+            [RD_KAFKA_OP_DESCRIBECLUSTER] = sizeof(rko->rko_u.admin_request),
+            [RD_KAFKA_OP_DELETEGROUPS]    = sizeof(rko->rko_u.admin_request),
             [RD_KAFKA_OP_DELETECONSUMERGROUPOFFSETS] =
                 sizeof(rko->rko_u.admin_request),
             [RD_KAFKA_OP_CREATEACLS]   = sizeof(rko->rko_u.admin_request),
@@ -253,8 +268,14 @@ rd_kafka_op_t *rd_kafka_op_new0(const char *source, rd_kafka_op_type_t type) {
             [RD_KAFKA_OP_TXN]            = sizeof(rko->rko_u.txn),
             [RD_KAFKA_OP_GET_REBALANCE_PROTOCOL] =
                 sizeof(rko->rko_u.rebalance_protocol),
-            [RD_KAFKA_OP_LEADERS] = sizeof(rko->rko_u.leaders),
-            [RD_KAFKA_OP_BARRIER] = _RD_KAFKA_OP_EMPTY,
+            [RD_KAFKA_OP_LEADERS]     = sizeof(rko->rko_u.leaders),
+            [RD_KAFKA_OP_BARRIER]     = _RD_KAFKA_OP_EMPTY,
+            [RD_KAFKA_OP_SASL_REAUTH] = _RD_KAFKA_OP_EMPTY,
+            [RD_KAFKA_OP_ALTERUSERSCRAMCREDENTIALS] =
+                sizeof(rko->rko_u.admin_request),
+            [RD_KAFKA_OP_DESCRIBEUSERSCRAMCREDENTIALS] =
+                sizeof(rko->rko_u.admin_request),
+            [RD_KAFKA_OP_LISTOFFSETS] = sizeof(rko->rko_u.admin_request),
         };
         size_t tsize = op2size[type & ~RD_KAFKA_OP_FLAGMASK];
 
@@ -374,6 +395,8 @@ void rd_kafka_op_destroy(rd_kafka_op_t *rko) {
 
         case RD_KAFKA_OP_METADATA:
                 RD_IF_FREE(rko->rko_u.metadata.md, rd_kafka_metadata_destroy);
+                /* It's not needed to free metadata.mdi because they
+                   are the in the same memory allocation. */
                 break;
 
         case RD_KAFKA_OP_LOG:
@@ -387,6 +410,7 @@ void rd_kafka_op_destroy(rd_kafka_op_t *rko) {
         case RD_KAFKA_OP_DELETETOPICS:
         case RD_KAFKA_OP_CREATEPARTITIONS:
         case RD_KAFKA_OP_ALTERCONFIGS:
+        case RD_KAFKA_OP_INCREMENTALALTERCONFIGS:
         case RD_KAFKA_OP_DESCRIBECONFIGS:
         case RD_KAFKA_OP_DELETERECORDS:
         case RD_KAFKA_OP_LISTCONSUMERGROUPS:
@@ -397,7 +421,12 @@ void rd_kafka_op_destroy(rd_kafka_op_t *rko) {
         case RD_KAFKA_OP_DESCRIBEACLS:
         case RD_KAFKA_OP_DELETEACLS:
         case RD_KAFKA_OP_ALTERCONSUMERGROUPOFFSETS:
+        case RD_KAFKA_OP_DESCRIBETOPICS:
+        case RD_KAFKA_OP_DESCRIBECLUSTER:
         case RD_KAFKA_OP_LISTCONSUMERGROUPOFFSETS:
+        case RD_KAFKA_OP_ALTERUSERSCRAMCREDENTIALS:
+        case RD_KAFKA_OP_DESCRIBEUSERSCRAMCREDENTIALS:
+        case RD_KAFKA_OP_LISTOFFSETS:
                 rd_kafka_replyq_destroy(&rko->rko_u.admin_request.replyq);
                 rd_list_destroy(&rko->rko_u.admin_request.args);
                 if (rko->rko_u.admin_request.options.match_consumer_group_states
@@ -727,11 +756,11 @@ rd_kafka_op_call(rd_kafka_t *rk, rd_kafka_q_t *rkq, rd_kafka_op_t *rko) {
 rd_kafka_op_t *rd_kafka_op_new_ctrl_msg(rd_kafka_toppar_t *rktp,
                                         int32_t version,
                                         rd_kafka_buf_t *rkbuf,
-                                        int64_t offset) {
+                                        rd_kafka_fetch_pos_t pos) {
         rd_kafka_msg_t *rkm;
         rd_kafka_op_t *rko;
 
-        rko = rd_kafka_op_new_fetch_msg(&rkm, rktp, version, rkbuf, offset, 0,
+        rko = rd_kafka_op_new_fetch_msg(&rkm, rktp, version, rkbuf, pos, 0,
                                         NULL, 0, NULL);
 
         rkm->rkm_flags |= RD_KAFKA_MSG_F_CONTROL;
@@ -750,7 +779,7 @@ rd_kafka_op_t *rd_kafka_op_new_fetch_msg(rd_kafka_msg_t **rkmp,
                                          rd_kafka_toppar_t *rktp,
                                          int32_t version,
                                          rd_kafka_buf_t *rkbuf,
-                                         int64_t offset,
+                                         rd_kafka_fetch_pos_t pos,
                                          size_t key_len,
                                          const void *key,
                                          size_t val_len,
@@ -772,7 +801,8 @@ rd_kafka_op_t *rd_kafka_op_new_fetch_msg(rd_kafka_msg_t **rkmp,
         rko->rko_u.fetch.rkbuf = rkbuf;
         rd_kafka_buf_keep(rkbuf);
 
-        rkm->rkm_offset = offset;
+        rkm->rkm_offset                  = pos.offset;
+        rkm->rkm_u.consumer.leader_epoch = pos.leader_epoch;
 
         rkm->rkm_key     = (void *)key;
         rkm->rkm_key_len = key_len;
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_op.h b/lib/librdkafka-2.3.0/src/rdkafka_op.h
similarity index 93%
rename from lib/librdkafka-2.1.0/src/rdkafka_op.h
rename to lib/librdkafka-2.3.0/src/rdkafka_op.h
index 57c07491a2c..3a1384362ad 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_op.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_op.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -126,18 +127,28 @@ typedef enum {
         RD_KAFKA_OP_DELETETOPICS, /**< Admin: DeleteTopics: u.admin_request*/
         RD_KAFKA_OP_CREATEPARTITIONS, /**< Admin: CreatePartitions:
                                        *   u.admin_request*/
-        RD_KAFKA_OP_ALTERCONFIGS,    /**< Admin: AlterConfigs: u.admin_request*/
-        RD_KAFKA_OP_DESCRIBECONFIGS, /**< Admin: DescribeConfigs:
-                                      *   u.admin_request*/
-        RD_KAFKA_OP_DELETERECORDS,   /**< Admin: DeleteRecords:
-                                      *   u.admin_request*/
-        RD_KAFKA_OP_LISTCONSUMERGROUPS,     /**< Admin:
-                                             *   ListConsumerGroups
-                                             *   u.admin_request */
-        RD_KAFKA_OP_DESCRIBECONSUMERGROUPS, /**< Admin:
-                                             *   DescribeConsumerGroups
-                                             *   u.admin_request */
-        RD_KAFKA_OP_DELETEGROUPS, /**< Admin: DeleteGroups: u.admin_request*/
+        RD_KAFKA_OP_ALTERCONFIGS, /**< Admin: AlterConfigs: u.admin_request*/
+        RD_KAFKA_OP_INCREMENTALALTERCONFIGS, /**< Admin:
+                                              *    IncrementalAlterConfigs:
+                                              *    u.admin_request */
+        RD_KAFKA_OP_DESCRIBECONFIGS,         /**< Admin: DescribeConfigs:
+                                              *   u.admin_request*/
+        RD_KAFKA_OP_DELETERECORDS,           /**< Admin: DeleteRecords:
+                                              *   u.admin_request*/
+        RD_KAFKA_OP_LISTCONSUMERGROUPS,      /**< Admin:
+                                              *   ListConsumerGroups
+                                              *   u.admin_request */
+        RD_KAFKA_OP_DESCRIBECONSUMERGROUPS,  /**< Admin:
+                                              *   DescribeConsumerGroups
+                                              *   u.admin_request */
+        RD_KAFKA_OP_DESCRIBECLUSTER,         /**< Admin:
+                                              *   DescribeCluster
+                                              *   u.admin_request */
+
+        RD_KAFKA_OP_DESCRIBETOPICS, /**< Admin:
+                                     *   DescribeTopics
+                                     *   u.admin_request */
+        RD_KAFKA_OP_DELETEGROUPS,   /**< Admin: DeleteGroups: u.admin_request*/
         RD_KAFKA_OP_DELETECONSUMERGROUPOFFSETS, /**< Admin:
                                                  *   DeleteConsumerGroupOffsets
                                                  *   u.admin_request */
@@ -161,6 +172,14 @@ typedef enum {
         RD_KAFKA_OP_GET_REBALANCE_PROTOCOL,    /**< Get rebalance protocol */
         RD_KAFKA_OP_LEADERS,                   /**< Partition leader query */
         RD_KAFKA_OP_BARRIER,                   /**< Version barrier bump */
+        RD_KAFKA_OP_SASL_REAUTH, /**< Sasl reauthentication for broker */
+        RD_KAFKA_OP_DESCRIBEUSERSCRAMCREDENTIALS, /* < Admin:
+                                                     DescribeUserScramCredentials
+                                                     u.admin_request >*/
+        RD_KAFKA_OP_ALTERUSERSCRAMCREDENTIALS,    /* < Admin:
+                                                     AlterUserScramCredentials
+                                                     u.admin_request >*/
+        RD_KAFKA_OP_LISTOFFSETS, /**< Admin: ListOffsets u.admin_request >*/
         RD_KAFKA_OP__END
 } rd_kafka_op_type_t;
 
@@ -370,6 +389,7 @@ struct rd_kafka_op_s {
                 /* RD_KAFKA_OP_METADATA */
                 struct {
                         rd_kafka_metadata_t *md;
+                        rd_kafka_metadata_internal_t *mdi;
                         int force; /* force request regardless of outstanding
                                     * metadata requests. */
                 } metadata;
@@ -510,6 +530,9 @@ struct rd_kafka_op_s {
                         char *errstr; /**< Error string, if rko_err
                                        *   is set, else NULL. */
 
+                        /** Result cb for this op */
+                        void (*result_cb)(rd_kafka_op_t *);
+
                         rd_list_t results; /**< Type depends on request type:
                                             *
                                             * (rd_kafka_topic_result_t *):
@@ -518,6 +541,7 @@ struct rd_kafka_op_s {
                                             *
                                             * (rd_kafka_ConfigResource_t *):
                                             * AlterConfigs, DescribeConfigs
+                                            * IncrementalAlterConfigs
                                             */
 
                         void *opaque; /**< Application's opaque as set by
@@ -712,7 +736,7 @@ rd_kafka_op_t *rd_kafka_op_new_fetch_msg(rd_kafka_msg_t **rkmp,
                                          rd_kafka_toppar_t *rktp,
                                          int32_t version,
                                          rd_kafka_buf_t *rkbuf,
-                                         int64_t offset,
+                                         rd_kafka_fetch_pos_t pos,
                                          size_t key_len,
                                          const void *key,
                                          size_t val_len,
@@ -721,7 +745,7 @@ rd_kafka_op_t *rd_kafka_op_new_fetch_msg(rd_kafka_msg_t **rkmp,
 rd_kafka_op_t *rd_kafka_op_new_ctrl_msg(rd_kafka_toppar_t *rktp,
                                         int32_t version,
                                         rd_kafka_buf_t *rkbuf,
-                                        int64_t offset);
+                                        rd_kafka_fetch_pos_t pos);
 
 void rd_kafka_op_throttle_time(struct rd_kafka_broker_s *rkb,
                                rd_kafka_q_t *rkq,
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_partition.c b/lib/librdkafka-2.3.0/src/rdkafka_partition.c
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdkafka_partition.c
rename to lib/librdkafka-2.3.0/src/rdkafka_partition.c
index 46d2fb3ed8f..b175ffbc798 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_partition.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_partition.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2015 Magnus Edenhill
+ * Copyright (c) 2015-2022, Magnus Edenhill,
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -162,9 +163,11 @@ static void rd_kafka_toppar_consumer_lag_req(rd_kafka_toppar_t *rktp) {
 
         /* Ask for oldest offset. The newest offset is automatically
          * propagated in FetchResponse.HighwaterMark. */
-        rd_kafka_ListOffsetsRequest(
-            rktp->rktp_broker, partitions, RD_KAFKA_REPLYQ(rktp->rktp_ops, 0),
-            rd_kafka_toppar_lag_handle_Offset, rd_kafka_toppar_keep(rktp));
+        rd_kafka_ListOffsetsRequest(rktp->rktp_broker, partitions,
+                                    RD_KAFKA_REPLYQ(rktp->rktp_ops, 0),
+                                    rd_kafka_toppar_lag_handle_Offset,
+                                    -1, /* don't set an absolute timeout */
+                                    rd_kafka_toppar_keep(rktp));
 
         rd_kafka_toppar_unlock(rktp);
 
@@ -243,6 +246,7 @@ rd_kafka_toppar_t *rd_kafka_toppar_new0(rd_kafka_topic_t *rkt,
         rd_kafka_fetch_pos_init(&rktp->rktp_query_pos);
         rd_kafka_fetch_pos_init(&rktp->rktp_next_fetch_start);
         rd_kafka_fetch_pos_init(&rktp->rktp_last_next_fetch_start);
+        rd_kafka_fetch_pos_init(&rktp->rktp_offset_validation_pos);
         rd_kafka_fetch_pos_init(&rktp->rktp_app_pos);
         rd_kafka_fetch_pos_init(&rktp->rktp_stored_pos);
         rd_kafka_fetch_pos_init(&rktp->rktp_committing_pos);
@@ -252,7 +256,7 @@ rd_kafka_toppar_t *rd_kafka_toppar_new0(rd_kafka_topic_t *rkt,
         mtx_init(&rktp->rktp_lock, mtx_plain);
 
         rd_refcnt_init(&rktp->rktp_refcnt, 0);
-        rktp->rktp_fetchq          = rd_kafka_q_new(rkt->rkt_rk);
+        rktp->rktp_fetchq          = rd_kafka_consume_q_new(rkt->rkt_rk);
         rktp->rktp_ops             = rd_kafka_q_new(rkt->rkt_rk);
         rktp->rktp_ops->rkq_serve  = rd_kafka_toppar_op_serve;
         rktp->rktp_ops->rkq_opaque = rktp;
@@ -348,6 +352,7 @@ void rd_kafka_toppar_destroy_final(rd_kafka_toppar_t *rktp) {
 
         rd_refcnt_destroy(&rktp->rktp_refcnt);
 
+        rd_free(rktp->rktp_stored_metadata);
         rd_free(rktp);
 }
 
@@ -359,9 +364,6 @@ void rd_kafka_toppar_destroy_final(rd_kafka_toppar_t *rktp) {
  * @locks_required rd_kafka_toppar_lock() MUST be held.
  */
 void rd_kafka_toppar_set_fetch_state(rd_kafka_toppar_t *rktp, int fetch_state) {
-        rd_kafka_assert(NULL,
-                        thrd_is_current(rktp->rktp_rkt->rkt_rk->rk_thread));
-
         if ((int)rktp->rktp_fetch_state == fetch_state)
                 return;
 
@@ -873,6 +875,11 @@ void rd_kafka_msgq_insert_msgq(rd_kafka_msgq_t *destq,
  * @param incr_retry Increment retry count for messages.
  * @param max_retries Maximum retries allowed per message.
  * @param backoff Absolute retry backoff for retried messages.
+ * @param exponential_backoff If true the backoff should be exponential with
+ *                            2**(retry_count - 1)*retry_ms with jitter. The
+ *                            \p backoff is ignored.
+ * @param retry_ms The retry ms used for exponential backoff calculation
+ * @param retry_max_ms The max backoff limit for exponential backoff calculation
  *
  * @returns 0 if all messages were retried, or 1 if some messages
  *          could not be retried.
@@ -883,10 +890,14 @@ int rd_kafka_retry_msgq(rd_kafka_msgq_t *destq,
                         int max_retries,
                         rd_ts_t backoff,
                         rd_kafka_msg_status_t status,
-                        int (*cmp)(const void *a, const void *b)) {
+                        int (*cmp)(const void *a, const void *b),
+                        rd_bool_t exponential_backoff,
+                        int retry_ms,
+                        int retry_max_ms) {
         rd_kafka_msgq_t retryable = RD_KAFKA_MSGQ_INITIALIZER(retryable);
         rd_kafka_msg_t *rkm, *tmp;
-
+        int64_t jitter = rd_jitter(100 - RD_KAFKA_RETRY_JITTER_PERCENT,
+                                   100 + RD_KAFKA_RETRY_JITTER_PERCENT);
         /* Scan through messages to see which ones are eligible for retry,
          * move the retryable ones to temporary queue and
          * set backoff time for first message and optionally
@@ -900,8 +911,25 @@ int rd_kafka_retry_msgq(rd_kafka_msgq_t *destq,
                 rd_kafka_msgq_deq(srcq, rkm, 1);
                 rd_kafka_msgq_enq(&retryable, rkm);
 
-                rkm->rkm_u.producer.ts_backoff = backoff;
                 rkm->rkm_u.producer.retries += incr_retry;
+                if (exponential_backoff) {
+                        /* In some cases, like failed Produce requests do not
+                         * increment the retry count, see
+                         * rd_kafka_handle_Produce_error. */
+                        if (rkm->rkm_u.producer.retries > 0)
+                                backoff =
+                                    (1 << (rkm->rkm_u.producer.retries - 1)) *
+                                    retry_ms;
+                        else
+                                backoff = retry_ms;
+                        /* Multiplied by 10 as backoff should be in nano
+                         * seconds. */
+                        backoff = jitter * backoff * 10;
+                        if (backoff > retry_max_ms * 1000)
+                                backoff = retry_max_ms * 1000;
+                        backoff = rd_clock() + backoff;
+                }
+                rkm->rkm_u.producer.ts_backoff = backoff;
 
                 /* Don't downgrade a message from any form of PERSISTED
                  * to NOT_PERSISTED, since the original cause of indicating
@@ -940,17 +968,21 @@ int rd_kafka_toppar_retry_msgq(rd_kafka_toppar_t *rktp,
                                rd_kafka_msgq_t *rkmq,
                                int incr_retry,
                                rd_kafka_msg_status_t status) {
-        rd_kafka_t *rk  = rktp->rktp_rkt->rkt_rk;
-        rd_ts_t backoff = rd_clock() + (rk->rk_conf.retry_backoff_ms * 1000);
+        rd_kafka_t *rk   = rktp->rktp_rkt->rkt_rk;
+        int retry_ms     = rk->rk_conf.retry_backoff_ms;
+        int retry_max_ms = rk->rk_conf.retry_backoff_max_ms;
         int r;
 
         if (rd_kafka_terminating(rk))
                 return 1;
 
         rd_kafka_toppar_lock(rktp);
+        /* Exponential backoff applied. */
         r = rd_kafka_retry_msgq(&rktp->rktp_msgq, rkmq, incr_retry,
-                                rk->rk_conf.max_retries, backoff, status,
-                                rktp->rktp_rkt->rkt_conf.msg_order_cmp);
+                                rk->rk_conf.max_retries,
+                                0 /* backoff will be calculated */, status,
+                                rktp->rktp_rkt->rkt_conf.msg_order_cmp, rd_true,
+                                retry_ms, retry_max_ms);
         rd_kafka_toppar_unlock(rktp);
 
         return r;
@@ -1570,7 +1602,9 @@ void rd_kafka_toppar_offset_request(rd_kafka_toppar_t *rktp,
                 rd_kafka_ListOffsetsRequest(
                     rkb, offsets,
                     RD_KAFKA_REPLYQ(rktp->rktp_ops, rktp->rktp_op_version),
-                    rd_kafka_toppar_handle_Offset, rktp);
+                    rd_kafka_toppar_handle_Offset,
+                    -1, /* don't set an absolute timeout */
+                    rktp);
 
                 rd_kafka_topic_partition_list_destroy(offsets);
         }
@@ -1798,6 +1832,7 @@ void rd_kafka_toppar_seek(rd_kafka_toppar_t *rktp,
                 rd_kafka_toppar_set_fetch_state(
                     rktp, RD_KAFKA_TOPPAR_FETCH_VALIDATE_EPOCH_WAIT);
                 rd_kafka_toppar_set_next_fetch_position(rktp, pos);
+                rd_kafka_toppar_set_offset_validation_position(rktp, pos);
                 rd_kafka_offset_validate(rktp, "seek");
         }
 
@@ -2486,7 +2521,6 @@ void rd_kafka_topic_partition_get(const rd_kafka_topic_partition_t *rktpar,
 }
 
 
-
 /**
  *
  * rd_kafka_topic_partition_t lists
@@ -2698,6 +2732,21 @@ void rd_kafka_topic_partition_set_from_fetch_pos(
                                                   fetchpos.leader_epoch);
 }
 
+/**
+ * @brief Set partition metadata from rktp stored one.
+ */
+void rd_kafka_topic_partition_set_metadata_from_rktp_stored(
+    rd_kafka_topic_partition_t *rktpar,
+    const rd_kafka_toppar_t *rktp) {
+        rktpar->metadata_size = rktp->rktp_stored_metadata_size;
+        if (rktp->rktp_stored_metadata) {
+                rktpar->metadata = rd_malloc(rktp->rktp_stored_metadata_size);
+                memcpy(rktpar->metadata, rktp->rktp_stored_metadata,
+                       rktpar->metadata_size);
+        }
+}
+
+
 /**
  * @brief Destroy all partitions in list.
  *
@@ -2750,7 +2799,6 @@ void rd_kafka_topic_partition_list_destroy_free(void *ptr) {
             (rd_kafka_topic_partition_list_t *)ptr);
 }
 
-
 /**
  * @brief Add a partition to an rktpar list.
  * The list must have enough room to fit it.
@@ -3213,6 +3261,8 @@ int rd_kafka_topic_partition_list_set_offsets(
                                 verb = "setting stored";
                                 rd_kafka_topic_partition_set_from_fetch_pos(
                                     rktpar, rktp->rktp_stored_pos);
+                                rd_kafka_topic_partition_set_metadata_from_rktp_stored(
+                                    rktpar, rktp);
                         } else {
                                 rktpar->offset = RD_KAFKA_OFFSET_INVALID;
                         }
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_partition.h b/lib/librdkafka-2.3.0/src/rdkafka_partition.h
similarity index 96%
rename from lib/librdkafka-2.1.0/src/rdkafka_partition.h
rename to lib/librdkafka-2.3.0/src/rdkafka_partition.h
index a1f1f47cd94..638c86eb352 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_partition.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_partition.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2015 Magnus Edenhill
+ * Copyright (c) 2015-2022, Magnus Edenhill,
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -67,24 +68,30 @@ struct rd_kafka_toppar_err {
                                   *   last msg sequence */
 };
 
-
+/**
+ * @brief Fetchpos comparator, only offset is compared.
+ */
+static RD_UNUSED RD_INLINE int
+rd_kafka_fetch_pos_cmp_offset(const rd_kafka_fetch_pos_t *a,
+                              const rd_kafka_fetch_pos_t *b) {
+        return (RD_CMP(a->offset, b->offset));
+}
 
 /**
- * @brief Fetchpos comparator, leader epoch has precedence.
+ * @brief Fetchpos comparator, leader epoch has precedence
+ *        iff both values are not null.
  */
 static RD_UNUSED RD_INLINE int
 rd_kafka_fetch_pos_cmp(const rd_kafka_fetch_pos_t *a,
                        const rd_kafka_fetch_pos_t *b) {
+        if (a->leader_epoch == -1 || b->leader_epoch == -1)
+                return rd_kafka_fetch_pos_cmp_offset(a, b);
         if (a->leader_epoch < b->leader_epoch)
                 return -1;
         else if (a->leader_epoch > b->leader_epoch)
                 return 1;
-        else if (a->offset < b->offset)
-                return -1;
-        else if (a->offset > b->offset)
-                return 1;
         else
-                return 0;
+                return rd_kafka_fetch_pos_cmp_offset(a, b);
 }
 
 
@@ -325,6 +332,10 @@ struct rd_kafka_toppar_s {                           /* rd_kafka_toppar_t */
          *  @locality toppar thread */
         rd_kafka_fetch_pos_t rktp_last_next_fetch_start;
 
+        /** The offset to verify.
+         *  @locality toppar thread */
+        rd_kafka_fetch_pos_t rktp_offset_validation_pos;
+
         /** Application's position.
          *  This is the latest offset delivered to application + 1.
          *  It is reset to INVALID_OFFSET when partition is
@@ -334,6 +345,11 @@ struct rd_kafka_toppar_s {                           /* rd_kafka_toppar_t */
         /** Last stored offset, but maybe not yet committed. */
         rd_kafka_fetch_pos_t rktp_stored_pos;
 
+        /* Last stored metadata, but
+         * maybe not committed yet. */
+        void *rktp_stored_metadata;
+        size_t rktp_stored_metadata_size;
+
         /** Offset currently being committed */
         rd_kafka_fetch_pos_t rktp_committing_pos;
 
@@ -550,7 +566,10 @@ int rd_kafka_retry_msgq(rd_kafka_msgq_t *destq,
                         int max_retries,
                         rd_ts_t backoff,
                         rd_kafka_msg_status_t status,
-                        int (*cmp)(const void *a, const void *b));
+                        int (*cmp)(const void *a, const void *b),
+                        rd_bool_t exponential_backoff,
+                        int retry_ms,
+                        int retry_max_ms);
 void rd_kafka_msgq_insert_msgq(rd_kafka_msgq_t *destq,
                                rd_kafka_msgq_t *srcq,
                                int (*cmp)(const void *a, const void *b));
@@ -888,6 +907,10 @@ void rd_kafka_topic_partition_set_from_fetch_pos(
     rd_kafka_topic_partition_t *rktpar,
     const rd_kafka_fetch_pos_t fetchpos);
 
+void rd_kafka_topic_partition_set_metadata_from_rktp_stored(
+    rd_kafka_topic_partition_t *rktpar,
+    const rd_kafka_toppar_t *rktp);
+
 static RD_UNUSED rd_kafka_fetch_pos_t rd_kafka_topic_partition_get_fetch_pos(
     const rd_kafka_topic_partition_t *rktpar) {
         rd_kafka_fetch_pos_t fetchpos = {
@@ -1047,7 +1070,7 @@ static RD_UNUSED int rd_kafka_toppar_topic_cmp(const void *_a, const void *_b) {
  * @brief Set's the partitions next fetch position, i.e., the next offset
  *        to start fetching from.
  *
- * @locks_required rd_kafka_toppar_lock(rktp) MUST be held.
+ * @locks rd_kafka_toppar_lock(rktp) MUST be held.
  */
 static RD_UNUSED RD_INLINE void
 rd_kafka_toppar_set_next_fetch_position(rd_kafka_toppar_t *rktp,
@@ -1055,4 +1078,15 @@ rd_kafka_toppar_set_next_fetch_position(rd_kafka_toppar_t *rktp,
         rktp->rktp_next_fetch_start = next_pos;
 }
 
+/**
+ * @brief Sets the offset validation position.
+ *
+ * @locks rd_kafka_toppar_lock(rktp) MUST be held.
+ */
+static RD_UNUSED RD_INLINE void rd_kafka_toppar_set_offset_validation_position(
+    rd_kafka_toppar_t *rktp,
+    rd_kafka_fetch_pos_t offset_validation_pos) {
+        rktp->rktp_offset_validation_pos = offset_validation_pos;
+}
+
 #endif /* _RDKAFKA_PARTITION_H_ */
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_pattern.c b/lib/librdkafka-2.3.0/src/rdkafka_pattern.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_pattern.c
rename to lib/librdkafka-2.3.0/src/rdkafka_pattern.c
index dfe3ef03e60..425f8201a52 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_pattern.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_pattern.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2015 Magnus Edenhill
+ * Copyright (c) 2015-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_pattern.h b/lib/librdkafka-2.3.0/src/rdkafka_pattern.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_pattern.h
rename to lib/librdkafka-2.3.0/src/rdkafka_pattern.h
index 88d183cd32c..5ef6a3464c1 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_pattern.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_pattern.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2015 Magnus Edenhill
+ * Copyright (c) 2015-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_plugin.c b/lib/librdkafka-2.3.0/src/rdkafka_plugin.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_plugin.c
rename to lib/librdkafka-2.3.0/src/rdkafka_plugin.c
index f58bc5060c6..f084eff7a76 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_plugin.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_plugin.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_plugin.h b/lib/librdkafka-2.3.0/src/rdkafka_plugin.h
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdkafka_plugin.h
rename to lib/librdkafka-2.3.0/src/rdkafka_plugin.h
index 1783d5f53cd..cb50a8647ad 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_plugin.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_plugin.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_proto.h b/lib/librdkafka-2.3.0/src/rdkafka_proto.h
similarity index 94%
rename from lib/librdkafka-2.1.0/src/rdkafka_proto.h
rename to lib/librdkafka-2.3.0/src/rdkafka_proto.h
index 396765857ca..e6caf509e30 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_proto.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_proto.h
@@ -1,7 +1,9 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012,2013 Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
+
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -30,8 +32,10 @@
 #define _RDKAFKA_PROTO_H_
 
 
+#include "rdstring.h"
 #include "rdendian.h"
 #include "rdvarint.h"
+#include "rdbase64.h"
 
 /* Protocol defines */
 #include "rdkafka_protocol.h"
@@ -378,7 +382,7 @@ typedef struct rd_kafkap_bytes_s {
         int32_t len;      /* Kafka bytes length (-1=NULL, 0=empty, >0=data) */
         const void *data; /* points just past the struct, or other memory,
                            * not NULL-terminated */
-        const char _data[1]; /* Bytes following struct when new()ed */
+        const unsigned char _data[1]; /* Bytes following struct when new()ed */
 } rd_kafkap_bytes_t;
 
 
@@ -423,7 +427,7 @@ static RD_UNUSED void rd_kafkap_bytes_destroy(rd_kafkap_bytes_t *kbytes) {
  *  - No-copy, just alloc (bytes==NULL,len>0)
  */
 static RD_INLINE RD_UNUSED rd_kafkap_bytes_t *
-rd_kafkap_bytes_new(const char *bytes, int32_t len) {
+rd_kafkap_bytes_new(const unsigned char *bytes, int32_t len) {
         rd_kafkap_bytes_t *kbytes;
         int32_t klen;
 
@@ -440,7 +444,7 @@ rd_kafkap_bytes_new(const char *bytes, int32_t len) {
         if (len == RD_KAFKAP_BYTES_LEN_NULL)
                 kbytes->data = NULL;
         else {
-                kbytes->data = ((const char *)(kbytes + 1)) + 4;
+                kbytes->data = ((const unsigned char *)(kbytes + 1)) + 4;
                 if (bytes)
                         memcpy((void *)kbytes->data, bytes, len);
         }
@@ -455,7 +459,7 @@ rd_kafkap_bytes_new(const char *bytes, int32_t len) {
  */
 static RD_INLINE RD_UNUSED rd_kafkap_bytes_t *
 rd_kafkap_bytes_copy(const rd_kafkap_bytes_t *src) {
-        return rd_kafkap_bytes_new((const char *)src->data, src->len);
+        return rd_kafkap_bytes_new((const unsigned char *)src->data, src->len);
 }
 
 
@@ -565,6 +569,26 @@ typedef struct rd_kafka_buf_s rd_kafka_buf_t;
         (8 + 4 + 4 + 1 + 4 + 2 + 4 + 8 + 8 + 8 + 2 + 4)
 
 
+/**
+ * @struct Struct representing UUID protocol primitive type.
+ */
+typedef struct rd_kafka_Uuid_s {
+        int64_t
+            most_significant_bits; /**< Most significant 64 bits for the UUID */
+        int64_t least_significant_bits; /**< Least significant 64 bits for the
+                                           UUID */
+        char base64str[23]; /**< base64 encoding for the uuid. By default, it is
+                               lazy loaded. Use function
+                               `rd_kafka_Uuid_base64str()` as a getter for this
+                               field. */
+} rd_kafka_Uuid_t;
+
+#define RD_KAFKA_UUID_ZERO                                                     \
+        { 0, 0, "" }
+
+#define RD_KAFKA_UUID_METADATA_TOPIC_ID                                        \
+        { 0, 1, "" }
+
 
 /**
  * @name Producer ID and Epoch for the Idempotent Producer
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_protocol.h b/lib/librdkafka-2.3.0/src/rdkafka_protocol.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_protocol.h
rename to lib/librdkafka-2.3.0/src/rdkafka_protocol.h
index 60c09998617..99c6aa16a20 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_protocol.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_protocol.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2020 Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_queue.c b/lib/librdkafka-2.3.0/src/rdkafka_queue.c
similarity index 91%
rename from lib/librdkafka-2.1.0/src/rdkafka_queue.c
rename to lib/librdkafka-2.3.0/src/rdkafka_queue.c
index 57fce36b8db..3e303795582 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_queue.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_queue.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2016 Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill,
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -83,12 +84,15 @@ void rd_kafka_q_destroy_final(rd_kafka_q_t *rkq) {
  */
 void rd_kafka_q_init0(rd_kafka_q_t *rkq,
                       rd_kafka_t *rk,
+                      rd_bool_t for_consume,
                       const char *func,
                       int line) {
         rd_kafka_q_reset(rkq);
         rkq->rkq_fwdq   = NULL;
         rkq->rkq_refcnt = 1;
         rkq->rkq_flags  = RD_KAFKA_Q_F_READY;
+        if (for_consume)
+                rkq->rkq_flags |= RD_KAFKA_Q_F_CONSUMER;
         rkq->rkq_rk     = rk;
         rkq->rkq_qio    = NULL;
         rkq->rkq_serve  = NULL;
@@ -106,9 +110,15 @@ void rd_kafka_q_init0(rd_kafka_q_t *rkq,
 /**
  * Allocate a new queue and initialize it.
  */
-rd_kafka_q_t *rd_kafka_q_new0(rd_kafka_t *rk, const char *func, int line) {
+rd_kafka_q_t *rd_kafka_q_new0(rd_kafka_t *rk,
+                              rd_bool_t for_consume,
+                              const char *func,
+                              int line) {
         rd_kafka_q_t *rkq = rd_malloc(sizeof(*rkq));
-        rd_kafka_q_init(rkq, rk);
+        if (!for_consume)
+                rd_kafka_q_init(rkq, rk);
+        else
+                rd_kafka_consume_q_init(rkq, rk);
         rkq->rkq_flags |= RD_KAFKA_Q_F_ALLOCATED;
 #if ENABLE_DEVEL
         rd_snprintf(rkq->rkq_name, sizeof(rkq->rkq_name), "%s:%d", func, line);
@@ -118,6 +128,33 @@ rd_kafka_q_t *rd_kafka_q_new0(rd_kafka_t *rk, const char *func, int line) {
         return rkq;
 }
 
+/*
+ * Sets the flag RD_KAFKA_Q_F_CONSUMER for rkq, any queues it's being forwarded
+ * to, recursively.
+ * Setting this flag indicates that polling this queue is equivalent to calling
+ * consumer poll, and will reset the max.poll.interval.ms timer. Only used
+ * internally when forwarding queues.
+ * @locks rd_kafka_q_lock(rkq)
+ */
+static void rd_kafka_q_consumer_propagate(rd_kafka_q_t *rkq) {
+        mtx_lock(&rkq->rkq_lock);
+        rkq->rkq_flags |= RD_KAFKA_Q_F_CONSUMER;
+
+        if (!rkq->rkq_fwdq) {
+                mtx_unlock(&rkq->rkq_lock);
+                return;
+        }
+
+        /* Recursively propagate the flag to any queues rkq is already
+         * forwarding to. There will be a deadlock here if the queues are being
+         * forwarded circularly, but that is a user error. We can't resolve this
+         * deadlock by unlocking before the recursive call, because that leads
+         * to incorrectness if the rkq_fwdq is forwarded elsewhere and the old
+         * one destroyed between recursive calls. */
+        rd_kafka_q_consumer_propagate(rkq->rkq_fwdq);
+        mtx_unlock(&rkq->rkq_lock);
+}
+
 /**
  * Set/clear forward queue.
  * Queue forwarding enables message routing inside rdkafka.
@@ -152,6 +189,9 @@ void rd_kafka_q_fwd_set0(rd_kafka_q_t *srcq,
                 }
 
                 srcq->rkq_fwdq = destq;
+
+                if (srcq->rkq_flags & RD_KAFKA_Q_F_CONSUMER)
+                        rd_kafka_q_consumer_propagate(destq);
         }
         if (do_lock)
                 mtx_unlock(&srcq->rkq_lock);
@@ -359,10 +399,16 @@ rd_kafka_op_t *rd_kafka_q_pop_serve(rd_kafka_q_t *rkq,
 
         rd_kafka_yield_thread = 0;
         if (!(fwdq = rd_kafka_q_fwd_get(rkq, 0))) {
+                const rd_bool_t can_q_contain_fetched_msgs =
+                    rd_kafka_q_can_contain_fetched_msgs(rkq, RD_DONT_LOCK);
+
                 struct timespec timeout_tspec;
 
                 rd_timeout_init_timespec_us(&timeout_tspec, timeout_us);
 
+                if (timeout_us && can_q_contain_fetched_msgs)
+                        rd_kafka_app_poll_blocking(rkq->rkq_rk);
+
                 while (1) {
                         rd_kafka_op_res_t res;
                         /* Keep track of current lock status to avoid
@@ -400,15 +446,24 @@ rd_kafka_op_t *rd_kafka_q_pop_serve(rd_kafka_q_t *rkq,
                                         goto retry; /* Next op */
                                 } else if (unlikely(res ==
                                                     RD_KAFKA_OP_RES_YIELD)) {
+                                        if (can_q_contain_fetched_msgs)
+                                                rd_kafka_app_polled(
+                                                    rkq->rkq_rk);
                                         /* Callback yielded, unroll */
                                         return NULL;
-                                } else
+                                } else {
+                                        if (can_q_contain_fetched_msgs)
+                                                rd_kafka_app_polled(
+                                                    rkq->rkq_rk);
                                         break; /* Proper op, handle below. */
+                                }
                         }
 
                         if (unlikely(rd_kafka_q_check_yield(rkq))) {
                                 if (is_locked)
                                         mtx_unlock(&rkq->rkq_lock);
+                                if (can_q_contain_fetched_msgs)
+                                        rd_kafka_app_polled(rkq->rkq_rk);
                                 return NULL;
                         }
 
@@ -418,6 +473,8 @@ rd_kafka_op_t *rd_kafka_q_pop_serve(rd_kafka_q_t *rkq,
                         if (cnd_timedwait_abs(&rkq->rkq_cond, &rkq->rkq_lock,
                                               &timeout_tspec) != thrd_success) {
                                 mtx_unlock(&rkq->rkq_lock);
+                                if (can_q_contain_fetched_msgs)
+                                        rd_kafka_app_polled(rkq->rkq_rk);
                                 return NULL;
                         }
                 }
@@ -463,6 +520,8 @@ int rd_kafka_q_serve(rd_kafka_q_t *rkq,
         rd_kafka_q_t *fwdq;
         int cnt = 0;
         struct timespec timeout_tspec;
+        const rd_bool_t can_q_contain_fetched_msgs =
+            rd_kafka_q_can_contain_fetched_msgs(rkq, RD_DONT_LOCK);
 
         rd_dassert(cb_type);
 
@@ -480,8 +539,12 @@ int rd_kafka_q_serve(rd_kafka_q_t *rkq,
                 return ret;
         }
 
+
         rd_timeout_init_timespec(&timeout_tspec, timeout_ms);
 
+        if (timeout_ms && can_q_contain_fetched_msgs)
+                rd_kafka_app_poll_blocking(rk);
+
         /* Wait for op */
         while (!(rko = TAILQ_FIRST(&rkq->rkq_q)) &&
                !rd_kafka_q_check_yield(rkq) &&
@@ -493,6 +556,8 @@ int rd_kafka_q_serve(rd_kafka_q_t *rkq,
 
         if (!rko) {
                 mtx_unlock(&rkq->rkq_lock);
+                if (can_q_contain_fetched_msgs)
+                        rd_kafka_app_polled(rk);
                 return 0;
         }
 
@@ -527,6 +592,9 @@ int rd_kafka_q_serve(rd_kafka_q_t *rkq,
                 }
         }
 
+        if (can_q_contain_fetched_msgs)
+                rd_kafka_app_polled(rk);
+
         rd_kafka_q_destroy_owner(&localq);
 
         return cnt;
@@ -610,6 +678,7 @@ int rd_kafka_q_serve_rkmessages(rd_kafka_q_t *rkq,
                 rd_kafka_q_destroy(fwdq);
                 return cnt;
         }
+
         mtx_unlock(&rkq->rkq_lock);
 
         if (timeout_ms)
@@ -684,7 +753,7 @@ int rd_kafka_q_serve_rkmessages(rd_kafka_q_t *rkq,
                 rko = (rd_kafka_op_t *)rkmessages[i]->_private;
                 rd_kafka_toppar_t *rktp = rko->rko_rktp;
                 int64_t offset          = rkmessages[i]->offset + 1;
-                if (unlikely(rktp->rktp_app_pos.offset < offset))
+                if (unlikely(rktp && (rktp->rktp_app_pos.offset < offset)))
                         rd_kafka_update_app_pos(
                             rk, rktp,
                             RD_KAFKA_FETCH_POS(
@@ -708,7 +777,7 @@ int rd_kafka_q_serve_rkmessages(rd_kafka_q_t *rkq,
                 next                    = TAILQ_NEXT(next, rko_link);
                 rd_kafka_toppar_t *rktp = rko->rko_rktp;
                 int64_t offset = rko->rko_u.fetch.rkm.rkm_rkmessage.offset + 1;
-                if (rktp->rktp_app_pos.offset < offset)
+                if (rktp && (rktp->rktp_app_pos.offset < offset))
                         rd_kafka_update_app_pos(
                             rk, rktp,
                             RD_KAFKA_FETCH_POS(
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_queue.h b/lib/librdkafka-2.3.0/src/rdkafka_queue.h
similarity index 96%
rename from lib/librdkafka-2.1.0/src/rdkafka_queue.h
rename to lib/librdkafka-2.3.0/src/rdkafka_queue.h
index 0d50f58703f..eb329d1c1d6 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_queue.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_queue.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2016 Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill,
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -75,6 +76,11 @@ struct rd_kafka_q_s {
              * by triggering the cond-var                                      \
              * but without having to enqueue                                   \
              * an op. */
+#define RD_KAFKA_Q_F_CONSUMER                                                  \
+        0x10 /* If this flag is set, this queue might contain fetched messages \
+                from partitions. Polling this queue will reset the             \
+                max.poll.interval.ms timer. Once set, this flag is never       \
+                reset. */
 
         rd_kafka_t *rkq_rk;
         struct rd_kafka_q_io *rkq_qio; /* FD-based application signalling */
@@ -123,12 +129,20 @@ static RD_INLINE RD_UNUSED int rd_kafka_q_ready(rd_kafka_q_t *rkq) {
 
 void rd_kafka_q_init0(rd_kafka_q_t *rkq,
                       rd_kafka_t *rk,
+                      rd_bool_t for_consume,
                       const char *func,
                       int line);
 #define rd_kafka_q_init(rkq, rk)                                               \
-        rd_kafka_q_init0(rkq, rk, __FUNCTION__, __LINE__)
-rd_kafka_q_t *rd_kafka_q_new0(rd_kafka_t *rk, const char *func, int line);
-#define rd_kafka_q_new(rk) rd_kafka_q_new0(rk, __FUNCTION__, __LINE__)
+        rd_kafka_q_init0(rkq, rk, rd_false, __FUNCTION__, __LINE__)
+#define rd_kafka_consume_q_init(rkq, rk)                                       \
+        rd_kafka_q_init0(rkq, rk, rd_true, __FUNCTION__, __LINE__)
+rd_kafka_q_t *rd_kafka_q_new0(rd_kafka_t *rk,
+                              rd_bool_t for_consume,
+                              const char *func,
+                              int line);
+#define rd_kafka_q_new(rk) rd_kafka_q_new0(rk, rd_false, __FUNCTION__, __LINE__)
+#define rd_kafka_consume_q_new(rk)                                             \
+        rd_kafka_q_new0(rk, rd_true, __FUNCTION__, __LINE__)
 void rd_kafka_q_destroy_final(rd_kafka_q_t *rkq);
 
 #define rd_kafka_q_lock(rkqu)   mtx_lock(&(rkqu)->rkq_lock)
@@ -1164,6 +1178,22 @@ rd_kafka_enq_once_disable(rd_kafka_enq_once_t *eonce) {
         return rko;
 }
 
+/**
+ * @brief Returns true if the queue can contain fetched messages.
+ *
+ * @locks rd_kafka_q_lock(rkq) if do_lock is set.
+ */
+static RD_INLINE RD_UNUSED rd_bool_t
+rd_kafka_q_can_contain_fetched_msgs(rd_kafka_q_t *rkq, rd_bool_t do_lock) {
+        rd_bool_t val;
+        if (do_lock)
+                mtx_lock(&rkq->rkq_lock);
+        val = rkq->rkq_flags & RD_KAFKA_Q_F_CONSUMER;
+        if (do_lock)
+                mtx_unlock(&rkq->rkq_lock);
+        return val;
+}
+
 
 /**@}*/
 
diff --git a/lib/librdkafka-2.3.0/src/rdkafka_range_assignor.c b/lib/librdkafka-2.3.0/src/rdkafka_range_assignor.c
new file mode 100644
index 00000000000..a869c139bd3
--- /dev/null
+++ b/lib/librdkafka-2.3.0/src/rdkafka_range_assignor.c
@@ -0,0 +1,1748 @@
+/*
+ * librdkafka - The Apache Kafka C/C++ library
+ *
+ * Copyright (c) 2015-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "rdkafka_int.h"
+#include "rdkafka_assignor.h"
+#include "rdunittest.h"
+
+
+/**
+ * Source:
+ * https://github.com/apache/kafka/blob/trunk/clients/src/main/java/org/apache/kafka/clients/consumer/RangeAssignor.java
+ *
+ * The range assignor works on a per-topic basis. For each topic, we lay out the
+ * available partitions in numeric order and the consumers in lexicographic
+ * order. We then divide the number of partitions by the total number of
+ * consumers to determine the number of partitions to assign to each consumer.
+ * If it does not evenly divide, then the first few consumers will have one
+ * extra partition.
+ *
+ * For example, suppose there are two consumers C0 and C1, two topics t0 and t1,
+ * and each topic has 3 partitions, resulting in partitions t0p0, t0p1, t0p2,
+ * t1p0, t1p1, and t1p2.
+ *
+ * The assignment will be:
+ * C0: [t0p0, t0p1, t1p0, t1p1]
+ * C1: [t0p2, t1p2]
+ */
+
+typedef struct {
+        rd_kafkap_str_t *member_id;
+        rd_list_t *assigned_partitions; /* Contained Type: int* */
+} rd_kafka_member_assigned_partitions_pair_t;
+
+/**
+ * @brief Intializes a rd_kafka_member_assigned_partitions_pair_t* with
+ * assigned_partitions = [].
+ *
+ * @param member_id
+ *
+ * The member_id isn't copied, so the returned value can be used only for the
+ * lifetime of this function's arguments.
+ * @return rd_kafka_member_assigned_partitions_pair_t*
+ */
+static rd_kafka_member_assigned_partitions_pair_t *
+rd_kafka_member_assigned_partitions_pair_new(rd_kafkap_str_t *member_id) {
+        rd_kafka_member_assigned_partitions_pair_t *pair =
+            rd_calloc(1, sizeof(rd_kafka_member_assigned_partitions_pair_t));
+
+        pair->member_id           = member_id;
+        pair->assigned_partitions = rd_list_new(0, NULL);
+        return pair;
+}
+
+static void rd_kafka_member_assigned_partitions_pair_destroy(void *_pair) {
+        rd_kafka_member_assigned_partitions_pair_t *pair =
+            (rd_kafka_member_assigned_partitions_pair_t *)_pair;
+
+        /* Do not destroy the member_id, we don't take ownership. */
+        RD_IF_FREE(pair->assigned_partitions, rd_list_destroy);
+        RD_IF_FREE(pair, rd_free);
+}
+
+static int rd_kafka_member_assigned_partitions_pair_cmp(const void *_a,
+                                                        const void *_b) {
+        rd_kafka_member_assigned_partitions_pair_t *a =
+            (rd_kafka_member_assigned_partitions_pair_t *)_a;
+        rd_kafka_member_assigned_partitions_pair_t *b =
+            (rd_kafka_member_assigned_partitions_pair_t *)_b;
+        return rd_kafkap_str_cmp(a->member_id, b->member_id);
+}
+
+static rd_kafka_member_assigned_partitions_pair_t *
+rd_kafka_find_member_assigned_partitions_pair_by_member_id(
+    rd_kafkap_str_t *member_id,
+    rd_list_t *rd_kafka_member_assigned_partitions_pair_list) {
+        rd_kafka_member_assigned_partitions_pair_t search_pair = {member_id,
+                                                                  NULL};
+        return rd_list_find(rd_kafka_member_assigned_partitions_pair_list,
+                            &search_pair,
+                            rd_kafka_member_assigned_partitions_pair_cmp);
+}
+
+typedef struct {
+        /* Contains topic and list of members - sorted by group instance id and
+         * member id. Also contains partitions, along with partition replicas,
+         * which will help us with the racks. The members also contain their
+         * rack id and the partitions they have already been assigned.
+         */
+        rd_kafka_assignor_topic_t *topic;
+        /* unassigned_partitions[i] is true if the ith partition of this topic
+         * is not assigned. We prefer using an array rather than using an
+         * rd_list and removing elements, because that involves a memmove on
+         * each remove. */
+        rd_bool_t *unassigned_partitions;
+        /* Number of partitions still to be assigned.*/
+        size_t unassigned_partitions_left;
+        /* An array of char** arrays. The ith element of this array is a sorted
+         * char** array, denoting the racks for the ith partition of this topic.
+         * The size of this array is equal to the partition_cnt. */
+        char ***partition_racks;
+        /* The ith element of this array is the size of partition_racks[i]. */
+        size_t *racks_cnt;
+        /* Contains a pair denoting the partitions assigned to every subscribed
+         * consumer (member, [rd_list_t* of int*]). Sorted by member_id.
+         * Contained Type: rd_kafka_member_assigned_partitions_pair_t* */
+        rd_list_t *member_to_assigned_partitions;
+        /* Contains the number of partitions that should be ideally assigned to
+         * every subscribing consumer. */
+        int num_partitions_per_consumer;
+        /* Contains the number of consumers with extra partitions in case number
+         * of partitions isn't perfectly divisible by number of consumers. */
+        int remaining_consumers_with_extra_partition;
+        /* True if we need to perform rack aware assignment. */
+        rd_bool_t needs_rack_aware_assignment;
+} rd_kafka_topic_assignment_state_t;
+
+
+/**
+ * @brief Initialize an rd_kafka_topic_assignment_state_t.
+ *
+ * @param topic
+ * @param broker_rack_pair
+ * @param broker_rack_pair_cnt
+ *
+ * The struct rd_kafka_topic_assignment_state_t is mostly for convenience and
+ * easy grouping, so we avoid copying values as much as possible. Hence, the
+ * returned rd_kafka_topic_assignment_state_t does not own all its values, and
+ * should not be used beyond the lifetime of this function's arguments. This
+ * function also computes the value of needsRackAwareAssignment given the other
+ * information.
+ *
+ * @return rd_kafka_topic_assignment_state_t*
+ */
+
+static rd_kafka_topic_assignment_state_t *
+rd_kafka_topic_assignment_state_new(rd_kafka_assignor_topic_t *topic,
+                                    const rd_kafka_metadata_internal_t *mdi) {
+        int i;
+        rd_kafka_group_member_t *member;
+        rd_kafka_topic_assignment_state_t *rktas;
+        const int partition_cnt = topic->metadata->partition_cnt;
+
+        rktas        = rd_calloc(1, sizeof(rd_kafka_topic_assignment_state_t));
+        rktas->topic = topic; /* don't copy. */
+
+        rktas->unassigned_partitions =
+            rd_malloc(sizeof(rd_bool_t) * partition_cnt);
+        rktas->unassigned_partitions_left = partition_cnt;
+        for (i = 0; i < partition_cnt; i++) {
+                rktas->unassigned_partitions[i] = rd_true;
+        }
+
+        rktas->num_partitions_per_consumer              = 0;
+        rktas->remaining_consumers_with_extra_partition = 0;
+        if (rd_list_cnt(&topic->members)) {
+                rktas->num_partitions_per_consumer =
+                    partition_cnt / rd_list_cnt(&topic->members);
+                rktas->remaining_consumers_with_extra_partition =
+                    partition_cnt % rd_list_cnt(&topic->members);
+        }
+
+        rktas->member_to_assigned_partitions =
+            rd_list_new(0, rd_kafka_member_assigned_partitions_pair_destroy);
+
+        RD_LIST_FOREACH(member, &topic->members, i) {
+                rd_list_add(rktas->member_to_assigned_partitions,
+                            rd_kafka_member_assigned_partitions_pair_new(
+                                member->rkgm_member_id));
+        }
+
+        rd_list_sort(rktas->member_to_assigned_partitions,
+                     rd_kafka_member_assigned_partitions_pair_cmp);
+
+        rktas->partition_racks = rd_calloc(partition_cnt, sizeof(char **));
+        rktas->racks_cnt       = rd_calloc(partition_cnt, sizeof(size_t));
+        for (i = 0; topic->metadata_internal->partitions && i < partition_cnt;
+             i++) {
+                rktas->racks_cnt[i] =
+                    topic->metadata_internal->partitions[i].racks_cnt;
+                rktas->partition_racks[i] =
+                    topic->metadata_internal->partitions[i].racks;
+        }
+
+        rktas->needs_rack_aware_assignment =
+            rd_kafka_use_rack_aware_assignment(&topic, 1, mdi);
+
+        return rktas;
+}
+
+/* Destroy a rd_kafka_topic_assignment_state_t. */
+static void rd_kafka_topic_assignment_state_destroy(void *_rktas) {
+        rd_kafka_topic_assignment_state_t *rktas =
+            (rd_kafka_topic_assignment_state_t *)_rktas;
+
+        rd_free(rktas->unassigned_partitions);
+        rd_list_destroy(rktas->member_to_assigned_partitions);
+        rd_free(rktas->partition_racks);
+        rd_free(rktas->racks_cnt);
+        rd_free(rktas);
+}
+
+/**
+ * Compare two topic_assignment_states, first on the sorted list of consumers
+ * (each consumer from the list of consumers is matched till the first point of
+ * difference), and if that's equal, compare on the number of partitions.
+ *
+ * A list sorted with this comparator will group the topic_assignment_states
+ * having the same consumers and the same number of partitions together - this
+ * is the criteria of co-partitioned topics.
+ */
+static int rd_kafka_topic_assignment_state_cmp(const void *_a, const void *_b) {
+        int i;
+        rd_kafka_topic_assignment_state_t *a =
+            (rd_kafka_topic_assignment_state_t *)_a;
+        rd_kafka_topic_assignment_state_t *b =
+            (rd_kafka_topic_assignment_state_t *)_b;
+
+        /* This guarantee comes from rd_kafka_range_assignor_assign_cb. */
+        rd_assert(a->topic->members.rl_flags & RD_LIST_F_SORTED);
+        rd_assert(b->topic->members.rl_flags & RD_LIST_F_SORTED);
+
+        /* Based on consumers */
+        for (i = 0; i < rd_list_cnt(&a->topic->members) &&
+                    i < rd_list_cnt(&b->topic->members);
+             i++) {
+                rd_kafka_group_member_t *am =
+                    rd_list_elem(&a->topic->members, i);
+                rd_kafka_group_member_t *bm =
+                    rd_list_elem(&b->topic->members, i);
+                int cmp_res =
+                    rd_kafkap_str_cmp(am->rkgm_member_id, bm->rkgm_member_id);
+                if (cmp_res != 0)
+                        return cmp_res;
+        }
+
+        if (rd_list_cnt(&a->topic->members) !=
+            rd_list_cnt(&b->topic->members)) {
+                return RD_CMP(rd_list_cnt(&a->topic->members),
+                              rd_list_cnt(&b->topic->members));
+        }
+
+        /* Based on number of partitions */
+        return RD_CMP(a->topic->metadata->partition_cnt,
+                      b->topic->metadata->partition_cnt);
+}
+
+
+/* Helper function to wrap a bsearch on the partition's racks. */
+static char *rd_kafka_topic_assignment_state_rack_search(
+    rd_kafka_topic_assignment_state_t *rktas,
+    int partition,
+    const char *rack) {
+        char **partition_racks = rktas->partition_racks[partition];
+        size_t cnt             = rktas->racks_cnt[partition];
+        void *res              = NULL;
+
+        if (!partition_racks)
+                return NULL;
+
+        res = bsearch(&rack, partition_racks, cnt, sizeof(char *), rd_strcmp3);
+        if (!res)
+                return NULL;
+
+        return *(char **)res;
+}
+
+/*
+ * Assigns a partition to a member, and updates fields in rktas for accounting.
+ * It's assumed that the partitions assigned to this member don't exceed the
+ * allowed number.
+ */
+static void rd_kafka_assign_partition(rd_kafka_group_member_t *member,
+                                      rd_kafka_topic_assignment_state_t *rktas,
+                                      int32_t partition) {
+        rd_kafka_member_assigned_partitions_pair_t *member_assignment =
+            rd_kafka_find_member_assigned_partitions_pair_by_member_id(
+                member->rkgm_member_id, rktas->member_to_assigned_partitions);
+        rd_assert(member_assignment);
+
+        /* We can't use &partition, since that's a copy on the stack. */
+        rd_list_add(member_assignment->assigned_partitions,
+                    (void *)&rktas->topic->metadata->partitions[partition].id);
+        rd_kafka_topic_partition_list_add_range(member->rkgm_assignment,
+                                                rktas->topic->metadata->topic,
+                                                partition, partition);
+
+        rd_assert(rktas->unassigned_partitions[partition]);
+        rktas->unassigned_partitions[partition] = rd_false;
+        rktas->unassigned_partitions_left--;
+
+        if (rd_list_cnt(member_assignment->assigned_partitions) >
+            rktas->num_partitions_per_consumer) {
+                rktas->remaining_consumers_with_extra_partition -= 1;
+        }
+}
+
+
+/* Implementation of may_assign for rd_kafka_assign_ranges. True if the consumer
+ * rack is empty, or if is exists within the partition racks. */
+static rd_bool_t rd_kafka_racks_match(rd_kafka_group_member_t *member,
+                                      rd_kafka_topic_assignment_state_t *rktas,
+                                      int32_t partition) {
+        rd_kafkap_str_t *consumer_rack = member->rkgm_rack_id;
+
+        if (!consumer_rack || RD_KAFKAP_STR_LEN(consumer_rack) == 0) {
+                return rd_true;
+        }
+
+        return rd_kafka_topic_assignment_state_rack_search(
+                   rktas, partition, consumer_rack->str) != NULL;
+}
+
+
+/* Implementation of may_assign for rd_kafka_assign_ranges. Always true, used to
+ * assign remaining partitions after rack-aware assignment is complete. */
+static rd_bool_t rd_kafka_always(rd_kafka_group_member_t *member,
+                                 rd_kafka_topic_assignment_state_t *rktas,
+                                 int32_t partition) {
+        return rd_true;
+}
+
+/* Assigns as many partitions as possible for a topic to subscribing members,
+ * such that no subscribing member exceeds their limit of allowed partitions,
+ * and may_assign(member, rktas, partition) is true for each member and
+ * partition.
+ */
+static void rd_kafka_assign_ranges(
+    rd_kafka_topic_assignment_state_t *rktas,
+    rd_bool_t (*may_assign)(rd_kafka_group_member_t *member,
+                            rd_kafka_topic_assignment_state_t *rktas,
+                            int32_t partition)) {
+        int i;
+        rd_kafka_group_member_t *member;
+        int32_t *partitions_to_assign =
+            rd_alloca(rktas->unassigned_partitions_left * sizeof(int32_t));
+
+        RD_LIST_FOREACH(member, &rktas->topic->members, i) {
+                int j;
+                rd_kafka_member_assigned_partitions_pair_t *member_assignment;
+                int maximum_assignable_to_consumer;
+                int partitions_to_assign_cnt;
+
+                if (rktas->unassigned_partitions_left == 0)
+                        break;
+
+                member_assignment =
+                    rd_kafka_find_member_assigned_partitions_pair_by_member_id(
+                        member->rkgm_member_id,
+                        rktas->member_to_assigned_partitions);
+
+                maximum_assignable_to_consumer =
+                    rktas->num_partitions_per_consumer +
+                    (rktas->remaining_consumers_with_extra_partition > 0) -
+                    rd_list_cnt(member_assignment->assigned_partitions);
+
+                if (maximum_assignable_to_consumer <= 0)
+                        continue;
+
+                partitions_to_assign_cnt = 0;
+                for (j = 0; j < rktas->topic->metadata->partition_cnt; j++) {
+                        if (!rktas->unassigned_partitions[j]) {
+                                continue;
+                        }
+
+                        if (maximum_assignable_to_consumer <= 0)
+                                break;
+
+                        if (!may_assign(member, rktas, j))
+                                continue;
+
+                        partitions_to_assign[partitions_to_assign_cnt] = j;
+                        partitions_to_assign_cnt++;
+                        maximum_assignable_to_consumer--;
+                }
+
+                for (j = 0; j < partitions_to_assign_cnt; j++)
+                        rd_kafka_assign_partition(member, rktas,
+                                                  partitions_to_assign[j]);
+        }
+}
+
+/*
+ * Assigns partitions for co-partitioned topics in a rack-aware manner on a best
+ * effort basis. All partitions may not be assigned to consumers in case a rack
+ * aware assignment does not exist.
+ */
+static void rd_kafka_assign_co_partitioned(
+    rd_list_t *
+        rktas_bucket /* Contained Type: rd_kafka_topic_assignment_state_t* */) {
+        rd_kafka_topic_assignment_state_t *first_rktas =
+            rd_list_elem(rktas_bucket, 0);
+        rd_kafka_topic_assignment_state_t *rktas;
+        rd_kafka_group_member_t *member;
+        int i;
+
+        /* Since a "bucket" is a group of topic_assignment_states with the same
+         * consumers and number of partitions, we can just fetch them from the
+         * first member of the bucket. */
+        const int partition_cnt = first_rktas->topic->metadata->partition_cnt;
+        const rd_list_t *consumers = &first_rktas->topic->members;
+
+        for (i = 0; i < partition_cnt; i++) {
+                /*
+                 * To assign the ith partition of all the co partitioned topics,
+                 * we need to find a consumerX that fulfils the criteria:
+                 *  for all topic_assignment_states in the bucket:
+                 *   1. rack(consumerX) is contained inside racks(partition i)
+                 *   2. partitions assigned to consumerX does not exceed limits.
+                 */
+                int j;
+                RD_LIST_FOREACH(member, consumers, j) {
+                        int m;
+                        RD_LIST_FOREACH(rktas, rktas_bucket, m) {
+                                int maximum_assignable;
+                                rd_kafka_member_assigned_partitions_pair_t
+                                    *member_assignment;
+
+                                /* Check (1.) */
+                                if (!member->rkgm_rack_id ||
+                                    RD_KAFKAP_STR_LEN(member->rkgm_rack_id) ==
+                                        0 ||
+                                    rd_kafka_topic_assignment_state_rack_search(
+                                        rktas, i, member->rkgm_rack_id->str) ==
+                                        NULL) {
+                                        break;
+                                }
+
+                                /* Check (2.) */
+                                member_assignment =
+                                    rd_kafka_find_member_assigned_partitions_pair_by_member_id(
+                                        member->rkgm_member_id,
+                                        rktas->member_to_assigned_partitions);
+                                maximum_assignable =
+                                    rktas->num_partitions_per_consumer +
+                                    (rktas
+                                         ->remaining_consumers_with_extra_partition >
+                                     0) -
+                                    rd_list_cnt(
+                                        member_assignment->assigned_partitions);
+
+                                if (maximum_assignable <= 0) {
+                                        break;
+                                }
+                        }
+                        if (m == rd_list_cnt(rktas_bucket)) {
+                                /* Break early - this consumer can be assigned
+                                 * this partition. */
+                                break;
+                        }
+                }
+                if (j == rd_list_cnt(&first_rktas->topic->members)) {
+                        continue; /* We didn't find a suitable consumer. */
+                }
+
+                rd_assert(member);
+
+                RD_LIST_FOREACH(rktas, rktas_bucket, j) {
+                        rd_kafka_assign_partition(member, rktas, i);
+                }
+
+                /* FIXME: A possible optimization: early break here if no
+                 * consumer remains with maximum_assignable_to_consumer > 0
+                 * across all topics. */
+        }
+}
+
+
+rd_kafka_resp_err_t
+rd_kafka_range_assignor_assign_cb(rd_kafka_t *rk,
+                                  const rd_kafka_assignor_t *rkas,
+                                  const char *member_id,
+                                  const rd_kafka_metadata_t *metadata,
+                                  rd_kafka_group_member_t *members,
+                                  size_t member_cnt,
+                                  rd_kafka_assignor_topic_t **eligible_topics,
+                                  size_t eligible_topic_cnt,
+                                  char *errstr,
+                                  size_t errstr_size,
+                                  void *opaque) {
+        unsigned int ti;
+        int i;
+        rd_list_t *rktas_list = rd_list_new(
+            eligible_topic_cnt, rd_kafka_topic_assignment_state_destroy);
+        rd_list_t *rktas_buckets = rd_list_new(0, rd_list_destroy_free);
+        rd_list_t
+            *rktas_current_bucket; /* Contained Type:
+                                      rd_kafka_topic_assignment_state_t* */
+        rd_kafka_topic_assignment_state_t *rktas;
+        rd_kafka_topic_assignment_state_t *prev_rktas;
+        const rd_kafka_metadata_internal_t *mdi =
+            rd_kafka_metadata_get_internal(metadata);
+
+        /* The range assignor works on a per-topic basis. */
+        for (ti = 0; ti < eligible_topic_cnt; ti++) {
+                rd_kafka_assignor_topic_t *eligible_topic = eligible_topics[ti];
+
+                /* For each topic, we sort the consumers in lexicographic order,
+                 * and create a topic_assignment_state. */
+                rd_list_sort(&eligible_topic->members,
+                             rd_kafka_group_member_cmp);
+                rd_list_add(rktas_list, rd_kafka_topic_assignment_state_new(
+                                            eligible_topic, mdi));
+        }
+
+        /* Sort the topic_assignment_states to group the topics which need to be
+         * co-partitioned. */
+        rd_list_sort(rktas_list, rd_kafka_topic_assignment_state_cmp);
+
+        /* Use the sorted list of topic_assignment_states and separate them into
+         * "buckets". Each bucket contains topics which can be co-partitioned,
+         * ie with the same consumers and number of partitions. */
+        prev_rktas           = NULL;
+        rktas_current_bucket = NULL;
+        RD_LIST_FOREACH(rktas, rktas_list, i) {
+                if (prev_rktas && rd_kafka_topic_assignment_state_cmp(
+                                      rktas, prev_rktas) == 0) {
+                        rd_list_add(rktas_current_bucket, rktas);
+                        continue;
+                }
+
+                /* The free function is set to NULL, as we don't copy any of the
+                 * topic_assignment_states. */
+                rktas_current_bucket = rd_list_new(0, NULL);
+                rd_list_add(rktas_buckets, rktas_current_bucket);
+                prev_rktas = rktas;
+                rd_list_add(rktas_current_bucket, rktas);
+        }
+
+        /* Iterate through each bucket. In case there's more than one element in
+         * the bucket, we prefer co-partitioning over rack awareness. Otherwise,
+         * assign with rack-awareness. */
+        rktas                = NULL;
+        rktas_current_bucket = NULL;
+        RD_LIST_FOREACH(rktas_current_bucket, rktas_buckets, i) {
+                rd_assert(rd_list_cnt(rktas_current_bucket) > 0);
+
+                if (rd_list_cnt(rktas_current_bucket) == 1) {
+                        rktas = rd_list_elem(rktas_current_bucket, 0);
+                        if (!rktas->needs_rack_aware_assignment)
+                                continue;
+
+
+                        rd_kafka_dbg(rk, CGRP, "ASSIGN",
+                                     "range: Topic %s with %d partition(s) and "
+                                     "%d subscribing member(s), single-topic "
+                                     "rack-aware assignment",
+                                     rktas->topic->metadata->topic,
+                                     rktas->topic->metadata->partition_cnt,
+                                     rd_list_cnt(&rktas->topic->members));
+
+                        rd_kafka_assign_ranges(rktas, rd_kafka_racks_match);
+                } else {
+                        rktas = rd_list_elem(rktas_current_bucket, 0);
+                        rd_kafka_dbg(
+                            rk, CGRP, "ASSIGN",
+                            "range: %d topics with %d partition(s) and "
+                            "%d subscribing member(s), co-partitioned "
+                            "rack-aware assignment",
+                            rd_list_cnt(rktas_current_bucket),
+                            rktas->topic->metadata->partition_cnt,
+                            rd_list_cnt(&rktas->topic->members));
+
+                        rd_kafka_assign_co_partitioned(rktas_current_bucket);
+                }
+        }
+
+        /* Iterate through each rktas, doing normal assignment for any
+         * partitions that might not have gotten a rack-aware assignment.*/
+        RD_LIST_FOREACH(rktas, rktas_list, i) {
+                rd_kafka_dbg(rk, CGRP, "ASSIGN",
+                             "range: Topic %s with %d partition(s) and "
+                             "%d subscribing member(s), single-topic "
+                             "non-rack-aware assignment for %" PRIusz
+                             " leftover partitions",
+                             rktas->topic->metadata->topic,
+                             rktas->topic->metadata->partition_cnt,
+                             rd_list_cnt(&rktas->topic->members),
+                             rktas->unassigned_partitions_left);
+                rd_kafka_assign_ranges(rktas, rd_kafka_always);
+        }
+
+        rd_list_destroy(rktas_list);
+        rd_list_destroy(rktas_buckets);
+
+        return 0;
+}
+
+
+/**
+ * @name Sticky assignor unit tests
+ *
+ *
+ * These are based on RangeAssignorTest.java
+ *
+ *
+ *
+ */
+
+
+/* All possible racks used in tests, as well as several common rack configs used
+ * by consumers */
+static rd_kafkap_str_t
+    *ALL_RACKS[7]; /* initialized before starting the unit tests. */
+static int RACKS_INITIAL[]  = {0, 1, 2};
+static int RACKS_NULL[]     = {6, 6, 6};
+static int RACKS_FINAL[]    = {4, 5, 6};
+static int RACKS_ONE_NULL[] = {6, 4, 5};
+
+static int
+ut_testOneConsumerNoTopic(rd_kafka_t *rk,
+                          const rd_kafka_assignor_t *rkas,
+                          rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[1];
+
+
+        if (parametrization == RD_KAFKA_RANGE_ASSIGNOR_UT_NO_BROKER_RACK) {
+                RD_UT_PASS();
+        }
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       0);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1", ALL_RACKS[0],
+                                     parametrization, "t1", NULL);
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    RD_ARRAYSIZE(members), errstr,
+                                    sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyAssignment(&members[0], NULL);
+
+        rd_kafka_group_member_clear(&members[0]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+static int ut_testOneConsumerNonexistentTopic(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[1];
+
+
+        if (parametrization == RD_KAFKA_RANGE_ASSIGNOR_UT_NO_BROKER_RACK) {
+                RD_UT_PASS();
+        }
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "t1", 0);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1", ALL_RACKS[0],
+                                     parametrization, "t1", NULL);
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    RD_ARRAYSIZE(members), errstr,
+                                    sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyAssignment(&members[0], NULL);
+
+        rd_kafka_group_member_clear(&members[0]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+
+static int
+ut_testOneConsumerOneTopic(rd_kafka_t *rk,
+                           const rd_kafka_assignor_t *rkas,
+                           rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[1];
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "t1", 3);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1", ALL_RACKS[0],
+                                     parametrization, "t1", NULL);
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    RD_ARRAYSIZE(members), errstr,
+                                    sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+        RD_UT_ASSERT(members[0].rkgm_assignment->cnt == 3,
+                     "expected assignment of 3 partitions, got %d partition(s)",
+                     members[0].rkgm_assignment->cnt);
+
+        verifyAssignment(&members[0], "t1", 0, "t1", 1, "t1", 2, NULL);
+
+        rd_kafka_group_member_clear(&members[0]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+
+static int ut_testOnlyAssignsPartitionsFromSubscribedTopics(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[1];
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       2, "t1", 3, "t2", 3);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1", ALL_RACKS[0],
+                                     parametrization, "t1", NULL);
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    RD_ARRAYSIZE(members), errstr,
+                                    sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyAssignment(&members[0], "t1", 0, "t1", 1, "t1", 2, NULL);
+
+        rd_kafka_group_member_clear(&members[0]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+static int ut_testOneConsumerMultipleTopics(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[1];
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       2, "t1", 1, "t2", 2);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1", ALL_RACKS[0],
+                                     parametrization, "t1", "t2", NULL);
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    RD_ARRAYSIZE(members), errstr,
+                                    sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyAssignment(&members[0], "t1", 0, "t2", 0, "t2", 1, NULL);
+
+        rd_kafka_group_member_clear(&members[0]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+static int ut_testTwoConsumersOneTopicOnePartition(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[2];
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "t1", 1);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1", ALL_RACKS[0],
+                                     parametrization, "t1", NULL);
+        ut_initMemberConditionalRack(&members[1], "consumer2", ALL_RACKS[1],
+                                     parametrization, "t1", NULL);
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    RD_ARRAYSIZE(members), errstr,
+                                    sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyAssignment(&members[0], "t1", 0, NULL);
+        verifyAssignment(&members[1], NULL);
+
+        rd_kafka_group_member_clear(&members[0]);
+        rd_kafka_group_member_clear(&members[1]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+static int ut_testTwoConsumersOneTopicTwoPartitions(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[2];
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "t1", 2);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1", ALL_RACKS[0],
+                                     parametrization, "t1", NULL);
+        ut_initMemberConditionalRack(&members[1], "consumer2", ALL_RACKS[1],
+                                     parametrization, "t1", NULL);
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    RD_ARRAYSIZE(members), errstr,
+                                    sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyAssignment(&members[0], "t1", 0, NULL);
+        verifyAssignment(&members[1], "t1", 1, NULL);
+
+        rd_kafka_group_member_clear(&members[0]);
+        rd_kafka_group_member_clear(&members[1]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+static int ut_testMultipleConsumersMixedTopicSubscriptions(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[3];
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       2, "t1", 3, "t2", 2);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1", ALL_RACKS[0],
+                                     parametrization, "t1", NULL);
+        ut_initMemberConditionalRack(&members[1], "consumer2", ALL_RACKS[1],
+                                     parametrization, "t1", "t2", NULL);
+        ut_initMemberConditionalRack(&members[2], "consumer3", ALL_RACKS[2],
+                                     parametrization, "t1", NULL);
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    RD_ARRAYSIZE(members), errstr,
+                                    sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyAssignment(&members[0], "t1", 0, NULL);
+        verifyAssignment(&members[1], "t1", 1, "t2", 0, "t2", 1, NULL);
+        verifyAssignment(&members[2], "t1", 2, NULL);
+
+        rd_kafka_group_member_clear(&members[0]);
+        rd_kafka_group_member_clear(&members[1]);
+        rd_kafka_group_member_clear(&members[2]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+static int ut_testTwoConsumersTwoTopicsSixPartitions(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[2];
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       2, "t1", 3, "t2", 3);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1", ALL_RACKS[0],
+                                     parametrization, "t1", "t2", NULL);
+        ut_initMemberConditionalRack(&members[1], "consumer2", ALL_RACKS[1],
+                                     parametrization, "t1", "t2", NULL);
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    RD_ARRAYSIZE(members), errstr,
+                                    sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyAssignment(&members[0], "t1", 0, "t1", 1, "t2", 0, "t2", 1, NULL);
+        verifyAssignment(&members[1], "t1", 2, "t2", 2, NULL);
+
+        rd_kafka_group_member_clear(&members[0]);
+        rd_kafka_group_member_clear(&members[1]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+
+/* Helper for setting up metadata and members, and running the assignor. Does
+ * not check the results of the assignment. */
+static int setupRackAwareAssignment0(rd_kafka_t *rk,
+                                     const rd_kafka_assignor_t *rkas,
+                                     rd_kafka_group_member_t *members,
+                                     size_t member_cnt,
+                                     int replication_factor,
+                                     int num_broker_racks,
+                                     size_t topic_cnt,
+                                     char *topics[],
+                                     int *partitions,
+                                     int *subscriptions_count,
+                                     char **subscriptions[],
+                                     int *consumer_racks,
+                                     rd_kafka_metadata_t **metadata) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata_local = NULL;
+        if (!metadata)
+                metadata = &metadata_local;
+
+        size_t i              = 0;
+        const int num_brokers = num_broker_racks > 0
+                                    ? replication_factor * num_broker_racks
+                                    : replication_factor;
+
+        /* The member naming for tests is consumerN where N is a single
+         * character. */
+        rd_assert(member_cnt <= 9);
+
+        *metadata = rd_kafka_metadata_new_topic_with_partition_replicas_mock(
+            replication_factor, num_brokers, topics, partitions, topic_cnt);
+        ut_populate_internal_broker_metadata(
+            rd_kafka_metadata_get_internal(*metadata), num_broker_racks,
+            ALL_RACKS, RD_ARRAYSIZE(ALL_RACKS));
+        ut_populate_internal_topic_metadata(
+            rd_kafka_metadata_get_internal(*metadata));
+
+        for (i = 0; i < member_cnt; i++) {
+                char member_id[10];
+                snprintf(member_id, 10, "consumer%d", (int)(i + 1));
+                ut_init_member_with_rack(
+                    &members[i], member_id, ALL_RACKS[consumer_racks[i]],
+                    subscriptions[i], subscriptions_count[i]);
+        }
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, *metadata, members,
+                                    member_cnt, errstr, sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        if (metadata_local)
+                ut_destroy_metadata(metadata_local);
+        return 0;
+}
+
+static int setupRackAwareAssignment(rd_kafka_t *rk,
+                                    const rd_kafka_assignor_t *rkas,
+                                    rd_kafka_group_member_t *members,
+                                    size_t member_cnt,
+                                    int replication_factor,
+                                    int num_broker_racks,
+                                    size_t topic_cnt,
+                                    char *topics[],
+                                    int *partitions,
+                                    int *subscriptions_count,
+                                    char **subscriptions[],
+                                    int *consumer_racks) {
+        return setupRackAwareAssignment0(
+            rk, rkas, members, member_cnt, replication_factor, num_broker_racks,
+            topic_cnt, topics, partitions, subscriptions_count, subscriptions,
+            consumer_racks, NULL);
+}
+
+/* Helper for testing cases where rack-aware assignment should not be triggered,
+ * and assignment should be the same as the pre-rack-aware assignor. */
+#define verifyNonRackAwareAssignment(rk, rkas, members, member_cnt, topic_cnt, \
+                                     topics, partitions, subscriptions_count,  \
+                                     subscriptions, ...)                       \
+        do {                                                                   \
+                size_t idx                    = 0;                             \
+                rd_kafka_metadata_t *metadata = NULL;                          \
+                                                                               \
+                /* num_broker_racks = 0, implies that brokers have no          \
+                 * configured racks. */                                        \
+                setupRackAwareAssignment(rk, rkas, members, member_cnt, 3, 0,  \
+                                         topic_cnt, topics, partitions,        \
+                                         subscriptions_count, subscriptions,   \
+                                         RACKS_INITIAL);                       \
+                verifyMultipleAssignment(members, member_cnt, __VA_ARGS__);    \
+                for (idx = 0; idx < member_cnt; idx++)                         \
+                        rd_kafka_group_member_clear(&members[idx]);            \
+                /* consumer_racks = RACKS_NULL implies that consumers have no  \
+                 * racks. */                                                   \
+                setupRackAwareAssignment(rk, rkas, members, member_cnt, 3, 3,  \
+                                         topic_cnt, topics, partitions,        \
+                                         subscriptions_count, subscriptions,   \
+                                         RACKS_NULL);                          \
+                verifyMultipleAssignment(members, member_cnt, __VA_ARGS__);    \
+                for (idx = 0; idx < member_cnt; idx++)                         \
+                        rd_kafka_group_member_clear(&members[idx]);            \
+                /* replication_factor = 3 and num_broker_racks = 3 means that  \
+                 * all partitions are replicated on all racks.*/               \
+                setupRackAwareAssignment0(rk, rkas, members, member_cnt, 3, 3, \
+                                          topic_cnt, topics, partitions,       \
+                                          subscriptions_count, subscriptions,  \
+                                          RACKS_INITIAL, &metadata);           \
+                verifyMultipleAssignment(members, member_cnt, __VA_ARGS__);    \
+                verifyNumPartitionsWithRackMismatch(metadata, members,         \
+                                                    RD_ARRAYSIZE(members), 0); \
+                                                                               \
+                for (idx = 0; idx < member_cnt; idx++)                         \
+                        rd_kafka_group_member_clear(&members[idx]);            \
+                ut_destroy_metadata(metadata);                                 \
+                /* replication_factor = 4 and num_broker_racks = 4 means that  \
+                 * all partitions are replicated on all racks. */              \
+                setupRackAwareAssignment0(rk, rkas, members, member_cnt, 4, 4, \
+                                          topic_cnt, topics, partitions,       \
+                                          subscriptions_count, subscriptions,  \
+                                          RACKS_INITIAL, &metadata);           \
+                verifyMultipleAssignment(members, member_cnt, __VA_ARGS__);    \
+                verifyNumPartitionsWithRackMismatch(metadata, members,         \
+                                                    RD_ARRAYSIZE(members), 0); \
+                                                                               \
+                for (idx = 0; idx < member_cnt; idx++)                         \
+                        rd_kafka_group_member_clear(&members[idx]);            \
+                ut_destroy_metadata(metadata);                                 \
+                /* There's no overap between broker racks and consumer racks,  \
+                 * since num_broker_racks = 3, they'll be picked from a,b,c    \
+                 * and consumer racks are d,e,f. */                            \
+                setupRackAwareAssignment(rk, rkas, members, member_cnt, 3, 3,  \
+                                         topic_cnt, topics, partitions,        \
+                                         subscriptions_count, subscriptions,   \
+                                         RACKS_FINAL);                         \
+                verifyMultipleAssignment(members, member_cnt, __VA_ARGS__);    \
+                for (idx = 0; idx < member_cnt; idx++)                         \
+                        rd_kafka_group_member_clear(&members[idx]);            \
+                /* There's no overap between broker racks and consumer racks,  \
+                 * since num_broker_racks = 3, they'll be picked from a,b,c    \
+                 * and consumer racks are d,e,NULL. */                         \
+                setupRackAwareAssignment(rk, rkas, members, member_cnt, 3, 3,  \
+                                         topic_cnt, topics, partitions,        \
+                                         subscriptions_count, subscriptions,   \
+                                         RACKS_ONE_NULL);                      \
+                verifyMultipleAssignment(members, member_cnt, __VA_ARGS__);    \
+                for (idx = 0; idx < member_cnt; idx++)                         \
+                        rd_kafka_group_member_clear(&members[idx]);            \
+        } while (0)
+
+static int ut_testRackAwareAssignmentWithUniformSubscription(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        char *topics[]   = {"t1", "t2", "t3"};
+        int partitions[] = {6, 7, 2};
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[3];
+        size_t i                  = 0;
+        int subscriptions_count[] = {3, 3, 3};
+        char **subscriptions[]    = {topics, topics, topics};
+
+        if (parametrization !=
+            RD_KAFKA_RANGE_ASSIGNOR_UT_BROKER_AND_CONSUMER_RACK) {
+                RD_UT_PASS();
+        }
+
+        verifyNonRackAwareAssignment(
+            rk, rkas, members, RD_ARRAYSIZE(members), RD_ARRAYSIZE(topics),
+            topics, partitions, subscriptions_count, subscriptions,
+            /* consumer1*/
+            "t1", 0, "t1", 1, "t2", 0, "t2", 1, "t2", 2, "t3", 0, NULL,
+            /* consumer2 */
+            "t1", 2, "t1", 3, "t2", 3, "t2", 4, "t3", 1, NULL,
+            /* consumer3 */
+            "t1", 4, "t1", 5, "t2", 5, "t2", 6, NULL);
+
+        /* Verify best-effort rack-aware assignment for lower replication factor
+         * where racks have a subset of partitions.*/
+        setupRackAwareAssignment0(rk, rkas, members, RD_ARRAYSIZE(members), 1,
+                                  3, RD_ARRAYSIZE(topics), topics, partitions,
+                                  subscriptions_count, subscriptions,
+                                  RACKS_INITIAL, &metadata);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 0, "t1", 3, "t2", 0, "t2", 3, "t2", 6, NULL,
+            /* consumer2 */
+            "t1", 1, "t1", 4, "t2", 1, "t2", 4, "t3", 0, NULL,
+            /* consumer3 */
+            "t1", 2, "t1", 5, "t2", 2, "t2", 5, "t3", 1, NULL);
+        verifyNumPartitionsWithRackMismatch(metadata, members,
+                                            RD_ARRAYSIZE(members), 0);
+
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        setupRackAwareAssignment0(rk, rkas, members, RD_ARRAYSIZE(members), 2,
+                                  3, RD_ARRAYSIZE(topics), topics, partitions,
+                                  subscriptions_count, subscriptions,
+                                  RACKS_INITIAL, &metadata);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /*consumer1*/
+            "t1", 0, "t1", 2, "t2", 0, "t2", 2, "t2", 3, "t3", 1, NULL,
+            /* consumer2 */
+            "t1", 1, "t1", 3, "t2", 1, "t2", 4, "t3", 0, NULL,
+            /* consumer 3*/
+            "t1", 4, "t1", 5, "t2", 5, "t2", 6, NULL);
+        verifyNumPartitionsWithRackMismatch(metadata, members,
+                                            RD_ARRAYSIZE(members), 1);
+
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+
+        /* One consumer on a rack with no partitions. */
+        setupRackAwareAssignment0(rk, rkas, members, RD_ARRAYSIZE(members), 3,
+                                  2, RD_ARRAYSIZE(topics), topics, partitions,
+                                  subscriptions_count, subscriptions,
+                                  RACKS_INITIAL, &metadata);
+        verifyMultipleAssignment(members, RD_ARRAYSIZE(members),
+                                 /* consumer1 */ "t1", 0, "t1", 1, "t2", 0,
+                                 "t2", 1, "t2", 2, "t3", 0, NULL,
+                                 /* consumer2 */
+                                 "t1", 2, "t1", 3, "t2", 3, "t2", 4, "t3", 1,
+                                 NULL,
+                                 /* consumer3 */
+                                 "t1", 4, "t1", 5, "t2", 5, "t2", 6, NULL);
+        verifyNumPartitionsWithRackMismatch(metadata, members,
+                                            RD_ARRAYSIZE(members), 4);
+
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+static int ut_testRackAwareAssignmentWithNonEqualSubscription(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_metadata_t *metadata;
+        char *topics[]   = {"t1", "t2", "t3"};
+        int partitions[] = {6, 7, 2};
+        rd_kafka_group_member_t members[3];
+        size_t i                  = 0;
+        int subscriptions_count[] = {3, 3, 2};
+        char *subscription13[]    = {"t1", "t3"};
+        char **subscriptions[]    = {topics, topics, subscription13};
+
+        if (parametrization !=
+            RD_KAFKA_RANGE_ASSIGNOR_UT_BROKER_AND_CONSUMER_RACK) {
+                RD_UT_PASS();
+        }
+
+        verifyNonRackAwareAssignment(
+            rk, rkas, members, RD_ARRAYSIZE(members), RD_ARRAYSIZE(topics),
+            topics, partitions, subscriptions_count, subscriptions,
+            /* consumer1*/
+            "t1", 0, "t1", 1, "t2", 0, "t2", 1, "t2", 2, "t2", 3, "t3", 0, NULL,
+            /* consumer2 */
+            "t1", 2, "t1", 3, "t2", 4, "t2", 5, "t2", 6, "t3", 1, NULL,
+            /* consumer3 */
+            "t1", 4, "t1", 5, NULL);
+
+        /* Verify best-effort rack-aware assignment for lower replication factor
+         * where racks have a subset of partitions. */
+        setupRackAwareAssignment0(rk, rkas, members, RD_ARRAYSIZE(members), 1,
+                                  3, RD_ARRAYSIZE(topics), topics, partitions,
+                                  subscriptions_count, subscriptions,
+                                  RACKS_INITIAL, &metadata);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 0, "t1", 3, "t2", 0, "t2", 2, "t2", 3, "t2", 6, NULL,
+            /* consumer2 */
+            "t1", 1, "t1", 4, "t2", 1, "t2", 4, "t2", 5, "t3", 0, NULL,
+            /* consumer3 */
+            "t1", 2, "t1", 5, "t3", 1, NULL);
+        verifyNumPartitionsWithRackMismatch(metadata, members,
+                                            RD_ARRAYSIZE(members), 2);
+
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        setupRackAwareAssignment0(rk, rkas, members, RD_ARRAYSIZE(members), 2,
+                                  3, RD_ARRAYSIZE(topics), topics, partitions,
+                                  subscriptions_count, subscriptions,
+                                  RACKS_INITIAL, &metadata);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 0, "t1", 2, "t2", 0, "t2", 2, "t2", 3, "t2", 5, "t3", 1, NULL,
+            /* consumer2 */
+            "t1", 1, "t1", 3, "t2", 1, "t2", 4, "t2", 6, "t3", 0, NULL,
+            /* consumer3 */
+            "t1", 4, "t1", 5, NULL);
+        verifyNumPartitionsWithRackMismatch(metadata, members,
+                                            RD_ARRAYSIZE(members), 0);
+
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        /* One consumer on a rack with no partitions */
+        setupRackAwareAssignment0(rk, rkas, members, RD_ARRAYSIZE(members), 3,
+                                  2, RD_ARRAYSIZE(topics), topics, partitions,
+                                  subscriptions_count, subscriptions,
+                                  RACKS_INITIAL, &metadata);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 0, "t1", 1, "t2", 0, "t2", 1, "t2", 2, "t2", 3, "t3", 0, NULL,
+            /* consumer2 */
+            "t1", 2, "t1", 3, "t2", 4, "t2", 5, "t2", 6, "t3", 1, NULL,
+            /* consumer3 */
+            "t1", 4, "t1", 5, NULL);
+        verifyNumPartitionsWithRackMismatch(metadata, members,
+                                            RD_ARRAYSIZE(members), 2);
+
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+static int ut_testRackAwareAssignmentWithUniformPartitions(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        char *topics[]            = {"t1", "t2", "t3"};
+        int partitions[]          = {5, 5, 5};
+        int partitions_mismatch[] = {10, 5, 3};
+        rd_kafka_group_member_t members[3];
+        size_t i                  = 0;
+        int replication_factor    = 0;
+        int subscriptions_count[] = {3, 3, 3};
+        char **subscriptions[]    = {topics, topics, topics};
+
+        if (parametrization !=
+            RD_KAFKA_RANGE_ASSIGNOR_UT_BROKER_AND_CONSUMER_RACK) {
+                RD_UT_PASS();
+        }
+
+        /* Verify combinations where rack-aware logic is not used. */
+        verifyNonRackAwareAssignment(
+            rk, rkas, members, RD_ARRAYSIZE(members), RD_ARRAYSIZE(topics),
+            topics, partitions, subscriptions_count, subscriptions,
+            /* consumer1*/
+            "t1", 0, "t1", 1, "t2", 0, "t2", 1, "t3", 0, "t3", 1, NULL,
+            /* consumer2 */
+            "t1", 2, "t1", 3, "t2", 2, "t2", 3, "t3", 2, "t3", 3, NULL,
+            /* consumer3 */
+            "t1", 4, "t2", 4, "t3", 4, NULL);
+
+        /* Verify that co-partitioning is prioritized over rack-alignment for
+         * topics with equal subscriptions */
+        for (replication_factor = 1; replication_factor <= 3;
+             replication_factor++) {
+                rd_kafka_metadata_t *metadata = NULL;
+                setupRackAwareAssignment0(
+                    rk, rkas, members, RD_ARRAYSIZE(members),
+                    replication_factor, replication_factor < 3 ? 3 : 2,
+                    RD_ARRAYSIZE(topics), topics, partitions,
+                    subscriptions_count, subscriptions, RACKS_INITIAL,
+                    &metadata);
+                verifyMultipleAssignment(
+                    members, RD_ARRAYSIZE(members),
+                    /* consumer1*/
+                    "t1", 0, "t1", 1, "t2", 0, "t2", 1, "t3", 0, "t3", 1, NULL,
+                    /* consumer2 */
+                    "t1", 2, "t1", 3, "t2", 2, "t2", 3, "t3", 2, "t3", 3, NULL,
+                    /* consumer3 */
+                    "t1", 4, "t2", 4, "t3", 4, NULL);
+                verifyNumPartitionsWithRackMismatch(
+                    metadata, members, RD_ARRAYSIZE(members),
+                    partitions_mismatch[replication_factor - 1]);
+
+                for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                        rd_kafka_group_member_clear(&members[i]);
+                ut_destroy_metadata(metadata);
+        }
+
+        RD_UT_PASS();
+}
+
+static int ut_testRackAwareAssignmentWithUniformPartitionsNonEqualSubscription(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_metadata_t *metadata = NULL;
+        char *topics[]                = {"t1", "t2", "t3"};
+        int partitions[]              = {5, 5, 5};
+        rd_kafka_group_member_t members[3];
+        size_t i                  = 0;
+        int subscriptions_count[] = {3, 3, 2};
+        char *subscription13[]    = {"t1", "t3"};
+        char **subscriptions[]    = {topics, topics, subscription13};
+
+        if (parametrization !=
+            RD_KAFKA_RANGE_ASSIGNOR_UT_BROKER_AND_CONSUMER_RACK) {
+                RD_UT_PASS();
+        }
+
+        /* Verify combinations where rack-aware logic is not used. */
+        verifyNonRackAwareAssignment(
+            rk, rkas, members, RD_ARRAYSIZE(members), RD_ARRAYSIZE(topics),
+            topics, partitions, subscriptions_count, subscriptions,
+            /* consumer1*/
+            "t1", 0, "t1", 1, "t2", 0, "t2", 1, "t2", 2, "t3", 0, "t3", 1, NULL,
+            /* consumer2 */
+            "t1", 2, "t1", 3, "t2", 3, "t2", 4, "t3", 2, "t3", 3, NULL,
+            /* consumer3 */
+            "t1", 4, "t3", 4, NULL);
+
+        /* Verify that co-partitioning is prioritized over rack-alignment for
+         * topics with equal subscriptions */
+        setupRackAwareAssignment0(rk, rkas, members, RD_ARRAYSIZE(members), 1,
+                                  3, RD_ARRAYSIZE(topics), topics, partitions,
+                                  subscriptions_count, subscriptions,
+                                  RACKS_INITIAL, &metadata);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 0, "t1", 1, "t2", 0, "t2", 1, "t2", 4, "t3", 0, "t3", 1, NULL,
+            /* consumer2 */
+            "t1", 2, "t1", 3, "t2", 2, "t2", 3, "t3", 2, "t3", 3, NULL,
+            /* consumer3 */
+            "t1", 4, "t3", 4, NULL);
+        verifyNumPartitionsWithRackMismatch(metadata, members,
+                                            RD_ARRAYSIZE(members), 9);
+
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+
+        setupRackAwareAssignment0(rk, rkas, members, RD_ARRAYSIZE(members), 2,
+                                  3, RD_ARRAYSIZE(topics), topics, partitions,
+                                  subscriptions_count, subscriptions,
+                                  RACKS_INITIAL, &metadata);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 2, "t2", 0, "t2", 1, "t2", 3, "t3", 2, NULL,
+            /* consumer2 */
+            "t1", 0, "t1", 3, "t2", 2, "t2", 4, "t3", 0, "t3", 3, NULL,
+            /* consumer3 */
+            "t1", 1, "t1", 4, "t3", 1, "t3", 4, NULL);
+        verifyNumPartitionsWithRackMismatch(metadata, members,
+                                            RD_ARRAYSIZE(members), 0);
+
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        /* One consumer on a rack with no partitions */
+        setupRackAwareAssignment0(rk, rkas, members, RD_ARRAYSIZE(members), 3,
+                                  2, RD_ARRAYSIZE(topics), topics, partitions,
+                                  subscriptions_count, subscriptions,
+                                  RACKS_INITIAL, &metadata);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 0, "t1", 1, "t2", 0, "t2", 1, "t2", 2, "t3", 0, "t3", 1, NULL,
+            /* consumer2 */
+            "t1", 2, "t1", 3, "t2", 3, "t2", 4, "t3", 2, "t3", 3, NULL,
+            /* consumer3 */
+            "t1", 4, "t3", 4, NULL);
+        verifyNumPartitionsWithRackMismatch(metadata, members,
+                                            RD_ARRAYSIZE(members), 2);
+
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+static int ut_testRackAwareAssignmentWithCoPartitioning0(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_metadata_t *metadata = NULL;
+        char *topics[]                = {"t1", "t2", "t3", "t4"};
+        int partitions[]              = {6, 6, 2, 2};
+        rd_kafka_group_member_t members[4];
+        size_t i                  = 0;
+        int subscriptions_count[] = {2, 2, 2, 2};
+        char *subscription12[]    = {"t1", "t2"};
+        char *subscription34[]    = {"t3", "t4"};
+        char **subscriptions[]    = {subscription12, subscription12,
+                                  subscription34, subscription34};
+        int racks[]               = {0, 1, 1, 0};
+
+        if (parametrization !=
+            RD_KAFKA_RANGE_ASSIGNOR_UT_BROKER_AND_CONSUMER_RACK) {
+                RD_UT_PASS();
+        }
+
+        setupRackAwareAssignment(rk, rkas, members, RD_ARRAYSIZE(members), 3, 2,
+                                 RD_ARRAYSIZE(topics), topics, partitions,
+                                 subscriptions_count, subscriptions, racks);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 0, "t1", 1, "t1", 2, "t2", 0, "t2", 1, "t2", 2, NULL,
+            /* consumer2 */
+            "t1", 3, "t1", 4, "t1", 5, "t2", 3, "t2", 4, "t2", 5, NULL,
+            /* consumer3 */
+            "t3", 0, "t4", 0, NULL,
+            /* consumer4 */
+            "t3", 1, "t4", 1, NULL);
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+
+        setupRackAwareAssignment0(rk, rkas, members, RD_ARRAYSIZE(members), 2,
+                                  2, RD_ARRAYSIZE(topics), topics, partitions,
+                                  subscriptions_count, subscriptions, racks,
+                                  &metadata);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 0, "t1", 1, "t1", 2, "t2", 0, "t2", 1, "t2", 2, NULL,
+            /* consumer2 */
+            "t1", 3, "t1", 4, "t1", 5, "t2", 3, "t2", 4, "t2", 5, NULL,
+            /* consumer3 */
+            "t3", 0, "t4", 0, NULL,
+            /* consumer4 */
+            "t3", 1, "t4", 1, NULL);
+        verifyNumPartitionsWithRackMismatch(metadata, members,
+                                            RD_ARRAYSIZE(members), 0);
+
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        setupRackAwareAssignment0(rk, rkas, members, RD_ARRAYSIZE(members), 1,
+                                  2, RD_ARRAYSIZE(topics), topics, partitions,
+                                  subscriptions_count, subscriptions, racks,
+                                  &metadata);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 0, "t1", 2, "t1", 4, "t2", 0, "t2", 2, "t2", 4, NULL,
+            /* consumer2 */
+            "t1", 1, "t1", 3, "t1", 5, "t2", 1, "t2", 3, "t2", 5, NULL,
+            /* consumer3 */
+            "t3", 1, "t4", 1, NULL,
+            /* consumer4 */
+            "t3", 0, "t4", 0, NULL);
+        verifyNumPartitionsWithRackMismatch(metadata, members,
+                                            RD_ARRAYSIZE(members), 0);
+
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+static int ut_testRackAwareAssignmentWithCoPartitioning1(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_metadata_t *metadata = NULL;
+        char *topics[]                = {"t1", "t2", "t3", "t4"};
+        int partitions[]              = {6, 6, 2, 2};
+        rd_kafka_group_member_t members[4];
+        size_t i                  = 0;
+        int subscriptions_count[] = {4, 4, 4, 4};
+        char **subscriptions[]    = {topics, topics, topics, topics};
+        int racks[]               = {0, 1, 1, 0};
+
+        if (parametrization !=
+            RD_KAFKA_RANGE_ASSIGNOR_UT_BROKER_AND_CONSUMER_RACK) {
+                RD_UT_PASS();
+        }
+
+        setupRackAwareAssignment(rk, rkas, members, RD_ARRAYSIZE(members), 3, 2,
+                                 RD_ARRAYSIZE(topics), topics, partitions,
+                                 subscriptions_count, subscriptions, racks);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 0, "t1", 1, "t2", 0, "t2", 1, "t3", 0, "t4", 0, NULL,
+            /* consumer2 */
+            "t1", 2, "t1", 3, "t2", 2, "t2", 3, "t3", 1, "t4", 1, NULL,
+            /* consumer3 */
+            "t1", 4, "t2", 4, NULL,
+            /* consumer4 */
+            "t1", 5, "t2", 5, NULL);
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+
+        setupRackAwareAssignment0(rk, rkas, members, RD_ARRAYSIZE(members), 2,
+                                  2, RD_ARRAYSIZE(topics), topics, partitions,
+                                  subscriptions_count, subscriptions, racks,
+                                  &metadata);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 0, "t1", 1, "t2", 0, "t2", 1, "t3", 0, "t4", 0, NULL,
+            /* consumer2 */
+            "t1", 2, "t1", 3, "t2", 2, "t2", 3, "t3", 1, "t4", 1, NULL,
+            /* consumer3 */
+            "t1", 4, "t2", 4, NULL,
+            /* consumer4 */
+            "t1", 5, "t2", 5, NULL);
+        verifyNumPartitionsWithRackMismatch(metadata, members,
+                                            RD_ARRAYSIZE(members), 0);
+
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+
+        setupRackAwareAssignment0(rk, rkas, members, RD_ARRAYSIZE(members), 1,
+                                  2, RD_ARRAYSIZE(topics), topics, partitions,
+                                  subscriptions_count, subscriptions, racks,
+                                  &metadata);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 0, "t1", 2, "t2", 0, "t2", 2, "t3", 0, "t4", 0, NULL,
+            /* consumer2 */
+            "t1", 1, "t1", 3, "t2", 1, "t2", 3, "t3", 1, "t4", 1, NULL,
+            /* consumer3 */
+            "t1", 5, "t2", 5, NULL,
+            /* consumer4 */
+            "t1", 4, "t2", 4, NULL);
+        verifyNumPartitionsWithRackMismatch(metadata, members,
+                                            RD_ARRAYSIZE(members), 0);
+
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+
+        setupRackAwareAssignment0(rk, rkas, members, RD_ARRAYSIZE(members), 1,
+                                  3, RD_ARRAYSIZE(topics), topics, partitions,
+                                  subscriptions_count, subscriptions, racks,
+                                  &metadata);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 0, "t1", 3, "t2", 0, "t2", 3, "t3", 0, "t4", 0, NULL,
+            /* consumer2 */
+            "t1", 1, "t1", 4, "t2", 1, "t2", 4, "t3", 1, "t4", 1, NULL,
+            /* consumer3 */
+            "t1", 2, "t2", 2, NULL,
+            /* consumer4 */
+            "t1", 5, "t2", 5, NULL);
+        verifyNumPartitionsWithRackMismatch(metadata, members,
+                                            RD_ARRAYSIZE(members), 6);
+
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+static int ut_testCoPartitionedAssignmentWithSameSubscription(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_metadata_t *metadata = NULL;
+        char *topics[]                = {"t1", "t2", "t3", "t4", "t5", "t6"};
+        int partitions[]              = {6, 6, 2, 2, 4, 4};
+        rd_kafka_group_member_t members[3];
+        size_t i                  = 0;
+        int subscriptions_count[] = {6, 6, 6};
+        char **subscriptions[]    = {topics, topics, topics};
+
+        if (parametrization !=
+            RD_KAFKA_RANGE_ASSIGNOR_UT_BROKER_AND_CONSUMER_RACK) {
+                RD_UT_PASS();
+        }
+
+        setupRackAwareAssignment(rk, rkas, members, RD_ARRAYSIZE(members), 3, 0,
+                                 RD_ARRAYSIZE(topics), topics, partitions,
+                                 subscriptions_count, subscriptions,
+                                 RACKS_INITIAL);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 0, "t1", 1, "t2", 0, "t2", 1, "t3", 0, "t4", 0, "t5", 0, "t5",
+            1, "t6", 0, "t6", 1, NULL,
+            /* consumer2 */
+            "t1", 2, "t1", 3, "t2", 2, "t2", 3, "t3", 1, "t4", 1, "t5", 2, "t6",
+            2, NULL,
+            /* consumer3 */
+            "t1", 4, "t1", 5, "t2", 4, "t2", 5, "t5", 3, "t6", 3, NULL);
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+
+        setupRackAwareAssignment0(rk, rkas, members, RD_ARRAYSIZE(members), 3,
+                                  3, RD_ARRAYSIZE(topics), topics, partitions,
+                                  subscriptions_count, subscriptions,
+                                  RACKS_INITIAL, &metadata);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 0, "t1", 1, "t2", 0, "t2", 1, "t3", 0, "t4", 0, "t5", 0, "t5",
+            1, "t6", 0, "t6", 1, NULL,
+            /* consumer2 */
+            "t1", 2, "t1", 3, "t2", 2, "t2", 3, "t3", 1, "t4", 1, "t5", 2, "t6",
+            2, NULL,
+            /* consumer3 */
+            "t1", 4, "t1", 5, "t2", 4, "t2", 5, "t5", 3, "t6", 3, NULL);
+        verifyNumPartitionsWithRackMismatch(metadata, members,
+                                            RD_ARRAYSIZE(members), 0);
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+
+static int rd_kafka_range_assignor_unittest(void) {
+        rd_kafka_conf_t *conf;
+        rd_kafka_t *rk;
+        int fails = 0;
+        char errstr[256];
+        rd_kafka_assignor_t *rkas;
+        size_t i;
+
+        conf = rd_kafka_conf_new();
+        if (rd_kafka_conf_set(conf, "group.id", "test", errstr,
+                              sizeof(errstr)) ||
+            rd_kafka_conf_set(conf, "partition.assignment.strategy", "range",
+                              errstr, sizeof(errstr)))
+                RD_UT_FAIL("range assignor conf failed: %s", errstr);
+
+        rd_kafka_conf_set(conf, "debug", rd_getenv("TEST_DEBUG", NULL), NULL,
+                          0);
+
+        rk = rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr));
+        RD_UT_ASSERT(rk, "range assignor client instantiation failed: %s",
+                     errstr);
+        rkas = rd_kafka_assignor_find(rk, "range");
+        RD_UT_ASSERT(rkas, "range assignor not found");
+
+        for (i = 0; i < RD_ARRAY_SIZE(ALL_RACKS) - 1; i++) {
+                char c       = 'a' + i;
+                ALL_RACKS[i] = rd_kafkap_str_new(&c, 1);
+        }
+        ALL_RACKS[i] = NULL;
+
+        static int (*tests[])(
+            rd_kafka_t *, const rd_kafka_assignor_t *,
+            rd_kafka_assignor_ut_rack_config_t parametrization) = {
+            ut_testOneConsumerNoTopic,
+            ut_testOneConsumerNonexistentTopic,
+            ut_testOneConsumerOneTopic,
+            ut_testOnlyAssignsPartitionsFromSubscribedTopics,
+            ut_testOneConsumerMultipleTopics,
+            ut_testTwoConsumersOneTopicOnePartition,
+            ut_testTwoConsumersOneTopicTwoPartitions,
+            ut_testMultipleConsumersMixedTopicSubscriptions,
+            ut_testTwoConsumersTwoTopicsSixPartitions,
+            ut_testRackAwareAssignmentWithUniformSubscription,
+            ut_testRackAwareAssignmentWithNonEqualSubscription,
+            ut_testRackAwareAssignmentWithUniformPartitions,
+            ut_testRackAwareAssignmentWithUniformPartitionsNonEqualSubscription,
+            ut_testRackAwareAssignmentWithCoPartitioning0,
+            ut_testRackAwareAssignmentWithCoPartitioning1,
+            ut_testCoPartitionedAssignmentWithSameSubscription,
+            NULL,
+        };
+
+        for (i = 0; tests[i]; i++) {
+                rd_ts_t ts = rd_clock();
+                int r      = 0;
+                rd_kafka_assignor_ut_rack_config_t j;
+
+                for (j = RD_KAFKA_RANGE_ASSIGNOR_UT_NO_BROKER_RACK;
+                     j != RD_KAFKA_RANGE_ASSIGNOR_UT_CONFIG_CNT; j++) {
+                        RD_UT_SAY("[ Test #%" PRIusz ", RackConfig = %d ]", i,
+                                  j);
+                        r += tests[i](rk, rkas, j);
+                }
+                RD_UT_SAY("[ Test #%" PRIusz " ran for %.3fms ]", i,
+                          (double)(rd_clock() - ts) / 1000.0);
+
+                RD_UT_ASSERT(!r, "^ failed");
+
+                fails += r;
+        }
+
+        for (i = 0; i < RD_ARRAY_SIZE(ALL_RACKS) - 1; i++) {
+                rd_kafkap_str_destroy(ALL_RACKS[i]);
+        }
+
+        rd_kafka_destroy(rk);
+
+        return fails;
+}
+
+
+
+/**
+ * @brief Initialzie and add range assignor.
+ */
+rd_kafka_resp_err_t rd_kafka_range_assignor_init(rd_kafka_t *rk) {
+        return rd_kafka_assignor_add(
+            rk, "consumer", "range", RD_KAFKA_REBALANCE_PROTOCOL_EAGER,
+            rd_kafka_range_assignor_assign_cb,
+            rd_kafka_assignor_get_metadata_with_empty_userdata,
+            NULL /* on_assignment_cb */, NULL /* destroy_state_cb */,
+            rd_kafka_range_assignor_unittest, NULL);
+}
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_request.c b/lib/librdkafka-2.3.0/src/rdkafka_request.c
similarity index 89%
rename from lib/librdkafka-2.1.0/src/rdkafka_request.c
rename to lib/librdkafka-2.3.0/src/rdkafka_request.c
index 12d9eb30e08..b9e250a9e50 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_request.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_request.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -134,6 +135,7 @@ int rd_kafka_err_action(rd_kafka_broker_t *rkb,
                 break;
 
         case RD_KAFKA_RESP_ERR__TRANSPORT:
+        case RD_KAFKA_RESP_ERR__SSL:
         case RD_KAFKA_RESP_ERR__TIMED_OUT:
         case RD_KAFKA_RESP_ERR_REQUEST_TIMED_OUT:
         case RD_KAFKA_RESP_ERR_NOT_ENOUGH_REPLICAS_AFTER_APPEND:
@@ -473,25 +475,95 @@ rd_kafka_FindCoordinatorRequest(rd_kafka_broker_t *rkb,
         return RD_KAFKA_RESP_ERR_NO_ERROR;
 }
 
+/**
+ * @struct rd_kafka_ListOffsetRequest_parameters_s
+ * @brief parameters for the rd_kafka_make_ListOffsetsRequest function.
+ */
+typedef struct rd_kafka_ListOffsetRequest_parameters_s {
+        /** Partitions to request offsets for. */
+        rd_kafka_topic_partition_list_t *rktpars;
+        /** Isolation level. */
+        rd_kafka_IsolationLevel_t isolation_level;
+        /** Error string (optional). */
+        char *errstr;
+        /** Error string size (optional). */
+        size_t errstr_size;
+} rd_kafka_ListOffsetRequest_parameters_t;
+
+
+static rd_kafka_ListOffsetRequest_parameters_t
+rd_kafka_ListOffsetRequest_parameters_make(
+    rd_kafka_topic_partition_list_t *rktpars,
+    rd_kafka_IsolationLevel_t isolation_level,
+    char *errstr,
+    size_t errstr_size) {
+        rd_kafka_ListOffsetRequest_parameters_t params = RD_ZERO_INIT;
+        params.rktpars                                 = rktpars;
+        params.isolation_level                         = isolation_level;
+        params.errstr                                  = errstr;
+        params.errstr_size                             = errstr_size;
+        return params;
+}
+
+static rd_kafka_ListOffsetRequest_parameters_t *
+rd_kafka_ListOffsetRequest_parameters_new(
+    rd_kafka_topic_partition_list_t *rktpars,
+    rd_kafka_IsolationLevel_t isolation_level,
+    char *errstr,
+    size_t errstr_size) {
+        rd_kafka_ListOffsetRequest_parameters_t *params =
+            rd_calloc(1, sizeof(*params));
+        *params = rd_kafka_ListOffsetRequest_parameters_make(
+            rktpars, isolation_level, errstr, errstr_size);
+        return params;
+}
 
+static void rd_kafka_ListOffsetRequest_parameters_destroy_free(void *opaque) {
+        rd_kafka_ListOffsetRequest_parameters_t *parameters = opaque;
+        RD_IF_FREE(parameters->rktpars, rd_kafka_topic_partition_list_destroy);
+        RD_IF_FREE(parameters->errstr, rd_free);
+        rd_free(parameters);
+}
+
+static rd_kafka_buf_t *
+rd_kafka_ListOffsetRequest_buf_new(rd_kafka_broker_t *rkb,
+                                   rd_kafka_topic_partition_list_t *rktpars) {
+        return rd_kafka_buf_new_flexver_request(
+            rkb, RD_KAFKAP_ListOffsets, 1,
+            /* ReplicaId+IsolationLevel+TopicArrayCnt+Topic */
+            4 + 1 + 4 + 100 +
+                /* PartArrayCnt */
+                4 +
+                /* partition_cnt * Partition+Time+MaxNumOffs */
+                (rktpars->cnt * (4 + 8 + 4)),
+            rd_false);
+}
 
 /**
  * @brief Parses a ListOffsets reply.
  *
  * Returns the parsed offsets (and errors) in \p offsets which must have been
- * initialized by caller.
+ * initialized by caller. If \p result_info is passed instead,
+ * it's populated with rd_kafka_ListOffsetsResultInfo_t instances.
+ *
+ * Either \p offsets or \p result_info must be passed.
+ * and the one that is passed is populated.
  *
  * @returns 0 on success, else an error (\p offsets may be completely or
  *          partially updated, depending on the nature of the error, and per
  *          partition error codes should be checked by the caller).
  */
-static rd_kafka_resp_err_t
+rd_kafka_resp_err_t
 rd_kafka_parse_ListOffsets(rd_kafka_buf_t *rkbuf,
-                           rd_kafka_topic_partition_list_t *offsets) {
+                           rd_kafka_topic_partition_list_t *offsets,
+                           rd_list_t *result_infos) {
         const int log_decode_errors = LOG_ERR;
         int32_t TopicArrayCnt;
         int16_t api_version;
         rd_kafka_resp_err_t all_err = RD_KAFKA_RESP_ERR_NO_ERROR;
+        rd_bool_t return_result_infos;
+        rd_assert((offsets != NULL) ^ (result_infos != NULL));
+        return_result_infos = result_infos != NULL;
 
         api_version = rkbuf->rkbuf_reqhdr.ApiVersion;
 
@@ -502,35 +574,37 @@ rd_kafka_parse_ListOffsets(rd_kafka_buf_t *rkbuf,
          * Broker may return offsets in a different constellation than
          * in the original request .*/
 
-        rd_kafka_buf_read_i32(rkbuf, &TopicArrayCnt);
+        rd_kafka_buf_read_arraycnt(rkbuf, &TopicArrayCnt, RD_KAFKAP_TOPICS_MAX);
         while (TopicArrayCnt-- > 0) {
-                rd_kafkap_str_t ktopic;
+                rd_kafkap_str_t Topic;
                 int32_t PartArrayCnt;
                 char *topic_name;
 
-                rd_kafka_buf_read_str(rkbuf, &ktopic);
-                rd_kafka_buf_read_i32(rkbuf, &PartArrayCnt);
+                rd_kafka_buf_read_str(rkbuf, &Topic);
+                rd_kafka_buf_read_arraycnt(rkbuf, &PartArrayCnt,
+                                           RD_KAFKAP_PARTITIONS_MAX);
 
-                RD_KAFKAP_STR_DUPA(&topic_name, &ktopic);
+                RD_KAFKAP_STR_DUPA(&topic_name, &Topic);
 
                 while (PartArrayCnt-- > 0) {
-                        int32_t kpartition;
+                        int32_t Partition;
                         int16_t ErrorCode;
                         int32_t OffsetArrayCnt;
                         int64_t Offset      = -1;
                         int32_t LeaderEpoch = -1;
+                        int64_t Timestamp   = -1;
                         rd_kafka_topic_partition_t *rktpar;
 
-                        rd_kafka_buf_read_i32(rkbuf, &kpartition);
+                        rd_kafka_buf_read_i32(rkbuf, &Partition);
                         rd_kafka_buf_read_i16(rkbuf, &ErrorCode);
 
                         if (api_version >= 1) {
-                                int64_t Timestamp;
                                 rd_kafka_buf_read_i64(rkbuf, &Timestamp);
                                 rd_kafka_buf_read_i64(rkbuf, &Offset);
                                 if (api_version >= 4)
                                         rd_kafka_buf_read_i32(rkbuf,
                                                               &LeaderEpoch);
+                                rd_kafka_buf_skip_tags(rkbuf);
                         } else if (api_version == 0) {
                                 rd_kafka_buf_read_i32(rkbuf, &OffsetArrayCnt);
                                 /* We only request one offset so just grab
@@ -541,16 +615,32 @@ rd_kafka_parse_ListOffsets(rd_kafka_buf_t *rkbuf,
                                 RD_NOTREACHED();
                         }
 
-                        rktpar = rd_kafka_topic_partition_list_add(
-                            offsets, topic_name, kpartition);
-                        rktpar->err    = ErrorCode;
-                        rktpar->offset = Offset;
-                        rd_kafka_topic_partition_set_leader_epoch(rktpar,
-                                                                  LeaderEpoch);
+                        if (likely(!return_result_infos)) {
+                                rktpar = rd_kafka_topic_partition_list_add(
+                                    offsets, topic_name, Partition);
+                                rktpar->err    = ErrorCode;
+                                rktpar->offset = Offset;
+                                rd_kafka_topic_partition_set_leader_epoch(
+                                    rktpar, LeaderEpoch);
+                        } else {
+                                rktpar = rd_kafka_topic_partition_new(
+                                    topic_name, Partition);
+                                rktpar->err    = ErrorCode;
+                                rktpar->offset = Offset;
+                                rd_kafka_topic_partition_set_leader_epoch(
+                                    rktpar, LeaderEpoch);
+                                rd_kafka_ListOffsetsResultInfo_t *result_info =
+                                    rd_kafka_ListOffsetsResultInfo_new(
+                                        rktpar, Timestamp);
+                                rd_list_add(result_infos, result_info);
+                                rd_kafka_topic_partition_destroy(rktpar);
+                        }
 
                         if (ErrorCode && !all_err)
                                 all_err = ErrorCode;
                 }
+
+                rd_kafka_buf_skip_tags(rkbuf);
         }
 
         return all_err;
@@ -559,91 +649,6 @@ rd_kafka_parse_ListOffsets(rd_kafka_buf_t *rkbuf,
         return rkbuf->rkbuf_err;
 }
 
-
-
-/**
- * @brief Parses and handles ListOffsets replies.
- *
- * Returns the parsed offsets (and errors) in \p offsets.
- * \p offsets must be initialized by the caller.
- *
- * @returns 0 on success, else an error. \p offsets may be populated on error,
- *          depending on the nature of the error.
- *          On error \p actionsp (unless NULL) is updated with the recommended
- *          error actions.
- */
-rd_kafka_resp_err_t
-rd_kafka_handle_ListOffsets(rd_kafka_t *rk,
-                            rd_kafka_broker_t *rkb,
-                            rd_kafka_resp_err_t err,
-                            rd_kafka_buf_t *rkbuf,
-                            rd_kafka_buf_t *request,
-                            rd_kafka_topic_partition_list_t *offsets,
-                            int *actionsp) {
-
-        int actions;
-
-        if (!err)
-                err = rd_kafka_parse_ListOffsets(rkbuf, offsets);
-        if (!err)
-                return RD_KAFKA_RESP_ERR_NO_ERROR;
-
-        actions = rd_kafka_err_action(
-            rkb, err, request, RD_KAFKA_ERR_ACTION_PERMANENT,
-            RD_KAFKA_RESP_ERR_UNKNOWN_TOPIC_OR_PART,
-
-            RD_KAFKA_ERR_ACTION_REFRESH,
-            RD_KAFKA_RESP_ERR_NOT_LEADER_FOR_PARTITION,
-
-            RD_KAFKA_ERR_ACTION_REFRESH,
-            RD_KAFKA_RESP_ERR_REPLICA_NOT_AVAILABLE,
-
-            RD_KAFKA_ERR_ACTION_REFRESH, RD_KAFKA_RESP_ERR_KAFKA_STORAGE_ERROR,
-
-            RD_KAFKA_ERR_ACTION_REFRESH, RD_KAFKA_RESP_ERR_OFFSET_NOT_AVAILABLE,
-
-            RD_KAFKA_ERR_ACTION_REFRESH | RD_KAFKA_ERR_ACTION_RETRY,
-            RD_KAFKA_RESP_ERR_LEADER_NOT_AVAILABLE,
-
-            RD_KAFKA_ERR_ACTION_REFRESH | RD_KAFKA_ERR_ACTION_RETRY,
-            RD_KAFKA_RESP_ERR_FENCED_LEADER_EPOCH,
-
-            RD_KAFKA_ERR_ACTION_REFRESH | RD_KAFKA_ERR_ACTION_RETRY,
-            RD_KAFKA_RESP_ERR_UNKNOWN_LEADER_EPOCH,
-
-            RD_KAFKA_ERR_ACTION_RETRY, RD_KAFKA_RESP_ERR__TRANSPORT,
-
-            RD_KAFKA_ERR_ACTION_RETRY, RD_KAFKA_RESP_ERR_REQUEST_TIMED_OUT,
-
-
-            RD_KAFKA_ERR_ACTION_END);
-
-        if (actionsp)
-                *actionsp = actions;
-
-        if (rkb)
-                rd_rkb_dbg(
-                    rkb, TOPIC, "OFFSET", "OffsetRequest failed: %s (%s)",
-                    rd_kafka_err2str(err), rd_kafka_actions2str(actions));
-
-        if (actions & RD_KAFKA_ERR_ACTION_REFRESH) {
-                char tmp[256];
-                /* Re-query for leader */
-                rd_snprintf(tmp, sizeof(tmp), "ListOffsetsRequest failed: %s",
-                            rd_kafka_err2str(err));
-                rd_kafka_metadata_refresh_known_topics(rk, NULL,
-                                                       rd_true /*force*/, tmp);
-        }
-
-        if ((actions & RD_KAFKA_ERR_ACTION_RETRY) &&
-            rd_kafka_buf_retry(rkb, request))
-                return RD_KAFKA_RESP_ERR__IN_PROGRESS;
-
-        return err;
-}
-
-
-
 /**
  * @brief Async maker for ListOffsetsRequest.
  */
@@ -651,8 +656,11 @@ static rd_kafka_resp_err_t
 rd_kafka_make_ListOffsetsRequest(rd_kafka_broker_t *rkb,
                                  rd_kafka_buf_t *rkbuf,
                                  void *make_opaque) {
-        const rd_kafka_topic_partition_list_t *partitions =
-            (const rd_kafka_topic_partition_list_t *)make_opaque;
+        rd_kafka_ListOffsetRequest_parameters_t *parameters = make_opaque;
+        const rd_kafka_topic_partition_list_t *partitions = parameters->rktpars;
+        int isolation_level = parameters->isolation_level;
+        char *errstr        = parameters->errstr;
+        size_t errstr_size  = parameters->errstr_size;
         int i;
         size_t of_TopicArrayCnt = 0, of_PartArrayCnt = 0;
         const char *last_topic = "";
@@ -660,20 +668,31 @@ rd_kafka_make_ListOffsetsRequest(rd_kafka_broker_t *rkb,
         int16_t ApiVersion;
 
         ApiVersion = rd_kafka_broker_ApiVersion_supported(
-            rkb, RD_KAFKAP_ListOffsets, 0, 5, NULL);
-        if (ApiVersion == -1)
+            rkb, RD_KAFKAP_ListOffsets, 0, 7, NULL);
+        if (ApiVersion == -1) {
+                if (errstr) {
+                        rd_snprintf(
+                            errstr, errstr_size,
+                            "ListOffsets (KIP-396) not supported "
+                            "by broker, requires broker version >= 2.5.0");
+                }
                 return RD_KAFKA_RESP_ERR__UNSUPPORTED_FEATURE;
+        }
+
+        if (ApiVersion >= 6) {
+                rd_kafka_buf_upgrade_flexver_request(rkbuf);
+        }
 
         /* ReplicaId */
         rd_kafka_buf_write_i32(rkbuf, -1);
 
         /* IsolationLevel */
         if (ApiVersion >= 2)
-                rd_kafka_buf_write_i8(rkbuf,
-                                      rkb->rkb_rk->rk_conf.isolation_level);
+                rd_kafka_buf_write_i8(rkbuf, isolation_level);
 
         /* TopicArrayCnt */
-        of_TopicArrayCnt = rd_kafka_buf_write_i32(rkbuf, 0); /* updated later */
+        of_TopicArrayCnt =
+            rd_kafka_buf_write_arraycnt_pos(rkbuf); /* updated later */
 
         for (i = 0; i < partitions->cnt; i++) {
                 const rd_kafka_topic_partition_t *rktpar =
@@ -681,9 +700,12 @@ rd_kafka_make_ListOffsetsRequest(rd_kafka_broker_t *rkb,
 
                 if (strcmp(rktpar->topic, last_topic)) {
                         /* Finish last topic, if any. */
-                        if (of_PartArrayCnt > 0)
-                                rd_kafka_buf_update_i32(rkbuf, of_PartArrayCnt,
-                                                        part_cnt);
+                        if (of_PartArrayCnt > 0) {
+                                rd_kafka_buf_finalize_arraycnt(
+                                    rkbuf, of_PartArrayCnt, part_cnt);
+                                /* Topics tags */
+                                rd_kafka_buf_write_tags(rkbuf);
+                        }
 
                         /* Topic */
                         rd_kafka_buf_write_str(rkbuf, rktpar->topic, -1);
@@ -693,7 +715,8 @@ rd_kafka_make_ListOffsetsRequest(rd_kafka_broker_t *rkb,
                         part_cnt = 0;
 
                         /* PartitionArrayCnt: updated later */
-                        of_PartArrayCnt = rd_kafka_buf_write_i32(rkbuf, 0);
+                        of_PartArrayCnt =
+                            rd_kafka_buf_write_arraycnt_pos(rkbuf);
                 }
 
                 /* Partition */
@@ -714,12 +737,18 @@ rd_kafka_make_ListOffsetsRequest(rd_kafka_broker_t *rkb,
                         /* MaxNumberOfOffsets */
                         rd_kafka_buf_write_i32(rkbuf, 1);
                 }
+
+                /* Partitions tags */
+                rd_kafka_buf_write_tags(rkbuf);
         }
 
         if (of_PartArrayCnt > 0) {
-                rd_kafka_buf_update_i32(rkbuf, of_PartArrayCnt, part_cnt);
-                rd_kafka_buf_update_i32(rkbuf, of_TopicArrayCnt, topic_cnt);
+                rd_kafka_buf_finalize_arraycnt(rkbuf, of_PartArrayCnt,
+                                               part_cnt);
+                /* Topics tags */
+                rd_kafka_buf_write_tags(rkbuf);
         }
+        rd_kafka_buf_finalize_arraycnt(rkbuf, of_TopicArrayCnt, topic_cnt);
 
         rd_kafka_buf_ApiVersion_set(rkbuf, ApiVersion, 0);
 
@@ -732,39 +761,166 @@ rd_kafka_make_ListOffsetsRequest(rd_kafka_broker_t *rkb,
         return RD_KAFKA_RESP_ERR_NO_ERROR;
 }
 
-
 /**
  * @brief Send ListOffsetsRequest for partitions in \p partitions.
+ *        Set absolute timeout \p timeout_ms if >= 0.
  */
 void rd_kafka_ListOffsetsRequest(rd_kafka_broker_t *rkb,
                                  rd_kafka_topic_partition_list_t *partitions,
                                  rd_kafka_replyq_t replyq,
                                  rd_kafka_resp_cb_t *resp_cb,
+                                 int timeout_ms,
                                  void *opaque) {
         rd_kafka_buf_t *rkbuf;
-        rd_kafka_topic_partition_list_t *make_parts;
+        rd_kafka_topic_partition_list_t *rktpars;
+        rd_kafka_ListOffsetRequest_parameters_t *params;
 
-        make_parts = rd_kafka_topic_partition_list_copy(partitions);
-        rd_kafka_topic_partition_list_sort_by_topic(make_parts);
+        rktpars = rd_kafka_topic_partition_list_copy(partitions);
+        rd_kafka_topic_partition_list_sort_by_topic(rktpars);
 
-        rkbuf = rd_kafka_buf_new_request(
-            rkb, RD_KAFKAP_ListOffsets, 1,
-            /* ReplicaId+IsolationLevel+TopicArrayCnt+Topic */
-            4 + 1 + 4 + 100 +
-                /* PartArrayCnt */
-                4 +
-                /* partition_cnt * Partition+Time+MaxNumOffs */
-                (make_parts->cnt * (4 + 8 + 4)));
+        params = rd_kafka_ListOffsetRequest_parameters_new(
+            rktpars,
+            (rd_kafka_IsolationLevel_t)rkb->rkb_rk->rk_conf.isolation_level,
+            NULL, 0);
+
+        rkbuf = rd_kafka_ListOffsetRequest_buf_new(rkb, partitions);
+
+        if (timeout_ms >= 0)
+                rd_kafka_buf_set_abs_timeout(rkbuf, timeout_ms, 0);
 
         /* Postpone creating the request contents until time to send,
          * at which time the ApiVersion is known. */
-        rd_kafka_buf_set_maker(rkbuf, rd_kafka_make_ListOffsetsRequest,
-                               make_parts,
-                               rd_kafka_topic_partition_list_destroy_free);
+        rd_kafka_buf_set_maker(
+            rkbuf, rd_kafka_make_ListOffsetsRequest, params,
+            rd_kafka_ListOffsetRequest_parameters_destroy_free);
 
         rd_kafka_broker_buf_enq_replyq(rkb, rkbuf, replyq, resp_cb, opaque);
 }
 
+/**
+ * @brief Send ListOffsetsRequest for offsets contained in the first
+ *        element of  \p offsets, that is a rd_kafka_topic_partition_list_t.
+ *        AdminClient compatible request callback.
+ */
+rd_kafka_resp_err_t rd_kafka_ListOffsetsRequest_admin(
+    rd_kafka_broker_t *rkb,
+    const rd_list_t *offsets /* rd_kafka_topic_partition_list_t*/,
+    rd_kafka_AdminOptions_t *options,
+    char *errstr,
+    size_t errstr_size,
+    rd_kafka_replyq_t replyq,
+    rd_kafka_resp_cb_t *resp_cb,
+    void *opaque) {
+        rd_kafka_ListOffsetRequest_parameters_t params;
+        rd_kafka_IsolationLevel_t isolation_level;
+        rd_kafka_topic_partition_list_t *topic_partitions;
+        rd_kafka_buf_t *rkbuf;
+        rd_kafka_resp_err_t err;
+        topic_partitions = rd_list_elem(offsets, 0);
+
+        isolation_level = RD_KAFKA_ISOLATION_LEVEL_READ_UNCOMMITTED;
+        if (options && options->isolation_level.u.INT.v)
+                isolation_level = options->isolation_level.u.INT.v;
+
+        params = rd_kafka_ListOffsetRequest_parameters_make(
+            topic_partitions, isolation_level, errstr, errstr_size);
+
+        rkbuf = rd_kafka_ListOffsetRequest_buf_new(rkb, topic_partitions);
+
+        err = rd_kafka_make_ListOffsetsRequest(rkb, rkbuf, &params);
+
+        if (err) {
+                rd_kafka_buf_destroy(rkbuf);
+                rd_kafka_replyq_destroy(&replyq);
+                return err;
+        }
+
+        rd_kafka_broker_buf_enq_replyq(rkb, rkbuf, replyq, resp_cb, opaque);
+
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
+}
+
+/**
+ * @brief Parses and handles ListOffsets replies.
+ *
+ * Returns the parsed offsets (and errors) in \p offsets.
+ * \p offsets must be initialized by the caller.
+ *
+ * @returns 0 on success, else an error. \p offsets may be populated on error,
+ *          depending on the nature of the error.
+ *          On error \p actionsp (unless NULL) is updated with the recommended
+ *          error actions.
+ */
+rd_kafka_resp_err_t
+rd_kafka_handle_ListOffsets(rd_kafka_t *rk,
+                            rd_kafka_broker_t *rkb,
+                            rd_kafka_resp_err_t err,
+                            rd_kafka_buf_t *rkbuf,
+                            rd_kafka_buf_t *request,
+                            rd_kafka_topic_partition_list_t *offsets,
+                            int *actionsp) {
+
+        int actions;
+
+        if (!err) {
+                err = rd_kafka_parse_ListOffsets(rkbuf, offsets, NULL);
+        }
+        if (!err)
+                return RD_KAFKA_RESP_ERR_NO_ERROR;
+
+        actions = rd_kafka_err_action(
+            rkb, err, request, RD_KAFKA_ERR_ACTION_PERMANENT,
+            RD_KAFKA_RESP_ERR_UNKNOWN_TOPIC_OR_PART,
+
+            RD_KAFKA_ERR_ACTION_REFRESH,
+            RD_KAFKA_RESP_ERR_NOT_LEADER_FOR_PARTITION,
+
+            RD_KAFKA_ERR_ACTION_REFRESH,
+            RD_KAFKA_RESP_ERR_REPLICA_NOT_AVAILABLE,
+
+            RD_KAFKA_ERR_ACTION_REFRESH, RD_KAFKA_RESP_ERR_KAFKA_STORAGE_ERROR,
+
+            RD_KAFKA_ERR_ACTION_REFRESH, RD_KAFKA_RESP_ERR_OFFSET_NOT_AVAILABLE,
+
+            RD_KAFKA_ERR_ACTION_REFRESH | RD_KAFKA_ERR_ACTION_RETRY,
+            RD_KAFKA_RESP_ERR_LEADER_NOT_AVAILABLE,
+
+            RD_KAFKA_ERR_ACTION_REFRESH | RD_KAFKA_ERR_ACTION_RETRY,
+            RD_KAFKA_RESP_ERR_FENCED_LEADER_EPOCH,
+
+            RD_KAFKA_ERR_ACTION_REFRESH | RD_KAFKA_ERR_ACTION_RETRY,
+            RD_KAFKA_RESP_ERR_UNKNOWN_LEADER_EPOCH,
+
+            RD_KAFKA_ERR_ACTION_RETRY, RD_KAFKA_RESP_ERR__TRANSPORT,
+
+            RD_KAFKA_ERR_ACTION_RETRY, RD_KAFKA_RESP_ERR_REQUEST_TIMED_OUT,
+
+            RD_KAFKA_ERR_ACTION_END);
+
+        if (actionsp)
+                *actionsp = actions;
+
+        if (rkb)
+                rd_rkb_dbg(
+                    rkb, TOPIC, "OFFSET", "OffsetRequest failed: %s (%s)",
+                    rd_kafka_err2str(err), rd_kafka_actions2str(actions));
+
+        if (actions & RD_KAFKA_ERR_ACTION_REFRESH) {
+                char tmp[256];
+                /* Re-query for leader */
+                rd_snprintf(tmp, sizeof(tmp), "ListOffsetsRequest failed: %s",
+                            rd_kafka_err2str(err));
+                rd_kafka_metadata_refresh_known_topics(rk, NULL,
+                                                       rd_true /*force*/, tmp);
+        }
+
+        if ((actions & RD_KAFKA_ERR_ACTION_RETRY) &&
+            rd_kafka_buf_retry(rkb, request))
+                return RD_KAFKA_RESP_ERR__IN_PROGRESS;
+
+        return err;
+}
+
 
 /**
  * @brief OffsetForLeaderEpochResponse handler.
@@ -1178,7 +1334,7 @@ void rd_kafka_OffsetFetchRequest(rd_kafka_broker_t *rkb,
                     rkbuf, parts, rd_false /*include invalid offsets*/,
                     rd_false /*skip valid offsets */, fields);
         } else {
-                rd_kafka_buf_write_arraycnt_pos(rkbuf);
+                rd_kafka_buf_write_arraycnt(rkbuf, PartCnt);
         }
 
         if (ApiVersion >= 7) {
@@ -1765,7 +1921,8 @@ void rd_kafka_JoinGroupRequest(rd_kafka_broker_t *rkb,
                 rd_kafka_buf_write_kstr(rkbuf, rkas->rkas_protocol_name);
                 member_metadata = rkas->rkas_get_metadata_cb(
                     rkas, rk->rk_cgrp->rkcg_assignor_state, topics,
-                    rk->rk_cgrp->rkcg_group_assignment);
+                    rk->rk_cgrp->rkcg_group_assignment,
+                    rk->rk_conf.client_rack);
                 rd_kafka_buf_write_kbytes(rkbuf, member_metadata);
                 rd_kafkap_bytes_destroy(member_metadata);
         }
@@ -2014,6 +2171,8 @@ rd_kafka_error_t *rd_kafka_ListGroupsRequest(rd_kafka_broker_t *rkb,
  *        with the groups (const char *) in \p groups.
  *        Uses \p max_ApiVersion as maximum API version,
  *        pass -1 to use the maximum available version.
+ *        Uses \p include_authorized_operations to get
+ *        group ACL authorized operations.
  *
  *        The response (unparsed) will be enqueued on \p replyq
  *        for handling by \p resp_cb (with \p opaque passed).
@@ -2021,13 +2180,15 @@ rd_kafka_error_t *rd_kafka_ListGroupsRequest(rd_kafka_broker_t *rkb,
  * @return NULL on success, a new error instance that must be
  *         released with rd_kafka_error_destroy() in case of error.
  */
-rd_kafka_error_t *rd_kafka_DescribeGroupsRequest(rd_kafka_broker_t *rkb,
-                                                 int16_t max_ApiVersion,
-                                                 char **groups,
-                                                 size_t group_cnt,
-                                                 rd_kafka_replyq_t replyq,
-                                                 rd_kafka_resp_cb_t *resp_cb,
-                                                 void *opaque) {
+rd_kafka_error_t *
+rd_kafka_DescribeGroupsRequest(rd_kafka_broker_t *rkb,
+                               int16_t max_ApiVersion,
+                               char **groups,
+                               size_t group_cnt,
+                               rd_bool_t include_authorized_operations,
+                               rd_kafka_replyq_t replyq,
+                               rd_kafka_resp_cb_t *resp_cb,
+                               void *opaque) {
         rd_kafka_buf_t *rkbuf;
         int16_t ApiVersion = 0;
         size_t of_GroupsArrayCnt;
@@ -2064,8 +2225,7 @@ rd_kafka_error_t *rd_kafka_DescribeGroupsRequest(rd_kafka_broker_t *rkb,
 
         /* write IncludeAuthorizedOperations */
         if (ApiVersion >= 3) {
-                /* TODO: implement KIP-430 */
-                rd_kafka_buf_write_bool(rkbuf, rd_false);
+                rd_kafka_buf_write_bool(rkbuf, include_authorized_operations);
         }
 
         rd_kafka_buf_ApiVersion_set(rkbuf, ApiVersion, 0);
@@ -2084,9 +2244,9 @@ static void rd_kafka_handle_Metadata(rd_kafka_t *rk,
                                      rd_kafka_buf_t *rkbuf,
                                      rd_kafka_buf_t *request,
                                      void *opaque) {
-        rd_kafka_op_t *rko           = opaque; /* Possibly NULL */
-        struct rd_kafka_metadata *md = NULL;
-        const rd_list_t *topics      = request->rkbuf_u.Metadata.topics;
+        rd_kafka_op_t *rko                = opaque; /* Possibly NULL */
+        rd_kafka_metadata_internal_t *mdi = NULL;
+        const rd_list_t *topics           = request->rkbuf_u.Metadata.topics;
         int actions;
 
         rd_kafka_assert(NULL, err == RD_KAFKA_RESP_ERR__DESTROY ||
@@ -2113,21 +2273,21 @@ static void rd_kafka_handle_Metadata(rd_kafka_t *rk,
                            rd_list_cnt(topics),
                            request->rkbuf_u.Metadata.reason);
 
-        err = rd_kafka_parse_Metadata(rkb, request, rkbuf, &md);
+        err = rd_kafka_parse_Metadata(rkb, request, rkbuf, &mdi);
         if (err)
                 goto err;
 
         if (rko && rko->rko_replyq.q) {
                 /* Reply to metadata requester, passing on the metadata.
                  * Reuse requesting rko for the reply. */
-                rko->rko_err           = err;
-                rko->rko_u.metadata.md = md;
-
+                rko->rko_err            = err;
+                rko->rko_u.metadata.md  = &mdi->metadata;
+                rko->rko_u.metadata.mdi = mdi;
                 rd_kafka_replyq_enq(&rko->rko_replyq, rko, 0);
                 rko = NULL;
         } else {
-                if (md)
-                        rd_free(md);
+                if (mdi)
+                        rd_free(mdi);
         }
 
         goto done;
@@ -2153,8 +2313,9 @@ static void rd_kafka_handle_Metadata(rd_kafka_t *rk,
                            rd_kafka_actions2str(actions));
                 /* Respond back to caller on non-retriable errors */
                 if (rko && rko->rko_replyq.q) {
-                        rko->rko_err           = err;
-                        rko->rko_u.metadata.md = NULL;
+                        rko->rko_err            = err;
+                        rko->rko_u.metadata.md  = NULL;
+                        rko->rko_u.metadata.mdi = NULL;
                         rd_kafka_replyq_enq(&rko->rko_replyq, rko, 0);
                         rko = NULL;
                 }
@@ -2171,55 +2332,71 @@ static void rd_kafka_handle_Metadata(rd_kafka_t *rk,
 
 
 /**
- * @brief Construct MetadataRequest (does not send)
+ * @brief Internal implementation of MetadataRequest (does not send).
  *
- * \p topics is a list of topic names (char *) to request.
+ * @param force - rd_true: force a full request (including all topics and
+ *                         brokers) even if there is such a request already
+ *                         in flight.
+ *              - rd_false: check if there are multiple outstanding full
+ *                          requests, and don't send one if there is already
+ *                          one present. (See note below.)
  *
- * !topics          - only request brokers (if supported by broker, else
- *                    all topics)
- *  topics.cnt==0   - all topics in cluster are requested
- *  topics.cnt >0   - only specified topics are requested
- *
- * @param reason    - metadata request reason
- * @param allow_auto_create_topics - allow broker-side auto topic creation.
- *                                   This is best-effort, depending on broker
- *                                   config and version.
- * @param cgrp_update - Update cgrp in parse_Metadata (see comment there).
- * @param rko       - (optional) rko with replyq for handling response.
- *                    Specifying an rko forces a metadata request even if
- *                    there is already a matching one in-transit.
+ * If full metadata for all topics is requested (or
+ * all brokers, which results in all-topics on older brokers) and there is
+ * already a full request in transit then this function will return
+ * RD_KAFKA_RESP_ERR__PREV_IN_PROGRESS otherwise RD_KAFKA_RESP_ERR_NO_ERROR.
+ * If \p rko is non-NULL or if \p force is true, the request is sent regardless.
  *
- * If full metadata for all topics is requested (or all brokers, which
- * results in all-topics on older brokers) and there is already a full request
- * in transit then this function will return RD_KAFKA_RESP_ERR__PREV_IN_PROGRESS
- * otherwise RD_KAFKA_RESP_ERR_NO_ERROR. If \p rko is non-NULL the request
- * is sent regardless.
+ * \p include_cluster_authorized_operations should not be set unless this
+ * MetadataRequest is for an admin operation. \sa
+ * rd_kafka_MetadataRequest_admin().
  */
-rd_kafka_resp_err_t rd_kafka_MetadataRequest(rd_kafka_broker_t *rkb,
-                                             const rd_list_t *topics,
-                                             const char *reason,
-                                             rd_bool_t allow_auto_create_topics,
-                                             rd_bool_t cgrp_update,
-                                             rd_kafka_op_t *rko) {
+static rd_kafka_resp_err_t
+rd_kafka_MetadataRequest0(rd_kafka_broker_t *rkb,
+                          const rd_list_t *topics,
+                          const char *reason,
+                          rd_bool_t allow_auto_create_topics,
+                          rd_bool_t include_cluster_authorized_operations,
+                          rd_bool_t include_topic_authorized_operations,
+                          rd_bool_t cgrp_update,
+                          rd_bool_t force_racks,
+                          rd_kafka_op_t *rko,
+                          rd_kafka_resp_cb_t *resp_cb,
+                          rd_kafka_replyq_t replyq,
+                          rd_bool_t force,
+                          void *opaque) {
         rd_kafka_buf_t *rkbuf;
         int16_t ApiVersion = 0;
         size_t of_TopicArrayCnt;
         int features;
-        int topic_cnt  = topics ? rd_list_cnt(topics) : 0;
-        int *full_incr = NULL;
+        int topic_cnt                  = topics ? rd_list_cnt(topics) : 0;
+        int *full_incr                 = NULL;
+        void *handler_arg              = NULL;
+        rd_kafka_resp_cb_t *handler_cb = rd_kafka_handle_Metadata;
+        int16_t metadata_max_version   = 12;
+        rd_kafka_replyq_t use_replyq   = replyq;
+
+        /* In case we want cluster authorized operations in the Metadata
+         * request, we must send a request with version not exceeding 10 because
+         * KIP-700 deprecates those fields from the Metadata RPC. */
+        if (include_cluster_authorized_operations)
+                metadata_max_version = RD_MIN(metadata_max_version, 10);
 
         ApiVersion = rd_kafka_broker_ApiVersion_supported(
-            rkb, RD_KAFKAP_Metadata, 0, 9, &features);
+            rkb, RD_KAFKAP_Metadata, 0, metadata_max_version, &features);
 
-        rkbuf = rd_kafka_buf_new_flexver_request(rkb, RD_KAFKAP_Metadata, 1,
-                                                 4 + (50 * topic_cnt) + 1,
-                                                 ApiVersion >= 9);
+        rkbuf = rd_kafka_buf_new_flexver_request(
+            rkb, RD_KAFKAP_Metadata, 1,
+            4 + (66 /* 50 for topic name and 16 for topic id */ * topic_cnt) +
+                1,
+            ApiVersion >= 9);
 
         if (!reason)
                 reason = "";
 
         rkbuf->rkbuf_u.Metadata.reason      = rd_strdup(reason);
         rkbuf->rkbuf_u.Metadata.cgrp_update = cgrp_update;
+        rkbuf->rkbuf_u.Metadata.force_racks = force_racks;
 
         /* TopicArrayCnt */
         of_TopicArrayCnt = rd_kafka_buf_write_arraycnt_pos(rkbuf);
@@ -2271,11 +2448,12 @@ rd_kafka_resp_err_t rd_kafka_MetadataRequest(rd_kafka_broker_t *rkb,
         if (full_incr) {
                 /* Avoid multiple outstanding full requests
                  * (since they are redundant and side-effect-less).
-                 * Forced requests (app using metadata() API) are passed
-                 * through regardless. */
+                 * Forced requests (app using metadata() API or Admin API) are
+                 * passed through regardless. */
 
                 mtx_lock(&rkb->rkb_rk->rk_metadata_cache.rkmc_full_lock);
-                if (*full_incr > 0 && (!rko || !rko->rko_u.metadata.force)) {
+                if (!force &&
+                    (*full_incr > 0 && (!rko || !rko->rko_u.metadata.force))) {
                         mtx_unlock(
                             &rkb->rkb_rk->rk_metadata_cache.rkmc_full_lock);
                         rd_rkb_dbg(rkb, METADATA, "METADATA",
@@ -2297,6 +2475,7 @@ rd_kafka_resp_err_t rd_kafka_MetadataRequest(rd_kafka_broker_t *rkb,
         if (topic_cnt > 0) {
                 char *topic;
                 int i;
+                rd_kafka_Uuid_t zero_uuid = RD_KAFKA_UUID_ZERO;
 
                 /* Maintain a copy of the topics list so we can purge
                  * hints from the metadata cache on error. */
@@ -2304,6 +2483,12 @@ rd_kafka_resp_err_t rd_kafka_MetadataRequest(rd_kafka_broker_t *rkb,
                     rd_list_copy(topics, rd_list_string_copy, NULL);
 
                 RD_LIST_FOREACH(topic, topics, i) {
+                        if (ApiVersion >= 10) {
+                                /* FIXME: Not supporting topic id in the request
+                                 * right now. Update this to correct topic
+                                 * id once KIP-516 is fully implemented. */
+                                rd_kafka_buf_write_uuid(rkbuf, &zero_uuid);
+                        }
                         rd_kafka_buf_write_str(rkbuf, topic, -1);
                         /* Tags for previous topic */
                         rd_kafka_buf_write_tags(rkbuf);
@@ -2329,16 +2514,16 @@ rd_kafka_resp_err_t rd_kafka_MetadataRequest(rd_kafka_broker_t *rkb,
                            "on broker auto.create.topics.enable configuration");
         }
 
-        if (ApiVersion >= 8 && ApiVersion < 10) {
-                /* TODO: implement KIP-430 */
+        if (ApiVersion >= 8 && ApiVersion <= 10) {
                 /* IncludeClusterAuthorizedOperations */
-                rd_kafka_buf_write_bool(rkbuf, rd_false);
+                rd_kafka_buf_write_bool(rkbuf,
+                                        include_cluster_authorized_operations);
         }
 
         if (ApiVersion >= 8) {
-                /* TODO: implement KIP-430 */
                 /* IncludeTopicAuthorizedOperations */
-                rd_kafka_buf_write_bool(rkbuf, rd_false);
+                rd_kafka_buf_write_bool(rkbuf,
+                                        include_topic_authorized_operations);
         }
 
         rd_kafka_buf_ApiVersion_set(rkbuf, ApiVersion, 0);
@@ -2347,17 +2532,157 @@ rd_kafka_resp_err_t rd_kafka_MetadataRequest(rd_kafka_broker_t *rkb,
          * and should go before most other requests (Produce, Fetch, etc). */
         rkbuf->rkbuf_prio = RD_KAFKA_PRIO_HIGH;
 
-        rd_kafka_broker_buf_enq_replyq(rkb, rkbuf,
-                                       /* Handle response thru rk_ops,
-                                        * but forward parsed result to
-                                        * rko's replyq when done. */
-                                       RD_KAFKA_REPLYQ(rkb->rkb_rk->rk_ops, 0),
-                                       rd_kafka_handle_Metadata, rko);
+        /* The default handler is rd_kafka_handle_Metadata, but it can be
+         * overriden to use a custom handler. */
+        if (resp_cb)
+                handler_cb = resp_cb;
+
+        /* If a custom handler is provided, we also allow the caller to set a
+         * custom argument which is passed as the opaque argument to the
+         * handler. However, if we're using the default handler, it expects
+         * either rko or NULL as its opaque argument (it forwards the response
+         * to rko's replyq if it's non-NULL). */
+        if (resp_cb && opaque)
+                handler_arg = opaque;
+        else
+                handler_arg = rko;
+
+        /* If a custom replyq is provided (and is valid), the response is
+         * handled through on that replyq. By default, response is handled on
+         * rk_ops, and the default handler (rd_kafka_handle_Metadata) forwards
+         * the parsed result to rko's replyq when done. */
+        if (!use_replyq.q)
+                use_replyq = RD_KAFKA_REPLYQ(rkb->rkb_rk->rk_ops, 0);
+
+        rd_kafka_broker_buf_enq_replyq(
+            rkb, rkbuf, use_replyq,
+            /* The default response handler is rd_kafka_handle_Metadata, but we
+               allow alternate handlers to be configured. */
+            handler_cb, handler_arg);
 
         return RD_KAFKA_RESP_ERR_NO_ERROR;
 }
 
 
+/**
+ * @brief Construct a MetadataRequest which uses an optional rko, and the
+ * default handler callback.
+ * @sa rd_kafka_MetadataRequest.
+ */
+static rd_kafka_resp_err_t
+rd_kafka_MetadataRequest_op(rd_kafka_broker_t *rkb,
+                            const rd_list_t *topics,
+                            const char *reason,
+                            rd_bool_t allow_auto_create_topics,
+                            rd_bool_t include_cluster_authorized_operations,
+                            rd_bool_t include_topic_authorized_operations,
+                            rd_bool_t cgrp_update,
+                            rd_bool_t force_racks,
+                            rd_kafka_op_t *rko) {
+        return rd_kafka_MetadataRequest0(
+            rkb, topics, reason, allow_auto_create_topics,
+            include_cluster_authorized_operations,
+            include_topic_authorized_operations, cgrp_update, force_racks, rko,
+            /* We use the default rd_kafka_handle_Metadata rather than a custom
+               resp_cb */
+            NULL,
+            /* Use default replyq which works with the default handler
+               rd_kafka_handle_Metadata. */
+            RD_KAFKA_NO_REPLYQ,
+            /* If the request needs to be forced, rko_u.metadata.force will be
+               set. We don't provide an explicit parameter force. */
+            rd_false, NULL);
+}
+
+/**
+ * @brief Construct MetadataRequest (does not send)
+ *
+ * \p topics is a list of topic names (char *) to request.
+ *
+ * !topics          - only request brokers (if supported by broker, else
+ *                    all topics)
+ *  topics.cnt==0   - all topics in cluster are requested
+ *  topics.cnt >0   - only specified topics are requested
+ *
+ * @param reason    - metadata request reason
+ * @param allow_auto_create_topics - allow broker-side auto topic creation.
+ *                                   This is best-effort, depending on broker
+ *                                   config and version.
+ * @param cgrp_update - Update cgrp in parse_Metadata (see comment there).
+ * @param force_racks - Force partition to rack mapping computation in
+ *                      parse_Metadata (see comment there).
+ * @param rko       - (optional) rko with replyq for handling response.
+ *                    Specifying an rko forces a metadata request even if
+ *                    there is already a matching one in-transit.
+ *
+ * If full metadata for all topics is requested (or
+ * all brokers, which results in all-topics on older brokers) and there is
+ * already a full request in transit then this function will return
+ * RD_KAFKA_RESP_ERR__PREV_IN_PROGRESS otherwise RD_KAFKA_RESP_ERR_NO_ERROR.
+ * If \p rko is non-NULL, the request is sent regardless.
+ */
+rd_kafka_resp_err_t rd_kafka_MetadataRequest(rd_kafka_broker_t *rkb,
+                                             const rd_list_t *topics,
+                                             const char *reason,
+                                             rd_bool_t allow_auto_create_topics,
+                                             rd_bool_t cgrp_update,
+                                             rd_bool_t force_racks,
+                                             rd_kafka_op_t *rko) {
+        return rd_kafka_MetadataRequest_op(
+            rkb, topics, reason, allow_auto_create_topics,
+            /* Cluster and Topic authorized operations are used by admin
+             * operations only. For non-admin operation cases, NEVER set them to
+             * true, since it changes the metadata max version to be 10, until
+             * KIP-700 can be implemented. */
+            rd_false, rd_false, cgrp_update, force_racks, rko);
+}
+
+
+/**
+ * @brief Construct MetadataRequest for use with AdminAPI (does not send).
+ *
+ * \p topics is a list of topic names (char *) to request.
+ *
+ * !topics          - only request brokers (if supported by broker, else
+ *                    all topics)
+ *  topics.cnt==0   - all topics in cluster are requested
+ *  topics.cnt >0   - only specified topics are requested
+ *
+ * @param reason    - metadata request reason
+ * @param include_cluster_authorized_operations - request for cluster
+ *                      authorized operations.
+ * @param include_topic_authorized_operations - request for topic authorized
+ *                      operations.
+ * @param cgrp_update - Update cgrp in parse_Metadata (see comment there).
+ * @param force_racks - Force partition to rack mapping computation in
+ *                      parse_Metadata (see comment there).
+ * @param resp_cb - callback to be used for handling response.
+ * @param replyq - replyq on which response is handled.
+ * @param opaque - (optional) parameter to be passed to resp_cb.
+ */
+rd_kafka_resp_err_t rd_kafka_MetadataRequest_resp_cb(
+    rd_kafka_broker_t *rkb,
+    const rd_list_t *topics,
+    const char *reason,
+    rd_bool_t allow_auto_create_topics,
+    rd_bool_t include_cluster_authorized_operations,
+    rd_bool_t include_topic_authorized_operations,
+    rd_bool_t cgrp_update,
+    rd_bool_t force_racks,
+    rd_kafka_resp_cb_t *resp_cb,
+    rd_kafka_replyq_t replyq,
+    rd_bool_t force,
+    void *opaque) {
+        return rd_kafka_MetadataRequest0(
+            rkb, topics, reason, allow_auto_create_topics,
+            include_cluster_authorized_operations,
+            include_topic_authorized_operations, cgrp_update, force_racks,
+            NULL /* No op - using custom resp_cb. */, resp_cb, replyq,
+            rd_true /* Admin operation metadata requests are always forced. */,
+            opaque);
+}
+
+
 
 /**
  * @brief Parses and handles ApiVersion reply.
@@ -2602,7 +2927,19 @@ void rd_kafka_handle_SaslAuthenticate(rd_kafka_t *rk,
                 goto err;
         }
 
-        rd_kafka_buf_read_bytes(rkbuf, &auth_data);
+        rd_kafka_buf_read_kbytes(rkbuf, &auth_data);
+
+        if (request->rkbuf_reqhdr.ApiVersion >= 1) {
+                int64_t session_lifetime_ms;
+                rd_kafka_buf_read_i64(rkbuf, &session_lifetime_ms);
+
+                if (session_lifetime_ms)
+                        rd_kafka_dbg(
+                            rk, SECURITY, "REAUTH",
+                            "Received session lifetime %ld ms from broker",
+                            session_lifetime_ms);
+                rd_kafka_broker_start_reauth_timer(rkb, session_lifetime_ms);
+        }
 
         /* Pass SASL auth frame to SASL handler */
         if (rd_kafka_sasl_recv(rkb->rkb_transport, auth_data.data,
@@ -2637,6 +2974,8 @@ void rd_kafka_SaslAuthenticateRequest(rd_kafka_broker_t *rkb,
                                       rd_kafka_resp_cb_t *resp_cb,
                                       void *opaque) {
         rd_kafka_buf_t *rkbuf;
+        int16_t ApiVersion;
+        int features;
 
         rkbuf = rd_kafka_buf_new_request(rkb, RD_KAFKAP_SaslAuthenticate, 0, 0);
 
@@ -2651,6 +2990,10 @@ void rd_kafka_SaslAuthenticateRequest(rd_kafka_broker_t *rkb,
          * close down the connection and reconnect on failure. */
         rkbuf->rkbuf_max_retries = RD_KAFKA_REQUEST_NO_RETRIES;
 
+        ApiVersion = rd_kafka_broker_ApiVersion_supported(
+            rkb, RD_KAFKAP_SaslAuthenticate, 0, 1, &features);
+        rd_kafka_buf_ApiVersion_set(rkbuf, ApiVersion, 0);
+
         if (replyq.q)
                 rd_kafka_broker_buf_enq_replyq(rkb, rkbuf, replyq, resp_cb,
                                                opaque);
@@ -3310,17 +3653,12 @@ static int rd_kafka_handle_Produce_error(rd_kafka_broker_t *rkb,
                          * which should not be treated as a fatal error
                          * since this request and sub-sequent requests
                          * will be retried and thus return to order.
-                         * Unless the error was a timeout, or similar,
-                         * in which case the request might have made it
-                         * and the messages are considered possibly persisted:
-                         * in this case we allow the next in-flight response
-                         * to be successful, in which case we mark
-                         * this request's messages as succesfully delivered. */
-                        if (perr->status &
-                            RD_KAFKA_MSG_STATUS_POSSIBLY_PERSISTED)
-                                perr->update_next_ack = rd_true;
-                        else
-                                perr->update_next_ack = rd_false;
+                         * In case the message is possibly persisted
+                         * we still treat it as not persisted,
+                         * expecting DUPLICATE_SEQUENCE_NUMBER
+                         * in case it was persisted or NO_ERROR in case
+                         * it wasn't. */
+                        perr->update_next_ack = rd_false;
                         perr->update_next_err = rd_true;
 
                         /* Drain outstanding requests so that retries
@@ -3601,7 +3939,7 @@ static void rd_kafka_msgbatch_handle_Produce_result(
                     .err             = err,
                     .incr_retry      = 1,
                     .status          = status,
-                    .update_next_ack = rd_true,
+                    .update_next_ack = rd_false,
                     .update_next_err = rd_true,
                     .last_seq        = (batch->first_seq +
                                  rd_kafka_msgq_len(&batch->msgq) - 1)};
@@ -4188,7 +4526,7 @@ rd_kafka_AlterConfigsRequest(rd_kafka_broker_t *rkb,
         }
 
         ApiVersion = rd_kafka_broker_ApiVersion_supported(
-            rkb, RD_KAFKAP_AlterConfigs, 0, 1, NULL);
+            rkb, RD_KAFKAP_AlterConfigs, 0, 2, NULL);
         if (ApiVersion == -1) {
                 rd_snprintf(errstr, errstr_size,
                             "AlterConfigs (KIP-133) not supported "
@@ -4197,52 +4535,121 @@ rd_kafka_AlterConfigsRequest(rd_kafka_broker_t *rkb,
                 return RD_KAFKA_RESP_ERR__UNSUPPORTED_FEATURE;
         }
 
-        /* Incremental requires IncrementalAlterConfigs */
-        if (rd_kafka_confval_get_int(&options->incremental)) {
+        rkbuf = rd_kafka_buf_new_flexver_request(rkb, RD_KAFKAP_AlterConfigs, 1,
+                                                 rd_list_cnt(configs) * 200,
+                                                 ApiVersion >= 2);
+
+        /* #Resources */
+        rd_kafka_buf_write_arraycnt(rkbuf, rd_list_cnt(configs));
+
+        RD_LIST_FOREACH(config, configs, i) {
+                const rd_kafka_ConfigEntry_t *entry;
+                int ei;
+
+                /* ResourceType */
+                rd_kafka_buf_write_i8(rkbuf, config->restype);
+
+                /* ResourceName */
+                rd_kafka_buf_write_str(rkbuf, config->name, -1);
+
+                /* #Configs */
+                rd_kafka_buf_write_arraycnt(rkbuf,
+                                            rd_list_cnt(&config->config));
+
+                RD_LIST_FOREACH(entry, &config->config, ei) {
+                        /* Name */
+                        rd_kafka_buf_write_str(rkbuf, entry->kv->name, -1);
+                        /* Value (nullable) */
+                        rd_kafka_buf_write_str(rkbuf, entry->kv->value, -1);
+
+                        rd_kafka_buf_write_tags(rkbuf);
+                }
+
+                rd_kafka_buf_write_tags(rkbuf);
+        }
+
+        /* timeout */
+        op_timeout = rd_kafka_confval_get_int(&options->operation_timeout);
+        if (op_timeout > rkb->rkb_rk->rk_conf.socket_timeout_ms)
+                rd_kafka_buf_set_abs_timeout(rkbuf, op_timeout + 1000, 0);
+
+        /* validate_only */
+        rd_kafka_buf_write_i8(
+            rkbuf, rd_kafka_confval_get_int(&options->validate_only));
+
+        rd_kafka_buf_ApiVersion_set(rkbuf, ApiVersion, 0);
+
+        rd_kafka_broker_buf_enq_replyq(rkb, rkbuf, replyq, resp_cb, opaque);
+
+        return RD_KAFKA_RESP_ERR_NO_ERROR;
+}
+
+
+rd_kafka_resp_err_t rd_kafka_IncrementalAlterConfigsRequest(
+    rd_kafka_broker_t *rkb,
+    const rd_list_t *configs /*(ConfigResource_t*)*/,
+    rd_kafka_AdminOptions_t *options,
+    char *errstr,
+    size_t errstr_size,
+    rd_kafka_replyq_t replyq,
+    rd_kafka_resp_cb_t *resp_cb,
+    void *opaque) {
+        rd_kafka_buf_t *rkbuf;
+        int16_t ApiVersion = 0;
+        int i;
+        const rd_kafka_ConfigResource_t *config;
+        int op_timeout;
+
+        if (rd_list_cnt(configs) == 0) {
+                rd_snprintf(errstr, errstr_size,
+                            "No config resources specified");
+                rd_kafka_replyq_destroy(&replyq);
+                return RD_KAFKA_RESP_ERR__INVALID_ARG;
+        }
+
+        ApiVersion = rd_kafka_broker_ApiVersion_supported(
+            rkb, RD_KAFKAP_IncrementalAlterConfigs, 0, 1, NULL);
+        if (ApiVersion == -1) {
                 rd_snprintf(errstr, errstr_size,
-                            "AlterConfigs.incremental=true (KIP-248) "
-                            "not supported by broker, "
-                            "replaced by IncrementalAlterConfigs");
+                            "IncrementalAlterConfigs (KIP-339) not supported "
+                            "by broker, requires broker version >= 2.3.0");
                 rd_kafka_replyq_destroy(&replyq);
                 return RD_KAFKA_RESP_ERR__UNSUPPORTED_FEATURE;
         }
 
-        rkbuf = rd_kafka_buf_new_request(rkb, RD_KAFKAP_AlterConfigs, 1,
-                                         rd_list_cnt(configs) * 200);
+        rkbuf = rd_kafka_buf_new_flexver_request(
+            rkb, RD_KAFKAP_IncrementalAlterConfigs, 1,
+            rd_list_cnt(configs) * 200, ApiVersion >= 1);
 
-        /* #resources */
-        rd_kafka_buf_write_i32(rkbuf, rd_list_cnt(configs));
+        /* #Resources */
+        rd_kafka_buf_write_arraycnt(rkbuf, rd_list_cnt(configs));
 
         RD_LIST_FOREACH(config, configs, i) {
                 const rd_kafka_ConfigEntry_t *entry;
                 int ei;
 
-                /* resource_type */
+                /* ResourceType */
                 rd_kafka_buf_write_i8(rkbuf, config->restype);
 
-                /* resource_name */
+                /* ResourceName */
                 rd_kafka_buf_write_str(rkbuf, config->name, -1);
 
-                /* #config */
-                rd_kafka_buf_write_i32(rkbuf, rd_list_cnt(&config->config));
+                /* #Configs */
+                rd_kafka_buf_write_arraycnt(rkbuf,
+                                            rd_list_cnt(&config->config));
 
                 RD_LIST_FOREACH(entry, &config->config, ei) {
-                        /* config_name */
+                        /* Name */
                         rd_kafka_buf_write_str(rkbuf, entry->kv->name, -1);
-                        /* config_value (nullable) */
+                        /* ConfigOperation */
+                        rd_kafka_buf_write_i8(rkbuf, entry->a.op_type);
+                        /* Value (nullable) */
                         rd_kafka_buf_write_str(rkbuf, entry->kv->value, -1);
 
-                        if (entry->a.operation != RD_KAFKA_ALTER_OP_SET) {
-                                rd_snprintf(errstr, errstr_size,
-                                            "IncrementalAlterConfigs required "
-                                            "for add/delete config "
-                                            "entries: only set supported "
-                                            "by this operation");
-                                rd_kafka_buf_destroy(rkbuf);
-                                rd_kafka_replyq_destroy(&replyq);
-                                return RD_KAFKA_RESP_ERR__UNSUPPORTED_FEATURE;
-                        }
+                        rd_kafka_buf_write_tags(rkbuf);
                 }
+
+                rd_kafka_buf_write_tags(rkbuf);
         }
 
         /* timeout */
@@ -4250,7 +4657,7 @@ rd_kafka_AlterConfigsRequest(rd_kafka_broker_t *rkb,
         if (op_timeout > rkb->rkb_rk->rk_conf.socket_timeout_ms)
                 rd_kafka_buf_set_abs_timeout(rkbuf, op_timeout + 1000, 0);
 
-        /* validate_only */
+        /* ValidateOnly */
         rd_kafka_buf_write_i8(
             rkbuf, rd_kafka_confval_get_int(&options->validate_only));
 
@@ -4261,7 +4668,6 @@ rd_kafka_AlterConfigsRequest(rd_kafka_broker_t *rkb,
         return RD_KAFKA_RESP_ERR_NO_ERROR;
 }
 
-
 /**
  * @brief Construct and send DescribeConfigsRequest to \p rkb
  *        with the configs (ConfigResource_t*) in \p configs, using
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_request.h b/lib/librdkafka-2.3.0/src/rdkafka_request.h
similarity index 91%
rename from lib/librdkafka-2.1.0/src/rdkafka_request.h
rename to lib/librdkafka-2.3.0/src/rdkafka_request.h
index 3eda6be61c4..ec94b0a5a06 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_request.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_request.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -100,6 +101,7 @@ rd_kafka_FindCoordinatorRequest(rd_kafka_broker_t *rkb,
                                 rd_kafka_resp_cb_t *resp_cb,
                                 void *opaque);
 
+
 rd_kafka_resp_err_t
 rd_kafka_handle_ListOffsets(rd_kafka_t *rk,
                             rd_kafka_broker_t *rkb,
@@ -113,8 +115,24 @@ void rd_kafka_ListOffsetsRequest(rd_kafka_broker_t *rkb,
                                  rd_kafka_topic_partition_list_t *offsets,
                                  rd_kafka_replyq_t replyq,
                                  rd_kafka_resp_cb_t *resp_cb,
+                                 int timeout_ms,
                                  void *opaque);
 
+rd_kafka_resp_err_t
+rd_kafka_ListOffsetsRequest_admin(rd_kafka_broker_t *rkb,
+                                  const rd_list_t *offsets,
+                                  rd_kafka_AdminOptions_t *options,
+                                  char *errstr,
+                                  size_t errstr_size,
+                                  rd_kafka_replyq_t replyq,
+                                  rd_kafka_resp_cb_t *resp_cb,
+                                  void *opaque);
+
+rd_kafka_resp_err_t
+rd_kafka_parse_ListOffsets(rd_kafka_buf_t *rkbuf,
+                           rd_kafka_topic_partition_list_t *offsets,
+                           rd_list_t *result_infos);
+
 rd_kafka_resp_err_t
 rd_kafka_handle_OffsetForLeaderEpoch(rd_kafka_t *rk,
                                      rd_kafka_broker_t *rkb,
@@ -235,13 +253,15 @@ rd_kafka_error_t *rd_kafka_ListGroupsRequest(rd_kafka_broker_t *rkb,
                                              rd_kafka_resp_cb_t *resp_cb,
                                              void *opaque);
 
-rd_kafka_error_t *rd_kafka_DescribeGroupsRequest(rd_kafka_broker_t *rkb,
-                                                 int16_t max_ApiVersion,
-                                                 char **groups,
-                                                 size_t group_cnt,
-                                                 rd_kafka_replyq_t replyq,
-                                                 rd_kafka_resp_cb_t *resp_cb,
-                                                 void *opaque);
+rd_kafka_error_t *
+rd_kafka_DescribeGroupsRequest(rd_kafka_broker_t *rkb,
+                               int16_t max_ApiVersion,
+                               char **groups,
+                               size_t group_cnt,
+                               rd_bool_t include_authorized_operations,
+                               rd_kafka_replyq_t replyq,
+                               rd_kafka_resp_cb_t *resp_cb,
+                               void *opaque);
 
 
 void rd_kafka_HeartbeatRequest(rd_kafka_broker_t *rkb,
@@ -258,8 +278,23 @@ rd_kafka_resp_err_t rd_kafka_MetadataRequest(rd_kafka_broker_t *rkb,
                                              const char *reason,
                                              rd_bool_t allow_auto_create_topics,
                                              rd_bool_t cgrp_update,
+                                             rd_bool_t force_racks,
                                              rd_kafka_op_t *rko);
 
+rd_kafka_resp_err_t rd_kafka_MetadataRequest_resp_cb(
+    rd_kafka_broker_t *rkb,
+    const rd_list_t *topics,
+    const char *reason,
+    rd_bool_t allow_auto_create_topics,
+    rd_bool_t include_cluster_authorized_operations,
+    rd_bool_t include_topic_authorized_operations,
+    rd_bool_t cgrp_update,
+    rd_bool_t force_racks,
+    rd_kafka_resp_cb_t *resp_cb,
+    rd_kafka_replyq_t replyq,
+    rd_bool_t force,
+    void *opaque);
+
 rd_kafka_resp_err_t
 rd_kafka_handle_ApiVersion(rd_kafka_t *rk,
                            rd_kafka_broker_t *rkb,
@@ -286,6 +321,7 @@ void rd_kafka_handle_SaslAuthenticate(rd_kafka_t *rk,
                                       rd_kafka_buf_t *rkbuf,
                                       rd_kafka_buf_t *request,
                                       void *opaque);
+
 void rd_kafka_SaslAuthenticateRequest(rd_kafka_broker_t *rkb,
                                       const void *buf,
                                       size_t size,
@@ -338,6 +374,16 @@ rd_kafka_AlterConfigsRequest(rd_kafka_broker_t *rkb,
                              rd_kafka_resp_cb_t *resp_cb,
                              void *opaque);
 
+rd_kafka_resp_err_t rd_kafka_IncrementalAlterConfigsRequest(
+    rd_kafka_broker_t *rkb,
+    const rd_list_t *configs /*(ConfigResource_t*)*/,
+    rd_kafka_AdminOptions_t *options,
+    char *errstr,
+    size_t errstr_size,
+    rd_kafka_replyq_t replyq,
+    rd_kafka_resp_cb_t *resp_cb,
+    void *opaque);
+
 rd_kafka_resp_err_t rd_kafka_DescribeConfigsRequest(
     rd_kafka_broker_t *rkb,
     const rd_list_t *configs /*(ConfigResource_t*)*/,
@@ -417,7 +463,6 @@ rd_kafka_resp_err_t rd_kafka_EndTxnRequest(rd_kafka_broker_t *rkb,
 
 int unittest_request(void);
 
-
 rd_kafka_resp_err_t
 rd_kafka_DeleteRecordsRequest(rd_kafka_broker_t *rkb,
                               /*(rd_topic_partition_list_t*)*/
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_roundrobin_assignor.c b/lib/librdkafka-2.3.0/src/rdkafka_roundrobin_assignor.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_roundrobin_assignor.c
rename to lib/librdkafka-2.3.0/src/rdkafka_roundrobin_assignor.c
index 6cb91936452..28d437f4f79 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_roundrobin_assignor.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_roundrobin_assignor.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2015 Magnus Edenhill
+ * Copyright (c) 2015-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_sasl.c b/lib/librdkafka-2.3.0/src/rdkafka_sasl.c
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_sasl.c
rename to lib/librdkafka-2.3.0/src/rdkafka_sasl.c
index cab67f241f7..32ebe3b198e 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_sasl.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_sasl.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2015 Magnus Edenhill
+ * Copyright (c) 2015-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -206,6 +207,11 @@ int rd_kafka_sasl_io_event(rd_kafka_transport_t *rktrans,
  * @remark May be called on non-SASL transports (no-op)
  */
 void rd_kafka_sasl_close(rd_kafka_transport_t *rktrans) {
+        /* The broker might not be up, and the transport might not exist in that
+         * case.*/
+        if (!rktrans)
+                return;
+
         const struct rd_kafka_sasl_provider *provider =
             rktrans->rktrans_rkb->rkb_rk->rk_conf.sasl.provider;
 
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_sasl.h b/lib/librdkafka-2.3.0/src/rdkafka_sasl.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_sasl.h
rename to lib/librdkafka-2.3.0/src/rdkafka_sasl.h
index d0dd01b8b21..0ac12c5d210 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_sasl.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_sasl.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2015 Magnus Edenhill
+ * Copyright (c) 2015-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_sasl_cyrus.c b/lib/librdkafka-2.3.0/src/rdkafka_sasl_cyrus.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_sasl_cyrus.c
rename to lib/librdkafka-2.3.0/src/rdkafka_sasl_cyrus.c
index 41452a33647..89ff15c4272 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_sasl_cyrus.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_sasl_cyrus.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2015 Magnus Edenhill
+ * Copyright (c) 2015-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -489,6 +490,7 @@ static void rd_kafka_sasl_cyrus_close(struct rd_kafka_transport_s *rktrans) {
                 mtx_unlock(&rktrans->rktrans_rkb->rkb_rk->rk_conf.sasl.lock);
         }
         rd_free(state);
+        rktrans->rktrans_sasl.state = NULL;
 }
 
 
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_sasl_int.h b/lib/librdkafka-2.3.0/src/rdkafka_sasl_int.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_sasl_int.h
rename to lib/librdkafka-2.3.0/src/rdkafka_sasl_int.h
index 33e3bdd05f6..8a49a6a2964 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_sasl_int.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_sasl_int.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2015 Magnus Edenhill
+ * Copyright (c) 2015-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_sasl_oauthbearer.c b/lib/librdkafka-2.3.0/src/rdkafka_sasl_oauthbearer.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_sasl_oauthbearer.c
rename to lib/librdkafka-2.3.0/src/rdkafka_sasl_oauthbearer.c
index 39b165a7dc7..2065751ccbb 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_sasl_oauthbearer.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_sasl_oauthbearer.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2019 Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -1041,6 +1042,7 @@ static void rd_kafka_sasl_oauthbearer_close(rd_kafka_transport_t *rktrans) {
         rd_free(state->md_principal_name);
         rd_list_destroy(&state->extensions);
         rd_free(state);
+        rktrans->rktrans_sasl.state = NULL;
 }
 
 
@@ -1300,6 +1302,16 @@ static int rd_kafka_sasl_oauthbearer_init(rd_kafka_t *rk,
         rd_list_init(&handle->extensions, 0,
                      (void (*)(void *))rd_strtup_destroy);
 
+
+        if (rk->rk_conf.sasl.enable_callback_queue) {
+                /* SASL specific callback queue enabled */
+                rk->rk_sasl.callback_q = rd_kafka_q_new(rk);
+                handle->callback_q = rd_kafka_q_keep(rk->rk_sasl.callback_q);
+        } else {
+                /* Use main queue */
+                handle->callback_q = rd_kafka_q_keep(rk->rk_rep);
+        }
+
         rd_kafka_timer_start(
             &rk->rk_timers, &handle->token_refresh_tmr, 1 * 1000 * 1000,
             rd_kafka_sasl_oauthbearer_token_refresh_tmr_cb, rk);
@@ -1316,14 +1328,6 @@ static int rd_kafka_sasl_oauthbearer_init(rd_kafka_t *rk,
                 return 0;
         }
 
-        if (rk->rk_conf.sasl.enable_callback_queue) {
-                /* SASL specific callback queue enabled */
-                rk->rk_sasl.callback_q = rd_kafka_q_new(rk);
-                handle->callback_q = rd_kafka_q_keep(rk->rk_sasl.callback_q);
-        } else {
-                /* Use main queue */
-                handle->callback_q = rd_kafka_q_keep(rk->rk_rep);
-        }
 
 #if WITH_OAUTHBEARER_OIDC
         if (rk->rk_conf.sasl.oauthbearer.method ==
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_sasl_oauthbearer.h b/lib/librdkafka-2.3.0/src/rdkafka_sasl_oauthbearer.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_sasl_oauthbearer.h
rename to lib/librdkafka-2.3.0/src/rdkafka_sasl_oauthbearer.h
index 75ab51d02fb..cdcea0608cd 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_sasl_oauthbearer.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_sasl_oauthbearer.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2019 Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_sasl_oauthbearer_oidc.c b/lib/librdkafka-2.3.0/src/rdkafka_sasl_oauthbearer_oidc.c
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdkafka_sasl_oauthbearer_oidc.c
rename to lib/librdkafka-2.3.0/src/rdkafka_sasl_oauthbearer_oidc.c
index 6c2773b027b..d56efbf3554 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_sasl_oauthbearer_oidc.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_sasl_oauthbearer_oidc.c
@@ -1,7 +1,9 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2021 Magnus Edenhill
+ * Copyright (c) 2021-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
+
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -37,25 +39,7 @@
 #include <curl/curl.h>
 #include "rdhttp.h"
 #include "rdkafka_sasl_oauthbearer_oidc.h"
-
-
-/**
- * @brief Base64 encode binary input \p in, and write base64-encoded string
- *        and it's size to \p out
- */
-static void rd_base64_encode(const rd_chariov_t *in, rd_chariov_t *out) {
-        size_t max_len;
-
-        max_len  = (((in->size + 2) / 3) * 4) + 1;
-        out->ptr = rd_malloc(max_len);
-        rd_assert(out->ptr);
-
-        out->size = EVP_EncodeBlock((uint8_t *)out->ptr, (uint8_t *)in->ptr,
-                                    (int)in->size);
-
-        rd_assert(out->size <= max_len);
-        out->ptr[out->size] = 0;
-}
+#include "rdbase64.h"
 
 
 /**
@@ -84,6 +68,7 @@ static char *rd_kafka_oidc_build_auth_header(const char *client_id,
 
         client_authorization_in.size--;
         rd_base64_encode(&client_authorization_in, &client_authorization_out);
+        rd_assert(client_authorization_out.ptr);
 
         authorization_base64_header_size =
             strlen("Authorization: Basic ") + client_authorization_out.size + 1;
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_sasl_oauthbearer_oidc.h b/lib/librdkafka-2.3.0/src/rdkafka_sasl_oauthbearer_oidc.h
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdkafka_sasl_oauthbearer_oidc.h
rename to lib/librdkafka-2.3.0/src/rdkafka_sasl_oauthbearer_oidc.h
index a944f2efa10..f46bf1beb7a 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_sasl_oauthbearer_oidc.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_sasl_oauthbearer_oidc.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2021 Magnus Edenhill
+ * Copyright (c) 2021-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_sasl_plain.c b/lib/librdkafka-2.3.0/src/rdkafka_sasl_plain.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_sasl_plain.c
rename to lib/librdkafka-2.3.0/src/rdkafka_sasl_plain.c
index 1e715cfba22..cca9957c83c 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_sasl_plain.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_sasl_plain.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_sasl_scram.c b/lib/librdkafka-2.3.0/src/rdkafka_sasl_scram.c
similarity index 88%
rename from lib/librdkafka-2.1.0/src/rdkafka_sasl_scram.c
rename to lib/librdkafka-2.3.0/src/rdkafka_sasl_scram.c
index 7d5db564964..01a6cd75e45 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_sasl_scram.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_sasl_scram.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -37,6 +38,7 @@
 #include "rdkafka_sasl_int.h"
 #include "rdrand.h"
 #include "rdunittest.h"
+#include "rdbase64.h"
 
 
 #if WITH_SSL
@@ -76,6 +78,7 @@ static void rd_kafka_sasl_scram_close(rd_kafka_transport_t *rktrans) {
         RD_IF_FREE(state->first_msg_bare.ptr, rd_free);
         RD_IF_FREE(state->ServerSignatureB64, rd_free);
         rd_free(state);
+        rktrans->rktrans_sasl.state = NULL;
 }
 
 
@@ -140,77 +143,6 @@ static char *rd_kafka_sasl_scram_get_attr(const rd_chariov_t *inbuf,
 }
 
 
-/**
- * @brief Base64 encode binary input \p in
- * @returns a newly allocated, base64-encoded string or NULL on error.
- */
-static char *rd_base64_encode(const rd_chariov_t *in) {
-        char *ret;
-        size_t ret_len, max_len;
-
-        /* OpenSSL takes an |int| argument so the input cannot exceed that. */
-        if (in->size > INT_MAX) {
-                return NULL;
-        }
-
-        /* This does not overflow given the |INT_MAX| bound, above. */
-        max_len = (((in->size + 2) / 3) * 4) + 1;
-        ret     = rd_malloc(max_len);
-        if (ret == NULL) {
-                return NULL;
-        }
-
-        ret_len =
-            EVP_EncodeBlock((uint8_t *)ret, (uint8_t *)in->ptr, (int)in->size);
-        assert(ret_len < max_len);
-        ret[ret_len] = 0;
-
-        return ret;
-}
-
-
-/**
- * @brief Base64 decode input string \p in. Ignores leading and trailing
- *         whitespace.
- * @returns -1 on invalid Base64, or 0 on successes in which case a
- *         newly allocated binary string is set in out (and size).
- */
-static int rd_base64_decode(const rd_chariov_t *in, rd_chariov_t *out) {
-        size_t ret_len;
-
-        /* OpenSSL takes an |int| argument, so |in->size| must not exceed
-         * that. */
-        if (in->size % 4 != 0 || in->size > INT_MAX) {
-                return -1;
-        }
-
-        ret_len  = ((in->size / 4) * 3);
-        out->ptr = rd_malloc(ret_len + 1);
-
-        if (EVP_DecodeBlock((uint8_t *)out->ptr, (uint8_t *)in->ptr,
-                            (int)in->size) == -1) {
-                rd_free(out->ptr);
-                out->ptr = NULL;
-                return -1;
-        }
-
-        /* EVP_DecodeBlock will pad the output with trailing NULs and count
-         * them in the return value. */
-        if (in->size > 1 && in->ptr[in->size - 1] == '=') {
-                if (in->size > 2 && in->ptr[in->size - 2] == '=') {
-                        ret_len -= 2;
-                } else {
-                        ret_len -= 1;
-                }
-        }
-
-        out->ptr[ret_len] = 0;
-        out->size         = ret_len;
-
-        return 0;
-}
-
-
 /**
  * @brief Perform H(str) hash function and stores the result in \p out
  *        which must be at least EVP_MAX_MD_SIZE.
@@ -254,8 +186,6 @@ static int rd_kafka_sasl_scram_HMAC(rd_kafka_transport_t *rktrans,
         return 0;
 }
 
-
-
 /**
  * @brief Perform \p itcnt iterations of HMAC() on the given buffer \p in
  *        using \p salt, writing the output into \p out which must be
@@ -267,57 +197,14 @@ static int rd_kafka_sasl_scram_Hi(rd_kafka_transport_t *rktrans,
                                   const rd_chariov_t *salt,
                                   int itcnt,
                                   rd_chariov_t *out) {
+        rd_kafka_broker_t *rkb = rktrans->rktrans_rkb;
         const EVP_MD *evp =
             rktrans->rktrans_rkb->rkb_rk->rk_conf.sasl.scram_evp;
-        unsigned int ressize = 0;
-        unsigned char tempres[EVP_MAX_MD_SIZE];
-        unsigned char *saltplus;
-        int i;
-
-        /* U1   := HMAC(str, salt + INT(1)) */
-        saltplus = rd_alloca(salt->size + 4);
-        memcpy(saltplus, salt->ptr, salt->size);
-        saltplus[salt->size]     = 0;
-        saltplus[salt->size + 1] = 0;
-        saltplus[salt->size + 2] = 0;
-        saltplus[salt->size + 3] = 1;
-
-        /* U1   := HMAC(str, salt + INT(1)) */
-        if (!HMAC(evp, (const unsigned char *)in->ptr, (int)in->size, saltplus,
-                  salt->size + 4, tempres, &ressize)) {
-                rd_rkb_dbg(rktrans->rktrans_rkb, SECURITY, "SCRAM",
-                           "HMAC priming failed");
-                return -1;
-        }
-
-        memcpy(out->ptr, tempres, ressize);
-
-        /* Ui-1 := HMAC(str, Ui-2) ..  */
-        for (i = 1; i < itcnt; i++) {
-                unsigned char tempdest[EVP_MAX_MD_SIZE];
-                int j;
-
-                if (unlikely(!HMAC(evp, (const unsigned char *)in->ptr,
-                                   (int)in->size, tempres, ressize, tempdest,
-                                   NULL))) {
-                        rd_rkb_dbg(rktrans->rktrans_rkb, SECURITY, "SCRAM",
-                                   "Hi() HMAC #%d/%d failed", i, itcnt);
-                        return -1;
-                }
-
-                /* U1 XOR U2 .. */
-                for (j = 0; j < (int)ressize; j++) {
-                        out->ptr[j] ^= tempdest[j];
-                        tempres[j] = tempdest[j];
-                }
-        }
-
-        out->size = ressize;
-
-        return 0;
+        return rd_kafka_ssl_hmac(rkb, evp, in, salt, itcnt, out);
 }
 
 
+
 /**
  * @returns a SASL value-safe-char encoded string, replacing "," and "="
  *          with their escaped counterparts in a newly allocated string.
@@ -486,7 +373,7 @@ static int rd_kafka_sasl_scram_build_client_final_message(
         }
 
         /* Store the Base64 encoded ServerSignature for quick comparison */
-        state->ServerSignatureB64 = rd_base64_encode(&ServerSignature);
+        state->ServerSignatureB64 = rd_base64_encode_str(&ServerSignature);
         if (state->ServerSignatureB64 == NULL) {
                 rd_free(client_final_msg_wo_proof.ptr);
                 return -1;
@@ -511,7 +398,7 @@ static int rd_kafka_sasl_scram_build_client_final_message(
 
 
         /* Base64 encoded ClientProof */
-        ClientProofB64 = rd_base64_encode(&ClientProof);
+        ClientProofB64 = rd_base64_encode_str(&ClientProof);
         if (ClientProofB64 == NULL) {
                 rd_free(client_final_msg_wo_proof.ptr);
                 return -1;
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_sasl_win32.c b/lib/librdkafka-2.3.0/src/rdkafka_sasl_win32.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_sasl_win32.c
rename to lib/librdkafka-2.3.0/src/rdkafka_sasl_win32.c
index b07e1808d0b..b968bcece36 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_sasl_win32.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_sasl_win32.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2016 Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -490,6 +491,7 @@ static void rd_kafka_sasl_win32_close(rd_kafka_transport_t *rktrans) {
                 rd_free(state->cred);
         }
         rd_free(state);
+        rktrans->rktrans_sasl.state = NULL;
 }
 
 
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_ssl.c b/lib/librdkafka-2.3.0/src/rdkafka_ssl.c
similarity index 96%
rename from lib/librdkafka-2.1.0/src/rdkafka_ssl.c
rename to lib/librdkafka-2.3.0/src/rdkafka_ssl.c
index 9961a240f71..85f745cb9ca 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_ssl.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_ssl.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2019 Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -1722,6 +1723,14 @@ int rd_kafka_ssl_ctx_init(rd_kafka_t *rk, char *errstr, size_t errstr_size) {
                 goto fail;
 
 
+#ifdef SSL_OP_IGNORE_UNEXPECTED_EOF
+        /* Ignore unexpected EOF error in OpenSSL 3.x, treating
+         * it like a normal connection close even if
+         * close_notify wasn't received.
+         * see issue #4293 */
+        SSL_CTX_set_options(ctx, SSL_OP_IGNORE_UNEXPECTED_EOF);
+#endif
+
         SSL_CTX_set_mode(ctx, SSL_MODE_ENABLE_PARTIAL_WRITE);
 
         rk->rk_conf.ssl.ctx = ctx;
@@ -1839,3 +1848,56 @@ void rd_kafka_ssl_init(void) {
         OpenSSL_add_all_algorithms();
 #endif
 }
+
+int rd_kafka_ssl_hmac(rd_kafka_broker_t *rkb,
+                      const EVP_MD *evp,
+                      const rd_chariov_t *in,
+                      const rd_chariov_t *salt,
+                      int itcnt,
+                      rd_chariov_t *out) {
+        unsigned int ressize = 0;
+        unsigned char tempres[EVP_MAX_MD_SIZE];
+        unsigned char *saltplus;
+        int i;
+
+        /* U1   := HMAC(str, salt + INT(1)) */
+        saltplus = rd_alloca(salt->size + 4);
+        memcpy(saltplus, salt->ptr, salt->size);
+        saltplus[salt->size]     = 0;
+        saltplus[salt->size + 1] = 0;
+        saltplus[salt->size + 2] = 0;
+        saltplus[salt->size + 3] = 1;
+
+        /* U1   := HMAC(str, salt + INT(1)) */
+        if (!HMAC(evp, (const unsigned char *)in->ptr, (int)in->size, saltplus,
+                  salt->size + 4, tempres, &ressize)) {
+                rd_rkb_dbg(rkb, SECURITY, "SSLHMAC", "HMAC priming failed");
+                return -1;
+        }
+
+        memcpy(out->ptr, tempres, ressize);
+
+        /* Ui-1 := HMAC(str, Ui-2) ..  */
+        for (i = 1; i < itcnt; i++) {
+                unsigned char tempdest[EVP_MAX_MD_SIZE];
+                int j;
+
+                if (unlikely(!HMAC(evp, (const unsigned char *)in->ptr,
+                                   (int)in->size, tempres, ressize, tempdest,
+                                   NULL))) {
+                        rd_rkb_dbg(rkb, SECURITY, "SSLHMAC",
+                                   "Hi() HMAC #%d/%d failed", i, itcnt);
+                        return -1;
+                }
+
+                /* U1 XOR U2 .. */
+                for (j = 0; j < (int)ressize; j++) {
+                        out->ptr[j] ^= tempdest[j];
+                        tempres[j] = tempdest[j];
+                }
+        }
+
+        out->size = ressize;
+
+        return 0;
+}
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_ssl.h b/lib/librdkafka-2.3.0/src/rdkafka_ssl.h
similarity index 89%
rename from lib/librdkafka-2.1.0/src/rdkafka_ssl.h
rename to lib/librdkafka-2.3.0/src/rdkafka_ssl.h
index 325abbe1d47..4dce0b1f872 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_ssl.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_ssl.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2019 Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -54,4 +54,11 @@ void rd_kafka_ssl_init(void);
 
 const char *rd_kafka_ssl_last_error_str(void);
 
+int rd_kafka_ssl_hmac(rd_kafka_broker_t *rkb,
+                      const EVP_MD *evp,
+                      const rd_chariov_t *in,
+                      const rd_chariov_t *salt,
+                      int itcnt,
+                      rd_chariov_t *out);
+
 #endif /* _RDKAFKA_SSL_H_ */
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_sticky_assignor.c b/lib/librdkafka-2.3.0/src/rdkafka_sticky_assignor.c
similarity index 57%
rename from lib/librdkafka-2.1.0/src/rdkafka_sticky_assignor.c
rename to lib/librdkafka-2.3.0/src/rdkafka_sticky_assignor.c
index 8e76ddb14e3..462da614781 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_sticky_assignor.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_sticky_assignor.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2020 Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -160,6 +161,9 @@ typedef RD_MAP_TYPE(const rd_kafka_topic_partition_t *,
 typedef RD_MAP_TYPE(const rd_kafka_topic_partition_t *,
                     rd_list_t *) map_toppar_list_t;
 
+typedef RD_MAP_TYPE(const rd_kafka_topic_partition_t *,
+                    rd_kafka_metadata_partition_internal_t *) map_toppar_mdpi_t;
+
 typedef RD_MAP_TYPE(const rd_kafka_topic_partition_t *,
                     ConsumerGenerationPair_t *) map_toppar_cgpair_t;
 
@@ -173,6 +177,7 @@ typedef RD_MAP_TYPE(const ConsumerPair_t *,
 typedef RD_MAP_TYPE(const char *,
                     map_cpair_toppar_list_t *) map_str_map_cpair_toppar_list_t;
 
+typedef RD_MAP_TYPE(const char *, const char *) map_str_str_t;
 
 
 /** Glue type helpers */
@@ -193,6 +198,121 @@ static void map_cpair_toppar_list_t_free(void *ptr) {
 }
 
 
+/** @struct Convenience struct for storing consumer/rack and toppar/rack
+ * mappings. */
+typedef struct {
+        /** A map of member_id -> rack_id pairs. */
+        map_str_str_t member_id_to_rack_id;
+        /* A map of topic partition to rd_kafka_metadata_partition_internal_t */
+        map_toppar_mdpi_t toppar_to_mdpi;
+} rd_kafka_rack_info_t;
+
+/**
+ * @brief Initialize a rd_kafka_rack_info_t.
+ *
+ * @param topics
+ * @param topic_cnt
+ * @param mdi
+ *
+ * This struct is for convenience/easy grouping, and as a consequence, we avoid
+ * copying values. Thus, it is intended to be used within the lifetime of this
+ * function's arguments.
+ *
+ * @return rd_kafka_rack_info_t*
+ */
+static rd_kafka_rack_info_t *
+rd_kafka_rack_info_new(rd_kafka_assignor_topic_t **topics,
+                       size_t topic_cnt,
+                       const rd_kafka_metadata_internal_t *mdi) {
+        int i;
+        size_t t;
+        rd_kafka_group_member_t *rkgm;
+        rd_kafka_rack_info_t *rkri = rd_calloc(1, sizeof(rd_kafka_rack_info_t));
+
+        if (!rd_kafka_use_rack_aware_assignment(topics, topic_cnt, mdi)) {
+                /* Free everything immediately, we aren't using rack aware
+                assignment, this struct is not applicable. */
+                rd_free(rkri);
+                return NULL;
+        }
+
+        rkri->member_id_to_rack_id = (map_str_str_t)RD_MAP_INITIALIZER(
+            0, rd_map_str_cmp, rd_map_str_hash,
+            NULL /* refs members.rkgm_member_id */,
+            NULL /* refs members.rkgm_rack_id */);
+        rkri->toppar_to_mdpi = (map_toppar_mdpi_t)RD_MAP_INITIALIZER(
+            0, rd_kafka_topic_partition_cmp, rd_kafka_topic_partition_hash,
+            rd_kafka_topic_partition_destroy_free, NULL);
+
+        for (t = 0; t < topic_cnt; t++) {
+                RD_LIST_FOREACH(rkgm, &topics[t]->members, i) {
+                        RD_MAP_SET(&rkri->member_id_to_rack_id,
+                                   rkgm->rkgm_member_id->str,
+                                   rkgm->rkgm_rack_id->str);
+                }
+
+                for (i = 0; i < topics[t]->metadata->partition_cnt; i++) {
+                        rd_kafka_topic_partition_t *rkpart =
+                            rd_kafka_topic_partition_new(
+                                topics[t]->metadata->topic, i);
+                        RD_MAP_SET(
+                            &rkri->toppar_to_mdpi, rkpart,
+                            &topics[t]->metadata_internal->partitions[i]);
+                }
+        }
+
+        return rkri;
+}
+
+/* Destroy a rd_kafka_rack_info_t. */
+static void rd_kafka_rack_info_destroy(rd_kafka_rack_info_t *rkri) {
+        if (!rkri)
+                return;
+
+        RD_MAP_DESTROY(&rkri->member_id_to_rack_id);
+        RD_MAP_DESTROY(&rkri->toppar_to_mdpi);
+
+        rd_free(rkri);
+}
+
+
+/* Convenience function to bsearch inside the racks of a
+ * rd_kafka_metadata_partition_internal_t. */
+static char *rd_kafka_partition_internal_find_rack(
+    rd_kafka_metadata_partition_internal_t *mdpi,
+    const char *rack) {
+        char **partition_racks = mdpi->racks;
+        size_t cnt             = mdpi->racks_cnt;
+
+        void *res =
+            bsearch(&rack, partition_racks, cnt, sizeof(char *), rd_strcmp3);
+
+        if (res)
+                return *(char **)res;
+        return NULL;
+}
+
+
+/* Computes whether there is a rack mismatch between the rack of the consumer
+ * and the topic partition/any of its replicas. */
+static rd_bool_t
+rd_kafka_racks_mismatch(rd_kafka_rack_info_t *rkri,
+                        const char *consumer,
+                        const rd_kafka_topic_partition_t *topic_partition) {
+        const char *consumer_rack;
+        rd_kafka_metadata_partition_internal_t *mdpi;
+
+        if (rkri == NULL) /* Not using rack aware assignment */
+                return rd_false;
+
+        consumer_rack = RD_MAP_GET(&rkri->member_id_to_rack_id, consumer);
+
+        mdpi = RD_MAP_GET(&rkri->toppar_to_mdpi, topic_partition);
+
+        return consumer_rack != NULL &&
+               (mdpi == NULL ||
+                !rd_kafka_partition_internal_find_rack(mdpi, consumer_rack));
+}
 
 /**
  * @struct Provides current state of partition movements between consumers
@@ -399,13 +519,15 @@ static int sort_by_map_elem_val_toppar_list_cnt(const void *_a,
  *
  * The assignment should improve the overall balance of the partition
  * assignments to consumers.
+ * @returns true if partition was assigned, false otherwise.
  */
-static void
-assignPartition(const rd_kafka_topic_partition_t *partition,
-                rd_list_t *sortedCurrentSubscriptions /*rd_map_elem_t*/,
-                map_str_toppar_list_t *currentAssignment,
-                map_str_toppar_list_t *consumer2AllPotentialPartitions,
-                map_toppar_str_t *currentPartitionConsumer) {
+static rd_bool_t
+maybeAssignPartition(const rd_kafka_topic_partition_t *partition,
+                     rd_list_t *sortedCurrentSubscriptions /*rd_map_elem_t*/,
+                     map_str_toppar_list_t *currentAssignment,
+                     map_str_toppar_list_t *consumer2AllPotentialPartitions,
+                     map_toppar_str_t *currentPartitionConsumer,
+                     rd_kafka_rack_info_t *rkri) {
         const rd_map_elem_t *elem;
         int i;
 
@@ -418,6 +540,9 @@ assignPartition(const rd_kafka_topic_partition_t *partition,
                 if (!rd_kafka_topic_partition_list_find(
                         partitions, partition->topic, partition->partition))
                         continue;
+                if (rkri != NULL &&
+                    rd_kafka_racks_mismatch(rkri, consumer, partition))
+                        continue;
 
                 rd_kafka_topic_partition_list_add(
                     RD_MAP_GET(currentAssignment, consumer), partition->topic,
@@ -431,8 +556,9 @@ assignPartition(const rd_kafka_topic_partition_t *partition,
                  * This is an O(N) operation since it is a single shuffle. */
                 rd_list_sort(sortedCurrentSubscriptions,
                              sort_by_map_elem_val_toppar_list_cnt);
-                return;
+                return rd_true;
         }
+        return rd_false;
 }
 
 /**
@@ -639,14 +765,6 @@ isBalanced(rd_kafka_t *rk,
                            ->value)
                           ->cnt;
 
-        /* Mapping from partitions to the consumer assigned to them */
-        // FIXME: don't create prior to min/max check below */
-        map_toppar_str_t allPartitions = RD_MAP_INITIALIZER(
-            RD_MAP_CNT(partition2AllPotentialConsumers),
-            rd_kafka_topic_partition_cmp, rd_kafka_topic_partition_hash,
-            NULL /* references currentAssignment */,
-            NULL /* references currentAssignment */);
-
         /* Iterators */
         const rd_kafka_topic_partition_list_t *partitions;
         const char *consumer;
@@ -661,10 +779,16 @@ isBalanced(rd_kafka_t *rk,
                              "minimum %d and maximum %d partitions assigned "
                              "to each consumer",
                              minimum, maximum);
-                RD_MAP_DESTROY(&allPartitions);
                 return rd_true;
         }
 
+        /* Mapping from partitions to the consumer assigned to them */
+        map_toppar_str_t allPartitions = RD_MAP_INITIALIZER(
+            RD_MAP_CNT(partition2AllPotentialConsumers),
+            rd_kafka_topic_partition_cmp, rd_kafka_topic_partition_hash,
+            NULL /* references currentAssignment */,
+            NULL /* references currentAssignment */);
+
         /* Create a mapping from partitions to the consumer assigned to them */
         RD_MAP_FOREACH(consumer, partitions, currentAssignment) {
 
@@ -764,7 +888,8 @@ performReassignments(rd_kafka_t *rk,
                      rd_list_t *sortedCurrentSubscriptions /*rd_map_elem_t*/,
                      map_str_toppar_list_t *consumer2AllPotentialPartitions,
                      map_toppar_list_t *partition2AllPotentialConsumers,
-                     map_toppar_str_t *currentPartitionConsumer) {
+                     map_toppar_str_t *currentPartitionConsumer,
+                     rd_kafka_rack_info_t *rkri) {
         rd_bool_t reassignmentPerformed = rd_false;
         rd_bool_t modified, saveIsBalanced = rd_false;
         int iterations = 0;
@@ -796,6 +921,9 @@ performReassignments(rd_kafka_t *rk,
                         const ConsumerGenerationPair_t *prevcgp;
                         const rd_kafka_topic_partition_list_t *currAssignment;
                         int j;
+                        rd_bool_t found_rack;
+                        const char *consumer_rack                    = NULL;
+                        rd_kafka_metadata_partition_internal_t *mdpi = NULL;
 
                         /* FIXME: Is this a local error/bug? If so, assert */
                         if (rd_list_cnt(consumers) <= 1)
@@ -832,7 +960,59 @@ performReassignments(rd_kafka_t *rk,
                         }
 
                         /* Check if a better-suited consumer exists for the
-                         * partition; if so, reassign it. */
+                         * partition; if so, reassign it. Use consumer within
+                         * rack if possible. */
+                        if (rkri) {
+                                consumer_rack = RD_MAP_GET(
+                                    &rkri->member_id_to_rack_id, consumer);
+                                mdpi = RD_MAP_GET(&rkri->toppar_to_mdpi,
+                                                  partition);
+                        }
+                        found_rack = rd_false;
+
+                        if (consumer_rack != NULL && mdpi != NULL &&
+                            mdpi->racks_cnt > 0 &&
+                            rd_kafka_partition_internal_find_rack(
+                                mdpi, consumer_rack)) {
+                                RD_LIST_FOREACH(otherConsumer, consumers, j) {
+                                        /* No need for rkri == NULL check, that
+                                         * is guaranteed if we're inside this if
+                                         * block. */
+                                        const char *other_consumer_rack =
+                                            RD_MAP_GET(
+                                                &rkri->member_id_to_rack_id,
+                                                otherConsumer);
+
+                                        if (other_consumer_rack == NULL ||
+                                            !rd_kafka_partition_internal_find_rack(
+                                                mdpi, other_consumer_rack))
+                                                continue;
+
+                                        if (currAssignment->cnt <=
+                                            RD_MAP_GET(currentAssignment,
+                                                       otherConsumer)
+                                                    ->cnt +
+                                                1)
+                                                continue;
+
+                                        reassignPartition(
+                                            rk, partitionMovements, partition,
+                                            currentAssignment,
+                                            sortedCurrentSubscriptions,
+                                            currentPartitionConsumer,
+                                            consumer2AllPotentialPartitions);
+
+                                        reassignmentPerformed = rd_true;
+                                        modified              = rd_true;
+                                        found_rack            = rd_true;
+                                        break;
+                                }
+                        }
+
+                        if (found_rack) {
+                                continue;
+                        }
+
                         RD_LIST_FOREACH(otherConsumer, consumers, j) {
                                 if (consumer == otherConsumer)
                                         continue;
@@ -911,7 +1091,43 @@ static int getBalanceScore(map_str_toppar_list_t *assignment) {
         return score;
 }
 
+static void maybeAssign(rd_kafka_topic_partition_list_t *unassignedPartitions,
+                        map_toppar_list_t *partition2AllPotentialConsumers,
+                        rd_list_t *sortedCurrentSubscriptions /*rd_map_elem_t*/,
+                        map_str_toppar_list_t *currentAssignment,
+                        map_str_toppar_list_t *consumer2AllPotentialPartitions,
+                        map_toppar_str_t *currentPartitionConsumer,
+                        rd_bool_t removeAssigned,
+                        rd_kafka_rack_info_t *rkri) {
+        int i;
+        const rd_kafka_topic_partition_t *partition;
+
+        for (i = 0; i < unassignedPartitions->cnt; i++) {
+                partition = &unassignedPartitions->elems[i];
+                rd_bool_t assigned;
+
+                /* Skip if there is no potential consumer for the partition.
+                 * FIXME: How could this be? */
+                if (rd_list_empty(RD_MAP_GET(partition2AllPotentialConsumers,
+                                             partition))) {
+                        rd_dassert(!*"sticky assignor bug");
+                        continue;
+                }
 
+                assigned = maybeAssignPartition(
+                    partition, sortedCurrentSubscriptions, currentAssignment,
+                    consumer2AllPotentialPartitions, currentPartitionConsumer,
+                    rkri);
+                if (assigned && removeAssigned) {
+                        rd_kafka_topic_partition_list_del_by_idx(
+                            unassignedPartitions, i);
+                        i--; /* Since the current element was
+                              * removed we need the next for
+                              * loop iteration to stay at the
+                              * same index. */
+                }
+        }
+}
 
 /**
  * @brief Balance the current assignment using the data structures
@@ -926,7 +1142,8 @@ static void balance(rd_kafka_t *rk,
                     map_str_toppar_list_t *consumer2AllPotentialPartitions,
                     map_toppar_list_t *partition2AllPotentialConsumers,
                     map_toppar_str_t *currentPartitionConsumer,
-                    rd_bool_t revocationRequired) {
+                    rd_bool_t revocationRequired,
+                    rd_kafka_rack_info_t *rkri) {
 
         /* If the consumer with most assignments (thus the last element
          * in the ascendingly ordered sortedCurrentSubscriptions list) has
@@ -964,23 +1181,34 @@ static void balance(rd_kafka_t *rk,
         const void *ignore;
         const rd_map_elem_t *elem;
         int i;
-
-        /* Assign all unassigned partitions */
-        for (i = 0; i < unassignedPartitions->cnt; i++) {
-                partition = &unassignedPartitions->elems[i];
-
-                /* Skip if there is no potential consumer for the partition.
-                 * FIXME: How could this be? */
-                if (rd_list_empty(RD_MAP_GET(partition2AllPotentialConsumers,
-                                             partition))) {
-                        rd_dassert(!*"sticky assignor bug");
-                        continue;
-                }
-
-                assignPartition(
-                    partition, sortedCurrentSubscriptions, currentAssignment,
-                    consumer2AllPotentialPartitions, currentPartitionConsumer);
+        rd_kafka_topic_partition_list_t *leftoverUnassignedPartitions;
+        rd_bool_t leftoverUnassignedPartitions_allocated = rd_false;
+
+        leftoverUnassignedPartitions =
+            unassignedPartitions; /* copy on write. */
+
+        if (rkri != NULL && RD_MAP_CNT(&rkri->member_id_to_rack_id) != 0) {
+                leftoverUnassignedPartitions_allocated = rd_true;
+                /* Since maybeAssign is called twice, we keep track of those
+                 * partitions which the first call has taken care of already,
+                 * but we don't want to modify the original
+                 * unassignedPartitions. */
+                leftoverUnassignedPartitions =
+                    rd_kafka_topic_partition_list_copy(unassignedPartitions);
+                maybeAssign(leftoverUnassignedPartitions,
+                            partition2AllPotentialConsumers,
+                            sortedCurrentSubscriptions, currentAssignment,
+                            consumer2AllPotentialPartitions,
+                            currentPartitionConsumer, rd_true, rkri);
         }
+        maybeAssign(leftoverUnassignedPartitions,
+                    partition2AllPotentialConsumers, sortedCurrentSubscriptions,
+                    currentAssignment, consumer2AllPotentialPartitions,
+                    currentPartitionConsumer, rd_false, NULL);
+
+        if (leftoverUnassignedPartitions_allocated)
+                rd_kafka_topic_partition_list_destroy(
+                    leftoverUnassignedPartitions);
 
 
         /* Narrow down the reassignment scope to only those partitions that can
@@ -1050,17 +1278,18 @@ static void balance(rd_kafka_t *rk,
          * changes, first try to balance by only moving newly added partitions.
          */
         if (!revocationRequired && unassignedPartitions->cnt > 0)
-                performReassignments(
-                    rk, partitionMovements, unassignedPartitions,
-                    currentAssignment, prevAssignment,
-                    sortedCurrentSubscriptions, consumer2AllPotentialPartitions,
-                    partition2AllPotentialConsumers, currentPartitionConsumer);
+                performReassignments(rk, partitionMovements,
+                                     unassignedPartitions, currentAssignment,
+                                     prevAssignment, sortedCurrentSubscriptions,
+                                     consumer2AllPotentialPartitions,
+                                     partition2AllPotentialConsumers,
+                                     currentPartitionConsumer, rkri);
 
         reassignmentPerformed = performReassignments(
             rk, partitionMovements, sortedPartitions, currentAssignment,
             prevAssignment, sortedCurrentSubscriptions,
             consumer2AllPotentialPartitions, partition2AllPotentialConsumers,
-            currentPartitionConsumer);
+            currentPartitionConsumer, rkri);
 
         /* If we are not preserving existing assignments and we have made
          * changes to the current assignment make sure we are getting a more
@@ -1180,24 +1409,6 @@ static void prepopulateCurrentAssignments(
                             &sortedPartitionConsumersByGeneration, partition,
                             rd_list_new(10, ConsumerGenerationPair_destroy));
 
-                        if (consumer->rkgm_generation != -1 &&
-                            rd_list_find(
-                                consumers, &consumer->rkgm_generation,
-                                ConsumerGenerationPair_cmp_generation)) {
-                                rd_kafka_log(
-                                    rk, LOG_WARNING, "STICKY",
-                                    "Sticky assignor: "
-                                    "%s [%" PRId32
-                                    "] is assigned to "
-                                    "multiple consumers with same "
-                                    "generation %d: "
-                                    "skipping member %.*s",
-                                    partition->topic, partition->partition,
-                                    consumer->rkgm_generation,
-                                    RD_KAFKAP_STR_PR(consumer->rkgm_member_id));
-                                continue;
-                        }
-
                         rd_list_add(consumers,
                                     ConsumerGenerationPair_new(
                                         consumer->rkgm_member_id->str,
@@ -1215,24 +1426,55 @@ static void prepopulateCurrentAssignments(
         RD_MAP_FOREACH(partition, consumers,
                        &sortedPartitionConsumersByGeneration) {
                 /* current and previous are the last two consumers
-                 * of each partition. */
-                ConsumerGenerationPair_t *current, *previous;
+                 * of each partition, and found is used to check for duplicate
+                 * consumers of same generation. */
+                ConsumerGenerationPair_t *current, *previous, *found;
                 rd_kafka_topic_partition_list_t *partitions;
 
                 /* Sort the per-partition consumers list by generation */
                 rd_list_sort(consumers, ConsumerGenerationPair_cmp_generation);
 
+                /* In case a partition is claimed by multiple consumers with the
+                 * same generation, invalidate it for all such consumers, and
+                 * log an error for this situation. */
+                if ((found = rd_list_find_duplicate(
+                         consumers, ConsumerGenerationPair_cmp_generation))) {
+                        const char *consumer1, *consumer2;
+                        int idx = rd_list_index(
+                            consumers, found,
+                            ConsumerGenerationPair_cmp_generation);
+                        consumer1 = ((ConsumerGenerationPair_t *)rd_list_elem(
+                                         consumers, idx))
+                                        ->consumer;
+                        consumer2 = ((ConsumerGenerationPair_t *)rd_list_elem(
+                                         consumers, idx + 1))
+                                        ->consumer;
+
+                        RD_MAP_DELETE(currentPartitionConsumer, partition);
+
+                        rd_kafka_log(
+                            rk, LOG_ERR, "STICKY",
+                            "Sticky assignor: Found multiple consumers %s and "
+                            "%s claiming the same topic partition %s:%d in the "
+                            "same generation %d, this will be invalidated and "
+                            "removed from their previous assignment.",
+                            consumer1, consumer2, partition->topic,
+                            partition->partition, found->generation);
+                        continue;
+                }
+
                 /* Add current (highest generation) consumer
                  * to currentAssignment. */
-                current    = rd_list_elem(consumers, 0);
+                current    = rd_list_last(consumers);
                 partitions = RD_MAP_GET(currentAssignment, current->consumer);
                 rd_kafka_topic_partition_list_add(partitions, partition->topic,
                                                   partition->partition);
 
                 /* Add previous (next highest generation) consumer, if any,
                  * to prevAssignment. */
-                previous = rd_list_elem(consumers, 1);
-                if (previous)
+                if (rd_list_cnt(consumers) >= 2 &&
+                    (previous =
+                         rd_list_elem(consumers, rd_list_cnt(consumers) - 2)))
                         RD_MAP_SET(
                             prevAssignment,
                             rd_kafka_topic_partition_copy(partition),
@@ -1590,6 +1832,11 @@ rd_kafka_sticky_assignor_assign_cb(rd_kafka_t *rk,
                                    void *opaque) {
         /* FIXME: Let the cgrp pass the actual eligible partition count */
         size_t partition_cnt = member_cnt * 10; /* FIXME */
+        const rd_kafka_metadata_internal_t *mdi =
+            rd_kafka_metadata_get_internal(metadata);
+
+        rd_kafka_rack_info_t *rkri =
+            rd_kafka_rack_info_new(eligible_topics, eligible_topic_cnt, mdi);
 
         /* Map of subscriptions. This is \p member turned into a map. */
         map_str_toppar_list_t subscriptions =
@@ -1680,6 +1927,10 @@ rd_kafka_sticky_assignor_assign_cb(rd_kafka_t *rk,
         unassignedPartitions =
             rd_kafka_topic_partition_list_copy(sortedPartitions);
 
+        if (rkri)
+                rd_kafka_dbg(rk, CGRP, "STICKY",
+                             "Sticky assignor: using rack aware assignment.");
+
         RD_MAP_FOREACH(consumer, partitions, &currentAssignment) {
                 if (!RD_MAP_GET(&subscriptions, consumer)) {
                         /* If a consumer that existed before
@@ -1726,13 +1977,16 @@ rd_kafka_sticky_assignor_assign_cb(rd_kafka_t *rk,
                                                RD_MAP_GET(&subscriptions,
                                                           consumer),
                                                partition->topic,
-                                               RD_KAFKA_PARTITION_UA)) {
+                                               RD_KAFKA_PARTITION_UA) ||
+                                           rd_kafka_racks_mismatch(
+                                               rkri, consumer, partition)) {
                                         /* If this partition cannot remain
                                          * assigned to its current consumer
                                          * because the consumer is no longer
-                                         * subscribed to its topic, remove it
-                                         * from the currentAssignment of the
-                                         * consumer. */
+                                         * subscribed to its topic, or racks
+                                         * don't match for rack-aware
+                                         * assignment, remove it from the
+                                         * currentAssignment of the consumer. */
                                         remove_part        = rd_true;
                                         revocationRequired = rd_true;
                                 } else {
@@ -1785,7 +2039,7 @@ rd_kafka_sticky_assignor_assign_cb(rd_kafka_t *rk,
                 sortedPartitions, unassignedPartitions,
                 &sortedCurrentSubscriptions, &consumer2AllPotentialPartitions,
                 &partition2AllPotentialConsumers, &currentPartitionConsumer,
-                revocationRequired);
+                revocationRequired, rkri);
 
         /* Transfer currentAssignment (now updated) to each member's
          * assignment. */
@@ -1798,6 +2052,7 @@ rd_kafka_sticky_assignor_assign_cb(rd_kafka_t *rk,
 
         rd_kafka_topic_partition_list_destroy(unassignedPartitions);
         rd_kafka_topic_partition_list_destroy(sortedPartitions);
+        rd_kafka_rack_info_destroy(rkri);
 
         RD_MAP_DESTROY(&currentPartitionConsumer);
         RD_MAP_DESTROY(&consumer2AllPotentialPartitions);
@@ -1837,7 +2092,8 @@ static rd_kafkap_bytes_t *rd_kafka_sticky_assignor_get_metadata(
     const rd_kafka_assignor_t *rkas,
     void *assignor_state,
     const rd_list_t *topics,
-    const rd_kafka_topic_partition_list_t *owned_partitions) {
+    const rd_kafka_topic_partition_list_t *owned_partitions,
+    const rd_kafkap_str_t *rack_id) {
         rd_kafka_sticky_assignor_state_t *state;
         rd_kafka_buf_t *rkbuf;
         rd_kafkap_bytes_t *metadata;
@@ -1855,9 +2111,11 @@ static rd_kafkap_bytes_t *rd_kafka_sticky_assignor_get_metadata(
          * If there is no previous assignment, UserData is NULL.
          */
 
+
         if (!assignor_state) {
                 return rd_kafka_consumer_protocol_member_metadata_new(
-                    topics, NULL, 0, owned_partitions);
+                    topics, NULL, 0, owned_partitions, -1 /* generation */,
+                    rack_id);
         }
 
         state = (rd_kafka_sticky_assignor_state_t *)assignor_state;
@@ -1880,7 +2138,8 @@ static rd_kafkap_bytes_t *rd_kafka_sticky_assignor_get_metadata(
         rd_kafka_buf_destroy(rkbuf);
 
         metadata = rd_kafka_consumer_protocol_member_metadata_new(
-            topics, kbytes->data, kbytes->len, owned_partitions);
+            topics, kbytes->data, kbytes->len, owned_partitions,
+            state->generation_id, rack_id);
 
         rd_kafkap_bytes_destroy(kbytes);
 
@@ -1913,296 +2172,97 @@ static void rd_kafka_sticky_assignor_state_destroy(void *assignor_state) {
  *
  */
 
-
-
-/**
- * @brief Set a member's owned partitions based on its assignment.
- *
- * For use between assignor_run(). This is mimicing a consumer receiving
- * its new assignment and including it in the next rebalance as its
- * owned-partitions.
- */
-static void ut_set_owned(rd_kafka_group_member_t *rkgm) {
-        if (rkgm->rkgm_owned)
-                rd_kafka_topic_partition_list_destroy(rkgm->rkgm_owned);
-
-        rkgm->rkgm_owned =
-            rd_kafka_topic_partition_list_copy(rkgm->rkgm_assignment);
-}
-
-
-/**
- * @brief Verify assignment validity and balance.
- *
- * @remark Also updates the members owned partitions to the assignment.
- */
-
-static int verifyValidityAndBalance0(const char *func,
-                                     int line,
-                                     rd_kafka_group_member_t *members,
-                                     size_t member_cnt,
-                                     const rd_kafka_metadata_t *metadata) {
-        int fails = 0;
-        int i;
-        rd_bool_t verbose = rd_false; /* Enable for troubleshooting */
-
-        RD_UT_SAY("%s:%d: verifying assignment for %d member(s):", func, line,
-                  (int)member_cnt);
-
-        for (i = 0; i < (int)member_cnt; i++) {
-                const char *consumer = members[i].rkgm_member_id->str;
-                const rd_kafka_topic_partition_list_t *partitions =
-                    members[i].rkgm_assignment;
-                int p, j;
-
-                if (verbose)
-                        RD_UT_SAY(
-                            "%s:%d:   "
-                            "consumer \"%s\", %d subscribed topic(s), "
-                            "%d assigned partition(s):",
-                            func, line, consumer,
-                            members[i].rkgm_subscription->cnt, partitions->cnt);
-
-                for (p = 0; p < partitions->cnt; p++) {
-                        const rd_kafka_topic_partition_t *partition =
-                            &partitions->elems[p];
-
-                        if (verbose)
-                                RD_UT_SAY("%s:%d:     %s [%" PRId32 "]", func,
-                                          line, partition->topic,
-                                          partition->partition);
-
-                        if (!rd_kafka_topic_partition_list_find(
-                                members[i].rkgm_subscription, partition->topic,
-                                RD_KAFKA_PARTITION_UA)) {
-                                RD_UT_WARN("%s [%" PRId32
-                                           "] is assigned to "
-                                           "%s but it is not subscribed to "
-                                           "that topic",
-                                           partition->topic,
-                                           partition->partition, consumer);
-                                fails++;
-                        }
-                }
-
-                /* Update the member's owned partitions to match
-                 * the assignment. */
-                ut_set_owned(&members[i]);
-
-                if (i == (int)member_cnt - 1)
-                        continue;
-
-                for (j = i + 1; j < (int)member_cnt; j++) {
-                        const char *otherConsumer =
-                            members[j].rkgm_member_id->str;
-                        const rd_kafka_topic_partition_list_t *otherPartitions =
-                            members[j].rkgm_assignment;
-                        rd_bool_t balanced =
-                            abs(partitions->cnt - otherPartitions->cnt) <= 1;
-
-                        for (p = 0; p < partitions->cnt; p++) {
-                                const rd_kafka_topic_partition_t *partition =
-                                    &partitions->elems[p];
-
-                                if (rd_kafka_topic_partition_list_find(
-                                        otherPartitions, partition->topic,
-                                        partition->partition)) {
-                                        RD_UT_WARN(
-                                            "Consumer %s and %s are both "
-                                            "assigned %s [%" PRId32 "]",
-                                            consumer, otherConsumer,
-                                            partition->topic,
-                                            partition->partition);
-                                        fails++;
-                                }
-
-
-                                /* If assignment is imbalanced and this topic
-                                 * is also subscribed by the other consumer
-                                 * it means the assignment strategy failed to
-                                 * properly balance the partitions. */
-                                if (!balanced &&
-                                    rd_kafka_topic_partition_list_find_topic(
-                                        otherPartitions, partition->topic)) {
-                                        RD_UT_WARN(
-                                            "Some %s partition(s) can be "
-                                            "moved from "
-                                            "%s (%d partition(s)) to "
-                                            "%s (%d partition(s)) to "
-                                            "achieve a better balance",
-                                            partition->topic, consumer,
-                                            partitions->cnt, otherConsumer,
-                                            otherPartitions->cnt);
-                                        fails++;
-                                }
-                        }
-                }
-        }
-
-        RD_UT_ASSERT(!fails, "%s:%d: See %d previous errors", func, line,
-                     fails);
-
-        return 0;
-}
-
-
-#define verifyValidityAndBalance(members, member_cnt, metadata)                \
-        do {                                                                   \
-                if (verifyValidityAndBalance0(__FUNCTION__, __LINE__, members, \
-                                              member_cnt, metadata))           \
-                        return 1;                                              \
-        } while (0)
-
-
-/**
- * @brief Checks that all assigned partitions are fully balanced.
- *
- * Only works for symmetrical subscriptions.
- */
-static int isFullyBalanced0(const char *function,
-                            int line,
-                            const rd_kafka_group_member_t *members,
-                            size_t member_cnt) {
-        int min_assignment = INT_MAX;
-        int max_assignment = -1;
-        size_t i;
-
-        for (i = 0; i < member_cnt; i++) {
-                int size = members[i].rkgm_assignment->cnt;
-                if (size < min_assignment)
-                        min_assignment = size;
-                if (size > max_assignment)
-                        max_assignment = size;
-        }
-
-        RD_UT_ASSERT(max_assignment - min_assignment <= 1,
-                     "%s:%d: Assignment not balanced: min %d, max %d", function,
-                     line, min_assignment, max_assignment);
-
-        return 0;
+/* All possible racks used in tests, as well as several common rack configs used
+ * by consumers */
+static rd_kafkap_str_t
+    *ALL_RACKS[7]; /* initialized before starting the unit tests. */
+static int RACKS_INITIAL[]  = {0, 1, 2};
+static int RACKS_NULL[]     = {6, 6, 6};
+static int RACKS_FINAL[]    = {4, 5, 6};
+static int RACKS_ONE_NULL[] = {6, 4, 5};
+
+/* Helper to get consumer rack based on the index of the consumer. */
+static rd_kafkap_str_t *
+ut_get_consumer_rack(int idx,
+                     rd_kafka_assignor_ut_rack_config_t parametrization) {
+        const int cycle_size =
+            (parametrization == RD_KAFKA_RANGE_ASSIGNOR_UT_NO_BROKER_RACK
+                 ? RD_ARRAYSIZE(ALL_RACKS)
+                 : 3);
+        return (ALL_RACKS[idx % cycle_size]);
 }
 
-#define isFullyBalanced(members, member_cnt)                                   \
-        do {                                                                   \
-                if (isFullyBalanced0(__FUNCTION__, __LINE__, members,          \
-                                     member_cnt))                              \
-                        return 1;                                              \
-        } while (0)
-
-
+/* Helper to populate a member's owned partitions (accepted as variadic), and
+ * generation. */
 static void
-ut_print_toppar_list(const rd_kafka_topic_partition_list_t *partitions) {
-        int i;
-
-        for (i = 0; i < partitions->cnt; i++)
-                RD_UT_SAY(" %s [%" PRId32 "]", partitions->elems[i].topic,
-                          partitions->elems[i].partition);
-}
-
-
-
-/**
- * @brief Verify that member's assignment matches the expected partitions.
- *
- * The va-list is a NULL-terminated list of (const char *topic, int partition)
- * tuples.
- *
- * @returns 0 on success, else raises a unittest error and returns 1.
- */
-static int verifyAssignment0(const char *function,
-                             int line,
-                             rd_kafka_group_member_t *rkgm,
-                             ...) {
+ut_populate_member_owned_partitions_generation(rd_kafka_group_member_t *rkgm,
+                                               int generation,
+                                               size_t partition_cnt,
+                                               ...) {
         va_list ap;
-        int cnt = 0;
-        const char *topic;
-        int fails = 0;
+        size_t i;
 
-        va_start(ap, rkgm);
-        while ((topic = va_arg(ap, const char *))) {
-                int partition = va_arg(ap, int);
-                cnt++;
+        if (rkgm->rkgm_owned)
+                rd_kafka_topic_partition_list_destroy(rkgm->rkgm_owned);
+        rkgm->rkgm_owned = rd_kafka_topic_partition_list_new(partition_cnt);
 
-                if (!rd_kafka_topic_partition_list_find(rkgm->rkgm_assignment,
-                                                        topic, partition)) {
-                        RD_UT_WARN(
-                            "%s:%d: Expected %s [%d] not found in %s's "
-                            "assignment (%d partition(s))",
-                            function, line, topic, partition,
-                            rkgm->rkgm_member_id->str,
-                            rkgm->rkgm_assignment->cnt);
-                        fails++;
-                }
+        va_start(ap, partition_cnt);
+        for (i = 0; i < partition_cnt; i++) {
+                char *topic   = va_arg(ap, char *);
+                int partition = va_arg(ap, int);
+                rd_kafka_topic_partition_list_add(rkgm->rkgm_owned, topic,
+                                                  partition);
         }
         va_end(ap);
 
-        if (cnt != rkgm->rkgm_assignment->cnt) {
-                RD_UT_WARN(
-                    "%s:%d: "
-                    "Expected %d assigned partition(s) for %s, not %d",
-                    function, line, cnt, rkgm->rkgm_member_id->str,
-                    rkgm->rkgm_assignment->cnt);
-                fails++;
-        }
-
-        if (fails)
-                ut_print_toppar_list(rkgm->rkgm_assignment);
-
-        RD_UT_ASSERT(!fails, "%s:%d: See previous errors", function, line);
-
-        return 0;
+        rkgm->rkgm_generation = generation;
 }
 
-#define verifyAssignment(rkgm, ...)                                            \
-        do {                                                                   \
-                if (verifyAssignment0(__FUNCTION__, __LINE__, rkgm,            \
-                                      __VA_ARGS__))                            \
-                        return 1;                                              \
-        } while (0)
-
-
-
-/**
- * @brief Initialize group member struct for testing.
- *
- * va-args is a NULL-terminated list of (const char *) topics.
- *
- * Use rd_kafka_group_member_clear() to free fields.
- */
-static void
-ut_init_member(rd_kafka_group_member_t *rkgm, const char *member_id, ...) {
+/* Helper to create topic partition list from a variadic list of topic,
+ * partition pairs. */
+static rd_kafka_topic_partition_list_t **
+ut_create_topic_partition_lists(size_t list_cnt, ...) {
         va_list ap;
-        const char *topic;
-
-        memset(rkgm, 0, sizeof(*rkgm));
-
-        rkgm->rkgm_member_id         = rd_kafkap_str_new(member_id, -1);
-        rkgm->rkgm_group_instance_id = rd_kafkap_str_new(member_id, -1);
-        rd_list_init(&rkgm->rkgm_eligible, 0, NULL);
-
-        rkgm->rkgm_subscription = rd_kafka_topic_partition_list_new(4);
-
-        va_start(ap, member_id);
-        while ((topic = va_arg(ap, const char *)))
-                rd_kafka_topic_partition_list_add(rkgm->rkgm_subscription,
-                                                  topic, RD_KAFKA_PARTITION_UA);
+        size_t i;
+        rd_kafka_topic_partition_list_t **lists =
+            rd_calloc(list_cnt, sizeof(rd_kafka_topic_partition_list_t *));
+
+        va_start(ap, list_cnt);
+        for (i = 0; i < list_cnt; i++) {
+                const char *topic;
+                lists[i] = rd_kafka_topic_partition_list_new(0);
+                while ((topic = va_arg(ap, const char *))) {
+                        int partition = va_arg(ap, int);
+                        rd_kafka_topic_partition_list_add(lists[i], topic,
+                                                          partition);
+                }
+        }
         va_end(ap);
 
-        rkgm->rkgm_assignment =
-            rd_kafka_topic_partition_list_new(rkgm->rkgm_subscription->size);
+        return lists;
 }
 
-
-
-static int ut_testOneConsumerNoTopic(rd_kafka_t *rk,
-                                     const rd_kafka_assignor_t *rkas) {
+static int
+ut_testOneConsumerNoTopic(rd_kafka_t *rk,
+                          const rd_kafka_assignor_t *rkas,
+                          rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
         rd_kafka_group_member_t members[1];
 
-        metadata = rd_kafka_metadata_new_topic_mock(NULL, 0);
-        ut_init_member(&members[0], "consumer1", "topic1", NULL);
+        if (parametrization == RD_KAFKA_RANGE_ASSIGNOR_UT_NO_BROKER_RACK) {
+                RD_UT_PASS();
+        }
+
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       0);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", NULL);
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     RD_ARRAYSIZE(members), errstr,
@@ -2214,21 +2274,32 @@ static int ut_testOneConsumerNoTopic(rd_kafka_t *rk,
         isFullyBalanced(members, RD_ARRAYSIZE(members));
 
         rd_kafka_group_member_clear(&members[0]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
 
 
-static int ut_testOneConsumerNonexistentTopic(rd_kafka_t *rk,
-                                              const rd_kafka_assignor_t *rkas) {
+static int ut_testOneConsumerNonexistentTopic(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
         rd_kafka_group_member_t members[1];
 
-        metadata = rd_kafka_metadata_new_topic_mockv(1, "topic1", 0);
-        ut_init_member(&members[0], "consumer1", "topic1", NULL);
+        if (parametrization == RD_KAFKA_RANGE_ASSIGNOR_UT_NO_BROKER_RACK) {
+                RD_UT_PASS();
+        }
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "topic1", 0);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", NULL);
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     RD_ARRAYSIZE(members), errstr,
@@ -2240,22 +2311,29 @@ static int ut_testOneConsumerNonexistentTopic(rd_kafka_t *rk,
         isFullyBalanced(members, RD_ARRAYSIZE(members));
 
         rd_kafka_group_member_clear(&members[0]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
 
 
 
-static int ut_testOneConsumerOneTopic(rd_kafka_t *rk,
-                                      const rd_kafka_assignor_t *rkas) {
+static int
+ut_testOneConsumerOneTopic(rd_kafka_t *rk,
+                           const rd_kafka_assignor_t *rkas,
+                           rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
         rd_kafka_group_member_t members[1];
 
-        metadata = rd_kafka_metadata_new_topic_mockv(1, "topic1", 3);
-        ut_init_member(&members[0], "consumer1", "topic1", NULL);
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "topic1", 3);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", NULL);
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     RD_ARRAYSIZE(members), errstr,
@@ -2272,7 +2350,7 @@ static int ut_testOneConsumerOneTopic(rd_kafka_t *rk,
         isFullyBalanced(members, RD_ARRAYSIZE(members));
 
         rd_kafka_group_member_clear(&members[0]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
@@ -2280,16 +2358,20 @@ static int ut_testOneConsumerOneTopic(rd_kafka_t *rk,
 
 static int ut_testOnlyAssignsPartitionsFromSubscribedTopics(
     rd_kafka_t *rk,
-    const rd_kafka_assignor_t *rkas) {
-
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
         rd_kafka_group_member_t members[1];
 
-        metadata =
-            rd_kafka_metadata_new_topic_mockv(2, "topic1", 3, "topic2", 3);
-        ut_init_member(&members[0], "consumer1", "topic1", NULL);
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       2, "topic1", 3, "topic2", 3);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", NULL);
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     RD_ARRAYSIZE(members), errstr,
@@ -2303,22 +2385,28 @@ static int ut_testOnlyAssignsPartitionsFromSubscribedTopics(
         isFullyBalanced(members, RD_ARRAYSIZE(members));
 
         rd_kafka_group_member_clear(&members[0]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
 
 
-static int ut_testOneConsumerMultipleTopics(rd_kafka_t *rk,
-                                            const rd_kafka_assignor_t *rkas) {
+static int ut_testOneConsumerMultipleTopics(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
         rd_kafka_group_member_t members[1];
 
-        metadata =
-            rd_kafka_metadata_new_topic_mockv(2, "topic1", 1, "topic2", 2);
-        ut_init_member(&members[0], "consumer1", "topic1", "topic2", NULL);
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       2, "topic1", 1, "topic2", 2);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", "topic2", NULL);
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     RD_ARRAYSIZE(members), errstr,
@@ -2332,22 +2420,30 @@ static int ut_testOneConsumerMultipleTopics(rd_kafka_t *rk,
         isFullyBalanced(members, RD_ARRAYSIZE(members));
 
         rd_kafka_group_member_clear(&members[0]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
 
-static int
-ut_testTwoConsumersOneTopicOnePartition(rd_kafka_t *rk,
-                                        const rd_kafka_assignor_t *rkas) {
+static int ut_testTwoConsumersOneTopicOnePartition(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
         rd_kafka_group_member_t members[2];
 
-        metadata = rd_kafka_metadata_new_topic_mockv(1, "topic1", 1);
-        ut_init_member(&members[0], "consumer1", "topic1", NULL);
-        ut_init_member(&members[1], "consumer2", "topic1", NULL);
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "topic1", 1);
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", NULL);
+        ut_initMemberConditionalRack(&members[1], "consumer2",
+                                     ut_get_consumer_rack(1, parametrization),
+                                     parametrization, "topic1", NULL);
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     RD_ARRAYSIZE(members), errstr,
@@ -2362,23 +2458,31 @@ ut_testTwoConsumersOneTopicOnePartition(rd_kafka_t *rk,
 
         rd_kafka_group_member_clear(&members[0]);
         rd_kafka_group_member_clear(&members[1]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
 
 
-static int
-ut_testTwoConsumersOneTopicTwoPartitions(rd_kafka_t *rk,
-                                         const rd_kafka_assignor_t *rkas) {
+static int ut_testTwoConsumersOneTopicTwoPartitions(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
         rd_kafka_group_member_t members[2];
 
-        metadata = rd_kafka_metadata_new_topic_mockv(1, "topic1", 2);
-        ut_init_member(&members[0], "consumer1", "topic1", NULL);
-        ut_init_member(&members[1], "consumer2", "topic1", NULL);
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "topic1", 2);
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", NULL);
+        ut_initMemberConditionalRack(&members[1], "consumer2",
+                                     ut_get_consumer_rack(1, parametrization),
+                                     parametrization, "topic1", NULL);
+
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     RD_ARRAYSIZE(members), errstr,
@@ -2393,7 +2497,7 @@ ut_testTwoConsumersOneTopicTwoPartitions(rd_kafka_t *rk,
 
         rd_kafka_group_member_clear(&members[0]);
         rd_kafka_group_member_clear(&members[1]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
@@ -2401,18 +2505,27 @@ ut_testTwoConsumersOneTopicTwoPartitions(rd_kafka_t *rk,
 
 static int ut_testMultipleConsumersMixedTopicSubscriptions(
     rd_kafka_t *rk,
-    const rd_kafka_assignor_t *rkas) {
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
 
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
         rd_kafka_group_member_t members[3];
 
-        metadata =
-            rd_kafka_metadata_new_topic_mockv(2, "topic1", 3, "topic2", 2);
-        ut_init_member(&members[0], "consumer1", "topic1", NULL);
-        ut_init_member(&members[1], "consumer2", "topic1", "topic2", NULL);
-        ut_init_member(&members[2], "consumer3", "topic1", NULL);
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       2, "topic1", 3, "topic2", 2);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", NULL);
+        ut_initMemberConditionalRack(&members[1], "consumer2",
+                                     ut_get_consumer_rack(1, parametrization),
+                                     parametrization, "topic1", "topic2", NULL);
+        ut_initMemberConditionalRack(&members[2], "consumer3",
+                                     ut_get_consumer_rack(2, parametrization),
+                                     parametrization, "topic1", NULL);
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     RD_ARRAYSIZE(members), errstr,
@@ -2429,24 +2542,31 @@ static int ut_testMultipleConsumersMixedTopicSubscriptions(
         rd_kafka_group_member_clear(&members[0]);
         rd_kafka_group_member_clear(&members[1]);
         rd_kafka_group_member_clear(&members[2]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
 
 
-static int
-ut_testTwoConsumersTwoTopicsSixPartitions(rd_kafka_t *rk,
-                                          const rd_kafka_assignor_t *rkas) {
+static int ut_testTwoConsumersTwoTopicsSixPartitions(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
         rd_kafka_group_member_t members[2];
 
-        metadata =
-            rd_kafka_metadata_new_topic_mockv(2, "topic1", 3, "topic2", 3);
-        ut_init_member(&members[0], "consumer1", "topic1", "topic2", NULL);
-        ut_init_member(&members[1], "consumer2", "topic1", "topic2", NULL);
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       2, "topic1", 3, "topic2", 3);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", "topic2", NULL);
+        ut_initMemberConditionalRack(&members[1], "consumer2",
+                                     ut_get_consumer_rack(1, parametrization),
+                                     parametrization, "topic1", "topic2", NULL);
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     RD_ARRAYSIZE(members), errstr,
@@ -2463,21 +2583,29 @@ ut_testTwoConsumersTwoTopicsSixPartitions(rd_kafka_t *rk,
 
         rd_kafka_group_member_clear(&members[0]);
         rd_kafka_group_member_clear(&members[1]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
 
 
-static int ut_testAddRemoveConsumerOneTopic(rd_kafka_t *rk,
-                                            const rd_kafka_assignor_t *rkas) {
+static int ut_testAddRemoveConsumerOneTopic(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
         rd_kafka_group_member_t members[2];
 
-        metadata = rd_kafka_metadata_new_topic_mockv(1, "topic1", 3);
-        ut_init_member(&members[0], "consumer1", "topic1", NULL);
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "topic1", 3);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", NULL);
+
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members, 1,
                                     errstr, sizeof(errstr));
@@ -2490,7 +2618,9 @@ static int ut_testAddRemoveConsumerOneTopic(rd_kafka_t *rk,
         isFullyBalanced(members, 1);
 
         /* Add consumer2 */
-        ut_init_member(&members[1], "consumer2", "topic1", NULL);
+        ut_initMemberConditionalRack(&members[1], "consumer2",
+                                     ut_get_consumer_rack(1, parametrization),
+                                     parametrization, "topic1", NULL);
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     RD_ARRAYSIZE(members), errstr,
@@ -2519,7 +2649,7 @@ static int ut_testAddRemoveConsumerOneTopic(rd_kafka_t *rk,
 
         rd_kafka_group_member_clear(&members[0]);
         rd_kafka_group_member_clear(&members[1]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
@@ -2545,25 +2675,35 @@ static int ut_testAddRemoveConsumerOneTopic(rd_kafka_t *rk,
  *  - consumer3: topic1-1, topic5-0
  *  - consumer4: topic4-0, topic5-1
  */
-static int
-ut_testPoorRoundRobinAssignmentScenario(rd_kafka_t *rk,
-                                        const rd_kafka_assignor_t *rkas) {
+static int ut_testPoorRoundRobinAssignmentScenario(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
         rd_kafka_group_member_t members[4];
 
-        metadata = rd_kafka_metadata_new_topic_mockv(
-            5, "topic1", 2, "topic2", 1, "topic3", 2, "topic4", 1, "topic5", 2);
-
-        ut_init_member(&members[0], "consumer1", "topic1", "topic2", "topic3",
-                       "topic4", "topic5", NULL);
-        ut_init_member(&members[1], "consumer2", "topic1", "topic3", "topic5",
-                       NULL);
-        ut_init_member(&members[2], "consumer3", "topic1", "topic3", "topic5",
-                       NULL);
-        ut_init_member(&members[3], "consumer4", "topic1", "topic2", "topic3",
-                       "topic4", "topic5", NULL);
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       5, "topic1", 2, "topic2", 1, "topic3", 2,
+                                       "topic4", 1, "topic5", 2);
+
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", "topic2",
+                                     "topic3", "topic4", "topic5", NULL);
+        ut_initMemberConditionalRack(
+            &members[1], "consumer2", ut_get_consumer_rack(1, parametrization),
+            parametrization, "topic1", "topic3", "topic5", NULL);
+        ut_initMemberConditionalRack(
+            &members[2], "consumer3", ut_get_consumer_rack(2, parametrization),
+            parametrization, "topic1", "topic3", "topic5", NULL);
+        ut_initMemberConditionalRack(&members[3], "consumer4",
+                                     ut_get_consumer_rack(3, parametrization),
+                                     parametrization, "topic1", "topic2",
+                                     "topic3", "topic4", "topic5", NULL);
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     RD_ARRAYSIZE(members), errstr,
@@ -2582,23 +2722,32 @@ ut_testPoorRoundRobinAssignmentScenario(rd_kafka_t *rk,
         rd_kafka_group_member_clear(&members[1]);
         rd_kafka_group_member_clear(&members[2]);
         rd_kafka_group_member_clear(&members[3]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
 
 
 
-static int ut_testAddRemoveTopicTwoConsumers(rd_kafka_t *rk,
-                                             const rd_kafka_assignor_t *rkas) {
+static int ut_testAddRemoveTopicTwoConsumers(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
         rd_kafka_group_member_t members[2];
 
-        metadata = rd_kafka_metadata_new_topic_mockv(1, "topic1", 3);
-        ut_init_member(&members[0], "consumer1", "topic1", "topic2", NULL);
-        ut_init_member(&members[1], "consumer2", "topic1", "topic2", NULL);
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "topic1", 3);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", "topic2", NULL);
+        ut_initMemberConditionalRack(&members[1], "consumer2",
+                                     ut_get_consumer_rack(1, parametrization),
+                                     parametrization, "topic1", "topic2", NULL);
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     RD_ARRAYSIZE(members), errstr,
@@ -2615,9 +2764,11 @@ static int ut_testAddRemoveTopicTwoConsumers(rd_kafka_t *rk,
          * Add topic2
          */
         RD_UT_SAY("Adding topic2");
-        rd_kafka_metadata_destroy(metadata);
-        metadata =
-            rd_kafka_metadata_new_topic_mockv(2, "topic1", 3, "topic2", 3);
+        ut_destroy_metadata(metadata);
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       2, "topic1", 3, "topic2", 3);
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     RD_ARRAYSIZE(members), errstr,
@@ -2638,8 +2789,11 @@ static int ut_testAddRemoveTopicTwoConsumers(rd_kafka_t *rk,
          * Remove topic1
          */
         RD_UT_SAY("Removing topic1");
-        rd_kafka_metadata_destroy(metadata);
-        metadata = rd_kafka_metadata_new_topic_mockv(1, "topic2", 3);
+        ut_destroy_metadata(metadata);
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "topic2", 3);
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     RD_ARRAYSIZE(members), errstr,
@@ -2655,15 +2809,16 @@ static int ut_testAddRemoveTopicTwoConsumers(rd_kafka_t *rk,
 
         rd_kafka_group_member_clear(&members[0]);
         rd_kafka_group_member_clear(&members[1]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
 
 
-static int
-ut_testReassignmentAfterOneConsumerLeaves(rd_kafka_t *rk,
-                                          const rd_kafka_assignor_t *rkas) {
+static int ut_testReassignmentAfterOneConsumerLeaves(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
@@ -2680,8 +2835,9 @@ ut_testReassignmentAfterOneConsumerLeaves(rd_kafka_t *rk,
                 mt[i].partition_cnt = i + 1;
         }
 
-        metadata = rd_kafka_metadata_new_topic_mock(mt, topic_cnt);
-
+        ut_initMetadataConditionalRack0(&metadata, 3, 3, ALL_RACKS,
+                                        RD_ARRAYSIZE(ALL_RACKS),
+                                        parametrization, mt, topic_cnt);
 
         for (i = 1; i <= member_cnt; i++) {
                 char name[20];
@@ -2695,7 +2851,12 @@ ut_testReassignmentAfterOneConsumerLeaves(rd_kafka_t *rk,
                             subscription, topic, RD_KAFKA_PARTITION_UA);
                 }
                 rd_snprintf(name, sizeof(name), "consumer%d", i);
-                ut_init_member(&members[i - 1], name, NULL);
+
+                ut_initMemberConditionalRack(
+                    &members[i - 1], name,
+                    ut_get_consumer_rack(i, parametrization), parametrization,
+                    NULL);
+
                 rd_kafka_topic_partition_list_destroy(
                     members[i - 1].rkgm_subscription);
                 members[i - 1].rkgm_subscription = subscription;
@@ -2725,15 +2886,16 @@ ut_testReassignmentAfterOneConsumerLeaves(rd_kafka_t *rk,
 
         for (i = 0; i < member_cnt; i++)
                 rd_kafka_group_member_clear(&members[i]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
 
 
-static int
-ut_testReassignmentAfterOneConsumerAdded(rd_kafka_t *rk,
-                                         const rd_kafka_assignor_t *rkas) {
+static int ut_testReassignmentAfterOneConsumerAdded(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
@@ -2741,7 +2903,9 @@ ut_testReassignmentAfterOneConsumerAdded(rd_kafka_t *rk,
         int member_cnt = RD_ARRAYSIZE(members);
         int i;
 
-        metadata = rd_kafka_metadata_new_topic_mockv(1, "topic1", 20);
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "topic1", 20);
 
         for (i = 1; i <= member_cnt; i++) {
                 char name[20];
@@ -2750,7 +2914,10 @@ ut_testReassignmentAfterOneConsumerAdded(rd_kafka_t *rk,
                 rd_kafka_topic_partition_list_add(subscription, "topic1",
                                                   RD_KAFKA_PARTITION_UA);
                 rd_snprintf(name, sizeof(name), "consumer%d", i);
-                ut_init_member(&members[i - 1], name, NULL);
+                ut_initMemberConditionalRack(
+                    &members[i - 1], name,
+                    ut_get_consumer_rack(i, parametrization), parametrization,
+                    NULL);
                 rd_kafka_topic_partition_list_destroy(
                     members[i - 1].rkgm_subscription);
                 members[i - 1].rkgm_subscription = subscription;
@@ -2778,14 +2945,16 @@ ut_testReassignmentAfterOneConsumerAdded(rd_kafka_t *rk,
 
         for (i = 0; i < member_cnt; i++)
                 rd_kafka_group_member_clear(&members[i]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
 
 
-static int ut_testSameSubscriptions(rd_kafka_t *rk,
-                                    const rd_kafka_assignor_t *rkas) {
+static int
+ut_testSameSubscriptions(rd_kafka_t *rk,
+                         const rd_kafka_assignor_t *rkas,
+                         rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
@@ -2806,12 +2975,17 @@ static int ut_testSameSubscriptions(rd_kafka_t *rk,
                                                   RD_KAFKA_PARTITION_UA);
         }
 
-        metadata = rd_kafka_metadata_new_topic_mock(mt, topic_cnt);
+        ut_initMetadataConditionalRack0(&metadata, 3, 3, ALL_RACKS,
+                                        RD_ARRAYSIZE(ALL_RACKS),
+                                        parametrization, mt, topic_cnt);
 
         for (i = 1; i <= member_cnt; i++) {
                 char name[16];
                 rd_snprintf(name, sizeof(name), "consumer%d", i);
-                ut_init_member(&members[i - 1], name, NULL);
+                ut_initMemberConditionalRack(
+                    &members[i - 1], name,
+                    ut_get_consumer_rack(i, parametrization), parametrization,
+                    NULL);
                 rd_kafka_topic_partition_list_destroy(
                     members[i - 1].rkgm_subscription);
                 members[i - 1].rkgm_subscription =
@@ -2840,7 +3014,7 @@ static int ut_testSameSubscriptions(rd_kafka_t *rk,
 
         for (i = 0; i < member_cnt; i++)
                 rd_kafka_group_member_clear(&members[i]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
         rd_kafka_topic_partition_list_destroy(subscription);
 
         RD_UT_PASS();
@@ -2849,8 +3023,8 @@ static int ut_testSameSubscriptions(rd_kafka_t *rk,
 
 static int ut_testLargeAssignmentWithMultipleConsumersLeaving(
     rd_kafka_t *rk,
-    const rd_kafka_assignor_t *rkas) {
-
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
@@ -2867,7 +3041,9 @@ static int ut_testLargeAssignmentWithMultipleConsumersLeaving(
                 mt[i].partition_cnt = i + 1;
         }
 
-        metadata = rd_kafka_metadata_new_topic_mock(mt, topic_cnt);
+        ut_initMetadataConditionalRack0(&metadata, 3, 3, ALL_RACKS,
+                                        RD_ARRAYSIZE(ALL_RACKS),
+                                        parametrization, mt, topic_cnt);
 
         for (i = 0; i < member_cnt; i++) {
                 /* Java tests use a random set, this is more deterministic. */
@@ -2884,7 +3060,10 @@ static int ut_testLargeAssignmentWithMultipleConsumersLeaving(
                             RD_KAFKA_PARTITION_UA);
 
                 rd_snprintf(name, sizeof(name), "consumer%d", i + 1);
-                ut_init_member(&members[i], name, NULL);
+                ut_initMemberConditionalRack(
+                    &members[i], name, ut_get_consumer_rack(i, parametrization),
+                    parametrization, NULL);
+
                 rd_kafka_topic_partition_list_destroy(
                     members[i].rkgm_subscription);
                 members[i].rkgm_subscription = subscription;
@@ -2915,14 +3094,16 @@ static int ut_testLargeAssignmentWithMultipleConsumersLeaving(
 
         for (i = 0; i < member_cnt; i++)
                 rd_kafka_group_member_clear(&members[i]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
 
 
-static int ut_testNewSubscription(rd_kafka_t *rk,
-                                  const rd_kafka_assignor_t *rkas) {
+static int
+ut_testNewSubscription(rd_kafka_t *rk,
+                       const rd_kafka_assignor_t *rkas,
+                       rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
@@ -2930,15 +3111,19 @@ static int ut_testNewSubscription(rd_kafka_t *rk,
         int member_cnt = RD_ARRAYSIZE(members);
         int i;
 
-        metadata = rd_kafka_metadata_new_topic_mockv(
-            5, "topic1", 1, "topic2", 2, "topic3", 3, "topic4", 4, "topic5", 5);
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       5, "topic1", 1, "topic2", 2, "topic3", 3,
+                                       "topic4", 4, "topic5", 5);
 
         for (i = 0; i < member_cnt; i++) {
                 char name[16];
                 int j;
 
                 rd_snprintf(name, sizeof(name), "consumer%d", i);
-                ut_init_member(&members[i], name, NULL);
+                ut_initMemberConditionalRack(
+                    &members[i], name, ut_get_consumer_rack(i, parametrization),
+                    parametrization, NULL);
 
                 rd_kafka_topic_partition_list_destroy(
                     members[i].rkgm_subscription);
@@ -2977,14 +3162,16 @@ static int ut_testNewSubscription(rd_kafka_t *rk,
 
         for (i = 0; i < member_cnt; i++)
                 rd_kafka_group_member_clear(&members[i]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
 
 
-static int ut_testMoveExistingAssignments(rd_kafka_t *rk,
-                                          const rd_kafka_assignor_t *rkas) {
+static int ut_testMoveExistingAssignments(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
@@ -2994,12 +3181,22 @@ static int ut_testMoveExistingAssignments(rd_kafka_t *rk,
         int i;
         int fails = 0;
 
-        metadata = rd_kafka_metadata_new_topic_mockv(1, "topic1", 3);
-
-        ut_init_member(&members[0], "consumer1", "topic1", NULL);
-        ut_init_member(&members[1], "consumer2", "topic1", NULL);
-        ut_init_member(&members[2], "consumer3", "topic1", NULL);
-        ut_init_member(&members[3], "consumer4", "topic1", NULL);
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "topic1", 3);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", NULL);
+        ut_initMemberConditionalRack(&members[1], "consumer2",
+                                     ut_get_consumer_rack(1, parametrization),
+                                     parametrization, "topic1", NULL);
+        ut_initMemberConditionalRack(&members[2], "consumer3",
+                                     ut_get_consumer_rack(2, parametrization),
+                                     parametrization, "topic1", NULL);
+        ut_initMemberConditionalRack(&members[3], "consumer4",
+                                     ut_get_consumer_rack(3, parametrization),
+                                     parametrization, "topic1", NULL);
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     member_cnt, errstr, sizeof(errstr));
@@ -3060,14 +3257,75 @@ static int ut_testMoveExistingAssignments(rd_kafka_t *rk,
                 if (assignments[i])
                         rd_kafka_topic_partition_list_destroy(assignments[i]);
         }
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
 
 
+/* The original version of this test diverged from the Java implementaion in
+ * what it was testing. It's not certain whether it was by mistake, or by
+ * design, but the new version matches the Java implementation, and the old one
+ * is retained as well, since it provides extra coverage.
+ */
+static int ut_testMoveExistingAssignments_j(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[3];
+        int member_cnt                                  = RD_ARRAYSIZE(members);
+        rd_kafka_topic_partition_list_t *assignments[4] = RD_ZERO_INIT;
+        int i;
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       6, "topic1", 1, "topic2", 1, "topic3", 1,
+                                       "topic4", 1, "topic5", 1, "topic6", 1);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", "topic2", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[0], 1 /* generation */, 1, "topic1", 0);
+
+        ut_initMemberConditionalRack(
+            &members[1], "consumer2", ut_get_consumer_rack(1, parametrization),
+            parametrization, "topic1", "topic2", "topic3", "topic4", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[1], 1 /* generation */, 2, "topic2", 0, "topic3", 0);
+
+        ut_initMemberConditionalRack(&members[2], "consumer3",
+                                     ut_get_consumer_rack(2, parametrization),
+                                     parametrization, "topic2", "topic3",
+                                     "topic4", "topic5", "topic6", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[2], 1 /* generation */, 3, "topic4", 0, "topic5", 0,
+            "topic6", 0);
 
-static int ut_testStickiness(rd_kafka_t *rk, const rd_kafka_assignor_t *rkas) {
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    member_cnt, errstr, sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyValidityAndBalance(members, member_cnt, metadata);
+
+        for (i = 0; i < member_cnt; i++) {
+                rd_kafka_group_member_clear(&members[i]);
+                if (assignments[i])
+                        rd_kafka_topic_partition_list_destroy(assignments[i]);
+        }
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+
+static int
+ut_testStickiness(rd_kafka_t *rk,
+                  const rd_kafka_assignor_t *rkas,
+                  rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
@@ -3075,18 +3333,22 @@ static int ut_testStickiness(rd_kafka_t *rk, const rd_kafka_assignor_t *rkas) {
         int member_cnt = RD_ARRAYSIZE(members);
         int i;
 
-        metadata = rd_kafka_metadata_new_topic_mockv(
-            6, "topic1", 1, "topic2", 1, "topic3", 1, "topic4", 1, "topic5", 1,
-            "topic6", 1);
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       6, "topic1", 1, "topic2", 1, "topic3", 1,
+                                       "topic4", 1, "topic5", 1, "topic6", 1);
 
-        ut_init_member(&members[0], "consumer1", "topic1", "topic2", NULL);
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", "topic2", NULL);
         rd_kafka_topic_partition_list_destroy(members[0].rkgm_assignment);
         members[0].rkgm_assignment = rd_kafka_topic_partition_list_new(1);
         rd_kafka_topic_partition_list_add(members[0].rkgm_assignment, "topic1",
                                           0);
 
-        ut_init_member(&members[1], "consumer2", "topic1", "topic2", "topic3",
-                       "topic4", NULL);
+        ut_initMemberConditionalRack(
+            &members[1], "consumer2", ut_get_consumer_rack(1, parametrization),
+            parametrization, "topic1", "topic2", "topic3", "topic4", NULL);
         rd_kafka_topic_partition_list_destroy(members[1].rkgm_assignment);
         members[1].rkgm_assignment = rd_kafka_topic_partition_list_new(2);
         rd_kafka_topic_partition_list_add(members[1].rkgm_assignment, "topic2",
@@ -3094,8 +3356,9 @@ static int ut_testStickiness(rd_kafka_t *rk, const rd_kafka_assignor_t *rkas) {
         rd_kafka_topic_partition_list_add(members[1].rkgm_assignment, "topic3",
                                           0);
 
-        ut_init_member(&members[2], "consumer3", "topic4", "topic5", "topic6",
-                       NULL);
+        ut_initMemberConditionalRack(
+            &members[2], "consumer3", ut_get_consumer_rack(1, parametrization),
+            parametrization, "topic4", "topic5", "topic6", NULL);
         rd_kafka_topic_partition_list_destroy(members[2].rkgm_assignment);
         members[2].rkgm_assignment = rd_kafka_topic_partition_list_new(3);
         rd_kafka_topic_partition_list_add(members[2].rkgm_assignment, "topic4",
@@ -3112,10 +3375,113 @@ static int ut_testStickiness(rd_kafka_t *rk, const rd_kafka_assignor_t *rkas) {
 
         verifyValidityAndBalance(members, RD_ARRAYSIZE(members), metadata);
 
-
         for (i = 0; i < member_cnt; i++)
                 rd_kafka_group_member_clear(&members[i]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+
+/* The original version of this test diverged from the Java implementaion in
+ * what it was testing. It's not certain whether it was by mistake, or by
+ * design, but the new version matches the Java implementation, and the old one
+ * is retained as well, for extra coverage.
+ */
+static int
+ut_testStickiness_j(rd_kafka_t *rk,
+                    const rd_kafka_assignor_t *rkas,
+                    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[4];
+        int member_cnt = RD_ARRAYSIZE(members);
+        int i;
+        rd_kafka_topic_partition_list_t *assignments[4] = RD_ZERO_INIT;
+        int fails                                       = 0;
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "topic1", 3);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", NULL);
+        ut_initMemberConditionalRack(&members[1], "consumer2",
+                                     ut_get_consumer_rack(1, parametrization),
+                                     parametrization, "topic1", NULL);
+        ut_initMemberConditionalRack(&members[2], "consumer3",
+                                     ut_get_consumer_rack(2, parametrization),
+                                     parametrization, "topic1", NULL);
+        ut_initMemberConditionalRack(&members[3], "consumer4",
+                                     ut_get_consumer_rack(3, parametrization),
+                                     parametrization, "topic1", NULL);
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    member_cnt, errstr, sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyValidityAndBalance(members, member_cnt, metadata);
+
+        for (i = 0; i < member_cnt; i++) {
+                if (members[i].rkgm_assignment->cnt > 1) {
+                        RD_UT_WARN("%s assigned %d partitions, expected <= 1",
+                                   members[i].rkgm_member_id->str,
+                                   members[i].rkgm_assignment->cnt);
+                        fails++;
+                } else if (members[i].rkgm_assignment->cnt == 1) {
+                        assignments[i] = rd_kafka_topic_partition_list_copy(
+                            members[i].rkgm_assignment);
+                }
+        }
+
+        /*
+         * Remove potential group leader consumer1, by starting members at
+         * index 1.
+         * Owned partitions of the members are already set to the assignment by
+         * verifyValidityAndBalance above to simulate the fact that the assignor
+         * has already run once.
+         */
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, &members[1],
+                                    member_cnt - 1, errstr, sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyValidityAndBalance(&members[1], member_cnt - 1, metadata);
+        // FIXME: isSticky()
+
+        for (i = 1; i < member_cnt; i++) {
+                if (members[i].rkgm_assignment->cnt != 1) {
+                        RD_UT_WARN("%s assigned %d partitions, expected 1",
+                                   members[i].rkgm_member_id->str,
+                                   members[i].rkgm_assignment->cnt);
+                        fails++;
+                } else if (assignments[i] &&
+                           !rd_kafka_topic_partition_list_find(
+                               assignments[i],
+                               members[i].rkgm_assignment->elems[0].topic,
+                               members[i]
+                                   .rkgm_assignment->elems[0]
+                                   .partition)) {
+                        RD_UT_WARN(
+                            "Stickiness was not honored for %s, "
+                            "%s [%" PRId32 "] not in previous assignment",
+                            members[i].rkgm_member_id->str,
+                            members[i].rkgm_assignment->elems[0].topic,
+                            members[i].rkgm_assignment->elems[0].partition);
+                        fails++;
+                }
+        }
+
+        RD_UT_ASSERT(!fails, "See previous errors");
+
+
+        for (i = 0; i < member_cnt; i++) {
+                rd_kafka_group_member_clear(&members[i]);
+                if (assignments[i])
+                        rd_kafka_topic_partition_list_destroy(assignments[i]);
+        }
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
@@ -3124,7 +3490,10 @@ static int ut_testStickiness(rd_kafka_t *rk, const rd_kafka_assignor_t *rkas) {
 /**
  * @brief Verify stickiness across three rebalances.
  */
-static int ut_testStickiness2(rd_kafka_t *rk, const rd_kafka_assignor_t *rkas) {
+static int
+ut_testStickiness2(rd_kafka_t *rk,
+                   const rd_kafka_assignor_t *rkas,
+                   rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
@@ -3132,11 +3501,19 @@ static int ut_testStickiness2(rd_kafka_t *rk, const rd_kafka_assignor_t *rkas) {
         int member_cnt = RD_ARRAYSIZE(members);
         int i;
 
-        metadata = rd_kafka_metadata_new_topic_mockv(1, "topic1", 6);
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "topic1", 6);
 
-        ut_init_member(&members[0], "consumer1", "topic1", NULL);
-        ut_init_member(&members[1], "consumer2", "topic1", NULL);
-        ut_init_member(&members[2], "consumer3", "topic1", NULL);
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", NULL);
+        ut_initMemberConditionalRack(&members[1], "consumer2",
+                                     ut_get_consumer_rack(1, parametrization),
+                                     parametrization, "topic1", NULL);
+        ut_initMemberConditionalRack(&members[2], "consumer3",
+                                     ut_get_consumer_rack(2, parametrization),
+                                     parametrization, "topic1", NULL);
 
         /* Just consumer1 */
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members, 1,
@@ -3198,24 +3575,28 @@ static int ut_testStickiness2(rd_kafka_t *rk, const rd_kafka_assignor_t *rkas) {
 
         for (i = 0; i < member_cnt; i++)
                 rd_kafka_group_member_clear(&members[i]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
 
 
-static int
-ut_testAssignmentUpdatedForDeletedTopic(rd_kafka_t *rk,
-                                        const rd_kafka_assignor_t *rkas) {
+static int ut_testAssignmentUpdatedForDeletedTopic(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
         rd_kafka_group_member_t members[1];
 
-        metadata =
-            rd_kafka_metadata_new_topic_mockv(2, "topic1", 1, "topic3", 100);
-        ut_init_member(&members[0], "consumer1", "topic1", "topic2", "topic3",
-                       NULL);
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       2, "topic1", 1, "topic3", 100);
+
+        ut_initMemberConditionalRack(
+            &members[0], "consumer1", ut_get_consumer_rack(0, parametrization),
+            parametrization, "topic1", "topic2", "topic3", NULL);
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     RD_ARRAYSIZE(members), errstr,
@@ -3230,7 +3611,7 @@ ut_testAssignmentUpdatedForDeletedTopic(rd_kafka_t *rk,
                      members[0].rkgm_assignment->cnt);
 
         rd_kafka_group_member_clear(&members[0]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
@@ -3238,16 +3619,21 @@ ut_testAssignmentUpdatedForDeletedTopic(rd_kafka_t *rk,
 
 static int ut_testNoExceptionThrownWhenOnlySubscribedTopicDeleted(
     rd_kafka_t *rk,
-    const rd_kafka_assignor_t *rkas) {
-
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
         rd_kafka_group_member_t members[1];
 
-        metadata = rd_kafka_metadata_new_topic_mockv(1, "topic1", 3);
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "topic1", 3);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", NULL);
 
-        ut_init_member(&members[0], "consumer1", "topic", NULL);
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     RD_ARRAYSIZE(members), errstr,
@@ -3260,8 +3646,8 @@ static int ut_testNoExceptionThrownWhenOnlySubscribedTopicDeleted(
         /*
          * Remove topic
          */
-        rd_kafka_metadata_destroy(metadata);
-        metadata = rd_kafka_metadata_new_topic_mock(NULL, 0);
+        ut_destroy_metadata(metadata);
+        metadata = rd_kafka_metadata_new_topic_mock(NULL, 0, -1, 0);
 
         err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
                                     RD_ARRAYSIZE(members), errstr,
@@ -3272,15 +3658,16 @@ static int ut_testNoExceptionThrownWhenOnlySubscribedTopicDeleted(
         isFullyBalanced(members, RD_ARRAYSIZE(members));
 
         rd_kafka_group_member_clear(&members[0]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
 
 
-static int
-ut_testConflictingPreviousAssignments(rd_kafka_t *rk,
-                                      const rd_kafka_assignor_t *rkas) {
+static int ut_testConflictingPreviousAssignments(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
         rd_kafka_resp_err_t err;
         char errstr[512];
         rd_kafka_metadata_t *metadata;
@@ -3289,6 +3676,8 @@ ut_testConflictingPreviousAssignments(rd_kafka_t *rk,
         int i;
 
         // FIXME: removed from Java test suite, and fails for us, why, why?
+        // NOTE: rack-awareness changes aren't made to this test because of
+        // the FIXME above.
         RD_UT_PASS();
 
         metadata = rd_kafka_metadata_new_topic_mockv(1, "topic1", 2);
@@ -3333,7 +3722,7 @@ ut_testConflictingPreviousAssignments(rd_kafka_t *rk,
 
         for (i = 0; i < member_cnt; i++)
                 rd_kafka_group_member_clear(&members[i]);
-        rd_kafka_metadata_destroy(metadata);
+        ut_destroy_metadata(metadata);
 
         RD_UT_PASS();
 }
@@ -3342,13 +3731,947 @@ ut_testConflictingPreviousAssignments(rd_kafka_t *rk,
  * from Java since random tests don't provide meaningful test coverage. */
 
 
+static int ut_testAllConsumersReachExpectedQuotaAndAreConsideredFilled(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[3];
+        int member_cnt = RD_ARRAYSIZE(members);
+        int i;
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "topic1", 4);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[0], 1 /* generation */, 2, "topic1", 0, "topic1", 1);
+
+        ut_initMemberConditionalRack(&members[1], "consumer2",
+                                     ut_get_consumer_rack(1, parametrization),
+                                     parametrization, "topic1", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[1], 1 /* generation */, 1, "topic1", 2);
+
+        ut_initMemberConditionalRack(&members[2], "consumer3",
+                                     ut_get_consumer_rack(2, parametrization),
+                                     parametrization, "topic1", NULL);
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    member_cnt, errstr, sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyValidityAndBalance(members, RD_ARRAYSIZE(members), metadata);
+        verifyAssignment(&members[0], "topic1", 0, "topic1", 1, NULL);
+        verifyAssignment(&members[1], "topic1", 2, NULL);
+        verifyAssignment(&members[2], "topic1", 3, NULL);
+
+        for (i = 0; i < member_cnt; i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+
+static int ut_testOwnedPartitionsAreInvalidatedForConsumerWithStaleGeneration(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[2];
+        int member_cnt = RD_ARRAYSIZE(members);
+        int i;
+        int current_generation = 10;
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       2, "topic1", 3, "topic2", 3);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", "topic2", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[0], current_generation, 3, "topic1", 0, "topic1", 2,
+            "topic2", 1);
+
+        ut_initMemberConditionalRack(&members[1], "consumer2",
+                                     ut_get_consumer_rack(1, parametrization),
+                                     parametrization, "topic1", "topic2", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[1], current_generation - 1, 3, "topic1", 0, "topic1", 2,
+            "topic2", 1);
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    member_cnt, errstr, sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyValidityAndBalance(members, RD_ARRAYSIZE(members), metadata);
+        verifyAssignment(&members[0], "topic1", 0, "topic1", 2, "topic2", 1,
+                         NULL);
+        verifyAssignment(&members[1], "topic1", 1, "topic2", 0, "topic2", 2,
+                         NULL);
+
+
+        for (i = 0; i < member_cnt; i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+static int ut_testOwnedPartitionsAreInvalidatedForConsumerWithNoGeneration(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[2];
+        int member_cnt = RD_ARRAYSIZE(members);
+        int i;
+        int current_generation = 10;
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       2, "topic1", 3, "topic2", 3);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", "topic2", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[0], current_generation, 3, "topic1", 0, "topic1", 2,
+            "topic2", 1);
+
+        ut_initMemberConditionalRack(&members[1], "consumer2",
+                                     ut_get_consumer_rack(1, parametrization),
+                                     parametrization, "topic1", "topic2", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[1], -1 /* default generation*/, 3, "topic1", 0, "topic1",
+            2, "topic2", 1);
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    member_cnt, errstr, sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyValidityAndBalance(members, RD_ARRAYSIZE(members), metadata);
+        verifyAssignment(&members[0], "topic1", 0, "topic1", 2, "topic2", 1,
+                         NULL);
+        verifyAssignment(&members[1], "topic1", 1, "topic2", 0, "topic2", 2,
+                         NULL);
+
+
+        for (i = 0; i < member_cnt; i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+static int
+ut_testPartitionsTransferringOwnershipIncludeThePartitionClaimedByMultipleConsumersInSameGeneration(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[3];
+        int member_cnt = RD_ARRAYSIZE(members);
+        int i;
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "topic1", 3);
+
+        // partition topic-0 is owned by multiple consumers
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[0], 1 /* generation */, 2, "topic1", 0, "topic1", 1);
+
+        ut_initMemberConditionalRack(&members[1], "consumer2",
+                                     ut_get_consumer_rack(1, parametrization),
+                                     parametrization, "topic1", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[1], 1 /* generation */, 2, "topic1", 0, "topic1", 2);
+
+        ut_initMemberConditionalRack(&members[2], "consumer3",
+                                     ut_get_consumer_rack(2, parametrization),
+                                     parametrization, "topic1", NULL);
+
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    member_cnt, errstr, sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyValidityAndBalance(members, RD_ARRAYSIZE(members), metadata);
+        verifyAssignment(&members[0], "topic1", 1, NULL);
+        verifyAssignment(&members[1], "topic1", 2, NULL);
+        verifyAssignment(&members[2], "topic1", 0, NULL);
+
+        for (i = 0; i < member_cnt; i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+
+/* In Java, there is a way to check what partition transferred ownership.
+ * We don't have anything like that for our UTs, so in lieue of that, this
+ * test is added along with the previous test to make sure that we move the
+ * right partition. Our solution in case of two consumers owning the same
+ * partitions with the same generation id was differing from the Java
+ * implementation earlier. (Check #4252.) */
+static int
+ut_testPartitionsTransferringOwnershipIncludeThePartitionClaimedByMultipleConsumersInSameGeneration2(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[3];
+        int member_cnt = RD_ARRAYSIZE(members);
+        int i;
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       1, "topic1", 3);
+
+        // partition topic-0 is owned by multiple consumers
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[0], 1 /* generation */, 2, "topic1", 0, "topic1", 1);
+
+        ut_initMemberConditionalRack(&members[1], "consumer2",
+                                     ut_get_consumer_rack(1, parametrization),
+                                     parametrization, "topic1", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[1], 1 /* generation */, 2, "topic1", 1, "topic1", 2);
+
+        ut_initMemberConditionalRack(&members[2], "consumer3",
+                                     ut_get_consumer_rack(2, parametrization),
+                                     parametrization, "topic1", NULL);
+
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    member_cnt, errstr, sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyValidityAndBalance(members, RD_ARRAYSIZE(members), metadata);
+        verifyAssignment(&members[0], "topic1", 0, NULL);
+        verifyAssignment(&members[1], "topic1", 2, NULL);
+        verifyAssignment(&members[2], "topic1", 1, NULL);
+
+        for (i = 0; i < member_cnt; i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+
+static int ut_testEnsurePartitionsAssignedToHighestGeneration(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[3];
+        int member_cnt = RD_ARRAYSIZE(members);
+        int i;
+        int currentGeneration = 10;
+
+        ut_initMetadataConditionalRack(
+            &metadata, 3, 3, ALL_RACKS, RD_ARRAYSIZE(ALL_RACKS),
+            parametrization, 3, "topic1", 3, "topic2", 3, "topic3", 3);
+
+        ut_initMemberConditionalRack(
+            &members[0], "consumer1", ut_get_consumer_rack(0, parametrization),
+            parametrization, "topic1", "topic2", "topic3", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[0], currentGeneration, 3, "topic1", 0, "topic2", 0,
+            "topic3", 0);
+
+
+        ut_initMemberConditionalRack(
+            &members[1], "consumer2", ut_get_consumer_rack(1, parametrization),
+            parametrization, "topic1", "topic2", "topic3", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[1], currentGeneration - 1, 3, "topic1", 1, "topic2", 1,
+            "topic3", 1);
+
+
+        ut_initMemberConditionalRack(
+            &members[2], "consumer3", ut_get_consumer_rack(2, parametrization),
+            parametrization, "topic1", "topic2", "topic3", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[2], currentGeneration - 2, 3, "topic2", 1, "topic3", 0,
+            "topic3", 2);
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    member_cnt, errstr, sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+        verifyAssignment(&members[0], "topic1", 0, "topic2", 0, "topic3", 0,
+                         NULL);
+        verifyAssignment(&members[1], "topic1", 1, "topic2", 1, "topic3", 1,
+                         NULL);
+        verifyAssignment(&members[2], "topic1", 2, "topic2", 2, "topic3", 2,
+                         NULL);
+
+        verifyValidityAndBalance(members, RD_ARRAYSIZE(members), metadata);
+
+        for (i = 0; i < member_cnt; i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+
+static int ut_testNoReassignmentOnCurrentMembers(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[4];
+        int member_cnt = RD_ARRAYSIZE(members);
+        int i;
+        int currentGeneration = 10;
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       4, "topic0", 3, "topic1", 3, "topic2", 3,
+                                       "topic3", 3);
+
+        ut_initMemberConditionalRack(
+            &members[0], "consumer1", ut_get_consumer_rack(0, parametrization),
+            parametrization, "topic0", "topic1", "topic2", "topic3", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[0], -1 /* default generation */, 0);
+
+        ut_initMemberConditionalRack(
+            &members[1], "consumer2", ut_get_consumer_rack(1, parametrization),
+            parametrization, "topic0", "topic1", "topic2", "topic3", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[1], currentGeneration - 1, 3, "topic0", 0, "topic2", 0,
+            "topic1", 0);
+
+        ut_initMemberConditionalRack(
+            &members[2], "consumer3", ut_get_consumer_rack(2, parametrization),
+            parametrization, "topic0", "topic1", "topic2", "topic3", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[2], currentGeneration - 2, 3, "topic3", 2, "topic2", 2,
+            "topic1", 1);
+
+        ut_initMemberConditionalRack(
+            &members[3], "consumer4", ut_get_consumer_rack(3, parametrization),
+            parametrization, "topic0", "topic1", "topic2", "topic3", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[3], currentGeneration - 3, 3, "topic3", 1, "topic0", 1,
+            "topic0", 2);
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    member_cnt, errstr, sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyValidityAndBalance(members, member_cnt, metadata);
+        verifyAssignment(&members[0], "topic1", 2, "topic2", 1, "topic3", 0,
+                         NULL);
+
+        for (i = 0; i < member_cnt; i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+
+static int
+ut_testOwnedPartitionsAreInvalidatedForConsumerWithMultipleGeneration(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata;
+        rd_kafka_group_member_t members[2];
+        int member_cnt = RD_ARRAYSIZE(members);
+        int i;
+        int currentGeneration = 10;
+
+        ut_initMetadataConditionalRack(&metadata, 3, 3, ALL_RACKS,
+                                       RD_ARRAYSIZE(ALL_RACKS), parametrization,
+                                       2, "topic1", 3, "topic2", 3);
+
+        ut_initMemberConditionalRack(&members[0], "consumer1",
+                                     ut_get_consumer_rack(0, parametrization),
+                                     parametrization, "topic1", "topic2", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[0], currentGeneration, 3, "topic1", 0, "topic2", 1,
+            "topic1", 1);
+
+        ut_initMemberConditionalRack(&members[1], "consumer2",
+                                     ut_get_consumer_rack(1, parametrization),
+                                     parametrization, "topic1", "topic2", NULL);
+        ut_populate_member_owned_partitions_generation(
+            &members[1], currentGeneration - 2, 3, "topic1", 0, "topic2", 1,
+            "topic2", 2);
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, metadata, members,
+                                    member_cnt, errstr, sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        verifyValidityAndBalance(members, member_cnt, metadata);
+        verifyAssignment(&members[0], "topic1", 0, "topic2", 1, "topic1", 1,
+                         NULL);
+        verifyAssignment(&members[1], "topic1", 2, "topic2", 2, "topic2", 0,
+                         NULL);
+
+        for (i = 0; i < member_cnt; i++)
+                rd_kafka_group_member_clear(&members[i]);
+        ut_destroy_metadata(metadata);
+
+        RD_UT_PASS();
+}
+
+/* Helper for setting up metadata and members, and running the assignor, and
+ * verifying validity and balance of the assignment. Does not check the results
+ * of the assignment on a per member basis..
+ */
+static int
+setupRackAwareAssignment0(rd_kafka_t *rk,
+                          const rd_kafka_assignor_t *rkas,
+                          rd_kafka_group_member_t *members,
+                          size_t member_cnt,
+                          int replication_factor,
+                          int num_broker_racks,
+                          size_t topic_cnt,
+                          char *topics[],
+                          int *partitions,
+                          int *subscriptions_count,
+                          char **subscriptions[],
+                          int *consumer_racks,
+                          rd_kafka_topic_partition_list_t **owned_tp_list,
+                          rd_bool_t initialize_members,
+                          rd_kafka_metadata_t **metadata) {
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        rd_kafka_metadata_t *metadata_local = NULL;
+
+        size_t i              = 0;
+        const int num_brokers = num_broker_racks > 0
+                                    ? replication_factor * num_broker_racks
+                                    : replication_factor;
+        if (!metadata)
+                metadata = &metadata_local;
+
+        /* The member naming for tests is consumerN where N is a single
+         * character. */
+        rd_assert(member_cnt <= 9);
+
+        *metadata = rd_kafka_metadata_new_topic_with_partition_replicas_mock(
+            replication_factor, num_brokers, topics, partitions, topic_cnt);
+        ut_populate_internal_broker_metadata(
+            rd_kafka_metadata_get_internal(*metadata), num_broker_racks,
+            ALL_RACKS, RD_ARRAYSIZE(ALL_RACKS));
+        ut_populate_internal_topic_metadata(
+            rd_kafka_metadata_get_internal(*metadata));
+
+        for (i = 0; initialize_members && i < member_cnt; i++) {
+                char member_id[10];
+                snprintf(member_id, 10, "consumer%d", (int)(i + 1));
+                ut_init_member_with_rack(
+                    &members[i], member_id, ALL_RACKS[consumer_racks[i]],
+                    subscriptions[i], subscriptions_count[i]);
+
+                if (!owned_tp_list || !owned_tp_list[i])
+                        continue;
+
+                if (members[i].rkgm_owned)
+                        rd_kafka_topic_partition_list_destroy(
+                            members[i].rkgm_owned);
+
+                members[i].rkgm_owned =
+                    rd_kafka_topic_partition_list_copy(owned_tp_list[i]);
+        }
+
+        err = rd_kafka_assignor_run(rk->rk_cgrp, rkas, *metadata, members,
+                                    member_cnt, errstr, sizeof(errstr));
+        RD_UT_ASSERT(!err, "assignor run failed: %s", errstr);
+
+        /* Note that verifyValidityAndBalance also sets rkgm_owned for each
+         * member to rkgm_assignment, so if the members are used without
+         * clearing, in another assignor_run, the result should be stable. */
+        verifyValidityAndBalance(members, member_cnt, *metadata);
+
+        if (metadata_local)
+                ut_destroy_metadata(metadata_local);
+        return 0;
+}
+
+static int
+setupRackAwareAssignment(rd_kafka_t *rk,
+                         const rd_kafka_assignor_t *rkas,
+                         rd_kafka_group_member_t *members,
+                         size_t member_cnt,
+                         int replication_factor,
+                         int num_broker_racks,
+                         size_t topic_cnt,
+                         char *topics[],
+                         int *partitions,
+                         int *subscriptions_count,
+                         char **subscriptions[],
+                         int *consumer_racks,
+                         rd_kafka_topic_partition_list_t **owned_tp_list,
+                         rd_bool_t initialize_members) {
+        return setupRackAwareAssignment0(
+            rk, rkas, members, member_cnt, replication_factor, num_broker_racks,
+            topic_cnt, topics, partitions, subscriptions_count, subscriptions,
+            consumer_racks, owned_tp_list, initialize_members, NULL);
+}
+
+/* Helper for testing cases where rack-aware assignment should not be triggered,
+ * and assignment should be the same as the pre-rack-aware assignor. Each case
+ * is run twice, once with owned partitions set to empty, and in the second
+ * case, with owned partitions set to the result of the previous run, to check
+ * that the assignment is stable. */
+#define verifyNonRackAwareAssignment(rk, rkas, members, member_cnt, topic_cnt, \
+                                     topics, partitions, subscriptions_count,  \
+                                     subscriptions, ...)                       \
+        do {                                                                   \
+                size_t idx       = 0;                                          \
+                int init_members = 1;                                          \
+                rd_kafka_metadata_t *metadata;                                 \
+                                                                               \
+                /* num_broker_racks = 0, implies that brokers have no          \
+                 * configured racks. */                                        \
+                for (init_members = 1; init_members >= 0; init_members--) {    \
+                        setupRackAwareAssignment(                              \
+                            rk, rkas, members, member_cnt, 3, 0, topic_cnt,    \
+                            topics, partitions, subscriptions_count,           \
+                            subscriptions, RACKS_INITIAL, NULL, init_members); \
+                        verifyMultipleAssignment(members, member_cnt,          \
+                                                 __VA_ARGS__);                 \
+                }                                                              \
+                for (idx = 0; idx < member_cnt; idx++)                         \
+                        rd_kafka_group_member_clear(&members[idx]);            \
+                /* consumer_racks = RACKS_NULL implies that consumers have no  \
+                 * racks. */                                                   \
+                for (init_members = 1; init_members >= 0; init_members--) {    \
+                        setupRackAwareAssignment(                              \
+                            rk, rkas, members, member_cnt, 3, 3, topic_cnt,    \
+                            topics, partitions, subscriptions_count,           \
+                            subscriptions, RACKS_NULL, NULL, init_members);    \
+                        verifyMultipleAssignment(members, member_cnt,          \
+                                                 __VA_ARGS__);                 \
+                }                                                              \
+                for (idx = 0; idx < member_cnt; idx++)                         \
+                        rd_kafka_group_member_clear(&members[idx]);            \
+                /* replication_factor = 3 and num_broker_racks = 3 means that  \
+                 * all partitions are replicated on all racks.*/               \
+                for (init_members = 1; init_members >= 0; init_members--) {    \
+                        setupRackAwareAssignment0(                             \
+                            rk, rkas, members, member_cnt, 3, 3, topic_cnt,    \
+                            topics, partitions, subscriptions_count,           \
+                            subscriptions, RACKS_INITIAL, NULL, init_members,  \
+                            &metadata);                                        \
+                        verifyMultipleAssignment(members, member_cnt,          \
+                                                 __VA_ARGS__);                 \
+                        verifyNumPartitionsWithRackMismatch(                   \
+                            metadata, members, RD_ARRAYSIZE(members), 0);      \
+                        ut_destroy_metadata(metadata);                         \
+                }                                                              \
+                for (idx = 0; idx < member_cnt; idx++)                         \
+                        rd_kafka_group_member_clear(&members[idx]);            \
+                /* replication_factor = 4 and num_broker_racks = 4 means that  \
+                 * all partitions are replicated on all racks. */              \
+                for (init_members = 1; init_members >= 0; init_members--) {    \
+                        setupRackAwareAssignment0(                             \
+                            rk, rkas, members, member_cnt, 4, 4, topic_cnt,    \
+                            topics, partitions, subscriptions_count,           \
+                            subscriptions, RACKS_INITIAL, NULL, init_members,  \
+                            &metadata);                                        \
+                        verifyMultipleAssignment(members, member_cnt,          \
+                                                 __VA_ARGS__);                 \
+                        verifyNumPartitionsWithRackMismatch(                   \
+                            metadata, members, RD_ARRAYSIZE(members), 0);      \
+                        ut_destroy_metadata(metadata);                         \
+                }                                                              \
+                for (idx = 0; idx < member_cnt; idx++)                         \
+                        rd_kafka_group_member_clear(&members[idx]);            \
+                /* There's no overap between broker racks and consumer racks,  \
+                 * since num_broker_racks = 3, they'll be picked from a,b,c    \
+                 * and consumer racks are d,e,f. */                            \
+                for (init_members = 1; init_members >= 0; init_members--) {    \
+                        setupRackAwareAssignment(                              \
+                            rk, rkas, members, member_cnt, 3, 3, topic_cnt,    \
+                            topics, partitions, subscriptions_count,           \
+                            subscriptions, RACKS_FINAL, NULL, init_members);   \
+                        verifyMultipleAssignment(members, member_cnt,          \
+                                                 __VA_ARGS__);                 \
+                }                                                              \
+                for (idx = 0; idx < member_cnt; idx++)                         \
+                        rd_kafka_group_member_clear(&members[idx]);            \
+                /* There's no overap between broker racks and consumer racks,  \
+                 * since num_broker_racks = 3, they'll be picked from a,b,c    \
+                 * and consumer racks are d,e,NULL. */                         \
+                for (init_members = 1; init_members >= 0; init_members--) {    \
+                        setupRackAwareAssignment(                              \
+                            rk, rkas, members, member_cnt, 3, 3, topic_cnt,    \
+                            topics, partitions, subscriptions_count,           \
+                            subscriptions, RACKS_ONE_NULL, NULL,               \
+                            init_members);                                     \
+                        verifyMultipleAssignment(members, member_cnt,          \
+                                                 __VA_ARGS__);                 \
+                }                                                              \
+                for (idx = 0; idx < member_cnt; idx++)                         \
+                        rd_kafka_group_member_clear(&members[idx]);            \
+        } while (0)
+
+
+static int ut_testRackAwareAssignmentWithUniformSubscription(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        char *topics[]   = {"t1", "t2", "t3"};
+        int partitions[] = {6, 7, 2};
+        rd_kafka_group_member_t members[3];
+        size_t member_cnt         = RD_ARRAYSIZE(members);
+        size_t i                  = 0;
+        int subscriptions_count[] = {3, 3, 3};
+        char **subscriptions[]    = {topics, topics, topics};
+        int init_members          = 0;
+        rd_kafka_topic_partition_list_t **owned;
+        rd_kafka_metadata_t *metadata;
+
+        if (parametrization !=
+            RD_KAFKA_RANGE_ASSIGNOR_UT_BROKER_AND_CONSUMER_RACK) {
+                RD_UT_PASS();
+        }
+
+        verifyNonRackAwareAssignment(
+            rk, rkas, members, RD_ARRAYSIZE(members), RD_ARRAYSIZE(topics),
+            topics, partitions, subscriptions_count, subscriptions,
+            /* consumer1 */
+            "t1", 0, "t1", 3, "t2", 0, "t2", 3, "t2", 6, NULL,
+            /* consumer2 */
+            "t1", 1, "t1", 4, "t2", 1, "t2", 4, "t3", 0, NULL,
+            /* consumer3 */
+            "t1", 2, "t1", 5, "t2", 2, "t2", 5, "t3", 1, NULL);
+
+        /* Verify assignment is rack-aligned for lower replication factor where
+         * brokers have a subset of partitions */
+        for (init_members = 1; init_members >= 0; init_members--) {
+                setupRackAwareAssignment0(
+                    rk, rkas, members, RD_ARRAYSIZE(members), 1, 3,
+                    RD_ARRAYSIZE(topics), topics, partitions,
+                    subscriptions_count, subscriptions, RACKS_INITIAL, NULL,
+                    init_members, &metadata);
+                verifyMultipleAssignment(
+                    members, RD_ARRAYSIZE(members),
+                    /* consumer1 */
+                    "t1", 0, "t1", 3, "t2", 0, "t2", 3, "t2", 6, NULL,
+                    /* consumer2 */
+                    "t1", 1, "t1", 4, "t2", 1, "t2", 4, "t3", 0, NULL,
+                    /* consumer3 */
+                    "t1", 2, "t1", 5, "t2", 2, "t2", 5, "t3", 1, NULL);
+                verifyNumPartitionsWithRackMismatch(metadata, members,
+                                                    RD_ARRAYSIZE(members), 0);
+                ut_destroy_metadata(metadata);
+        }
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+
+
+        for (init_members = 1; init_members >= 0; init_members--) {
+                setupRackAwareAssignment0(
+                    rk, rkas, members, RD_ARRAYSIZE(members), 2, 3,
+                    RD_ARRAYSIZE(topics), topics, partitions,
+                    subscriptions_count, subscriptions, RACKS_INITIAL, NULL,
+                    init_members, &metadata);
+                verifyMultipleAssignment(
+                    members, RD_ARRAYSIZE(members),
+                    /* consumer1 */
+                    "t1", 0, "t1", 3, "t2", 0, "t2", 3, "t2", 6, NULL,
+                    /* consumer2 */
+                    "t1", 1, "t1", 4, "t2", 1, "t2", 4, "t3", 0, NULL,
+                    /* consumer3 */
+                    "t1", 2, "t1", 5, "t2", 2, "t2", 5, "t3", 1, NULL);
+                verifyNumPartitionsWithRackMismatch(metadata, members,
+                                                    RD_ARRAYSIZE(members), 0);
+                ut_destroy_metadata(metadata);
+        }
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+
+        /* One consumer on a rack with no partitions. We allocate with
+         * misaligned rack to this consumer to maintain balance. */
+        for (init_members = 1; init_members >= 0; init_members--) {
+                setupRackAwareAssignment0(
+                    rk, rkas, members, RD_ARRAYSIZE(members), 3, 2,
+                    RD_ARRAYSIZE(topics), topics, partitions,
+                    subscriptions_count, subscriptions, RACKS_INITIAL, NULL,
+                    init_members, &metadata);
+                verifyMultipleAssignment(
+                    members, RD_ARRAYSIZE(members),
+                    /* consumer1 */
+                    "t1", 0, "t1", 3, "t2", 0, "t2", 3, "t2", 6, NULL,
+                    /* consumer2 */
+                    "t1", 1, "t1", 4, "t2", 1, "t2", 4, "t3", 0, NULL,
+                    /* consumer3 */
+                    "t1", 2, "t1", 5, "t2", 2, "t2", 5, "t3", 1, NULL);
+                verifyNumPartitionsWithRackMismatch(metadata, members,
+                                                    RD_ARRAYSIZE(members), 5);
+                ut_destroy_metadata(metadata);
+        }
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+
+        /* Verify that rack-awareness is improved if already owned partitions
+         * are misaligned */
+        owned = ut_create_topic_partition_lists(
+            3,
+            /* consumer1 */
+            "t1", 0, "t1", 1, "t1", 2, "t1", 3, "t1", 4, NULL,
+            /* consumer2 */
+            "t1", 5, "t2", 0, "t2", 1, "t2", 2, "t2", 3, NULL,
+            /* consumer3 */
+            "t2", 4, "t2", 5, "t2", 6, "t3", 0, "t3", 1, NULL);
+
+        setupRackAwareAssignment0(rk, rkas, members, RD_ARRAYSIZE(members), 1,
+                                  3, RD_ARRAYSIZE(topics), topics, partitions,
+                                  subscriptions_count, subscriptions,
+                                  RACKS_INITIAL, owned, rd_true, &metadata);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 0, "t1", 3, "t2", 0, "t2", 3, "t2", 6, NULL,
+            /* consumer2 */
+            "t1", 1, "t1", 4, "t2", 1, "t2", 4, "t3", 0, NULL,
+            /* consumer3 */
+            "t1", 2, "t1", 5, "t2", 2, "t2", 5, "t3", 1, NULL);
+        verifyNumPartitionsWithRackMismatch(metadata, members,
+                                            RD_ARRAYSIZE(members), 0);
+        ut_destroy_metadata(metadata);
+
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+        for (i = 0; i < member_cnt; i++)
+                rd_kafka_topic_partition_list_destroy(owned[i]);
+        rd_free(owned);
+
+
+        /* Verify that stickiness is retained when racks match */
+        owned = ut_create_topic_partition_lists(
+            3,
+            /* consumer1 */
+            "t1", 0, "t1", 3, "t2", 0, "t2", 3, "t2", 6, NULL,
+            /* consumer2 */
+            "t1", 1, "t1", 4, "t2", 1, "t2", 4, "t3", 0, NULL,
+            /* consumer3 */
+            "t1", 2, "t1", 5, "t2", 2, "t2", 5, "t3", 1, NULL);
+
+        /* This test deviates slightly from Java, in that we test with two
+         * additional replication factors, 1 and 2, which are not tested in
+         * Java. This is because in Java, there is a way to turn rack aware
+         * logic on or off for tests. We don't have that, and to test with rack
+         * aware logic, we need to change something, in this case, the
+         * replication factor. */
+        for (i = 1; i <= 3; i++) {
+                setupRackAwareAssignment0(
+                    rk, rkas, members, RD_ARRAYSIZE(members),
+                    i /* replication factor */, 3, RD_ARRAYSIZE(topics), topics,
+                    partitions, subscriptions_count, subscriptions,
+                    RACKS_INITIAL, owned, rd_true, &metadata);
+                verifyMultipleAssignment(
+                    members, RD_ARRAYSIZE(members),
+                    /* consumer1 */
+                    "t1", 0, "t1", 3, "t2", 0, "t2", 3, "t2", 6, NULL,
+                    /* consumer2 */
+                    "t1", 1, "t1", 4, "t2", 1, "t2", 4, "t3", 0, NULL,
+                    /* consumer3 */
+                    "t1", 2, "t1", 5, "t2", 2, "t2", 5, "t3", 1, NULL);
+                verifyNumPartitionsWithRackMismatch(metadata, members,
+                                                    RD_ARRAYSIZE(members), 0);
+
+                for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                        rd_kafka_group_member_clear(&members[i]);
+                ut_destroy_metadata(metadata);
+        }
+
+        for (i = 0; i < member_cnt; i++)
+                rd_kafka_topic_partition_list_destroy(owned[i]);
+        rd_free(owned);
+
+        RD_UT_PASS();
+}
+
+
+static int ut_testRackAwareAssignmentWithNonEqualSubscription(
+    rd_kafka_t *rk,
+    const rd_kafka_assignor_t *rkas,
+    rd_kafka_assignor_ut_rack_config_t parametrization) {
+        char *topics[]   = {"t1", "t2", "t3"};
+        char *topics0[]  = {"t1", "t3"};
+        int partitions[] = {6, 7, 2};
+        rd_kafka_group_member_t members[3];
+        size_t member_cnt         = RD_ARRAYSIZE(members);
+        size_t i                  = 0;
+        int subscriptions_count[] = {3, 3, 2};
+        char **subscriptions[]    = {topics, topics, topics0};
+        int with_owned            = 0;
+        rd_kafka_topic_partition_list_t **owned;
+        rd_kafka_metadata_t *metadata;
+
+        if (parametrization !=
+            RD_KAFKA_RANGE_ASSIGNOR_UT_BROKER_AND_CONSUMER_RACK) {
+                RD_UT_PASS();
+        }
+
+        verifyNonRackAwareAssignment(
+            rk, rkas, members, RD_ARRAYSIZE(members), RD_ARRAYSIZE(topics),
+            topics, partitions, subscriptions_count, subscriptions, "t1", 5,
+            "t2", 0, "t2", 2, "t2", 4, "t2", 6, NULL,
+            /* consumer2 */
+            "t1", 3, "t2", 1, "t2", 3, "t2", 5, "t3", 0, NULL,
+            /* consumer3 */
+            "t1", 0, "t1", 1, "t1", 2, "t1", 4, "t3", 1, NULL);
+
+        // Verify assignment is rack-aligned for lower replication factor where
+        // brokers have a subset of partitions
+        for (with_owned = 0; with_owned <= 1; with_owned++) {
+                setupRackAwareAssignment0(
+                    rk, rkas, members, RD_ARRAYSIZE(members), 1, 3,
+                    RD_ARRAYSIZE(topics), topics, partitions,
+                    subscriptions_count, subscriptions, RACKS_INITIAL, NULL,
+                    !with_owned, &metadata);
+                verifyMultipleAssignment(
+                    members, RD_ARRAYSIZE(members),
+                    /* consumer1 */
+                    "t1", 3, "t2", 0, "t2", 2, "t2", 3, "t2", 6, NULL,
+                    /* consumer2 */
+                    "t1", 4, "t2", 1, "t2", 4, "t2", 5, "t3", 0, NULL,
+                    /* consumer3 */
+                    "t1", 0, "t1", 1, "t1", 2, "t1", 5, "t3", 1, NULL);
+                verifyNumPartitionsWithRackMismatch(metadata, members,
+                                                    RD_ARRAYSIZE(members), 4);
+                ut_destroy_metadata(metadata);
+        }
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+
+
+
+        for (with_owned = 0; with_owned <= 1; with_owned++) {
+                setupRackAwareAssignment0(
+                    rk, rkas, members, RD_ARRAYSIZE(members), 2, 3,
+                    RD_ARRAYSIZE(topics), topics, partitions,
+                    subscriptions_count, subscriptions, RACKS_INITIAL, NULL,
+                    !with_owned, &metadata);
+                verifyMultipleAssignment(
+                    members, RD_ARRAYSIZE(members),
+                    /* consumer1 */
+                    "t1", 3, "t2", 0, "t2", 2, "t2", 5, "t2", 6, NULL,
+                    /* consumer2 */
+                    "t1", 0, "t2", 1, "t2", 3, "t2", 4, "t3", 0, NULL,
+                    /* consumer3 */
+                    "t1", 1, "t1", 2, "t1", 4, "t1", 5, "t3", 1, NULL);
+                verifyNumPartitionsWithRackMismatch(metadata, members,
+                                                    RD_ARRAYSIZE(members), 0);
+                ut_destroy_metadata(metadata);
+        }
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+
+        /* One consumer on a rack with no partitions. We allocate with
+         * misaligned rack to this consumer to maintain balance. */
+        for (with_owned = 0; with_owned <= 1; with_owned++) {
+                setupRackAwareAssignment0(
+                    rk, rkas, members, RD_ARRAYSIZE(members), 3, 2,
+                    RD_ARRAYSIZE(topics), topics, partitions,
+                    subscriptions_count, subscriptions, RACKS_INITIAL, NULL,
+                    !with_owned, &metadata);
+                verifyMultipleAssignment(
+                    members, RD_ARRAYSIZE(members),
+                    /* consumer1 */
+                    "t1", 5, "t2", 0, "t2", 2, "t2", 4, "t2", 6, NULL,
+                    /* consumer2 */
+                    "t1", 3, "t2", 1, "t2", 3, "t2", 5, "t3", 0, NULL,
+                    /* consumer3 */
+                    "t1", 0, "t1", 1, "t1", 2, "t1", 4, "t3", 1, NULL);
+                verifyNumPartitionsWithRackMismatch(metadata, members,
+                                                    RD_ARRAYSIZE(members), 5);
+                ut_destroy_metadata(metadata);
+        }
+
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+
+        /* Verify that rack-awareness is improved if already owned partitions
+         * are misaligned. */
+        owned = ut_create_topic_partition_lists(
+            3,
+            /* consumer1 */
+            "t1", 0, "t1", 1, "t1", 2, "t1", 3, "t1", 4, NULL,
+            /* consumer2 */
+            "t1", 5, "t2", 0, "t2", 1, "t2", 2, "t2", 3, NULL,
+            /* consumer3 */
+            "t2", 4, "t2", 5, "t2", 6, "t3", 0, "t3", 1, NULL);
+
+        setupRackAwareAssignment0(rk, rkas, members, RD_ARRAYSIZE(members), 1,
+                                  3, RD_ARRAYSIZE(topics), topics, partitions,
+                                  subscriptions_count, subscriptions,
+                                  RACKS_INITIAL, owned, rd_true, &metadata);
+        verifyMultipleAssignment(
+            members, RD_ARRAYSIZE(members),
+            /* consumer1 */
+            "t1", 3, "t2", 0, "t2", 2, "t2", 3, "t2", 6, NULL,
+            /* consumer2 */
+            "t1", 4, "t2", 1, "t2", 4, "t2", 5, "t3", 0, NULL,
+            /* consumer3 */
+            "t1", 0, "t1", 1, "t1", 2, "t1", 5, "t3", 1, NULL);
+        verifyNumPartitionsWithRackMismatch(metadata, members,
+                                            RD_ARRAYSIZE(members), 4);
+        ut_destroy_metadata(metadata);
+
+        for (i = 0; i < RD_ARRAYSIZE(members); i++)
+                rd_kafka_group_member_clear(&members[i]);
+        for (i = 0; i < member_cnt; i++)
+                rd_kafka_topic_partition_list_destroy(owned[i]);
+        rd_free(owned);
+
+        /* One of the Java tests is skipped here, which tests if the rack-aware
+         * logic assigns the same partitions as non-rack aware logic. This is
+         * because we don't have a way to force rack-aware logic like the Java
+         * assignor. */
+        RD_UT_PASS();
+}
+
 static int rd_kafka_sticky_assignor_unittest(void) {
         rd_kafka_conf_t *conf;
         rd_kafka_t *rk;
         int fails = 0;
         char errstr[256];
         rd_kafka_assignor_t *rkas;
-        static int (*tests[])(rd_kafka_t *, const rd_kafka_assignor_t *) = {
+        static int (*tests[])(
+            rd_kafka_t *, const rd_kafka_assignor_t *,
+            rd_kafka_assignor_ut_rack_config_t parametrization) = {
             ut_testOneConsumerNoTopic,
             ut_testOneConsumerNonexistentTopic,
             ut_testOneConsumerOneTopic,
@@ -3367,14 +4690,26 @@ static int rd_kafka_sticky_assignor_unittest(void) {
             ut_testLargeAssignmentWithMultipleConsumersLeaving,
             ut_testNewSubscription,
             ut_testMoveExistingAssignments,
+            ut_testMoveExistingAssignments_j,
             ut_testStickiness,
+            ut_testStickiness_j,
             ut_testStickiness2,
             ut_testAssignmentUpdatedForDeletedTopic,
             ut_testNoExceptionThrownWhenOnlySubscribedTopicDeleted,
             ut_testConflictingPreviousAssignments,
+            ut_testAllConsumersReachExpectedQuotaAndAreConsideredFilled,
+            ut_testOwnedPartitionsAreInvalidatedForConsumerWithStaleGeneration,
+            ut_testOwnedPartitionsAreInvalidatedForConsumerWithNoGeneration,
+            ut_testPartitionsTransferringOwnershipIncludeThePartitionClaimedByMultipleConsumersInSameGeneration,
+            ut_testPartitionsTransferringOwnershipIncludeThePartitionClaimedByMultipleConsumersInSameGeneration2,
+            ut_testEnsurePartitionsAssignedToHighestGeneration,
+            ut_testNoReassignmentOnCurrentMembers,
+            ut_testOwnedPartitionsAreInvalidatedForConsumerWithMultipleGeneration,
+            ut_testRackAwareAssignmentWithUniformSubscription,
+            ut_testRackAwareAssignmentWithNonEqualSubscription,
             NULL,
         };
-        int i;
+        size_t i;
 
 
         conf = rd_kafka_conf_new();
@@ -3394,13 +4729,25 @@ static int rd_kafka_sticky_assignor_unittest(void) {
         rkas = rd_kafka_assignor_find(rk, "cooperative-sticky");
         RD_UT_ASSERT(rkas, "sticky assignor not found");
 
+        for (i = 0; i < RD_ARRAY_SIZE(ALL_RACKS) - 1; i++) {
+                char c       = 'a' + i;
+                ALL_RACKS[i] = rd_kafkap_str_new(&c, 1);
+        }
+        ALL_RACKS[i] = NULL;
+
         for (i = 0; tests[i]; i++) {
                 rd_ts_t ts = rd_clock();
-                int r;
-
-                RD_UT_SAY("[ Test #%d ]", i);
-                r = tests[i](rk, rkas);
-                RD_UT_SAY("[ Test #%d ran for %.3fms ]", i,
+                int r      = 0;
+                rd_kafka_assignor_ut_rack_config_t j;
+
+                RD_UT_SAY("[ Test #%" PRIusz " ]", i);
+                for (j = RD_KAFKA_RANGE_ASSIGNOR_UT_NO_BROKER_RACK;
+                     j != RD_KAFKA_RANGE_ASSIGNOR_UT_CONFIG_CNT; j++) {
+                        RD_UT_SAY("[ Test #%" PRIusz ", RackConfig = %d ]", i,
+                                  j);
+                        r += tests[i](rk, rkas, j);
+                }
+                RD_UT_SAY("[ Test #%" PRIusz " ran for %.3fms ]", i,
                           (double)(rd_clock() - ts) / 1000.0);
 
                 RD_UT_ASSERT(!r, "^ failed");
@@ -3408,6 +4755,10 @@ static int rd_kafka_sticky_assignor_unittest(void) {
                 fails += r;
         }
 
+        for (i = 0; i < RD_ARRAY_SIZE(ALL_RACKS) - 1; i++) {
+                rd_kafkap_str_destroy(ALL_RACKS[i]);
+        }
+
         rd_kafka_destroy(rk);
 
         return fails;
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_subscription.c b/lib/librdkafka-2.3.0/src/rdkafka_subscription.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_subscription.c
rename to lib/librdkafka-2.3.0/src/rdkafka_subscription.c
index 08058935876..46ab544ee20 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_subscription.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_subscription.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_timer.c b/lib/librdkafka-2.3.0/src/rdkafka_timer.c
similarity index 92%
rename from lib/librdkafka-2.1.0/src/rdkafka_timer.c
rename to lib/librdkafka-2.3.0/src/rdkafka_timer.c
index 5240af78578..b62343269dd 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_timer.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_timer.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,6 +29,7 @@
 #include "rdkafka_int.h"
 #include "rd.h"
 #include "rdtime.h"
+#include "rdrand.h"
 #include "rdsysqueue.h"
 
 #include "rdkafka_queue.h"
@@ -198,15 +199,32 @@ void rd_kafka_timer_start0(rd_kafka_timers_t *rkts,
 
 /**
  * Delay the next timer invocation by '2 * rtmr->rtmr_interval'
+ * @param minimum_backoff the minimum backoff to be applied
+ * @param maximum_backoff the maximum backoff to be applied
+ * @param max_jitter the jitter percentage to be applied to the backoff
  */
 void rd_kafka_timer_exp_backoff(rd_kafka_timers_t *rkts,
-                                rd_kafka_timer_t *rtmr) {
+                                rd_kafka_timer_t *rtmr,
+                                rd_ts_t minimum_backoff,
+                                rd_ts_t maximum_backoff,
+                                int max_jitter) {
+        int64_t jitter;
         rd_kafka_timers_lock(rkts);
         if (rd_kafka_timer_scheduled(rtmr)) {
-                rtmr->rtmr_interval *= 2;
                 rd_kafka_timer_unschedule(rkts, rtmr);
         }
-        rd_kafka_timer_schedule(rkts, rtmr, 0);
+        rtmr->rtmr_interval *= 2;
+        jitter =
+            (rd_jitter(-max_jitter, max_jitter) * rtmr->rtmr_interval) / 100;
+        if (rtmr->rtmr_interval + jitter < minimum_backoff) {
+                rtmr->rtmr_interval = minimum_backoff;
+                jitter              = 0;
+        } else if ((maximum_backoff != -1) &&
+                   (rtmr->rtmr_interval + jitter) > maximum_backoff) {
+                rtmr->rtmr_interval = maximum_backoff;
+                jitter              = 0;
+        }
+        rd_kafka_timer_schedule(rkts, rtmr, jitter);
         rd_kafka_timers_unlock(rkts);
 }
 
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_timer.h b/lib/librdkafka-2.3.0/src/rdkafka_timer.h
similarity index 94%
rename from lib/librdkafka-2.1.0/src/rdkafka_timer.h
rename to lib/librdkafka-2.3.0/src/rdkafka_timer.h
index e3cadd7b9fa..9a273adcfa6 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_timer.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_timer.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -85,7 +85,10 @@ void rd_kafka_timer_start0(rd_kafka_timers_t *rkts,
                               callback, arg)
 
 void rd_kafka_timer_exp_backoff(rd_kafka_timers_t *rkts,
-                                rd_kafka_timer_t *rtmr);
+                                rd_kafka_timer_t *rtmr,
+                                rd_ts_t minimum,
+                                rd_ts_t maximum,
+                                int maxjitter);
 rd_ts_t rd_kafka_timer_next(rd_kafka_timers_t *rkts,
                             rd_kafka_timer_t *rtmr,
                             int do_lock);
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_topic.c b/lib/librdkafka-2.3.0/src/rdkafka_topic.c
similarity index 90%
rename from lib/librdkafka-2.1.0/src/rdkafka_topic.c
rename to lib/librdkafka-2.3.0/src/rdkafka_topic.c
index 89bfa092dfe..5a161db9ac1 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_topic.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_topic.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012,2013 Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -39,6 +40,7 @@
 #include "rdsysqueue.h"
 #include "rdtime.h"
 #include "rdregex.h"
+#include "rdkafka_fetcher.h"
 
 #if WITH_ZSTD
 #include <zstd.h>
@@ -49,11 +51,11 @@ const char *rd_kafka_topic_state_names[] = {"unknown", "exists", "notexists",
                                             "error"};
 
 
-static int rd_kafka_topic_metadata_update(
-    rd_kafka_topic_t *rkt,
-    const struct rd_kafka_metadata_topic *mdt,
-    const rd_kafka_partition_leader_epoch_t *leader_epochs,
-    rd_ts_t ts_age);
+static int
+rd_kafka_topic_metadata_update(rd_kafka_topic_t *rkt,
+                               const struct rd_kafka_metadata_topic *mdt,
+                               const rd_kafka_metadata_topic_internal_t *mdit,
+                               rd_ts_t ts_age);
 
 
 /**
@@ -478,8 +480,10 @@ rd_kafka_topic_t *rd_kafka_topic_new0(rd_kafka_t *rk,
                 if (existing)
                         *existing = 1;
 
-                rd_kafka_topic_metadata_update(rkt, &rkmce->rkmce_mtopic, NULL,
-                                               rkmce->rkmce_ts_insert);
+                rd_kafka_topic_metadata_update(
+                    rkt, &rkmce->rkmce_mtopic,
+                    &rkmce->rkmce_metadata_internal_topic,
+                    rkmce->rkmce_ts_insert);
         }
 
         if (do_lock)
@@ -673,13 +677,12 @@ static int rd_kafka_toppar_leader_update(rd_kafka_topic_t *rkt,
                              rktp->rktp_leader_epoch);
                 if (rktp->rktp_fetch_state == RD_KAFKA_TOPPAR_FETCH_ACTIVE) {
                         rd_kafka_toppar_unlock(rktp);
+                        rd_kafka_toppar_destroy(rktp); /* from get() */
                         return 0;
                 }
         }
 
-        if (rktp->rktp_fetch_state == RD_KAFKA_TOPPAR_FETCH_VALIDATE_EPOCH_WAIT)
-                need_epoch_validation = rd_true;
-        else if (leader_epoch > rktp->rktp_leader_epoch) {
+        if (leader_epoch > rktp->rktp_leader_epoch) {
                 rd_kafka_dbg(rktp->rktp_rkt->rkt_rk, TOPIC, "BROKER",
                              "%s [%" PRId32 "]: leader %" PRId32
                              " epoch %" PRId32 " -> leader %" PRId32
@@ -689,7 +692,9 @@ static int rd_kafka_toppar_leader_update(rd_kafka_topic_t *rkt,
                              rktp->rktp_leader_epoch, leader_id, leader_epoch);
                 rktp->rktp_leader_epoch = leader_epoch;
                 need_epoch_validation   = rd_true;
-        }
+        } else if (rktp->rktp_fetch_state ==
+                   RD_KAFKA_TOPPAR_FETCH_VALIDATE_EPOCH_WAIT)
+                need_epoch_validation = rd_true;
 
         fetching_from_follower =
             leader != NULL && rktp->rktp_broker != NULL &&
@@ -725,11 +730,16 @@ static int rd_kafka_toppar_leader_update(rd_kafka_topic_t *rkt,
         }
 
         if (need_epoch_validation) {
-                /* Update next fetch position, that could be stale since last
-                 * fetch start. Only if the app pos is real. */
-                if (rktp->rktp_app_pos.offset > 0) {
-                        rd_kafka_toppar_set_next_fetch_position(
-                            rktp, rktp->rktp_app_pos);
+                /* Set offset validation position,
+                 * depending it if should continue with current position or
+                 * with next fetch start position. */
+                if (rd_kafka_toppar_fetch_decide_start_from_next_fetch_start(
+                        rktp)) {
+                        rd_kafka_toppar_set_offset_validation_position(
+                            rktp, rktp->rktp_next_fetch_start);
+                } else {
+                        rd_kafka_toppar_set_offset_validation_position(
+                            rktp, rktp->rktp_offsets.fetch_pos);
                 }
                 rd_kafka_offset_validate(rktp, "epoch updated from metadata");
         }
@@ -1232,9 +1242,7 @@ rd_bool_t rd_kafka_topic_set_error(rd_kafka_topic_t *rkt,
  * @brief Update a topic from metadata.
  *
  * @param mdt Topic metadata.
- * @param leader_epochs Array of per-partition leader epochs, or NULL.
- *                      The array size is identical to the partition count in
- *                      \p mdt.
+ * @param mdit Topic internal metadata.
  * @param ts_age absolute age (timestamp) of metadata.
  * @returns 1 if the number of partitions changed, 0 if not, and -1 if the
  *          topic is unknown.
@@ -1242,17 +1250,19 @@ rd_bool_t rd_kafka_topic_set_error(rd_kafka_topic_t *rkt,
  *
  * @locks_required rd_kafka_*lock() MUST be held.
  */
-static int rd_kafka_topic_metadata_update(
-    rd_kafka_topic_t *rkt,
-    const struct rd_kafka_metadata_topic *mdt,
-    const rd_kafka_partition_leader_epoch_t *leader_epochs,
-    rd_ts_t ts_age) {
+static int
+rd_kafka_topic_metadata_update(rd_kafka_topic_t *rkt,
+                               const struct rd_kafka_metadata_topic *mdt,
+                               const rd_kafka_metadata_topic_internal_t *mdit,
+                               rd_ts_t ts_age) {
         rd_kafka_t *rk = rkt->rkt_rk;
         int upd        = 0;
         int j;
         rd_kafka_broker_t **partbrokers;
         int leader_cnt = 0;
         int old_state;
+        rd_bool_t partition_exists_with_no_leader_epoch      = rd_false;
+        rd_bool_t partition_exists_with_updated_leader_epoch = rd_false;
 
         if (mdt->err != RD_KAFKA_RESP_ERR_NO_ERROR)
                 rd_kafka_dbg(rk, TOPIC | RD_KAFKA_DBG_METADATA, "METADATA",
@@ -1317,8 +1327,9 @@ static int rd_kafka_topic_metadata_update(
         for (j = 0; j < mdt->partition_cnt; j++) {
                 int r;
                 rd_kafka_broker_t *leader;
-                int32_t leader_epoch =
-                    leader_epochs ? leader_epochs[j].leader_epoch : -1;
+                int32_t leader_epoch = mdit->partitions[j].leader_epoch;
+                rd_kafka_toppar_t *rktp =
+                    rd_kafka_toppar_get(rkt, mdt->partitions[j].id, 0);
 
                 rd_kafka_dbg(rk, TOPIC | RD_KAFKA_DBG_METADATA, "METADATA",
                              "  Topic %s partition %i Leader %" PRId32
@@ -1329,6 +1340,14 @@ static int rd_kafka_topic_metadata_update(
                 leader         = partbrokers[j];
                 partbrokers[j] = NULL;
 
+                /* If broker does not support leaderEpoch(KIP 320) then it is
+                 * set to -1, we assume that metadata is not stale. */
+                if (leader_epoch == -1)
+                        partition_exists_with_no_leader_epoch = rd_true;
+                else if (rktp->rktp_leader_epoch < leader_epoch)
+                        partition_exists_with_updated_leader_epoch = rd_true;
+
+
                 /* Update leader for partition */
                 r = rd_kafka_toppar_leader_update(rkt, mdt->partitions[j].id,
                                                   mdt->partitions[j].leader,
@@ -1342,10 +1361,14 @@ static int rd_kafka_topic_metadata_update(
                         /* Drop reference to broker (from find()) */
                         rd_kafka_broker_destroy(leader);
                 }
+                RD_IF_FREE(rktp, rd_kafka_toppar_destroy);
         }
 
-        /* If all partitions have leaders we can turn off fast leader query. */
-        if (mdt->partition_cnt > 0 && leader_cnt == mdt->partition_cnt)
+        /* If all partitions have leaders, and this metadata update was not
+         * stale, we can turn off fast leader query. */
+        if (mdt->partition_cnt > 0 && leader_cnt == mdt->partition_cnt &&
+            (partition_exists_with_no_leader_epoch ||
+             partition_exists_with_updated_leader_epoch))
                 rkt->rkt_flags &= ~RD_KAFKA_TOPIC_F_LEADER_UNAVAIL;
 
         if (mdt->err != RD_KAFKA_RESP_ERR_NO_ERROR && rkt->rkt_partition_cnt) {
@@ -1391,7 +1414,7 @@ static int rd_kafka_topic_metadata_update(
 int rd_kafka_topic_metadata_update2(
     rd_kafka_broker_t *rkb,
     const struct rd_kafka_metadata_topic *mdt,
-    const rd_kafka_partition_leader_epoch_t *leader_epochs) {
+    const rd_kafka_metadata_topic_internal_t *mdit) {
         rd_kafka_topic_t *rkt;
         int r;
 
@@ -1402,7 +1425,7 @@ int rd_kafka_topic_metadata_update2(
                 return -1; /* Ignore topics that we dont have locally. */
         }
 
-        r = rd_kafka_topic_metadata_update(rkt, mdt, leader_epochs, rd_clock());
+        r = rd_kafka_topic_metadata_update(rkt, mdt, mdit, rd_clock());
 
         rd_kafka_wrunlock(rkb->rkb_rk);
 
@@ -1749,12 +1772,36 @@ void *rd_kafka_topic_opaque(const rd_kafka_topic_t *app_rkt) {
 
 int rd_kafka_topic_info_cmp(const void *_a, const void *_b) {
         const rd_kafka_topic_info_t *a = _a, *b = _b;
-        int r;
+        int r, i;
 
         if ((r = strcmp(a->topic, b->topic)))
                 return r;
 
-        return RD_CMP(a->partition_cnt, b->partition_cnt);
+        if ((r = RD_CMP(a->partition_cnt, b->partition_cnt)))
+                return r;
+
+        if (a->partitions_internal == NULL && b->partitions_internal == NULL)
+                return 0;
+
+        if (a->partitions_internal == NULL || b->partitions_internal == NULL)
+                return (a->partitions_internal == NULL) ? 1 : -1;
+
+        /* We're certain partitions_internal exist for a/b and have the same
+         * count. */
+        for (i = 0; i < a->partition_cnt; i++) {
+                size_t k;
+                if ((r = RD_CMP(a->partitions_internal[i].racks_cnt,
+                                b->partitions_internal[i].racks_cnt)))
+                        return r;
+
+                for (k = 0; k < a->partitions_internal[i].racks_cnt; k++) {
+                        if ((r = rd_strcmp(a->partitions_internal[i].racks[k],
+                                           b->partitions_internal[i].racks[k])))
+                                return r;
+                }
+        }
+
+        return 0;
 }
 
 
@@ -1784,7 +1831,83 @@ rd_kafka_topic_info_t *rd_kafka_topic_info_new(const char *topic,
         ti        = rd_malloc(sizeof(*ti) + tlen);
         ti->topic = (char *)(ti + 1);
         memcpy((char *)ti->topic, topic, tlen);
-        ti->partition_cnt = partition_cnt;
+        ti->partition_cnt       = partition_cnt;
+        ti->partitions_internal = NULL;
+
+        return ti;
+}
+
+/**
+ * Allocate new topic_info, including rack information.
+ * \p topic is copied.
+ */
+rd_kafka_topic_info_t *rd_kafka_topic_info_new_with_rack(
+    const char *topic,
+    int partition_cnt,
+    const rd_kafka_metadata_partition_internal_t *mdpi) {
+        rd_kafka_topic_info_t *ti;
+        rd_tmpabuf_t tbuf;
+        int i;
+        rd_bool_t has_racks = rd_false;
+
+        rd_tmpabuf_new(&tbuf, 0, rd_true /* assert on fail */);
+
+        rd_tmpabuf_add_alloc(&tbuf, sizeof(*ti));
+        rd_tmpabuf_add_alloc(&tbuf, strlen(topic) + 1);
+        for (i = 0; i < partition_cnt; i++) {
+                size_t j;
+                if (!mdpi[i].racks)
+                        continue;
+
+                if (unlikely(!has_racks))
+                        has_racks = rd_true;
+
+                for (j = 0; j < mdpi[i].racks_cnt; j++) {
+                        rd_tmpabuf_add_alloc(&tbuf,
+                                             strlen(mdpi[i].racks[j]) + 1);
+                }
+                rd_tmpabuf_add_alloc(&tbuf, sizeof(char *) * mdpi[i].racks_cnt);
+        }
+
+        /* Only bother allocating this if at least one
+         * rack is there. */
+        if (has_racks) {
+                rd_tmpabuf_add_alloc(
+                    &tbuf, sizeof(rd_kafka_metadata_partition_internal_t) *
+                               partition_cnt);
+        }
+
+        rd_tmpabuf_finalize(&tbuf);
+
+        ti                      = rd_tmpabuf_alloc(&tbuf, sizeof(*ti));
+        ti->topic               = rd_tmpabuf_write_str(&tbuf, topic);
+        ti->partition_cnt       = partition_cnt;
+        ti->partitions_internal = NULL;
+
+        if (has_racks) {
+                ti->partitions_internal = rd_tmpabuf_alloc(
+                    &tbuf, sizeof(*ti->partitions_internal) * partition_cnt);
+
+                for (i = 0; i < partition_cnt; i++) {
+                        size_t j;
+                        ti->partitions_internal[i].id    = mdpi[i].id;
+                        ti->partitions_internal[i].racks = NULL;
+
+                        if (!mdpi[i].racks)
+                                continue;
+
+                        ti->partitions_internal[i].racks_cnt =
+                            mdpi[i].racks_cnt;
+                        ti->partitions_internal[i].racks = rd_tmpabuf_alloc(
+                            &tbuf, sizeof(char *) * mdpi[i].racks_cnt);
+
+                        for (j = 0; j < mdpi[i].racks_cnt; j++) {
+                                ti->partitions_internal[i].racks[j] =
+                                    rd_tmpabuf_write_str(&tbuf,
+                                                         mdpi[i].racks[j]);
+                        }
+                }
+        }
 
         return ti;
 }
@@ -1880,9 +2003,12 @@ void rd_kafka_local_topics_to_list(rd_kafka_t *rk,
 void rd_ut_kafka_topic_set_topic_exists(rd_kafka_topic_t *rkt,
                                         int partition_cnt,
                                         int32_t leader_id) {
-        struct rd_kafka_metadata_topic mdt = {.topic =
+        rd_kafka_metadata_partition_internal_t *partitions =
+            rd_calloc(partition_cnt, sizeof(*partitions));
+        struct rd_kafka_metadata_topic mdt      = {.topic =
                                                   (char *)rkt->rkt_topic->str,
                                               .partition_cnt = partition_cnt};
+        rd_kafka_metadata_topic_internal_t mdit = {.partitions = partitions};
         int i;
 
         mdt.partitions = rd_alloca(sizeof(*mdt.partitions) * partition_cnt);
@@ -1894,7 +2020,9 @@ void rd_ut_kafka_topic_set_topic_exists(rd_kafka_topic_t *rkt,
         }
 
         rd_kafka_wrlock(rkt->rkt_rk);
-        rd_kafka_metadata_cache_topic_update(rkt->rkt_rk, &mdt, rd_true);
-        rd_kafka_topic_metadata_update(rkt, &mdt, NULL, rd_clock());
+        rd_kafka_metadata_cache_topic_update(rkt->rkt_rk, &mdt, &mdit, rd_true,
+                                             rd_false, NULL, 0);
+        rd_kafka_topic_metadata_update(rkt, &mdt, &mdit, rd_clock());
         rd_kafka_wrunlock(rkt->rkt_rk);
+        rd_free(partitions);
 }
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_topic.h b/lib/librdkafka-2.3.0/src/rdkafka_topic.h
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdkafka_topic.h
rename to lib/librdkafka-2.3.0/src/rdkafka_topic.h
index cbed9308a7a..b8c0b66c99e 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_topic.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_topic.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012,2013 Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -258,7 +259,7 @@ rd_kafka_topic_get_error(rd_kafka_topic_t *rkt) {
 int rd_kafka_topic_metadata_update2(
     rd_kafka_broker_t *rkb,
     const struct rd_kafka_metadata_topic *mdt,
-    const rd_kafka_partition_leader_epoch_t *leader_epochs);
+    const rd_kafka_metadata_topic_internal_t *mdit);
 
 void rd_kafka_topic_scan_all(rd_kafka_t *rk, rd_ts_t now);
 
@@ -266,12 +267,17 @@ void rd_kafka_topic_scan_all(rd_kafka_t *rk, rd_ts_t now);
 typedef struct rd_kafka_topic_info_s {
         const char *topic; /**< Allocated along with struct */
         int partition_cnt;
+        rd_kafka_metadata_partition_internal_t *partitions_internal;
 } rd_kafka_topic_info_t;
 
 int rd_kafka_topic_info_topic_cmp(const void *_a, const void *_b);
 int rd_kafka_topic_info_cmp(const void *_a, const void *_b);
 rd_kafka_topic_info_t *rd_kafka_topic_info_new(const char *topic,
                                                int partition_cnt);
+rd_kafka_topic_info_t *rd_kafka_topic_info_new_with_rack(
+    const char *topic,
+    int partition_cnt,
+    const rd_kafka_metadata_partition_internal_t *mdpi);
 void rd_kafka_topic_info_destroy(rd_kafka_topic_info_t *ti);
 
 int rd_kafka_topic_match(rd_kafka_t *rk,
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_transport.c b/lib/librdkafka-2.3.0/src/rdkafka_transport.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_transport.c
rename to lib/librdkafka-2.3.0/src/rdkafka_transport.c
index ae5895b29ae..f133d8fdde9 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_transport.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_transport.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2015, Magnus Edenhill
+ * Copyright (c) 2015-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -807,6 +808,7 @@ static void rd_kafka_transport_io_event(rd_kafka_transport_t *rktrans,
         case RD_KAFKA_BROKER_STATE_INIT:
         case RD_KAFKA_BROKER_STATE_DOWN:
         case RD_KAFKA_BROKER_STATE_TRY_CONNECT:
+        case RD_KAFKA_BROKER_STATE_REAUTH:
                 rd_kafka_assert(rkb->rkb_rk, !*"bad state");
         }
 }
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_transport.h b/lib/librdkafka-2.3.0/src/rdkafka_transport.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_transport.h
rename to lib/librdkafka-2.3.0/src/rdkafka_transport.h
index 83af5ae9016..c5f73163f94 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_transport.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_transport.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2015, Magnus Edenhill
+ * Copyright (c) 2015-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_transport_int.h b/lib/librdkafka-2.3.0/src/rdkafka_transport_int.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_transport_int.h
rename to lib/librdkafka-2.3.0/src/rdkafka_transport_int.h
index 4b053b98fa1..9e00f238c30 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_transport_int.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_transport_int.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2015, Magnus Edenhill
+ * Copyright (c) 2015-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_txnmgr.c b/lib/librdkafka-2.3.0/src/rdkafka_txnmgr.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_txnmgr.c
rename to lib/librdkafka-2.3.0/src/rdkafka_txnmgr.c
index afbc28b71cc..cd8a60f30ad 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_txnmgr.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_txnmgr.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019 Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_txnmgr.h b/lib/librdkafka-2.3.0/src/rdkafka_txnmgr.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_txnmgr.h
rename to lib/librdkafka-2.3.0/src/rdkafka_txnmgr.h
index 3c088d09a65..d67b57bce26 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_txnmgr.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_txnmgr.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019 Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_zstd.c b/lib/librdkafka-2.3.0/src/rdkafka_zstd.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdkafka_zstd.c
rename to lib/librdkafka-2.3.0/src/rdkafka_zstd.c
index 68b01a4e1ce..dac2c4dfcc7 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_zstd.c
+++ b/lib/librdkafka-2.3.0/src/rdkafka_zstd.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2018 Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdkafka_zstd.h b/lib/librdkafka-2.3.0/src/rdkafka_zstd.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdkafka_zstd.h
rename to lib/librdkafka-2.3.0/src/rdkafka_zstd.h
index f87c4c6fbc3..7f5a7490413 100644
--- a/lib/librdkafka-2.1.0/src/rdkafka_zstd.h
+++ b/lib/librdkafka-2.3.0/src/rdkafka_zstd.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2018 Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdlist.c b/lib/librdkafka-2.3.0/src/rdlist.c
similarity index 93%
rename from lib/librdkafka-2.1.0/src/rdlist.c
rename to lib/librdkafka-2.3.0/src/rdlist.c
index c71e3004ad7..65e3eb97e04 100644
--- a/lib/librdkafka-2.1.0/src/rdlist.c
+++ b/lib/librdkafka-2.3.0/src/rdlist.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -148,6 +149,7 @@ void *rd_list_add(rd_list_t *rl, void *elem) {
         return rl->rl_elems[rl->rl_cnt++];
 }
 
+
 void rd_list_set(rd_list_t *rl, int idx, void *ptr) {
         if (idx >= rl->rl_size)
                 rd_list_grow(rl, idx + 1);
@@ -376,6 +378,34 @@ void *rd_list_find_duplicate(const rd_list_t *rl,
         return NULL;
 }
 
+void rd_list_deduplicate(rd_list_t **rl,
+                         int (*cmp)(const void *, const void *)) {
+        rd_list_t *deduped = rd_list_new(0, (*rl)->rl_free_cb);
+        void *elem;
+        void *prev_elem = NULL;
+        int i;
+
+        if (!((*rl)->rl_flags & RD_LIST_F_SORTED))
+                rd_list_sort(*rl, cmp);
+
+        RD_LIST_FOREACH(elem, *rl, i) {
+                if (prev_elem && cmp(elem, prev_elem) == 0) {
+                        /* Skip this element, and destroy it */
+                        rd_list_free_cb(*rl, elem);
+                        continue;
+                }
+                rd_list_add(deduped, elem);
+                prev_elem = elem;
+        }
+        /* The elements we want destroyed are already destroyed. */
+        (*rl)->rl_free_cb = NULL;
+        rd_list_destroy(*rl);
+
+        /* The parent list was sorted, we can set this without re-sorting. */
+        deduped->rl_flags |= RD_LIST_F_SORTED;
+        *rl = deduped;
+}
+
 int rd_list_cmp(const rd_list_t *a,
                 const rd_list_t *b,
                 int (*cmp)(const void *, const void *)) {
diff --git a/lib/librdkafka-2.1.0/src/rdlist.h b/lib/librdkafka-2.3.0/src/rdlist.h
similarity index 96%
rename from lib/librdkafka-2.1.0/src/rdlist.h
rename to lib/librdkafka-2.3.0/src/rdlist.h
index db5295f6cfd..3a1316c3899 100644
--- a/lib/librdkafka-2.1.0/src/rdlist.h
+++ b/lib/librdkafka-2.3.0/src/rdlist.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill,
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -301,6 +302,18 @@ void *rd_list_find_duplicate(const rd_list_t *rl,
                              int (*cmp)(const void *, const void *));
 
 
+/**
+ * @brief Deduplicates a list.
+ *
+ * @param rl is a ptrptr since a new list is created and assigned to *rl, for
+ * efficiency.
+ * @returns a deduplicated and sorted version of \p *rl.
+ * @warning the original \p *rl is destroyed.
+ */
+void rd_list_deduplicate(rd_list_t **rl,
+                         int (*cmp)(const void *, const void *));
+
+
 /**
  * @brief Compare list \p a to \p b.
  *
diff --git a/lib/librdkafka-2.1.0/src/rdlog.c b/lib/librdkafka-2.3.0/src/rdlog.c
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdlog.c
rename to lib/librdkafka-2.3.0/src/rdlog.c
index 19fbbb1614b..3ddc82d06ea 100644
--- a/lib/librdkafka-2.1.0/src/rdlog.c
+++ b/lib/librdkafka-2.3.0/src/rdlog.c
@@ -1,7 +1,7 @@
 /*
  * librd - Rapid Development C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdlog.h b/lib/librdkafka-2.3.0/src/rdlog.h
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdlog.h
rename to lib/librdkafka-2.3.0/src/rdlog.h
index f360a0b66e0..a83701f6a3e 100644
--- a/lib/librdkafka-2.1.0/src/rdlog.h
+++ b/lib/librdkafka-2.3.0/src/rdlog.h
@@ -1,7 +1,7 @@
 /*
  * librd - Rapid Development C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdmap.c b/lib/librdkafka-2.3.0/src/rdmap.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdmap.c
rename to lib/librdkafka-2.3.0/src/rdmap.c
index 4b85470336b..8e1a0546cc5 100644
--- a/lib/librdkafka-2.1.0/src/rdmap.c
+++ b/lib/librdkafka-2.3.0/src/rdmap.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2020 Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdmap.h b/lib/librdkafka-2.3.0/src/rdmap.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdmap.h
rename to lib/librdkafka-2.3.0/src/rdmap.h
index a79dcda06a8..bea8a1aca6d 100644
--- a/lib/librdkafka-2.1.0/src/rdmap.h
+++ b/lib/librdkafka-2.3.0/src/rdmap.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2020 Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdmurmur2.c b/lib/librdkafka-2.3.0/src/rdmurmur2.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdmurmur2.c
rename to lib/librdkafka-2.3.0/src/rdmurmur2.c
index c3e4095d4c0..c54fa2f51c3 100644
--- a/lib/librdkafka-2.1.0/src/rdmurmur2.c
+++ b/lib/librdkafka-2.3.0/src/rdmurmur2.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdmurmur2.h b/lib/librdkafka-2.3.0/src/rdmurmur2.h
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdmurmur2.h
rename to lib/librdkafka-2.3.0/src/rdmurmur2.h
index 5991caa50ca..fc23dfec947 100644
--- a/lib/librdkafka-2.1.0/src/rdmurmur2.h
+++ b/lib/librdkafka-2.3.0/src/rdmurmur2.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018 Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdports.c b/lib/librdkafka-2.3.0/src/rdports.c
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdports.c
rename to lib/librdkafka-2.3.0/src/rdports.c
index 15c57e9289a..9af8ede531f 100644
--- a/lib/librdkafka-2.1.0/src/rdports.c
+++ b/lib/librdkafka-2.3.0/src/rdports.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016 Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdports.h b/lib/librdkafka-2.3.0/src/rdports.h
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdports.h
rename to lib/librdkafka-2.3.0/src/rdports.h
index 0cdbcd85fc9..41314ebfbe3 100644
--- a/lib/librdkafka-2.1.0/src/rdports.h
+++ b/lib/librdkafka-2.3.0/src/rdports.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016 Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdposix.h b/lib/librdkafka-2.3.0/src/rdposix.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdposix.h
rename to lib/librdkafka-2.3.0/src/rdposix.h
index 7b2376823f6..0af5948168e 100644
--- a/lib/librdkafka-2.1.0/src/rdposix.h
+++ b/lib/librdkafka-2.3.0/src/rdposix.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015 Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdrand.c b/lib/librdkafka-2.3.0/src/rdrand.c
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdrand.c
rename to lib/librdkafka-2.3.0/src/rdrand.c
index e36d79380bc..bdab002968e 100644
--- a/lib/librdkafka-2.1.0/src/rdrand.c
+++ b/lib/librdkafka-2.3.0/src/rdrand.c
@@ -1,7 +1,7 @@
 /*
  * librd - Rapid Development C library
  *
- * Copyright (c) 2012, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdrand.h b/lib/librdkafka-2.3.0/src/rdrand.h
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdrand.h
rename to lib/librdkafka-2.3.0/src/rdrand.h
index 0e3a927c2cb..f86fb83e791 100644
--- a/lib/librdkafka-2.1.0/src/rdrand.h
+++ b/lib/librdkafka-2.3.0/src/rdrand.h
@@ -1,7 +1,7 @@
 /*
  * librd - Rapid Development C library
  *
- * Copyright (c) 2012, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdregex.c b/lib/librdkafka-2.3.0/src/rdregex.c
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdregex.c
rename to lib/librdkafka-2.3.0/src/rdregex.c
index 0c70cb334be..4a09286b81d 100644
--- a/lib/librdkafka-2.1.0/src/rdregex.c
+++ b/lib/librdkafka-2.3.0/src/rdregex.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2016 Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdregex.h b/lib/librdkafka-2.3.0/src/rdregex.h
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdregex.h
rename to lib/librdkafka-2.3.0/src/rdregex.h
index 135229d6268..94edcf661ca 100644
--- a/lib/librdkafka-2.1.0/src/rdregex.h
+++ b/lib/librdkafka-2.3.0/src/rdregex.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2016 Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdsignal.h b/lib/librdkafka-2.3.0/src/rdsignal.h
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdsignal.h
rename to lib/librdkafka-2.3.0/src/rdsignal.h
index a2c0de1b0cd..6f3462130ab 100644
--- a/lib/librdkafka-2.1.0/src/rdsignal.h
+++ b/lib/librdkafka-2.3.0/src/rdsignal.h
@@ -1,7 +1,7 @@
 /*
  * librd - Rapid Development C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdstring.c b/lib/librdkafka-2.3.0/src/rdstring.c
similarity index 97%
rename from lib/librdkafka-2.1.0/src/rdstring.c
rename to lib/librdkafka-2.3.0/src/rdstring.c
index 6a18210c937..c981f7705a3 100644
--- a/lib/librdkafka-2.1.0/src/rdstring.c
+++ b/lib/librdkafka-2.3.0/src/rdstring.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2016 Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -289,6 +290,21 @@ int rd_strcmp(const char *a, const char *b) {
 }
 
 
+/**
+ * @brief Same as rd_strcmp() but works with rd_list comparator.
+ */
+int rd_strcmp2(const void *a, const void *b) {
+        return rd_strcmp((const char *)a, (const char *)b);
+}
+
+/**
+ * @brief Same as rd_strcmp() but works with bsearch, which requires one more
+ * indirection.
+ */
+int rd_strcmp3(const void *a, const void *b) {
+        return rd_strcmp(*((const char **)a), *((const char **)b));
+}
+
 
 /**
  * @brief Case-insensitive strstr() for platforms where strcasestr()
diff --git a/lib/librdkafka-2.1.0/src/rdstring.h b/lib/librdkafka-2.3.0/src/rdstring.h
similarity index 94%
rename from lib/librdkafka-2.1.0/src/rdstring.h
rename to lib/librdkafka-2.3.0/src/rdstring.h
index 67ea19401bd..dc0627a138a 100644
--- a/lib/librdkafka-2.1.0/src/rdstring.h
+++ b/lib/librdkafka-2.3.0/src/rdstring.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -80,6 +81,10 @@ unsigned int rd_string_hash(const char *str, ssize_t len);
 
 int rd_strcmp(const char *a, const char *b);
 
+int rd_strcmp2(const void *a, const void *b);
+
+int rd_strcmp3(const void *a, const void *b);
+
 char *_rd_strcasestr(const char *haystack, const char *needle);
 
 char **rd_string_split(const char *input,
diff --git a/lib/librdkafka-2.1.0/src/rdsysqueue.h b/lib/librdkafka-2.3.0/src/rdsysqueue.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdsysqueue.h
rename to lib/librdkafka-2.3.0/src/rdsysqueue.h
index ecba4154eb5..738cdad792f 100644
--- a/lib/librdkafka-2.1.0/src/rdsysqueue.h
+++ b/lib/librdkafka-2.3.0/src/rdsysqueue.h
@@ -1,8 +1,8 @@
 /*
  * librd - Rapid Development C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
- * Copyright (c) 2012-2013, Andreas Öman
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ * Copyright (c) 2012-2022, Andreas Öman
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdtime.h b/lib/librdkafka-2.3.0/src/rdtime.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdtime.h
rename to lib/librdkafka-2.3.0/src/rdtime.h
index 4a3e5d85591..4a7e76d752a 100644
--- a/lib/librdkafka-2.1.0/src/rdtime.h
+++ b/lib/librdkafka-2.3.0/src/rdtime.h
@@ -1,7 +1,7 @@
 /*
  * librd - Rapid Development C library
  *
- * Copyright (c) 2012, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdtypes.h b/lib/librdkafka-2.3.0/src/rdtypes.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/rdtypes.h
rename to lib/librdkafka-2.3.0/src/rdtypes.h
index 8f3625512dc..a22bb906496 100644
--- a/lib/librdkafka-2.1.0/src/rdtypes.h
+++ b/lib/librdkafka-2.3.0/src/rdtypes.h
@@ -1,7 +1,7 @@
 /*
  * librd - Rapid Development C library
  *
- * Copyright (c) 2012, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdunittest.c b/lib/librdkafka-2.3.0/src/rdunittest.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdunittest.c
rename to lib/librdkafka-2.3.0/src/rdunittest.c
index aa14b6aa841..18236ca9ec6 100644
--- a/lib/librdkafka-2.1.0/src/rdunittest.c
+++ b/lib/librdkafka-2.3.0/src/rdunittest.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdunittest.h b/lib/librdkafka-2.3.0/src/rdunittest.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdunittest.h
rename to lib/librdkafka-2.3.0/src/rdunittest.h
index a1548856806..a9e709fa73a 100644
--- a/lib/librdkafka-2.1.0/src/rdunittest.h
+++ b/lib/librdkafka-2.3.0/src/rdunittest.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdvarint.c b/lib/librdkafka-2.3.0/src/rdvarint.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdvarint.c
rename to lib/librdkafka-2.3.0/src/rdvarint.c
index fb0cbd04660..cb8b8a9837c 100644
--- a/lib/librdkafka-2.1.0/src/rdvarint.c
+++ b/lib/librdkafka-2.3.0/src/rdvarint.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2016 Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdvarint.h b/lib/librdkafka-2.3.0/src/rdvarint.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdvarint.h
rename to lib/librdkafka-2.3.0/src/rdvarint.h
index 6fe112ba95d..c628822fc82 100644
--- a/lib/librdkafka-2.1.0/src/rdvarint.h
+++ b/lib/librdkafka-2.3.0/src/rdvarint.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2016 Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdwin32.h b/lib/librdkafka-2.3.0/src/rdwin32.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/rdwin32.h
rename to lib/librdkafka-2.3.0/src/rdwin32.h
index 73edd41d6a7..37c25843acb 100644
--- a/lib/librdkafka-2.1.0/src/rdwin32.h
+++ b/lib/librdkafka-2.3.0/src/rdwin32.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015 Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/rdxxhash.c b/lib/librdkafka-2.3.0/src/rdxxhash.c
similarity index 100%
rename from lib/librdkafka-2.1.0/src/rdxxhash.c
rename to lib/librdkafka-2.3.0/src/rdxxhash.c
diff --git a/lib/librdkafka-2.1.0/src/rdxxhash.h b/lib/librdkafka-2.3.0/src/rdxxhash.h
similarity index 100%
rename from lib/librdkafka-2.1.0/src/rdxxhash.h
rename to lib/librdkafka-2.3.0/src/rdxxhash.h
diff --git a/lib/librdkafka-2.1.0/src/regexp.c b/lib/librdkafka-2.3.0/src/regexp.c
similarity index 100%
rename from lib/librdkafka-2.1.0/src/regexp.c
rename to lib/librdkafka-2.3.0/src/regexp.c
diff --git a/lib/librdkafka-2.1.0/src/regexp.h b/lib/librdkafka-2.3.0/src/regexp.h
similarity index 100%
rename from lib/librdkafka-2.1.0/src/regexp.h
rename to lib/librdkafka-2.3.0/src/regexp.h
diff --git a/lib/librdkafka-2.1.0/src/snappy.c b/lib/librdkafka-2.3.0/src/snappy.c
similarity index 100%
rename from lib/librdkafka-2.1.0/src/snappy.c
rename to lib/librdkafka-2.3.0/src/snappy.c
diff --git a/lib/librdkafka-2.1.0/src/snappy.h b/lib/librdkafka-2.3.0/src/snappy.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/snappy.h
rename to lib/librdkafka-2.3.0/src/snappy.h
index b3742f1ac5c..c366fb5aa6f 100644
--- a/lib/librdkafka-2.1.0/src/snappy.h
+++ b/lib/librdkafka-2.3.0/src/snappy.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018 Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/snappy_compat.h b/lib/librdkafka-2.3.0/src/snappy_compat.h
similarity index 100%
rename from lib/librdkafka-2.1.0/src/snappy_compat.h
rename to lib/librdkafka-2.3.0/src/snappy_compat.h
diff --git a/lib/librdkafka-2.1.0/src/statistics_schema.json b/lib/librdkafka-2.3.0/src/statistics_schema.json
similarity index 99%
rename from lib/librdkafka-2.1.0/src/statistics_schema.json
rename to lib/librdkafka-2.3.0/src/statistics_schema.json
index d0dbedda7df..185bc2637e4 100644
--- a/lib/librdkafka-2.1.0/src/statistics_schema.json
+++ b/lib/librdkafka-2.3.0/src/statistics_schema.json
@@ -1,5 +1,5 @@
 { "$schema": "http://json-schema.org/schema#",
-  "id": "https://github.com/edenhill/librdkafka/src/statistics_schema.json",
+  "id": "https://github.com/confluentinc/librdkafka/src/statistics_schema.json",
   "title": "librdkafka statistics schema - INCOMPLETE - WORK IN PROGRESS",
   "definitions": {
       "window": {
diff --git a/lib/librdkafka-2.1.0/src/tinycthread.c b/lib/librdkafka-2.3.0/src/tinycthread.c
similarity index 100%
rename from lib/librdkafka-2.1.0/src/tinycthread.c
rename to lib/librdkafka-2.3.0/src/tinycthread.c
diff --git a/lib/librdkafka-2.1.0/src/tinycthread.h b/lib/librdkafka-2.3.0/src/tinycthread.h
similarity index 100%
rename from lib/librdkafka-2.1.0/src/tinycthread.h
rename to lib/librdkafka-2.3.0/src/tinycthread.h
diff --git a/lib/librdkafka-2.1.0/src/tinycthread_extra.c b/lib/librdkafka-2.3.0/src/tinycthread_extra.c
similarity index 99%
rename from lib/librdkafka-2.1.0/src/tinycthread_extra.c
rename to lib/librdkafka-2.3.0/src/tinycthread_extra.c
index 58049448cef..11dc0f212fa 100644
--- a/lib/librdkafka-2.1.0/src/tinycthread_extra.c
+++ b/lib/librdkafka-2.3.0/src/tinycthread_extra.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018 Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/tinycthread_extra.h b/lib/librdkafka-2.3.0/src/tinycthread_extra.h
similarity index 99%
rename from lib/librdkafka-2.1.0/src/tinycthread_extra.h
rename to lib/librdkafka-2.3.0/src/tinycthread_extra.h
index e5f6731739d..22070225920 100644
--- a/lib/librdkafka-2.1.0/src/tinycthread_extra.h
+++ b/lib/librdkafka-2.3.0/src/tinycthread_extra.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018 Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/src/win32_config.h b/lib/librdkafka-2.3.0/src/win32_config.h
similarity index 98%
rename from lib/librdkafka-2.1.0/src/win32_config.h
rename to lib/librdkafka-2.3.0/src/win32_config.h
index dd61b2c92f1..e1b416ba3cf 100644
--- a/lib/librdkafka-2.1.0/src/win32_config.h
+++ b/lib/librdkafka-2.3.0/src/win32_config.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015 Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/.gitignore b/lib/librdkafka-2.3.0/tests/.gitignore
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/.gitignore
rename to lib/librdkafka-2.3.0/tests/.gitignore
diff --git a/lib/librdkafka-2.1.0/tests/0000-unittests.c b/lib/librdkafka-2.3.0/tests/0000-unittests.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0000-unittests.c
rename to lib/librdkafka-2.3.0/tests/0000-unittests.c
index e0a02fb625b..dd3655e655f 100644
--- a/lib/librdkafka-2.1.0/tests/0000-unittests.c
+++ b/lib/librdkafka-2.3.0/tests/0000-unittests.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2017, Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0001-multiobj.c b/lib/librdkafka-2.3.0/tests/0001-multiobj.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0001-multiobj.c
rename to lib/librdkafka-2.3.0/tests/0001-multiobj.c
index c2a4eb57af6..423bd15ae39 100644
--- a/lib/librdkafka-2.1.0/tests/0001-multiobj.c
+++ b/lib/librdkafka-2.3.0/tests/0001-multiobj.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0002-unkpart.c b/lib/librdkafka-2.3.0/tests/0002-unkpart.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0002-unkpart.c
rename to lib/librdkafka-2.3.0/tests/0002-unkpart.c
index 087e37ae628..f70250e6ea4 100644
--- a/lib/librdkafka-2.1.0/tests/0002-unkpart.c
+++ b/lib/librdkafka-2.3.0/tests/0002-unkpart.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0003-msgmaxsize.c b/lib/librdkafka-2.3.0/tests/0003-msgmaxsize.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0003-msgmaxsize.c
rename to lib/librdkafka-2.3.0/tests/0003-msgmaxsize.c
index 97b5111258c..64d105df0a9 100644
--- a/lib/librdkafka-2.1.0/tests/0003-msgmaxsize.c
+++ b/lib/librdkafka-2.3.0/tests/0003-msgmaxsize.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0004-conf.c b/lib/librdkafka-2.3.0/tests/0004-conf.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0004-conf.c
rename to lib/librdkafka-2.3.0/tests/0004-conf.c
index 51401e17d3f..5dbd9f0b1d8 100644
--- a/lib/librdkafka-2.1.0/tests/0004-conf.c
+++ b/lib/librdkafka-2.3.0/tests/0004-conf.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -529,6 +529,8 @@ int main_0004_conf(int argc, char **argv) {
                 "ssl.ca.certificate.stores",
                 "Intermediate ,, Root ,",
 #endif
+                "client.dns.lookup",
+                "resolve_canonical_bootstrap_servers_only",
                 NULL
         };
         static const char *tconfs[] = {"request.required.acks",
diff --git a/lib/librdkafka-2.1.0/tests/0005-order.c b/lib/librdkafka-2.3.0/tests/0005-order.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0005-order.c
rename to lib/librdkafka-2.3.0/tests/0005-order.c
index 722cef3b069..f4e2f75ccf9 100644
--- a/lib/librdkafka-2.1.0/tests/0005-order.c
+++ b/lib/librdkafka-2.3.0/tests/0005-order.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0006-symbols.c b/lib/librdkafka-2.3.0/tests/0006-symbols.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0006-symbols.c
rename to lib/librdkafka-2.3.0/tests/0006-symbols.c
index 8a25f6a1d20..1e5378c39e5 100644
--- a/lib/librdkafka-2.1.0/tests/0006-symbols.c
+++ b/lib/librdkafka-2.3.0/tests/0006-symbols.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0007-autotopic.c b/lib/librdkafka-2.3.0/tests/0007-autotopic.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0007-autotopic.c
rename to lib/librdkafka-2.3.0/tests/0007-autotopic.c
index cf196d60c20..afcb8dd0dfb 100644
--- a/lib/librdkafka-2.1.0/tests/0007-autotopic.c
+++ b/lib/librdkafka-2.3.0/tests/0007-autotopic.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0008-reqacks.c b/lib/librdkafka-2.3.0/tests/0008-reqacks.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0008-reqacks.c
rename to lib/librdkafka-2.3.0/tests/0008-reqacks.c
index d52081b7583..b03878b9cbd 100644
--- a/lib/librdkafka-2.1.0/tests/0008-reqacks.c
+++ b/lib/librdkafka-2.3.0/tests/0008-reqacks.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0009-mock_cluster.c b/lib/librdkafka-2.3.0/tests/0009-mock_cluster.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0009-mock_cluster.c
rename to lib/librdkafka-2.3.0/tests/0009-mock_cluster.c
index 32590820e74..23a953fbe3a 100644
--- a/lib/librdkafka-2.1.0/tests/0009-mock_cluster.c
+++ b/lib/librdkafka-2.3.0/tests/0009-mock_cluster.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019, Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0011-produce_batch.c b/lib/librdkafka-2.3.0/tests/0011-produce_batch.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0011-produce_batch.c
rename to lib/librdkafka-2.3.0/tests/0011-produce_batch.c
index 584d37bc63f..fd8d2e2d474 100644
--- a/lib/librdkafka-2.1.0/tests/0011-produce_batch.c
+++ b/lib/librdkafka-2.3.0/tests/0011-produce_batch.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0012-produce_consume.c b/lib/librdkafka-2.3.0/tests/0012-produce_consume.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0012-produce_consume.c
rename to lib/librdkafka-2.3.0/tests/0012-produce_consume.c
index 30ff392c423..97f592b3c3f 100644
--- a/lib/librdkafka-2.1.0/tests/0012-produce_consume.c
+++ b/lib/librdkafka-2.3.0/tests/0012-produce_consume.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0013-null-msgs.c b/lib/librdkafka-2.3.0/tests/0013-null-msgs.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0013-null-msgs.c
rename to lib/librdkafka-2.3.0/tests/0013-null-msgs.c
index 26a7ac070d3..8cb2af255f3 100644
--- a/lib/librdkafka-2.1.0/tests/0013-null-msgs.c
+++ b/lib/librdkafka-2.3.0/tests/0013-null-msgs.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0014-reconsume-191.c b/lib/librdkafka-2.3.0/tests/0014-reconsume-191.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0014-reconsume-191.c
rename to lib/librdkafka-2.3.0/tests/0014-reconsume-191.c
index edae85f5cd3..2965b8d6c10 100644
--- a/lib/librdkafka-2.1.0/tests/0014-reconsume-191.c
+++ b/lib/librdkafka-2.3.0/tests/0014-reconsume-191.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0015-offset_seeks.c b/lib/librdkafka-2.3.0/tests/0015-offset_seeks.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0015-offset_seeks.c
rename to lib/librdkafka-2.3.0/tests/0015-offset_seeks.c
index a551a0b531f..1bbd9be1326 100644
--- a/lib/librdkafka-2.1.0/tests/0015-offset_seeks.c
+++ b/lib/librdkafka-2.3.0/tests/0015-offset_seeks.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0016-client_swname.c b/lib/librdkafka-2.3.0/tests/0016-client_swname.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0016-client_swname.c
rename to lib/librdkafka-2.3.0/tests/0016-client_swname.c
index 2d0605b8874..f8b2cf60741 100644
--- a/lib/librdkafka-2.1.0/tests/0016-client_swname.c
+++ b/lib/librdkafka-2.3.0/tests/0016-client_swname.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0017-compression.c b/lib/librdkafka-2.3.0/tests/0017-compression.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0017-compression.c
rename to lib/librdkafka-2.3.0/tests/0017-compression.c
index f28f63f244f..d13bb1bf6c8 100644
--- a/lib/librdkafka-2.1.0/tests/0017-compression.c
+++ b/lib/librdkafka-2.3.0/tests/0017-compression.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0018-cgrp_term.c b/lib/librdkafka-2.3.0/tests/0018-cgrp_term.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0018-cgrp_term.c
rename to lib/librdkafka-2.3.0/tests/0018-cgrp_term.c
index 6b22339d7d8..99a98df4fd5 100644
--- a/lib/librdkafka-2.1.0/tests/0018-cgrp_term.c
+++ b/lib/librdkafka-2.3.0/tests/0018-cgrp_term.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0019-list_groups.c b/lib/librdkafka-2.3.0/tests/0019-list_groups.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0019-list_groups.c
rename to lib/librdkafka-2.3.0/tests/0019-list_groups.c
index 02729c33964..3337e347073 100644
--- a/lib/librdkafka-2.1.0/tests/0019-list_groups.c
+++ b/lib/librdkafka-2.3.0/tests/0019-list_groups.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0020-destroy_hang.c b/lib/librdkafka-2.3.0/tests/0020-destroy_hang.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0020-destroy_hang.c
rename to lib/librdkafka-2.3.0/tests/0020-destroy_hang.c
index a8a6552fa80..ca2a2362be2 100644
--- a/lib/librdkafka-2.1.0/tests/0020-destroy_hang.c
+++ b/lib/librdkafka-2.3.0/tests/0020-destroy_hang.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0021-rkt_destroy.c b/lib/librdkafka-2.3.0/tests/0021-rkt_destroy.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0021-rkt_destroy.c
rename to lib/librdkafka-2.3.0/tests/0021-rkt_destroy.c
index 76b4dd16b34..f1517b8476e 100644
--- a/lib/librdkafka-2.1.0/tests/0021-rkt_destroy.c
+++ b/lib/librdkafka-2.3.0/tests/0021-rkt_destroy.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0022-consume_batch.c b/lib/librdkafka-2.3.0/tests/0022-consume_batch.c
similarity index 76%
rename from lib/librdkafka-2.1.0/tests/0022-consume_batch.c
rename to lib/librdkafka-2.3.0/tests/0022-consume_batch.c
index 64e826d0350..5deccc378fd 100644
--- a/lib/librdkafka-2.1.0/tests/0022-consume_batch.c
+++ b/lib/librdkafka-2.3.0/tests/0022-consume_batch.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -196,8 +196,70 @@ static void do_test_consume_batch_oauthbearer_cb(void) {
 #endif
 
 
+/**
+ * @brief Subscribe to a non-existent topic with rd_kafka_consume_batch_queue.
+ *        Verify that a rkmessage with error code ERR_UNKNOWN_TOPIC_OR_PART
+ *        is received.
+ */
+static void do_test_consume_batch_non_existent_topic(void) {
+
+        char *topic = "non-existent";
+        rd_kafka_t *rk;
+        rd_kafka_topic_partition_list_t *rktpars;
+        rd_kafka_queue_t *rkq;
+        rd_kafka_message_t *rkms[1];
+        rd_kafka_conf_t *conf;
+        ssize_t consumed = 0;
+
+        SUB_TEST_QUICK();
+
+        test_conf_init(&conf, NULL, 30);
+        test_conf_set(conf, "allow.auto.create.topics", "false");
+        test_conf_set(conf, "group.id", "test1");
+
+        /* Create simple consumer */
+        rk = test_create_consumer(NULL, NULL, conf, NULL);
+
+        /* Subscribe to the input topic */
+        rktpars = rd_kafka_topic_partition_list_new(1);
+        rd_kafka_topic_partition_list_add(rktpars, topic,
+                                          /* The partition is ignored in
+                                           * rd_kafka_subscribe() */
+                                          RD_KAFKA_PARTITION_UA);
+
+        rd_kafka_subscribe(rk, rktpars);
+        rd_kafka_topic_partition_list_destroy(rktpars);
+
+        /* Create generic consume queue */
+        rkq = rd_kafka_queue_get_consumer(rk);
+
+        TEST_SAY("Consuming from non-existent topic\n");
+        while ((consumed = rd_kafka_consume_batch_queue(rkq, 1000, rkms, 1)) !=
+               1) {
+                TEST_SAY("Consuming from non-existent topic\n");
+        }
+
+        TEST_ASSERT(rkms[0]->err == RD_KAFKA_RESP_ERR_UNKNOWN_TOPIC_OR_PART,
+                    "Expected ERR_UNKNOWN_TOPIC_OR_PART, not %s: %s",
+                    rd_kafka_err2str(rkms[0]->err),
+                    rd_kafka_message_errstr(rkms[0]));
+        TEST_SAY("Received ERR_UNKNOWN_TOPIC_OR_PART\n");
+
+        TEST_SAY("Stopping consumer\n");
+
+        rd_kafka_message_destroy(rkms[0]);
+
+        rd_kafka_queue_destroy(rkq);
+
+        rd_kafka_destroy(rk);
+
+        SUB_TEST_PASS();
+}
+
+
 int main_0022_consume_batch(int argc, char **argv) {
         do_test_consume_batch();
+        do_test_consume_batch_non_existent_topic();
         return 0;
 }
 
diff --git a/lib/librdkafka-2.1.0/tests/0025-timers.c b/lib/librdkafka-2.3.0/tests/0025-timers.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0025-timers.c
rename to lib/librdkafka-2.3.0/tests/0025-timers.c
index 318fc0a1b4d..79d765160a7 100644
--- a/lib/librdkafka-2.1.0/tests/0025-timers.c
+++ b/lib/librdkafka-2.3.0/tests/0025-timers.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0026-consume_pause.c b/lib/librdkafka-2.3.0/tests/0026-consume_pause.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0026-consume_pause.c
rename to lib/librdkafka-2.3.0/tests/0026-consume_pause.c
index c8adc3885cc..dfac4572d99 100644
--- a/lib/librdkafka-2.1.0/tests/0026-consume_pause.c
+++ b/lib/librdkafka-2.3.0/tests/0026-consume_pause.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0028-long_topicnames.c b/lib/librdkafka-2.3.0/tests/0028-long_topicnames.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0028-long_topicnames.c
rename to lib/librdkafka-2.3.0/tests/0028-long_topicnames.c
index 999d8f135f7..a20f4308b5d 100644
--- a/lib/librdkafka-2.1.0/tests/0028-long_topicnames.c
+++ b/lib/librdkafka-2.3.0/tests/0028-long_topicnames.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0029-assign_offset.c b/lib/librdkafka-2.3.0/tests/0029-assign_offset.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0029-assign_offset.c
rename to lib/librdkafka-2.3.0/tests/0029-assign_offset.c
index 5b3595baf01..29ec6d9ea8c 100644
--- a/lib/librdkafka-2.1.0/tests/0029-assign_offset.c
+++ b/lib/librdkafka-2.3.0/tests/0029-assign_offset.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0030-offset_commit.c b/lib/librdkafka-2.3.0/tests/0030-offset_commit.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0030-offset_commit.c
rename to lib/librdkafka-2.3.0/tests/0030-offset_commit.c
index 9b05cb420bd..e53b0aefe4b 100644
--- a/lib/librdkafka-2.1.0/tests/0030-offset_commit.c
+++ b/lib/librdkafka-2.3.0/tests/0030-offset_commit.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0031-get_offsets.c b/lib/librdkafka-2.3.0/tests/0031-get_offsets.c
similarity index 50%
rename from lib/librdkafka-2.1.0/tests/0031-get_offsets.c
rename to lib/librdkafka-2.3.0/tests/0031-get_offsets.c
index 327be43df49..573e36b10fe 100644
--- a/lib/librdkafka-2.1.0/tests/0031-get_offsets.c
+++ b/lib/librdkafka-2.3.0/tests/0031-get_offsets.c
@@ -2,7 +2,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -32,13 +33,126 @@
 /* Typical include path would be <librdkafka/rdkafka.h>, but this program
  * is built from within the librdkafka source tree and thus differs. */
 #include "rdkafka.h" /* for Kafka driver */
+#include "../src/rdkafka_proto.h"
 
 
 /**
- * Verify that rd_kafka_(query|get)_watermark_offsets() works.
+ * @brief Verify that rd_kafka_query_watermark_offsets times out in case we're
+ * unable to fetch offsets within the timeout (Issue #2588).
+ */
+void test_query_watermark_offsets_timeout(void) {
+        int64_t qry_low, qry_high;
+        rd_kafka_resp_err_t err;
+        const char *topic = test_mk_topic_name(__FUNCTION__, 1);
+        rd_kafka_mock_cluster_t *mcluster;
+        rd_kafka_t *rk;
+        rd_kafka_conf_t *conf;
+        const char *bootstraps;
+        const int timeout_ms = 1000;
+
+        if (test_needs_auth()) {
+                TEST_SKIP("Mock cluster does not support SSL/SASL\n");
+                return;
+        }
+
+        SUB_TEST_QUICK();
+
+        mcluster = test_mock_cluster_new(1, &bootstraps);
+        rd_kafka_mock_topic_create(mcluster, topic, 1, 1);
+        rd_kafka_mock_broker_push_request_error_rtts(
+            mcluster, 1, RD_KAFKAP_ListOffsets, 1, RD_KAFKA_RESP_ERR_NO_ERROR,
+            (int)(timeout_ms * 1.2));
+
+        test_conf_init(&conf, NULL, 30);
+        test_conf_set(conf, "bootstrap.servers", bootstraps);
+        rk = test_create_handle(RD_KAFKA_PRODUCER, conf);
+
+
+        err = rd_kafka_query_watermark_offsets(rk, topic, 0, &qry_low,
+                                               &qry_high, timeout_ms);
+
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR__TIMED_OUT,
+                    "Querying watermark offsets should fail with %s when RTT > "
+                    "timeout, instead got %s",
+                    rd_kafka_err2name(RD_KAFKA_RESP_ERR__TIMED_OUT),
+                    rd_kafka_err2name(err));
+
+        rd_kafka_destroy(rk);
+        test_mock_cluster_destroy(mcluster);
+
+        SUB_TEST_PASS();
+}
+
+/**
+ * @brief Query watermark offsets should be able to query the correct
+ *        leader immediately after a leader change.
  */
+void test_query_watermark_offsets_leader_change(void) {
+        int64_t qry_low, qry_high;
+        rd_kafka_resp_err_t err;
+        const char *topic = test_mk_topic_name(__FUNCTION__, 1);
+        rd_kafka_mock_cluster_t *mcluster;
+        rd_kafka_t *rk;
+        rd_kafka_conf_t *conf;
+        const char *bootstraps;
+        const int timeout_ms = 1000;
+
+        if (test_needs_auth()) {
+                TEST_SKIP("Mock cluster does not support SSL/SASL\n");
+                return;
+        }
 
+        SUB_TEST_QUICK();
 
+        mcluster = test_mock_cluster_new(2, &bootstraps);
+        rd_kafka_mock_topic_create(mcluster, topic, 1, 2);
+
+        /* Leader is broker 1 */
+        rd_kafka_mock_partition_set_leader(mcluster, topic, 0, 1);
+
+        test_conf_init(&conf, NULL, 30);
+        test_conf_set(conf, "bootstrap.servers", bootstraps);
+        rk = test_create_handle(RD_KAFKA_PRODUCER, conf);
+
+        err = rd_kafka_query_watermark_offsets(rk, topic, 0, &qry_low,
+                                               &qry_high, timeout_ms);
+
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR_NO_ERROR,
+                    "Querying watermark offsets succeed on the first broker"
+                    "and cache the leader, got %s",
+                    rd_kafka_err2name(err));
+
+        /* Leader is broker 2 */
+        rd_kafka_mock_partition_set_leader(mcluster, topic, 0, 2);
+
+        /* First call returns NOT_LEADER_FOR_PARTITION, second one should go to
+         * the second broker and return NO_ERROR instead of
+         * NOT_LEADER_FOR_PARTITION. */
+        err = rd_kafka_query_watermark_offsets(rk, topic, 0, &qry_low,
+                                               &qry_high, timeout_ms);
+
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR_NOT_LEADER_FOR_PARTITION,
+                    "Querying watermark offsets should fail with "
+                    "NOT_LEADER_FOR_PARTITION, got %s",
+                    rd_kafka_err2name(err));
+
+        err = rd_kafka_query_watermark_offsets(rk, topic, 0, &qry_low,
+                                               &qry_high, timeout_ms);
+
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR_NO_ERROR,
+                    "Querying watermark offsets should succeed by "
+                    "querying the second broker, got %s",
+                    rd_kafka_err2name(err));
+
+        rd_kafka_destroy(rk);
+        test_mock_cluster_destroy(mcluster);
+
+        SUB_TEST_PASS();
+}
+
+/**
+ * Verify that rd_kafka_(query|get)_watermark_offsets() works.
+ */
 int main_0031_get_offsets(int argc, char **argv) {
         const char *topic = test_mk_topic_name(__FUNCTION__, 1);
         const int msgcnt  = test_quick ? 10 : 100;
@@ -114,6 +228,14 @@ int main_0031_get_offsets(int argc, char **argv) {
 
         rd_kafka_topic_destroy(rkt);
         rd_kafka_destroy(rk);
+        return 0;
+}
+
+int main_0031_get_offsets_mock(int argc, char **argv) {
+
+        test_query_watermark_offsets_timeout();
+
+        test_query_watermark_offsets_leader_change();
 
         return 0;
 }
diff --git a/lib/librdkafka-2.1.0/tests/0033-regex_subscribe.c b/lib/librdkafka-2.3.0/tests/0033-regex_subscribe.c
similarity index 97%
rename from lib/librdkafka-2.1.0/tests/0033-regex_subscribe.c
rename to lib/librdkafka-2.3.0/tests/0033-regex_subscribe.c
index f31d33ebcbd..be974d0628d 100644
--- a/lib/librdkafka-2.1.0/tests/0033-regex_subscribe.c
+++ b/lib/librdkafka-2.3.0/tests/0033-regex_subscribe.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -174,6 +175,13 @@ static void consumer_poll_once(rd_kafka_t *rk) {
                          rkmessage->partition, rkmessage->offset);
 
         } else if (rkmessage->err == RD_KAFKA_RESP_ERR_UNKNOWN_TOPIC_OR_PART) {
+                /* Test segfault associated with this call is solved */
+                int32_t leader_epoch = rd_kafka_message_leader_epoch(rkmessage);
+                TEST_ASSERT(leader_epoch == -1,
+                            "rd_kafka_message_leader_epoch should be -1"
+                            ", got %" PRId32,
+                            leader_epoch);
+
                 if (strstr(rd_kafka_topic_name(rkmessage->rkt), "NONEXIST"))
                         TEST_SAY("%s: %s: error is expected for this topic\n",
                                  rd_kafka_topic_name(rkmessage->rkt),
diff --git a/lib/librdkafka-2.1.0/tests/0034-offset_reset.c b/lib/librdkafka-2.3.0/tests/0034-offset_reset.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0034-offset_reset.c
rename to lib/librdkafka-2.3.0/tests/0034-offset_reset.c
index 9276764c8e7..4a6a58f4dc1 100644
--- a/lib/librdkafka-2.1.0/tests/0034-offset_reset.c
+++ b/lib/librdkafka-2.3.0/tests/0034-offset_reset.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0035-api_version.c b/lib/librdkafka-2.3.0/tests/0035-api_version.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0035-api_version.c
rename to lib/librdkafka-2.3.0/tests/0035-api_version.c
index d005b1e9ec4..36eff1243c4 100644
--- a/lib/librdkafka-2.1.0/tests/0035-api_version.c
+++ b/lib/librdkafka-2.3.0/tests/0035-api_version.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0036-partial_fetch.c b/lib/librdkafka-2.3.0/tests/0036-partial_fetch.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0036-partial_fetch.c
rename to lib/librdkafka-2.3.0/tests/0036-partial_fetch.c
index 69ee9864c84..50c64c35c6b 100644
--- a/lib/librdkafka-2.1.0/tests/0036-partial_fetch.c
+++ b/lib/librdkafka-2.3.0/tests/0036-partial_fetch.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0037-destroy_hang_local.c b/lib/librdkafka-2.3.0/tests/0037-destroy_hang_local.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0037-destroy_hang_local.c
rename to lib/librdkafka-2.3.0/tests/0037-destroy_hang_local.c
index 3b543fb6f4b..abb94e11776 100644
--- a/lib/librdkafka-2.1.0/tests/0037-destroy_hang_local.c
+++ b/lib/librdkafka-2.3.0/tests/0037-destroy_hang_local.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0038-performance.c b/lib/librdkafka-2.3.0/tests/0038-performance.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0038-performance.c
rename to lib/librdkafka-2.3.0/tests/0038-performance.c
index 674964dc9c2..c7953546375 100644
--- a/lib/librdkafka-2.1.0/tests/0038-performance.c
+++ b/lib/librdkafka-2.3.0/tests/0038-performance.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0039-event.c b/lib/librdkafka-2.3.0/tests/0039-event.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0039-event.c
rename to lib/librdkafka-2.3.0/tests/0039-event.c
index 8d6b9f0ee1b..faee0d4c461 100644
--- a/lib/librdkafka-2.1.0/tests/0039-event.c
+++ b/lib/librdkafka-2.3.0/tests/0039-event.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0040-io_event.c b/lib/librdkafka-2.3.0/tests/0040-io_event.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0040-io_event.c
rename to lib/librdkafka-2.3.0/tests/0040-io_event.c
index d47da52060f..fba8f9d3b9a 100644
--- a/lib/librdkafka-2.1.0/tests/0040-io_event.c
+++ b/lib/librdkafka-2.3.0/tests/0040-io_event.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0041-fetch_max_bytes.c b/lib/librdkafka-2.3.0/tests/0041-fetch_max_bytes.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0041-fetch_max_bytes.c
rename to lib/librdkafka-2.3.0/tests/0041-fetch_max_bytes.c
index e243dc8ac89..75ea4f80ccc 100644
--- a/lib/librdkafka-2.1.0/tests/0041-fetch_max_bytes.c
+++ b/lib/librdkafka-2.3.0/tests/0041-fetch_max_bytes.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0042-many_topics.c b/lib/librdkafka-2.3.0/tests/0042-many_topics.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0042-many_topics.c
rename to lib/librdkafka-2.3.0/tests/0042-many_topics.c
index 6ea5aa66951..c580b4a7567 100644
--- a/lib/librdkafka-2.1.0/tests/0042-many_topics.c
+++ b/lib/librdkafka-2.3.0/tests/0042-many_topics.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0043-no_connection.c b/lib/librdkafka-2.3.0/tests/0043-no_connection.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0043-no_connection.c
rename to lib/librdkafka-2.3.0/tests/0043-no_connection.c
index 3470c4ae139..594b4868a8c 100644
--- a/lib/librdkafka-2.1.0/tests/0043-no_connection.c
+++ b/lib/librdkafka-2.3.0/tests/0043-no_connection.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0044-partition_cnt.c b/lib/librdkafka-2.3.0/tests/0044-partition_cnt.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0044-partition_cnt.c
rename to lib/librdkafka-2.3.0/tests/0044-partition_cnt.c
index 51ef318c352..b4b66bd4823 100644
--- a/lib/librdkafka-2.1.0/tests/0044-partition_cnt.c
+++ b/lib/librdkafka-2.3.0/tests/0044-partition_cnt.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0045-subscribe_update.c b/lib/librdkafka-2.3.0/tests/0045-subscribe_update.c
similarity index 56%
rename from lib/librdkafka-2.1.0/tests/0045-subscribe_update.c
rename to lib/librdkafka-2.3.0/tests/0045-subscribe_update.c
index f804613d726..cf013c5bdaf 100644
--- a/lib/librdkafka-2.1.0/tests/0045-subscribe_update.c
+++ b/lib/librdkafka-2.3.0/tests/0045-subscribe_update.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -36,6 +37,7 @@
  *  - topic additions
  *  - topic deletions
  *  - partition count changes
+ *  - replica rack changes (using mock broker)
  */
 
 
@@ -142,6 +144,61 @@ static void await_no_rebalance(const char *pfx,
         rd_kafka_event_destroy(rkev);
 }
 
+
+/**
+ * Wait for REBALANCE event and perform assignment/unassignment.
+ * For the first time and after each event, wait till for \p timeout before
+ * stopping. Terminates earlier if \p min_events were seen.
+ * Asserts that \p min_events were processed.
+ * \p min_events set to 0 means it tries to drain all rebalance events and
+ * asserts only the fact that at least 1 event was processed.
+ */
+static void await_rebalance(const char *pfx,
+                            rd_kafka_t *rk,
+                            rd_kafka_queue_t *queue,
+                            int timeout_ms,
+                            int min_events) {
+        rd_kafka_event_t *rkev;
+        int processed = 0;
+
+        while (1) {
+                TEST_SAY("%s: waiting for %d ms for rebalance event\n", pfx,
+                         timeout_ms);
+
+                rkev = test_wait_event(queue, RD_KAFKA_EVENT_REBALANCE,
+                                       timeout_ms);
+                if (!rkev)
+                        break;
+                TEST_ASSERT(rd_kafka_event_type(rkev) ==
+                                RD_KAFKA_EVENT_REBALANCE,
+                            "either expected a timeout or a "
+                            "RD_KAFKA_EVENT_REBALANCE, got %s : %s",
+                            rd_kafka_event_name(rkev),
+                            rd_kafka_err2str(rd_kafka_event_error(rkev)));
+
+                TEST_SAY("Calling test_rebalance_cb, assignment type is %s\n",
+                         rd_kafka_rebalance_protocol(rk));
+                test_rebalance_cb(rk, rd_kafka_event_error(rkev),
+                                  rd_kafka_event_topic_partition_list(rkev),
+                                  NULL);
+
+                processed++;
+
+                rd_kafka_event_destroy(rkev);
+
+                if (min_events && processed >= min_events)
+                        break;
+        }
+
+        if (min_events)
+                min_events = 1;
+
+        TEST_ASSERT(
+            processed >= min_events,
+            "Expected to process at least %d rebalance event, processed %d",
+            min_events, processed);
+}
+
 static void do_test_non_exist_and_partchange(void) {
         char *topic_a = rd_strdup(test_mk_topic_name("topic_a", 1));
         rd_kafka_t *rk;
@@ -421,6 +478,204 @@ static void do_test_regex_many_mock(const char *assignment_strategy,
 }
 
 
+/**
+ * @brief Changing the broker racks should trigger a rejoin, if the client rack
+ * is set, and the set of partition racks changes due to the broker rack change.
+ *
+ * This is using the mock cluster.
+ *
+ */
+static void do_test_replica_rack_change_mock(const char *assignment_strategy,
+                                             rd_bool_t use_regex,
+                                             rd_bool_t use_client_rack,
+                                             rd_bool_t use_replica_rack) {
+        const char *subscription = use_regex ? "^top" : "topic";
+        const char *topic        = "topic";
+        const char *test_name    = tsprintf(
+            "Replica rack changes (%s, subscription = \"%s\", %s client.rack, "
+            "%s replica.rack)",
+            assignment_strategy, subscription,
+            use_client_rack ? "with" : "without",
+            use_replica_rack ? "with" : "without");
+        rd_kafka_t *rk;
+        rd_kafka_conf_t *conf;
+        rd_kafka_mock_cluster_t *mcluster;
+        const char *bootstraps;
+        rd_kafka_queue_t *queue;
+
+        SUB_TEST("Testing %s", test_name);
+
+        mcluster = test_mock_cluster_new(3, &bootstraps);
+        test_conf_init(&conf, NULL, 60 * 4);
+
+        if (use_replica_rack) {
+                rd_kafka_mock_broker_set_rack(mcluster, 1, "rack0");
+                rd_kafka_mock_broker_set_rack(mcluster, 2, "rack1");
+                rd_kafka_mock_broker_set_rack(mcluster, 3, "rack2");
+        }
+
+        TEST_SAY("Creating topic %s\n", topic);
+        TEST_CALL_ERR__(rd_kafka_mock_topic_create(mcluster, topic,
+                                                   2 /* partition_cnt */,
+                                                   1 /* replication_factor */));
+
+        test_conf_set(conf, "bootstrap.servers", bootstraps);
+        test_conf_set(conf, "partition.assignment.strategy",
+                      assignment_strategy);
+        /* Decrease metadata interval to speed up topic change discovery. */
+        test_conf_set(conf, "topic.metadata.refresh.interval.ms", "3000");
+
+        if (use_client_rack)
+                test_conf_set(conf, "client.rack", "client_rack");
+
+        rd_kafka_conf_set_events(conf, RD_KAFKA_EVENT_REBALANCE);
+        rk = test_create_consumer(test_str_id_generate_tmp(), NULL, conf, NULL);
+        queue = rd_kafka_queue_get_consumer(rk);
+
+        TEST_SAY("%s: Subscribing via %s\n", test_name, subscription);
+        test_consumer_subscribe(rk, subscription);
+
+        await_rebalance(tsprintf("%s: initial assignment", test_name), rk,
+                        queue, 10000, 1);
+
+        /* Avoid issues if the replica assignment algorithm for mock broker
+         * changes, and change all the racks. */
+        if (use_replica_rack) {
+                TEST_SAY("%s: changing rack for all brokers\n", test_name);
+                rd_kafka_mock_broker_set_rack(mcluster, 1, "rack2");
+                rd_kafka_mock_broker_set_rack(mcluster, 2, "rack0");
+                rd_kafka_mock_broker_set_rack(mcluster, 3, "rack1");
+        }
+
+        if (use_client_rack && use_replica_rack)
+                await_rebalance(tsprintf("%s: rebalance", test_name), rk, queue,
+                                10000, 1);
+        else
+                await_no_rebalance(
+                    tsprintf("%s: no rebalance without racks", test_name), rk,
+                    queue, 10000);
+
+        test_consumer_close(rk);
+        rd_kafka_queue_destroy(queue);
+        rd_kafka_destroy(rk);
+        test_mock_cluster_destroy(mcluster);
+
+        SUB_TEST_PASS();
+}
+
+
+/* Even if the leader has no rack, it should do rack-aware assignment in case
+ * one of the group members has a rack configured. */
+static void do_test_replica_rack_change_leader_no_rack_mock(
+    const char *assignment_strategy) {
+        const char *topic     = "topic";
+        const char *test_name = "Replica rack changes with leader rack absent.";
+        rd_kafka_t *c1, *c2;
+        rd_kafka_conf_t *conf1, *conf2;
+        rd_kafka_mock_cluster_t *mcluster;
+        const char *bootstraps;
+        rd_kafka_queue_t *queue;
+        rd_kafka_topic_partition_list_t *asg1, *asg2;
+
+        SUB_TEST("Testing %s", test_name);
+
+        mcluster = test_mock_cluster_new(2, &bootstraps);
+        test_conf_init(&conf1, NULL, 60 * 4);
+
+        rd_kafka_mock_broker_set_rack(mcluster, 1, "rack0");
+        rd_kafka_mock_broker_set_rack(mcluster, 2, "rack1");
+
+        TEST_SAY("Creating topic %s\n", topic);
+        TEST_CALL_ERR__(rd_kafka_mock_topic_create(mcluster, topic,
+                                                   2 /* partition_cnt */,
+                                                   1 /* replication_factor */));
+
+        test_conf_set(conf1, "bootstrap.servers", bootstraps);
+        test_conf_set(conf1, "partition.assignment.strategy",
+                      assignment_strategy);
+        /* Decrease metadata interval to speed up topic change discovery. */
+        test_conf_set(conf1, "topic.metadata.refresh.interval.ms", "3000");
+
+        conf2 = rd_kafka_conf_dup(conf1);
+
+        /* Setting the group.instance.id ensures that the leader is always c1.
+         */
+        test_conf_set(conf1, "client.id", "client1Leader");
+        test_conf_set(conf1, "group.instance.id", "client1Leader");
+
+        test_conf_set(conf2, "client.id", "client2Follower");
+        test_conf_set(conf2, "group.instance.id", "client2Follower");
+        test_conf_set(conf2, "client.rack", "rack0");
+
+        rd_kafka_conf_set_events(conf1, RD_KAFKA_EVENT_REBALANCE);
+        c1    = test_create_consumer("mygroup", NULL, conf1, NULL);
+        queue = rd_kafka_queue_get_consumer(c1);
+
+        c2 = test_create_consumer("mygroup", NULL, conf2, NULL);
+
+        TEST_SAY("%s: Subscribing via %s\n", test_name, topic);
+        test_consumer_subscribe(c1, topic);
+        test_consumer_subscribe(c2, topic);
+
+        /* Poll to cause joining. */
+        rd_kafka_poll(c1, 1);
+        rd_kafka_poll(c2, 1);
+
+        /* Drain all events, as we want to process the assignment. */
+        await_rebalance(tsprintf("%s: initial assignment", test_name), c1,
+                        queue, 10000, 0);
+
+        rd_kafka_assignment(c1, &asg1);
+        rd_kafka_assignment(c2, &asg2);
+
+        /* Because of the deterministic nature of replica assignment in the mock
+         * broker, we can always be certain that topic:0 has its only replica on
+         * broker 1, and topic:1 has its only replica on broker 2. */
+        TEST_ASSERT(asg1->cnt == 1 && asg1->elems[0].partition == 1,
+                    "Expected c1 to be assigned topic1:1");
+        TEST_ASSERT(asg2->cnt == 1 && asg2->elems[0].partition == 0,
+                    "Expected c2 to be assigned topic1:0");
+
+        rd_kafka_topic_partition_list_destroy(asg1);
+        rd_kafka_topic_partition_list_destroy(asg2);
+
+        /* Avoid issues if the replica assignment algorithm for mock broker
+         * changes, and change all the racks. */
+        TEST_SAY("%s: changing rack for all brokers\n", test_name);
+        rd_kafka_mock_broker_set_rack(mcluster, 2, "rack0");
+        rd_kafka_mock_broker_set_rack(mcluster, 1, "rack1");
+
+        /* Poll to cause rejoining. */
+        rd_kafka_poll(c1, 1);
+        rd_kafka_poll(c2, 1);
+
+        /* Drain all events, as we want to process the assignment. */
+        await_rebalance(tsprintf("%s: rebalance", test_name), c1, queue, 10000,
+                        0);
+
+        rd_kafka_assignment(c1, &asg1);
+        rd_kafka_assignment(c2, &asg2);
+
+        /* Because of the deterministic nature of replica assignment in the mock
+         * broker, we can always be certain that topic:0 has its only replica on
+         * broker 1, and topic:1 has its only replica on broker 2. */
+        TEST_ASSERT(asg1->cnt == 1 && asg1->elems[0].partition == 0,
+                    "Expected c1 to be assigned topic1:0");
+        TEST_ASSERT(asg2->cnt == 1 && asg2->elems[0].partition == 1,
+                    "Expected c2 to be assigned topic1:1");
+
+        rd_kafka_topic_partition_list_destroy(asg1);
+        rd_kafka_topic_partition_list_destroy(asg2);
+
+        test_consumer_close(c1);
+        test_consumer_close(c2);
+        rd_kafka_queue_destroy(queue);
+        rd_kafka_destroy(c1);
+        rd_kafka_destroy(c2);
+        test_mock_cluster_destroy(mcluster);
+
+        SUB_TEST_PASS();
+}
 
 int main_0045_subscribe_update(int argc, char **argv) {
 
@@ -457,3 +712,38 @@ int main_0045_subscribe_update_mock(int argc, char **argv) {
 
         return 0;
 }
+
+
+int main_0045_subscribe_update_racks_mock(int argc, char **argv) {
+        int use_replica_rack = 0;
+        int use_client_rack  = 0;
+
+        if (test_needs_auth()) {
+                TEST_SKIP("Mock cluster does not support SSL/SASL\n");
+                return 0;
+        }
+
+        for (use_replica_rack = 0; use_replica_rack < 2; use_replica_rack++) {
+                for (use_client_rack = 0; use_client_rack < 2;
+                     use_client_rack++) {
+                        do_test_replica_rack_change_mock(
+                            "range", rd_true /* use_regex */, use_client_rack,
+                            use_replica_rack);
+                        do_test_replica_rack_change_mock(
+                            "range", rd_true /* use_regex */, use_client_rack,
+                            use_replica_rack);
+                        do_test_replica_rack_change_mock(
+                            "cooperative-sticky", rd_true /* use_regex */,
+                            use_client_rack, use_replica_rack);
+                        do_test_replica_rack_change_mock(
+                            "cooperative-sticky", rd_true /* use_regex */,
+                            use_client_rack, use_replica_rack);
+                }
+        }
+
+        /* Do not test with range assignor (yet) since it does not do rack aware
+         * assignment properly with the NULL rack, even for the Java client. */
+        do_test_replica_rack_change_leader_no_rack_mock("cooperative-sticky");
+
+        return 0;
+}
diff --git a/lib/librdkafka-2.1.0/tests/0046-rkt_cache.c b/lib/librdkafka-2.3.0/tests/0046-rkt_cache.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0046-rkt_cache.c
rename to lib/librdkafka-2.3.0/tests/0046-rkt_cache.c
index 541c0303769..93f7fc78ff2 100644
--- a/lib/librdkafka-2.1.0/tests/0046-rkt_cache.c
+++ b/lib/librdkafka-2.3.0/tests/0046-rkt_cache.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0047-partial_buf_tmout.c b/lib/librdkafka-2.3.0/tests/0047-partial_buf_tmout.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0047-partial_buf_tmout.c
rename to lib/librdkafka-2.3.0/tests/0047-partial_buf_tmout.c
index d90004a3aa1..e999afa367a 100644
--- a/lib/librdkafka-2.1.0/tests/0047-partial_buf_tmout.c
+++ b/lib/librdkafka-2.3.0/tests/0047-partial_buf_tmout.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0048-partitioner.c b/lib/librdkafka-2.3.0/tests/0048-partitioner.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0048-partitioner.c
rename to lib/librdkafka-2.3.0/tests/0048-partitioner.c
index 84efee7dbd3..63761506c5a 100644
--- a/lib/librdkafka-2.1.0/tests/0048-partitioner.c
+++ b/lib/librdkafka-2.3.0/tests/0048-partitioner.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0049-consume_conn_close.c b/lib/librdkafka-2.3.0/tests/0049-consume_conn_close.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0049-consume_conn_close.c
rename to lib/librdkafka-2.3.0/tests/0049-consume_conn_close.c
index 6083a1a764d..61f6d7a9dd7 100644
--- a/lib/librdkafka-2.1.0/tests/0049-consume_conn_close.c
+++ b/lib/librdkafka-2.3.0/tests/0049-consume_conn_close.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0050-subscribe_adds.c b/lib/librdkafka-2.3.0/tests/0050-subscribe_adds.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0050-subscribe_adds.c
rename to lib/librdkafka-2.3.0/tests/0050-subscribe_adds.c
index d55e6e09a24..299c6b95d85 100644
--- a/lib/librdkafka-2.1.0/tests/0050-subscribe_adds.c
+++ b/lib/librdkafka-2.3.0/tests/0050-subscribe_adds.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0051-assign_adds.c b/lib/librdkafka-2.3.0/tests/0051-assign_adds.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0051-assign_adds.c
rename to lib/librdkafka-2.3.0/tests/0051-assign_adds.c
index 6f97b2ee493..31866627ddd 100644
--- a/lib/librdkafka-2.1.0/tests/0051-assign_adds.c
+++ b/lib/librdkafka-2.3.0/tests/0051-assign_adds.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0052-msg_timestamps.c b/lib/librdkafka-2.3.0/tests/0052-msg_timestamps.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0052-msg_timestamps.c
rename to lib/librdkafka-2.3.0/tests/0052-msg_timestamps.c
index ef9b89878f2..7921cd4594a 100644
--- a/lib/librdkafka-2.1.0/tests/0052-msg_timestamps.c
+++ b/lib/librdkafka-2.3.0/tests/0052-msg_timestamps.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0053-stats_cb.cpp b/lib/librdkafka-2.3.0/tests/0053-stats_cb.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0053-stats_cb.cpp
rename to lib/librdkafka-2.3.0/tests/0053-stats_cb.cpp
index a61755c30b8..d7254a6ca30 100644
--- a/lib/librdkafka-2.1.0/tests/0053-stats_cb.cpp
+++ b/lib/librdkafka-2.3.0/tests/0053-stats_cb.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2018, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0054-offset_time.cpp b/lib/librdkafka-2.3.0/tests/0054-offset_time.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0054-offset_time.cpp
rename to lib/librdkafka-2.3.0/tests/0054-offset_time.cpp
index 58c88b4a138..082357f663f 100644
--- a/lib/librdkafka-2.1.0/tests/0054-offset_time.cpp
+++ b/lib/librdkafka-2.3.0/tests/0054-offset_time.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0055-producer_latency.c b/lib/librdkafka-2.3.0/tests/0055-producer_latency.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0055-producer_latency.c
rename to lib/librdkafka-2.3.0/tests/0055-producer_latency.c
index e0244cec952..a8cbb4efe86 100644
--- a/lib/librdkafka-2.1.0/tests/0055-producer_latency.c
+++ b/lib/librdkafka-2.3.0/tests/0055-producer_latency.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0056-balanced_group_mt.c b/lib/librdkafka-2.3.0/tests/0056-balanced_group_mt.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0056-balanced_group_mt.c
rename to lib/librdkafka-2.3.0/tests/0056-balanced_group_mt.c
index e6205ddb636..59dc8691bc5 100644
--- a/lib/librdkafka-2.1.0/tests/0056-balanced_group_mt.c
+++ b/lib/librdkafka-2.3.0/tests/0056-balanced_group_mt.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0057-invalid_topic.cpp b/lib/librdkafka-2.3.0/tests/0057-invalid_topic.cpp
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0057-invalid_topic.cpp
rename to lib/librdkafka-2.3.0/tests/0057-invalid_topic.cpp
index 0b50b40ad71..c2da2c9879d 100644
--- a/lib/librdkafka-2.1.0/tests/0057-invalid_topic.cpp
+++ b/lib/librdkafka-2.3.0/tests/0057-invalid_topic.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0058-log.cpp b/lib/librdkafka-2.3.0/tests/0058-log.cpp
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0058-log.cpp
rename to lib/librdkafka-2.3.0/tests/0058-log.cpp
index 4da46e7f76b..bf1c97a74e5 100644
--- a/lib/librdkafka-2.1.0/tests/0058-log.cpp
+++ b/lib/librdkafka-2.3.0/tests/0058-log.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0059-bsearch.cpp b/lib/librdkafka-2.3.0/tests/0059-bsearch.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0059-bsearch.cpp
rename to lib/librdkafka-2.3.0/tests/0059-bsearch.cpp
index 67508ff8243..18ea216bda7 100644
--- a/lib/librdkafka-2.1.0/tests/0059-bsearch.cpp
+++ b/lib/librdkafka-2.3.0/tests/0059-bsearch.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0060-op_prio.cpp b/lib/librdkafka-2.3.0/tests/0060-op_prio.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0060-op_prio.cpp
rename to lib/librdkafka-2.3.0/tests/0060-op_prio.cpp
index 156b8a57a9d..43371fd6b29 100644
--- a/lib/librdkafka-2.1.0/tests/0060-op_prio.cpp
+++ b/lib/librdkafka-2.3.0/tests/0060-op_prio.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0061-consumer_lag.cpp b/lib/librdkafka-2.3.0/tests/0061-consumer_lag.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0061-consumer_lag.cpp
rename to lib/librdkafka-2.3.0/tests/0061-consumer_lag.cpp
index 75954158342..10a18afb339 100644
--- a/lib/librdkafka-2.1.0/tests/0061-consumer_lag.cpp
+++ b/lib/librdkafka-2.3.0/tests/0061-consumer_lag.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0062-stats_event.c b/lib/librdkafka-2.3.0/tests/0062-stats_event.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0062-stats_event.c
rename to lib/librdkafka-2.3.0/tests/0062-stats_event.c
index bdddda5e08f..3e57e9a1dce 100644
--- a/lib/librdkafka-2.1.0/tests/0062-stats_event.c
+++ b/lib/librdkafka-2.3.0/tests/0062-stats_event.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2017, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0063-clusterid.cpp b/lib/librdkafka-2.3.0/tests/0063-clusterid.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0063-clusterid.cpp
rename to lib/librdkafka-2.3.0/tests/0063-clusterid.cpp
index dda8d6ddb21..8ff565db7f9 100644
--- a/lib/librdkafka-2.1.0/tests/0063-clusterid.cpp
+++ b/lib/librdkafka-2.3.0/tests/0063-clusterid.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0064-interceptors.c b/lib/librdkafka-2.3.0/tests/0064-interceptors.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0064-interceptors.c
rename to lib/librdkafka-2.3.0/tests/0064-interceptors.c
index e5c5b047a73..ddfb9e6bb4e 100644
--- a/lib/librdkafka-2.1.0/tests/0064-interceptors.c
+++ b/lib/librdkafka-2.3.0/tests/0064-interceptors.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2017, Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0065-yield.cpp b/lib/librdkafka-2.3.0/tests/0065-yield.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0065-yield.cpp
rename to lib/librdkafka-2.3.0/tests/0065-yield.cpp
index 6f2dbb0acb1..26b1e4bbc64 100644
--- a/lib/librdkafka-2.1.0/tests/0065-yield.cpp
+++ b/lib/librdkafka-2.3.0/tests/0065-yield.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0066-plugins.cpp b/lib/librdkafka-2.3.0/tests/0066-plugins.cpp
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0066-plugins.cpp
rename to lib/librdkafka-2.3.0/tests/0066-plugins.cpp
index 9f9f3124008..7b5e7b00fb9 100644
--- a/lib/librdkafka-2.1.0/tests/0066-plugins.cpp
+++ b/lib/librdkafka-2.3.0/tests/0066-plugins.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0067-empty_topic.cpp b/lib/librdkafka-2.3.0/tests/0067-empty_topic.cpp
similarity index 96%
rename from lib/librdkafka-2.1.0/tests/0067-empty_topic.cpp
rename to lib/librdkafka-2.3.0/tests/0067-empty_topic.cpp
index f71489fa163..2db9ee87350 100644
--- a/lib/librdkafka-2.1.0/tests/0067-empty_topic.cpp
+++ b/lib/librdkafka-2.3.0/tests/0067-empty_topic.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0068-produce_timeout.c b/lib/librdkafka-2.3.0/tests/0068-produce_timeout.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0068-produce_timeout.c
rename to lib/librdkafka-2.3.0/tests/0068-produce_timeout.c
index a7ad37e164d..7f195068887 100644
--- a/lib/librdkafka-2.1.0/tests/0068-produce_timeout.c
+++ b/lib/librdkafka-2.3.0/tests/0068-produce_timeout.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0069-consumer_add_parts.c b/lib/librdkafka-2.3.0/tests/0069-consumer_add_parts.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0069-consumer_add_parts.c
rename to lib/librdkafka-2.3.0/tests/0069-consumer_add_parts.c
index 933e53775b2..b43c4c3a697 100644
--- a/lib/librdkafka-2.1.0/tests/0069-consumer_add_parts.c
+++ b/lib/librdkafka-2.3.0/tests/0069-consumer_add_parts.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0070-null_empty.cpp b/lib/librdkafka-2.3.0/tests/0070-null_empty.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0070-null_empty.cpp
rename to lib/librdkafka-2.3.0/tests/0070-null_empty.cpp
index fac48185c34..154f0b079b7 100644
--- a/lib/librdkafka-2.1.0/tests/0070-null_empty.cpp
+++ b/lib/librdkafka-2.3.0/tests/0070-null_empty.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0072-headers_ut.c b/lib/librdkafka-2.3.0/tests/0072-headers_ut.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0072-headers_ut.c
rename to lib/librdkafka-2.3.0/tests/0072-headers_ut.c
index 0576d611ae5..d4b453ec04b 100644
--- a/lib/librdkafka-2.1.0/tests/0072-headers_ut.c
+++ b/lib/librdkafka-2.3.0/tests/0072-headers_ut.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0073-headers.c b/lib/librdkafka-2.3.0/tests/0073-headers.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0073-headers.c
rename to lib/librdkafka-2.3.0/tests/0073-headers.c
index e7e5c4074d3..15e8ab40fd2 100644
--- a/lib/librdkafka-2.1.0/tests/0073-headers.c
+++ b/lib/librdkafka-2.3.0/tests/0073-headers.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0074-producev.c b/lib/librdkafka-2.3.0/tests/0074-producev.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0074-producev.c
rename to lib/librdkafka-2.3.0/tests/0074-producev.c
index 544a8473486..8cd67fe8b33 100644
--- a/lib/librdkafka-2.1.0/tests/0074-producev.c
+++ b/lib/librdkafka-2.3.0/tests/0074-producev.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2020, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0075-retry.c b/lib/librdkafka-2.3.0/tests/0075-retry.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0075-retry.c
rename to lib/librdkafka-2.3.0/tests/0075-retry.c
index 7e1e4f0f582..c3ce353abfe 100644
--- a/lib/librdkafka-2.1.0/tests/0075-retry.c
+++ b/lib/librdkafka-2.3.0/tests/0075-retry.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -177,6 +177,7 @@ static void do_test_low_socket_timeout(const char *topic) {
         test_conf_set(conf, "socket.timeout.ms", "1000");
         test_conf_set(conf, "socket.max.fails", "12345");
         test_conf_set(conf, "retry.backoff.ms", "5000");
+        test_conf_set(conf, "retry.backoff.max.ms", "5000");
         /* Avoid api version requests (with their own timeout) to get in
          * the way of our test */
         test_conf_set(conf, "api.version.request", "false");
diff --git a/lib/librdkafka-2.1.0/tests/0076-produce_retry.c b/lib/librdkafka-2.3.0/tests/0076-produce_retry.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0076-produce_retry.c
rename to lib/librdkafka-2.3.0/tests/0076-produce_retry.c
index 16d6f602c6d..86cc0bfb355 100644
--- a/lib/librdkafka-2.1.0/tests/0076-produce_retry.c
+++ b/lib/librdkafka-2.3.0/tests/0076-produce_retry.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0077-compaction.c b/lib/librdkafka-2.3.0/tests/0077-compaction.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0077-compaction.c
rename to lib/librdkafka-2.3.0/tests/0077-compaction.c
index 01667114c78..623461b7f8b 100644
--- a/lib/librdkafka-2.1.0/tests/0077-compaction.c
+++ b/lib/librdkafka-2.3.0/tests/0077-compaction.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0078-c_from_cpp.cpp b/lib/librdkafka-2.3.0/tests/0078-c_from_cpp.cpp
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0078-c_from_cpp.cpp
rename to lib/librdkafka-2.3.0/tests/0078-c_from_cpp.cpp
index 41d6886cb92..b405be0b30f 100644
--- a/lib/librdkafka-2.1.0/tests/0078-c_from_cpp.cpp
+++ b/lib/librdkafka-2.3.0/tests/0078-c_from_cpp.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0079-fork.c b/lib/librdkafka-2.3.0/tests/0079-fork.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0079-fork.c
rename to lib/librdkafka-2.3.0/tests/0079-fork.c
index 506dd62a31a..0f217fc90b2 100644
--- a/lib/librdkafka-2.1.0/tests/0079-fork.c
+++ b/lib/librdkafka-2.3.0/tests/0079-fork.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0080-admin_ut.c b/lib/librdkafka-2.3.0/tests/0080-admin_ut.c
similarity index 87%
rename from lib/librdkafka-2.1.0/tests/0080-admin_ut.c
rename to lib/librdkafka-2.3.0/tests/0080-admin_ut.c
index 9d049e5b14d..3a3b980f0aa 100644
--- a/lib/librdkafka-2.1.0/tests/0080-admin_ut.c
+++ b/lib/librdkafka-2.3.0/tests/0080-admin_ut.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -633,9 +634,10 @@ static void do_test_DescribeConsumerGroups(const char *what,
         char errstr[512];
         const char *errstr2;
         rd_kafka_resp_err_t err;
+        rd_kafka_error_t *error;
         test_timing_t timing;
         rd_kafka_event_t *rkev;
-        const rd_kafka_DeleteGroups_result_t *res;
+        const rd_kafka_DescribeConsumerGroups_result_t *res;
         const rd_kafka_ConsumerGroupDescription_t **resgroups;
         size_t resgroup_cnt;
         void *my_opaque = NULL, *opaque;
@@ -657,6 +659,17 @@ static void do_test_DescribeConsumerGroups(const char *what,
                 err         = rd_kafka_AdminOptions_set_request_timeout(
                     options, exp_timeout, errstr, sizeof(errstr));
                 TEST_ASSERT(!err, "%s", rd_kafka_err2str(err));
+                if ((error =
+                         rd_kafka_AdminOptions_set_include_authorized_operations(
+                             options, 0))) {
+                        fprintf(stderr,
+                                "%% Failed to set require authorized "
+                                "operations: %s\n",
+                                rd_kafka_error_string(error));
+                        rd_kafka_error_destroy(error);
+                        TEST_FAIL(
+                            "Failed to set include authorized operations\n");
+                }
 
                 if (useq) {
                         my_opaque = (void *)456;
@@ -710,6 +723,7 @@ static void do_test_DescribeConsumerGroups(const char *what,
         /* The returned groups should be in the original order, and
          * should all have timed out. */
         for (i = 0; i < TEST_DESCRIBE_CONSUMER_GROUPS_CNT; i++) {
+                size_t authorized_operation_cnt;
                 TEST_ASSERT(
                     !strcmp(group_names[i],
                             rd_kafka_ConsumerGroupDescription_group_id(
@@ -724,6 +738,12 @@ static void do_test_DescribeConsumerGroups(const char *what,
                     group_names[i],
                     rd_kafka_error_string(
                         rd_kafka_ConsumerGroupDescription_error(resgroups[i])));
+
+                rd_kafka_ConsumerGroupDescription_authorized_operations(
+                    resgroups[i], &authorized_operation_cnt);
+                TEST_ASSERT(authorized_operation_cnt == 0,
+                            "Got authorized operations"
+                            "when not requested");
         }
 
         rd_kafka_event_destroy(rkev);
@@ -743,6 +763,219 @@ static void do_test_DescribeConsumerGroups(const char *what,
         SUB_TEST_PASS();
 }
 
+/**
+ * @brief DescribeTopics tests
+ *
+ *
+ *
+ */
+static void do_test_DescribeTopics(const char *what,
+                                   rd_kafka_t *rk,
+                                   rd_kafka_queue_t *useq,
+                                   int with_options) {
+        rd_kafka_queue_t *q;
+#define TEST_DESCRIBE_TOPICS_CNT 4
+        const char *topic_names[TEST_DESCRIBE_TOPICS_CNT];
+        rd_kafka_TopicCollection_t *topics;
+        rd_kafka_AdminOptions_t *options = NULL;
+        int exp_timeout                  = MY_SOCKET_TIMEOUT_MS;
+        int i;
+        char errstr[512];
+        const char *errstr2;
+        rd_kafka_resp_err_t err;
+        rd_kafka_error_t *error;
+        test_timing_t timing;
+        rd_kafka_event_t *rkev;
+        const rd_kafka_DescribeTopics_result_t *res;
+        const rd_kafka_TopicDescription_t **restopics;
+        size_t restopic_cnt;
+        void *my_opaque = NULL, *opaque;
+
+        SUB_TEST_QUICK("%s DescribeTopics with %s, timeout %dms",
+                       rd_kafka_name(rk), what, exp_timeout);
+
+        q = useq ? useq : rd_kafka_queue_new(rk);
+
+        for (i = 0; i < TEST_DESCRIBE_TOPICS_CNT; i++) {
+                topic_names[i] = rd_strdup(test_mk_topic_name(__FUNCTION__, 1));
+        }
+
+        topics = rd_kafka_TopicCollection_of_topic_names(
+            topic_names, TEST_DESCRIBE_TOPICS_CNT);
+
+        if (with_options) {
+                options = rd_kafka_AdminOptions_new(
+                    rk, RD_KAFKA_ADMIN_OP_DESCRIBETOPICS);
+
+                exp_timeout = MY_SOCKET_TIMEOUT_MS * 2;
+                err         = rd_kafka_AdminOptions_set_request_timeout(
+                    options, exp_timeout, errstr, sizeof(errstr));
+                TEST_ASSERT(!err, "%s", rd_kafka_err2str(err));
+                if ((error =
+                         rd_kafka_AdminOptions_set_include_authorized_operations(
+                             options, 0))) {
+                        fprintf(stderr,
+                                "%% Failed to set topic authorized operations: "
+                                "%s\n",
+                                rd_kafka_error_string(error));
+                        rd_kafka_error_destroy(error);
+                        TEST_FAIL(
+                            "Failed to set topic authorized operations\n");
+                }
+
+                if (useq) {
+                        my_opaque = (void *)456;
+                        rd_kafka_AdminOptions_set_opaque(options, my_opaque);
+                }
+        }
+
+        TIMING_START(&timing, "DescribeTopics");
+        TEST_SAY("Call DescribeTopics, timeout is %dms\n", exp_timeout);
+        rd_kafka_DescribeTopics(rk, topics, options, q);
+        TIMING_ASSERT_LATER(&timing, 0, 50);
+
+        /* Poll result queue */
+        TIMING_START(&timing, "DescribeTopics.queue_poll");
+        rkev = rd_kafka_queue_poll(q, exp_timeout + 1000);
+        TIMING_ASSERT_LATER(&timing, exp_timeout - 100, exp_timeout + 100);
+        TEST_ASSERT(rkev != NULL, "expected result in %dms", exp_timeout);
+        TEST_SAY("DescribeTopics: got %s in %.3fs\n", rd_kafka_event_name(rkev),
+                 TIMING_DURATION(&timing) / 1000.0f);
+
+        /* Convert event to proper result */
+        res = rd_kafka_event_DescribeTopics_result(rkev);
+        TEST_ASSERT(res, "expected DescribeTopics_result, not %s",
+                    rd_kafka_event_name(rkev));
+
+        opaque = rd_kafka_event_opaque(rkev);
+        TEST_ASSERT(opaque == my_opaque, "expected opaque to be %p, not %p",
+                    my_opaque, opaque);
+
+        /* Expecting error (Fail while waiting for controller)*/
+        err     = rd_kafka_event_error(rkev);
+        errstr2 = rd_kafka_event_error_string(rkev);
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR__TIMED_OUT,
+                    "expected DescribeTopics to return error %s, not %s (%s)",
+                    rd_kafka_err2str(RD_KAFKA_RESP_ERR__TIMED_OUT),
+                    rd_kafka_err2str(err), err ? errstr2 : "n/a");
+
+        /* Extract topics, should return 0 topics. */
+        restopics = rd_kafka_DescribeTopics_result_topics(res, &restopic_cnt);
+        TEST_ASSERT(!restopics && restopic_cnt == 0,
+                    "expected no result topics, got %p cnt %" PRIusz, restopics,
+                    restopic_cnt);
+
+        rd_kafka_event_destroy(rkev);
+
+        for (i = 0; i < TEST_DESCRIBE_TOPICS_CNT; i++) {
+                rd_free((char *)topic_names[i]);
+        }
+        rd_kafka_TopicCollection_destroy(topics);
+
+        if (options)
+                rd_kafka_AdminOptions_destroy(options);
+
+        if (!useq)
+                rd_kafka_queue_destroy(q);
+#undef TEST_DESCRIBE_TOPICS_CNT
+
+        SUB_TEST_PASS();
+}
+
+/**
+ * @brief DescribeCluster tests
+ *
+ *
+ *
+ */
+static void do_test_DescribeCluster(const char *what,
+                                    rd_kafka_t *rk,
+                                    rd_kafka_queue_t *useq,
+                                    int with_options) {
+        rd_kafka_queue_t *q;
+        rd_kafka_AdminOptions_t *options = NULL;
+        int exp_timeout                  = MY_SOCKET_TIMEOUT_MS;
+        char errstr[512];
+        const char *errstr2;
+        rd_kafka_resp_err_t err;
+        rd_kafka_error_t *error;
+        test_timing_t timing;
+        rd_kafka_event_t *rkev;
+        const rd_kafka_DescribeCluster_result_t *res;
+        void *my_opaque = NULL, *opaque;
+
+        SUB_TEST_QUICK("%s DescribeCluster with %s, timeout %dms",
+                       rd_kafka_name(rk), what, exp_timeout);
+
+        q = useq ? useq : rd_kafka_queue_new(rk);
+
+        if (with_options) {
+                options = rd_kafka_AdminOptions_new(
+                    rk, RD_KAFKA_ADMIN_OP_DESCRIBECLUSTER);
+
+                exp_timeout = MY_SOCKET_TIMEOUT_MS * 2;
+                err         = rd_kafka_AdminOptions_set_request_timeout(
+                    options, exp_timeout, errstr, sizeof(errstr));
+                TEST_ASSERT(!err, "%s", rd_kafka_err2str(err));
+                if ((error =
+                         rd_kafka_AdminOptions_set_include_authorized_operations(
+                             options, 0))) {
+                        fprintf(stderr,
+                                "%% Failed to set cluster authorized "
+                                "operations: %s\n",
+                                rd_kafka_error_string(error));
+                        rd_kafka_error_destroy(error);
+                        TEST_FAIL(
+                            "Failed to set cluster authorized operations\n");
+                }
+
+                if (useq) {
+                        my_opaque = (void *)456;
+                        rd_kafka_AdminOptions_set_opaque(options, my_opaque);
+                }
+        }
+
+        TIMING_START(&timing, "DescribeCluster");
+        TEST_SAY("Call DescribeCluster, timeout is %dms\n", exp_timeout);
+        rd_kafka_DescribeCluster(rk, options, q);
+        TIMING_ASSERT_LATER(&timing, 0, 50);
+
+        /* Poll result queue */
+        TIMING_START(&timing, "DescribeCluster.queue_poll");
+        rkev = rd_kafka_queue_poll(q, exp_timeout + 1000);
+        TIMING_ASSERT_LATER(&timing, exp_timeout - 100, exp_timeout + 100);
+        TEST_ASSERT(rkev != NULL, "expected result in %dms", exp_timeout);
+        TEST_SAY("DescribeCluster: got %s in %.3fs\n",
+                 rd_kafka_event_name(rkev), TIMING_DURATION(&timing) / 1000.0f);
+
+        /* Convert event to proper result */
+        res = rd_kafka_event_DescribeCluster_result(rkev);
+        TEST_ASSERT(res, "expected DescribeCluster_result, not %s",
+                    rd_kafka_event_name(rkev));
+
+        opaque = rd_kafka_event_opaque(rkev);
+        TEST_ASSERT(opaque == my_opaque, "expected opaque to be %p, not %p",
+                    my_opaque, opaque);
+
+        /* Expecting error (Fail while waiting for controller)*/
+        err     = rd_kafka_event_error(rkev);
+        errstr2 = rd_kafka_event_error_string(rkev);
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR__TIMED_OUT,
+                    "expected DescribeCluster to return error %s, not %s (%s)",
+                    rd_kafka_err2str(RD_KAFKA_RESP_ERR__TIMED_OUT),
+                    rd_kafka_err2str(err), err ? errstr2 : "n/a");
+
+        rd_kafka_event_destroy(rkev);
+
+        if (options)
+                rd_kafka_AdminOptions_destroy(options);
+
+        if (!useq)
+                rd_kafka_queue_destroy(q);
+
+        SUB_TEST_PASS();
+}
+
 static void do_test_DeleteRecords(const char *what,
                                   rd_kafka_t *rk,
                                   rd_kafka_queue_t *useq,
@@ -1987,6 +2220,140 @@ static void do_test_ListConsumerGroupOffsets(const char *what,
         SUB_TEST_PASS();
 }
 
+static void do_test_DescribeUserScramCredentials(const char *what,
+                                                 rd_kafka_t *rk,
+                                                 rd_kafka_queue_t *useq) {
+        char errstr[512];
+        rd_kafka_AdminOptions_t *options;
+        rd_kafka_event_t *rkev;
+        rd_kafka_queue_t *rkqu;
+
+        SUB_TEST_QUICK("%s", what);
+
+        rkqu = useq ? useq : rd_kafka_queue_new(rk);
+
+        const char *users[2];
+        users[0] = "Sam";
+        users[1] = "Sam";
+
+        /* Whenever a duplicate user is passed,
+         * the request should fail with error code
+         * RD_KAFKA_RESP_ERR__INVALID_ARG */
+        options = rd_kafka_AdminOptions_new(
+            rk, RD_KAFKA_ADMIN_OP_DESCRIBEUSERSCRAMCREDENTIALS);
+        TEST_CALL_ERR__(rd_kafka_AdminOptions_set_request_timeout(
+            options, 30 * 1000 /* 30s */, errstr, sizeof(errstr)));
+
+        rd_kafka_DescribeUserScramCredentials(rk, users, RD_ARRAY_SIZE(users),
+                                              options, rkqu);
+        rd_kafka_AdminOptions_destroy(options);
+
+        rkev = test_wait_admin_result(
+            rkqu, RD_KAFKA_EVENT_DESCRIBEUSERSCRAMCREDENTIALS_RESULT, 2000);
+
+        TEST_ASSERT(
+            rd_kafka_event_error(rkev) == RD_KAFKA_RESP_ERR__INVALID_ARG,
+            "Expected \"Local: Invalid argument or configuration\", not %s",
+            rd_kafka_err2str(rd_kafka_event_error(rkev)));
+
+        rd_kafka_event_destroy(rkev);
+
+        if (!useq)
+                rd_kafka_queue_destroy(rkqu);
+
+        SUB_TEST_PASS();
+}
+
+static void do_test_AlterUserScramCredentials(const char *what,
+                                              rd_kafka_t *rk,
+                                              rd_kafka_queue_t *useq) {
+        char errstr[512];
+        rd_kafka_AdminOptions_t *options;
+        rd_kafka_event_t *rkev;
+        rd_kafka_queue_t *rkqu;
+
+        SUB_TEST_QUICK("%s", what);
+
+        rkqu = useq ? useq : rd_kafka_queue_new(rk);
+
+#if !WITH_SSL
+        /* Whenever librdkafka wasn't built with OpenSSL,
+         * the request should fail with error code
+         * RD_KAFKA_RESP_ERR__INVALID_ARG */
+        rd_kafka_UserScramCredentialAlteration_t *alterations_ssl[1];
+        alterations_ssl[0] = rd_kafka_UserScramCredentialUpsertion_new(
+            "user", RD_KAFKA_SCRAM_MECHANISM_SHA_256, 10000,
+            (unsigned char *)"password", 8, (unsigned char *)"salt", 4);
+        options = rd_kafka_AdminOptions_new(
+            rk, RD_KAFKA_ADMIN_OP_ALTERUSERSCRAMCREDENTIALS);
+        TEST_CALL_ERR__(rd_kafka_AdminOptions_set_request_timeout(
+            options, 30 * 1000 /* 30s */, errstr, sizeof(errstr)));
+
+        rd_kafka_AlterUserScramCredentials(rk, alterations_ssl, 1, options,
+                                           rkqu);
+        rd_kafka_UserScramCredentialAlteration_destroy_array(
+            alterations_ssl, RD_ARRAY_SIZE(alterations_ssl));
+        rd_kafka_AdminOptions_destroy(options);
+
+        rkev = test_wait_admin_result(
+            rkqu, RD_KAFKA_EVENT_ALTERUSERSCRAMCREDENTIALS_RESULT, 2000);
+
+        TEST_ASSERT(
+            rd_kafka_event_error(rkev) == RD_KAFKA_RESP_ERR__INVALID_ARG,
+            "Expected \"Local: Invalid argument or configuration\", not %s",
+            rd_kafka_err2str(rd_kafka_event_error(rkev)));
+
+        rd_kafka_event_destroy(rkev);
+#endif
+
+        rd_kafka_UserScramCredentialAlteration_t *alterations[1];
+        alterations[0] = rd_kafka_UserScramCredentialDeletion_new(
+            "", RD_KAFKA_SCRAM_MECHANISM_SHA_256);
+        options = rd_kafka_AdminOptions_new(
+            rk, RD_KAFKA_ADMIN_OP_ALTERUSERSCRAMCREDENTIALS);
+        TEST_CALL_ERR__(rd_kafka_AdminOptions_set_request_timeout(
+            options, 30 * 1000 /* 30s */, errstr, sizeof(errstr)));
+
+        /* Whenever an empty array is passed,
+         * the request should fail with error code
+         * RD_KAFKA_RESP_ERR__INVALID_ARG */
+        rd_kafka_AlterUserScramCredentials(rk, alterations, 0, options, rkqu);
+
+        rkev = test_wait_admin_result(
+            rkqu, RD_KAFKA_EVENT_ALTERUSERSCRAMCREDENTIALS_RESULT, 2000);
+
+        TEST_ASSERT(
+            rd_kafka_event_error(rkev) == RD_KAFKA_RESP_ERR__INVALID_ARG,
+            "Expected \"Local: Invalid argument or configuration\", not %s",
+            rd_kafka_err2str(rd_kafka_event_error(rkev)));
+
+        rd_kafka_event_destroy(rkev);
+
+        /* Whenever an empty user is passed,
+         * the request should fail with error code
+         * RD_KAFKA_RESP_ERR__INVALID_ARG */
+        rd_kafka_AlterUserScramCredentials(
+            rk, alterations, RD_ARRAY_SIZE(alterations), options, rkqu);
+        rkev = test_wait_admin_result(
+            rkqu, RD_KAFKA_EVENT_ALTERUSERSCRAMCREDENTIALS_RESULT, 2000);
+
+        TEST_ASSERT(
+            rd_kafka_event_error(rkev) == RD_KAFKA_RESP_ERR__INVALID_ARG,
+            "Expected \"Local: Invalid argument or configuration\", not %s",
+            rd_kafka_err2str(rd_kafka_event_error(rkev)));
+
+        rd_kafka_event_destroy(rkev);
+
+
+        rd_kafka_UserScramCredentialAlteration_destroy_array(
+            alterations, RD_ARRAY_SIZE(alterations));
+        rd_kafka_AdminOptions_destroy(options);
+
+        if (!useq)
+                rd_kafka_queue_destroy(rkqu);
+
+        SUB_TEST_PASS();
+}
 
 /**
  * @brief Test a mix of APIs using the same replyq.
@@ -2444,6 +2811,14 @@ static void do_test_apis(rd_kafka_type_t cltype) {
         do_test_DescribeConsumerGroups("main queue, options", rk, mainq, 1,
                                        rd_false);
 
+        do_test_DescribeTopics("temp queue, no options", rk, NULL, 0);
+        do_test_DescribeTopics("temp queue, options", rk, NULL, 1);
+        do_test_DescribeTopics("main queue, options", rk, mainq, 1);
+
+        do_test_DescribeCluster("temp queue, no options", rk, NULL, 0);
+        do_test_DescribeCluster("temp queue, options", rk, NULL, 1);
+        do_test_DescribeCluster("main queue, options", rk, mainq, 1);
+
         do_test_DeleteGroups("temp queue, no options", rk, NULL, 0, rd_false);
         do_test_DeleteGroups("temp queue, options", rk, NULL, 1, rd_false);
         do_test_DeleteGroups("main queue, options", rk, mainq, 1, rd_false);
@@ -2495,6 +2870,12 @@ static void do_test_apis(rd_kafka_type_t cltype) {
         do_test_ListConsumerGroupOffsets("main queue, options", rk, mainq, 1,
                                          rd_true);
 
+        do_test_DescribeUserScramCredentials("main queue", rk, mainq);
+        do_test_DescribeUserScramCredentials("temp queue", rk, NULL);
+
+        do_test_AlterUserScramCredentials("main queue", rk, mainq);
+        do_test_AlterUserScramCredentials("temp queue", rk, NULL);
+
         do_test_mix(rk, mainq);
 
         do_test_configs(rk, mainq);
diff --git a/lib/librdkafka-2.1.0/tests/0081-admin.c b/lib/librdkafka-2.3.0/tests/0081-admin.c
similarity index 70%
rename from lib/librdkafka-2.1.0/tests/0081-admin.c
rename to lib/librdkafka-2.3.0/tests/0081-admin.c
index 7da2dff1569..0690217a3c8 100644
--- a/lib/librdkafka-2.1.0/tests/0081-admin.c
+++ b/lib/librdkafka-2.3.0/tests/0081-admin.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -896,6 +897,252 @@ static void do_test_AlterConfigs(rd_kafka_t *rk, rd_kafka_queue_t *rkqu) {
         SUB_TEST_PASS();
 }
 
+/**
+ * @brief Test IncrementalAlterConfigs
+ */
+static void do_test_IncrementalAlterConfigs(rd_kafka_t *rk,
+                                            rd_kafka_queue_t *rkqu) {
+#define MY_CONFRES_CNT 3
+        char *topics[MY_CONFRES_CNT];
+        rd_kafka_ConfigResource_t *configs[MY_CONFRES_CNT];
+        rd_kafka_AdminOptions_t *options;
+        rd_kafka_resp_err_t exp_err[MY_CONFRES_CNT];
+        rd_kafka_event_t *rkev;
+        rd_kafka_resp_err_t err;
+        rd_kafka_error_t *error;
+        const rd_kafka_IncrementalAlterConfigs_result_t *res;
+        const rd_kafka_ConfigResource_t **rconfigs;
+        size_t rconfig_cnt;
+        char errstr[128];
+        const char *errstr2;
+        int ci = 0;
+        int i;
+        int fails = 0;
+
+        SUB_TEST_QUICK();
+
+        /*
+         * Only create one topic, the others will be non-existent.
+         */
+        for (i = 0; i < MY_CONFRES_CNT; i++)
+                rd_strdupa(&topics[i], test_mk_topic_name(__FUNCTION__, 1));
+
+        test_CreateTopics_simple(rk, NULL, topics, 1, 1, NULL);
+
+        test_wait_topic_exists(rk, topics[0], 10000);
+
+
+        /** Test the test helper, for use in other tests. */
+        do {
+                const char *broker_id = tsprintf("%d", avail_brokers[0]);
+                const char *confs_set_append[] = {
+                    "compression.type", "SET",    "lz4",
+                    "cleanup.policy",   "APPEND", "compact"};
+                const char *confs_delete_subtract[] = {
+                    "compression.type", "DELETE",   "lz4",
+                    "cleanup.policy",   "SUBTRACT", "compact"};
+                const char *confs_set_append_broker[] = {
+                    "background.threads", "SET",    "9",
+                    "log.cleanup.policy", "APPEND", "compact"};
+                const char *confs_delete_subtract_broker[] = {
+                    "background.threads", "DELETE",   "",
+                    "log.cleanup.policy", "SUBTRACT", "compact"};
+
+                TEST_SAY("Testing test helper with SET and APPEND\n");
+                test_IncrementalAlterConfigs_simple(rk, RD_KAFKA_RESOURCE_TOPIC,
+                                                    topics[0], confs_set_append,
+                                                    2);
+                TEST_SAY("Testing test helper with SUBTRACT and DELETE\n");
+                test_IncrementalAlterConfigs_simple(rk, RD_KAFKA_RESOURCE_TOPIC,
+                                                    topics[0],
+                                                    confs_delete_subtract, 2);
+
+                TEST_SAY(
+                    "Testing test helper with SET and APPEND with BROKER "
+                    "resource type\n");
+                test_IncrementalAlterConfigs_simple(
+                    rk, RD_KAFKA_RESOURCE_BROKER, broker_id,
+                    confs_set_append_broker, 2);
+                TEST_SAY(
+                    "Testing test helper with SUBTRACT and DELETE with BROKER "
+                    "resource type\n");
+                test_IncrementalAlterConfigs_simple(
+                    rk, RD_KAFKA_RESOURCE_BROKER, broker_id,
+                    confs_delete_subtract_broker, 2);
+                TEST_SAY("End testing test helper\n");
+        } while (0);
+
+        /*
+         * ConfigResource #0: valid topic config
+         */
+        configs[ci] =
+            rd_kafka_ConfigResource_new(RD_KAFKA_RESOURCE_TOPIC, topics[ci]);
+
+        error = rd_kafka_ConfigResource_add_incremental_config(
+            configs[ci], "compression.type", RD_KAFKA_ALTER_CONFIG_OP_TYPE_SET,
+            "gzip");
+        TEST_ASSERT(!error, "%s", rd_kafka_error_string(error));
+
+        error = rd_kafka_ConfigResource_add_incremental_config(
+            configs[ci], "flush.ms", RD_KAFKA_ALTER_CONFIG_OP_TYPE_SET,
+            "12345678");
+        TEST_ASSERT(!error, "%s", rd_kafka_error_string(error));
+
+        exp_err[ci] = RD_KAFKA_RESP_ERR_NO_ERROR;
+        ci++;
+
+
+        if (test_broker_version >= TEST_BRKVER(1, 1, 0, 0)) {
+                /*
+                 * ConfigResource #1: valid broker config
+                 */
+                configs[ci] = rd_kafka_ConfigResource_new(
+                    RD_KAFKA_RESOURCE_BROKER,
+                    tsprintf("%" PRId32, avail_brokers[0]));
+
+                error = rd_kafka_ConfigResource_add_incremental_config(
+                    configs[ci], "sasl.kerberos.min.time.before.relogin",
+                    RD_KAFKA_ALTER_CONFIG_OP_TYPE_SET, "58000");
+                TEST_ASSERT(!error, "%s", rd_kafka_error_string(error));
+
+                exp_err[ci] = RD_KAFKA_RESP_ERR_NO_ERROR;
+                ci++;
+        } else {
+                TEST_WARN(
+                    "Skipping RESOURCE_BROKER test on unsupported "
+                    "broker version\n");
+        }
+
+        /*
+         * ConfigResource #2: valid topic config, non-existent topic
+         */
+        configs[ci] =
+            rd_kafka_ConfigResource_new(RD_KAFKA_RESOURCE_TOPIC, topics[ci]);
+
+        error = rd_kafka_ConfigResource_add_incremental_config(
+            configs[ci], "compression.type", RD_KAFKA_ALTER_CONFIG_OP_TYPE_SET,
+            "lz4");
+        TEST_ASSERT(!error, "%s", rd_kafka_error_string(error));
+
+        error = rd_kafka_ConfigResource_add_incremental_config(
+            configs[ci], "offset.metadata.max.bytes",
+            RD_KAFKA_ALTER_CONFIG_OP_TYPE_SET, "12345");
+        TEST_ASSERT(!error, "%s", rd_kafka_error_string(error));
+
+        if (test_broker_version >= TEST_BRKVER(2, 7, 0, 0))
+                exp_err[ci] = RD_KAFKA_RESP_ERR_UNKNOWN_TOPIC_OR_PART;
+        else
+                exp_err[ci] = RD_KAFKA_RESP_ERR_UNKNOWN;
+        ci++;
+
+        /*
+         * Timeout options
+         */
+        options = rd_kafka_AdminOptions_new(
+            rk, RD_KAFKA_ADMIN_OP_INCREMENTALALTERCONFIGS);
+        err = rd_kafka_AdminOptions_set_request_timeout(options, 10000, errstr,
+                                                        sizeof(errstr));
+        TEST_ASSERT(!err, "%s", errstr);
+
+
+        /*
+         * Fire off request
+         */
+        rd_kafka_IncrementalAlterConfigs(rk, configs, ci, options, rkqu);
+
+        rd_kafka_AdminOptions_destroy(options);
+
+        /*
+         * Wait for result
+         */
+        rkev = test_wait_admin_result(
+            rkqu, RD_KAFKA_EVENT_INCREMENTALALTERCONFIGS_RESULT, 10000 + 1000);
+
+        /*
+         * Extract result
+         */
+        res = rd_kafka_event_IncrementalAlterConfigs_result(rkev);
+        TEST_ASSERT(res, "Expected AlterConfigs result, not %s",
+                    rd_kafka_event_name(rkev));
+
+        err     = rd_kafka_event_error(rkev);
+        errstr2 = rd_kafka_event_error_string(rkev);
+        TEST_ASSERT(!err, "Expected success, not %s: %s",
+                    rd_kafka_err2name(err), errstr2);
+
+        rconfigs = rd_kafka_IncrementalAlterConfigs_result_resources(
+            res, &rconfig_cnt);
+        TEST_ASSERT((int)rconfig_cnt == ci,
+                    "Expected %d result resources, got %" PRIusz "\n", ci,
+                    rconfig_cnt);
+
+        /*
+         * Verify status per resource
+         */
+        for (i = 0; i < (int)rconfig_cnt; i++) {
+                const rd_kafka_ConfigEntry_t **entries;
+                size_t entry_cnt;
+
+                err     = rd_kafka_ConfigResource_error(rconfigs[i]);
+                errstr2 = rd_kafka_ConfigResource_error_string(rconfigs[i]);
+
+                entries =
+                    rd_kafka_ConfigResource_configs(rconfigs[i], &entry_cnt);
+
+                TEST_SAY(
+                    "ConfigResource #%d: type %s (%d), \"%s\": "
+                    "%" PRIusz " ConfigEntries, error %s (%s)\n",
+                    i,
+                    rd_kafka_ResourceType_name(
+                        rd_kafka_ConfigResource_type(rconfigs[i])),
+                    rd_kafka_ConfigResource_type(rconfigs[i]),
+                    rd_kafka_ConfigResource_name(rconfigs[i]), entry_cnt,
+                    rd_kafka_err2name(err), errstr2 ? errstr2 : "");
+
+                test_print_ConfigEntry_array(entries, entry_cnt, 1);
+
+                if (rd_kafka_ConfigResource_type(rconfigs[i]) !=
+                        rd_kafka_ConfigResource_type(configs[i]) ||
+                    strcmp(rd_kafka_ConfigResource_name(rconfigs[i]),
+                           rd_kafka_ConfigResource_name(configs[i]))) {
+                        TEST_FAIL_LATER(
+                            "ConfigResource #%d: "
+                            "expected type %s name %s, "
+                            "got type %s name %s",
+                            i,
+                            rd_kafka_ResourceType_name(
+                                rd_kafka_ConfigResource_type(configs[i])),
+                            rd_kafka_ConfigResource_name(configs[i]),
+                            rd_kafka_ResourceType_name(
+                                rd_kafka_ConfigResource_type(rconfigs[i])),
+                            rd_kafka_ConfigResource_name(rconfigs[i]));
+                        fails++;
+                        continue;
+                }
+
+
+                if (err != exp_err[i]) {
+                        TEST_FAIL_LATER(
+                            "ConfigResource #%d: "
+                            "expected %s (%d), got %s (%s)",
+                            i, rd_kafka_err2name(exp_err[i]), exp_err[i],
+                            rd_kafka_err2name(err), errstr2 ? errstr2 : "");
+                        fails++;
+                }
+        }
+
+        TEST_ASSERT(!fails, "See %d previous failure(s)", fails);
+
+        rd_kafka_event_destroy(rkev);
+
+        rd_kafka_ConfigResource_destroy_array(configs, ci);
+
+        TEST_LATER_CHECK();
+#undef MY_CONFRES_CNT
+
+        SUB_TEST_PASS();
+}
+
 
 
 /**
@@ -2619,6 +2866,7 @@ static void do_test_DescribeConsumerGroups(const char *what,
         char client_ids[TEST_DESCRIBE_CONSUMER_GROUPS_CNT][512];
         rd_kafka_t *rks[TEST_DESCRIBE_CONSUMER_GROUPS_CNT];
         const rd_kafka_DescribeConsumerGroups_result_t *res;
+        size_t authorized_operation_cnt;
         rd_bool_t has_group_instance_id =
             test_broker_version >= TEST_BRKVER(2, 4, 0, 0);
 
@@ -2739,6 +2987,16 @@ static void do_test_DescribeConsumerGroups(const char *what,
                     rd_kafka_ConsumerGroupDescription_error(act));
                 rd_kafka_consumer_group_state_t state =
                     rd_kafka_ConsumerGroupDescription_state(act);
+                const rd_kafka_AclOperation_t *authorized_operations =
+                    rd_kafka_ConsumerGroupDescription_authorized_operations(
+                        act, &authorized_operation_cnt);
+                TEST_ASSERT(
+                    authorized_operation_cnt == 0,
+                    "Authorized operation count should be 0, is %" PRIusz,
+                    authorized_operation_cnt);
+                TEST_ASSERT(
+                    authorized_operations == NULL,
+                    "Authorized operations should be NULL when not requested");
                 TEST_ASSERT(
                     strcmp(exp->group_id,
                            rd_kafka_ConsumerGroupDescription_group_id(act)) ==
@@ -2845,6 +3103,8 @@ static void do_test_DescribeConsumerGroups(const char *what,
                 rd_free(expected[i].group_id);
         }
 
+        test_DeleteTopics_simple(rk, NULL, &topic, 1, NULL);
+
         rd_free(topic);
 
         if (options)
@@ -2859,78 +3119,818 @@ static void do_test_DescribeConsumerGroups(const char *what,
         SUB_TEST_PASS();
 }
 
+/** @brief Helper function to check whether \p expected and \p actual contain
+ * the same values. */
+static void
+test_match_authorized_operations(const rd_kafka_AclOperation_t *expected,
+                                 size_t expected_cnt,
+                                 const rd_kafka_AclOperation_t *actual,
+                                 size_t actual_cnt) {
+        size_t i, j;
+        TEST_ASSERT(expected_cnt == actual_cnt,
+                    "Expected %" PRIusz " authorized operations, got %" PRIusz,
+                    expected_cnt, actual_cnt);
+
+        for (i = 0; i < expected_cnt; i++) {
+                for (j = 0; j < actual_cnt; j++)
+                        if (expected[i] == actual[j])
+                                break;
+
+                if (j == actual_cnt)
+                        TEST_FAIL(
+                            "Did not find expected authorized operation in "
+                            "result %s\n",
+                            rd_kafka_AclOperation_name(expected[i]));
+        }
+}
+
 /**
- * @brief Test deletion of committed offsets.
- *
+ * @brief Test DescribeTopics: create a topic, describe it, and then
+ * delete it.
  *
+ * @param include_authorized_operations if true, check authorized
+ * operations included in topic descriptions, and if they're changed if
+ * ACLs are defined.
  */
-static void do_test_DeleteConsumerGroupOffsets(const char *what,
-                                               rd_kafka_t *rk,
-                                               rd_kafka_queue_t *useq,
-                                               int req_timeout_ms,
-                                               rd_bool_t sub_consumer) {
+static void do_test_DescribeTopics(const char *what,
+                                   rd_kafka_t *rk,
+                                   rd_kafka_queue_t *rkqu,
+                                   int request_timeout,
+                                   rd_bool_t include_authorized_operations) {
         rd_kafka_queue_t *q;
-        rd_kafka_AdminOptions_t *options = NULL;
-        rd_kafka_topic_partition_list_t *orig_offsets, *offsets, *to_delete,
-            *committed, *deleted, *subscription = NULL;
-        rd_kafka_event_t *rkev = NULL;
+#define TEST_DESCRIBE_TOPICS_CNT 3
+        char *topic_names[TEST_DESCRIBE_TOPICS_CNT];
+        rd_kafka_TopicCollection_t *topics, *empty_topics;
+        rd_kafka_AdminOptions_t *options;
+        rd_kafka_event_t *rkev;
+        const rd_kafka_error_t *error;
         rd_kafka_resp_err_t err;
-        char errstr[512];
+        test_timing_t timing;
+        const rd_kafka_DescribeTopics_result_t *res;
+        const rd_kafka_TopicDescription_t **result_topics;
+        const rd_kafka_TopicPartitionInfo_t **partitions;
+        const rd_kafka_Uuid_t *topic_id;
+        size_t partitions_cnt;
+        size_t result_topics_cnt;
+        char errstr[128];
         const char *errstr2;
-#define MY_TOPIC_CNT 3
+        const char *sasl_username;
+        const char *sasl_mechanism;
+        const char *principal;
+        rd_kafka_AclBinding_t *acl_bindings[1];
         int i;
-        const int partitions_cnt = 3;
-        char *topics[MY_TOPIC_CNT];
-        rd_kafka_metadata_topic_t exp_mdtopics[MY_TOPIC_CNT] = {{0}};
-        int exp_mdtopic_cnt                                  = 0;
-        test_timing_t timing;
-        rd_kafka_resp_err_t exp_err = RD_KAFKA_RESP_ERR_NO_ERROR;
-        rd_kafka_DeleteConsumerGroupOffsets_t *cgoffsets;
-        const rd_kafka_DeleteConsumerGroupOffsets_result_t *res;
-        const rd_kafka_group_result_t **gres;
-        size_t gres_cnt;
-        rd_kafka_t *consumer;
-        char *groupid;
+        const rd_kafka_AclOperation_t *authorized_operations;
+        size_t authorized_operations_cnt;
 
         SUB_TEST_QUICK(
-            "%s DeleteConsumerGroupOffsets with %s, req_timeout_ms %d%s",
-            rd_kafka_name(rk), what, req_timeout_ms,
-            sub_consumer ? ", with subscribing consumer" : "");
+            "%s DescribeTopics with %s, request_timeout %d, "
+            "%s authorized operations",
+            rd_kafka_name(rk), what, request_timeout,
+            include_authorized_operations ? "with" : "without");
 
-        if (sub_consumer)
-                exp_err = RD_KAFKA_RESP_ERR_GROUP_SUBSCRIBED_TO_TOPIC;
+        q = rkqu ? rkqu : rd_kafka_queue_new(rk);
 
-        q = useq ? useq : rd_kafka_queue_new(rk);
+        /* Only create one topic, the others will be non-existent. */
+        for (i = 0; i < TEST_DESCRIBE_TOPICS_CNT; i++) {
+                rd_strdupa(&topic_names[i],
+                           test_mk_topic_name(__FUNCTION__, 1));
+        }
+        topics = rd_kafka_TopicCollection_of_topic_names(
+            (const char **)topic_names, TEST_DESCRIBE_TOPICS_CNT);
+        empty_topics = rd_kafka_TopicCollection_of_topic_names(NULL, 0);
+
+        test_CreateTopics_simple(rk, NULL, topic_names, 1, 1, NULL);
+        test_wait_topic_exists(rk, topic_names[0], 10000);
+
+        options =
+            rd_kafka_AdminOptions_new(rk, RD_KAFKA_ADMIN_OP_DESCRIBETOPICS);
+        TEST_CALL_ERR__(rd_kafka_AdminOptions_set_request_timeout(
+            options, request_timeout, errstr, sizeof(errstr)));
+        TEST_CALL_ERROR__(
+            rd_kafka_AdminOptions_set_include_authorized_operations(
+                options, include_authorized_operations));
+
+        /* Call DescribeTopics with empty topics. */
+        TIMING_START(&timing, "DescribeTopics empty");
+        rd_kafka_DescribeTopics(rk, empty_topics, options, q);
+        TIMING_ASSERT_LATER(&timing, 0, 50);
 
-        if (req_timeout_ms != -1) {
-                options = rd_kafka_AdminOptions_new(
-                    rk, RD_KAFKA_ADMIN_OP_DELETECONSUMERGROUPOFFSETS);
+        /* Check DescribeTopics results. */
+        rkev = test_wait_admin_result(q, RD_KAFKA_EVENT_DESCRIBETOPICS_RESULT,
+                                      tmout_multip(20 * 1000));
+        TEST_ASSERT(rkev, "Expected DescribeTopicsResult on queue");
 
-                err = rd_kafka_AdminOptions_set_request_timeout(
-                    options, req_timeout_ms, errstr, sizeof(errstr));
-                TEST_ASSERT(!err, "%s", rd_kafka_err2str(err));
-        }
+        /* Extract result. */
+        res = rd_kafka_event_DescribeTopics_result(rkev);
+        TEST_ASSERT(res, "Expected DescribeTopics result, not %s",
+                    rd_kafka_event_name(rkev));
 
+        err     = rd_kafka_event_error(rkev);
+        errstr2 = rd_kafka_event_error_string(rkev);
+        TEST_ASSERT(!err, "Expected success, not %s: %s",
+                    rd_kafka_err2name(err), errstr2);
 
-        subscription = rd_kafka_topic_partition_list_new(MY_TOPIC_CNT);
+        result_topics =
+            rd_kafka_DescribeTopics_result_topics(res, &result_topics_cnt);
 
-        for (i = 0; i < MY_TOPIC_CNT; i++) {
-                char pfx[64];
-                char *topic;
+        /* Check no result is received. */
+        TEST_ASSERT((int)result_topics_cnt == 0,
+                    "Expected 0 topics in result, got %d",
+                    (int)result_topics_cnt);
 
-                rd_snprintf(pfx, sizeof(pfx), "DCGO-topic%d", i);
-                topic = rd_strdup(test_mk_topic_name(pfx, 1));
+        rd_kafka_event_destroy(rkev);
 
-                topics[i]                             = topic;
-                exp_mdtopics[exp_mdtopic_cnt++].topic = topic;
+        /* Call DescribeTopics with all of them. */
+        TIMING_START(&timing, "DescribeTopics all");
+        rd_kafka_DescribeTopics(rk, topics, options, q);
+        TIMING_ASSERT_LATER(&timing, 0, 50);
 
-                rd_kafka_topic_partition_list_add(subscription, topic,
-                                                  RD_KAFKA_PARTITION_UA);
-        }
+        /* Check DescribeTopics results. */
+        rkev = test_wait_admin_result(q, RD_KAFKA_EVENT_DESCRIBETOPICS_RESULT,
+                                      tmout_multip(20 * 1000));
+        TEST_ASSERT(rkev, "Expected DescribeTopicsResult on queue");
 
-        groupid = topics[0];
+        /* Extract result. */
+        res = rd_kafka_event_DescribeTopics_result(rkev);
+        TEST_ASSERT(res, "Expected DescribeTopics result, not %s",
+                    rd_kafka_event_name(rkev));
 
-        /* Create the topics first. */
+        err     = rd_kafka_event_error(rkev);
+        errstr2 = rd_kafka_event_error_string(rkev);
+        TEST_ASSERT(!err, "Expected success, not %s: %s",
+                    rd_kafka_err2name(err), errstr2);
+
+        result_topics =
+            rd_kafka_DescribeTopics_result_topics(res, &result_topics_cnt);
+
+        /* Check if results have been received for all topics. */
+        TEST_ASSERT((int)result_topics_cnt == TEST_DESCRIBE_TOPICS_CNT,
+                    "Expected %d topics in result, got %d",
+                    TEST_DESCRIBE_TOPICS_CNT, (int)result_topics_cnt);
+
+        /* Check if topics[0] succeeded. */
+        error = rd_kafka_TopicDescription_error(result_topics[0]);
+        TEST_ASSERT(rd_kafka_error_code(error) == RD_KAFKA_RESP_ERR_NO_ERROR,
+                    "Expected no error, not %s\n",
+                    rd_kafka_error_string(error));
+
+        /*
+         * Check whether the topics which are non-existent have
+         * RD_KAFKA_RESP_ERR_UNKNOWN_TOPIC_OR_PART error.
+         */
+        for (i = 1; i < TEST_DESCRIBE_TOPICS_CNT; i++) {
+                error = rd_kafka_TopicDescription_error(result_topics[i]);
+                TEST_ASSERT(rd_kafka_error_code(error) ==
+                                RD_KAFKA_RESP_ERR_UNKNOWN_TOPIC_OR_PART,
+                            "Expected unknown Topic or partition, not %s\n",
+                            rd_kafka_error_string(error));
+        }
+
+        /* Check fields inside the first (existent) topic. */
+        TEST_ASSERT(strcmp(rd_kafka_TopicDescription_name(result_topics[0]),
+                           topic_names[0]) == 0,
+                    "Expected topic name %s, got %s", topic_names[0],
+                    rd_kafka_TopicDescription_name(result_topics[0]));
+
+        topic_id = rd_kafka_TopicDescription_topic_id(result_topics[0]);
+
+        TEST_ASSERT(topic_id, "Expected Topic Id to present.");
+
+        partitions = rd_kafka_TopicDescription_partitions(result_topics[0],
+                                                          &partitions_cnt);
+
+        TEST_ASSERT(partitions_cnt == 1, "Expected %d partitions, got %" PRIusz,
+                    1, partitions_cnt);
+
+        TEST_ASSERT(rd_kafka_TopicPartitionInfo_partition(partitions[0]) == 0,
+                    "Expected partion id to be %d, got %d", 0,
+                    rd_kafka_TopicPartitionInfo_partition(partitions[0]));
+
+        authorized_operations = rd_kafka_TopicDescription_authorized_operations(
+            result_topics[0], &authorized_operations_cnt);
+        if (include_authorized_operations) {
+                const rd_kafka_AclOperation_t expected[] = {
+                    RD_KAFKA_ACL_OPERATION_ALTER,
+                    RD_KAFKA_ACL_OPERATION_ALTER_CONFIGS,
+                    RD_KAFKA_ACL_OPERATION_CREATE,
+                    RD_KAFKA_ACL_OPERATION_DELETE,
+                    RD_KAFKA_ACL_OPERATION_DESCRIBE,
+                    RD_KAFKA_ACL_OPERATION_DESCRIBE_CONFIGS,
+                    RD_KAFKA_ACL_OPERATION_READ,
+                    RD_KAFKA_ACL_OPERATION_WRITE};
+
+                test_match_authorized_operations(expected, 8,
+                                                 authorized_operations,
+                                                 authorized_operations_cnt);
+        } else {
+                TEST_ASSERT(
+                    authorized_operations_cnt == 0,
+                    "Authorized operation count should be 0, is %" PRIusz,
+                    authorized_operations_cnt);
+                TEST_ASSERT(
+                    authorized_operations == NULL,
+                    "Authorized operations should be NULL when not requested");
+        }
+
+        rd_kafka_AdminOptions_destroy(options);
+        rd_kafka_event_destroy(rkev);
+
+        /* If we don't have authentication/authorization set up in our
+         * broker, the following test doesn't make sense, since we're
+         * testing ACLs and authorized operations for our principal. The
+         * same goes for `include_authorized_operations`, if it's not
+         * true, it doesn't make sense to change the ACLs and check. We
+         * limit ourselves to SASL_PLAIN and SASL_SCRAM.*/
+        if (!test_needs_auth() || !include_authorized_operations)
+                goto done;
+
+        sasl_mechanism = test_conf_get(NULL, "sasl.mechanism");
+        if (strcmp(sasl_mechanism, "PLAIN") != 0 &&
+            strncmp(sasl_mechanism, "SCRAM", 5) != 0)
+                goto done;
+
+        sasl_username = test_conf_get(NULL, "sasl.username");
+        principal     = tsprintf("User:%s", sasl_username);
+
+        /* Change authorized operations for the principal which we're
+         * using to connect to the broker. */
+        acl_bindings[0] = rd_kafka_AclBinding_new(
+            RD_KAFKA_RESOURCE_TOPIC, topic_names[0],
+            RD_KAFKA_RESOURCE_PATTERN_LITERAL, principal, "*",
+            RD_KAFKA_ACL_OPERATION_READ, RD_KAFKA_ACL_PERMISSION_TYPE_ALLOW,
+            NULL, 0);
+        TEST_CALL_ERR__(
+            test_CreateAcls_simple(rk, NULL, acl_bindings, 1, NULL));
+        rd_kafka_AclBinding_destroy(acl_bindings[0]);
+
+        /* Call DescribeTopics. */
+        options =
+            rd_kafka_AdminOptions_new(rk, RD_KAFKA_ADMIN_OP_DESCRIBETOPICS);
+        TEST_CALL_ERR__(rd_kafka_AdminOptions_set_request_timeout(
+            options, request_timeout, errstr, sizeof(errstr)));
+        TEST_CALL_ERROR__(
+            rd_kafka_AdminOptions_set_include_authorized_operations(options,
+                                                                    1));
+
+        TIMING_START(&timing, "DescribeTopics");
+        rd_kafka_DescribeTopics(rk, topics, options, q);
+        TIMING_ASSERT_LATER(&timing, 0, 50);
+        rd_kafka_AdminOptions_destroy(options);
+
+        /* Check DescribeTopics results. */
+        rkev = test_wait_admin_result(q, RD_KAFKA_EVENT_DESCRIBETOPICS_RESULT,
+                                      tmout_multip(20 * 1000));
+        TEST_ASSERT(rkev, "Expected DescribeTopicsResult on queue");
+
+        /* Extract result. */
+        res = rd_kafka_event_DescribeTopics_result(rkev);
+        TEST_ASSERT(res, "Expected DescribeTopics result, not %s",
+                    rd_kafka_event_name(rkev));
+
+        err     = rd_kafka_event_error(rkev);
+        errstr2 = rd_kafka_event_error_string(rkev);
+        TEST_ASSERT(!err, "Expected success, not %s: %s",
+                    rd_kafka_err2name(err), errstr2);
+
+        result_topics =
+            rd_kafka_DescribeTopics_result_topics(res, &result_topics_cnt);
+
+        /* Check if results have been received for all topics. */
+        TEST_ASSERT((int)result_topics_cnt == TEST_DESCRIBE_TOPICS_CNT,
+                    "Expected %d topics in result, got %d",
+                    TEST_DESCRIBE_TOPICS_CNT, (int)result_topics_cnt);
+
+        /* Check if topics[0] succeeded. */
+        error = rd_kafka_TopicDescription_error(result_topics[0]);
+        TEST_ASSERT(rd_kafka_error_code(error) == RD_KAFKA_RESP_ERR_NO_ERROR,
+                    "Expected no error, not %s\n",
+                    rd_kafka_error_string(error));
+
+        /* Check if ACLs changed. */
+        {
+                const rd_kafka_AclOperation_t expected[] = {
+                    RD_KAFKA_ACL_OPERATION_READ,
+                    RD_KAFKA_ACL_OPERATION_DESCRIBE};
+                authorized_operations =
+                    rd_kafka_TopicDescription_authorized_operations(
+                        result_topics[0], &authorized_operations_cnt);
+
+                test_match_authorized_operations(expected, 2,
+                                                 authorized_operations,
+                                                 authorized_operations_cnt);
+        }
+        rd_kafka_event_destroy(rkev);
+
+        /*
+         * Allow RD_KAFKA_ACL_OPERATION_DELETE to allow deletion
+         * of the created topic as currently our principal only has read
+         * and describe.
+         */
+        acl_bindings[0] = rd_kafka_AclBinding_new(
+            RD_KAFKA_RESOURCE_TOPIC, topic_names[0],
+            RD_KAFKA_RESOURCE_PATTERN_LITERAL, principal, "*",
+            RD_KAFKA_ACL_OPERATION_DELETE, RD_KAFKA_ACL_PERMISSION_TYPE_ALLOW,
+            NULL, 0);
+        TEST_CALL_ERR__(
+            test_CreateAcls_simple(rk, NULL, acl_bindings, 1, NULL));
+        rd_kafka_AclBinding_destroy(acl_bindings[0]);
+
+done:
+        test_DeleteTopics_simple(rk, NULL, topic_names, 1, NULL);
+        if (!rkqu)
+                rd_kafka_queue_destroy(q);
+
+        rd_kafka_TopicCollection_destroy(topics);
+        rd_kafka_TopicCollection_destroy(empty_topics);
+
+
+        TEST_LATER_CHECK();
+#undef TEST_DESCRIBE_TOPICS_CNT
+
+        SUB_TEST_PASS();
+}
+
+/**
+ * @brief Test DescribeCluster for the test cluster.
+ *
+ * @param include_authorized_operations if true, check authorized operations
+ * included in cluster description, and if they're changed if ACLs are defined.
+ */
+static void do_test_DescribeCluster(const char *what,
+                                    rd_kafka_t *rk,
+                                    rd_kafka_queue_t *rkqu,
+                                    int request_timeout,
+                                    rd_bool_t include_authorized_operations) {
+        rd_kafka_queue_t *q;
+        rd_kafka_AdminOptions_t *options;
+        rd_kafka_event_t *rkev;
+        rd_kafka_resp_err_t err;
+        test_timing_t timing;
+        const rd_kafka_DescribeCluster_result_t *res;
+        const rd_kafka_Node_t **nodes;
+        size_t node_cnt;
+        char errstr[128];
+        const char *errstr2;
+        rd_kafka_AclBinding_t *acl_bindings[1];
+        rd_kafka_AclBindingFilter_t *acl_bindings_delete;
+        const rd_kafka_AclOperation_t *authorized_operations;
+        size_t authorized_operations_cnt;
+        const char *sasl_username;
+        const char *sasl_mechanism;
+        const char *principal;
+
+        SUB_TEST_QUICK(
+            "%s DescribeCluster with %s, request_timeout %d, %s authorized "
+            "operations",
+            rd_kafka_name(rk), what, request_timeout,
+            include_authorized_operations ? "with" : "without");
+
+        q = rkqu ? rkqu : rd_kafka_queue_new(rk);
+
+        /* Call DescribeCluster. */
+        options =
+            rd_kafka_AdminOptions_new(rk, RD_KAFKA_ADMIN_OP_DESCRIBECLUSTER);
+        TEST_CALL_ERR__(rd_kafka_AdminOptions_set_request_timeout(
+            options, request_timeout, errstr, sizeof(errstr)));
+        TEST_CALL_ERROR__(
+            rd_kafka_AdminOptions_set_include_authorized_operations(
+                options, include_authorized_operations));
+
+        TIMING_START(&timing, "DescribeCluster");
+        rd_kafka_DescribeCluster(rk, options, q);
+        TIMING_ASSERT_LATER(&timing, 0, 50);
+        rd_kafka_AdminOptions_destroy(options);
+
+        /* Wait for DescribeCluster result.*/
+        rkev = test_wait_admin_result(q, RD_KAFKA_EVENT_DESCRIBECLUSTER_RESULT,
+                                      tmout_multip(20 * 1000));
+        TEST_ASSERT(rkev, "Should receive describe cluster event.");
+
+        /* Extract result. */
+        res = rd_kafka_event_DescribeCluster_result(rkev);
+        TEST_ASSERT(res, "Expected DescribeCluster result, not %s",
+                    rd_kafka_event_name(rkev));
+
+        err     = rd_kafka_event_error(rkev);
+        errstr2 = rd_kafka_event_error_string(rkev);
+        TEST_ASSERT(!err, "Expected success, not %s: %s",
+                    rd_kafka_err2name(err), errstr2);
+
+        /* Sanity checks on fields inside the result. There's not much we can
+         * say here deterministically, since it depends on the test environment.
+         */
+        TEST_ASSERT(strlen(rd_kafka_DescribeCluster_result_cluster_id(res)),
+                    "Length of cluster id should be non-null.");
+
+        nodes = rd_kafka_DescribeCluster_result_nodes(res, &node_cnt);
+        TEST_ASSERT(node_cnt, "Expected non-zero node count for cluster.");
+
+        TEST_ASSERT(rd_kafka_Node_host(nodes[0]),
+                    "Expected first node of cluster to have a hostname");
+        TEST_ASSERT(rd_kafka_Node_port(nodes[0]),
+                    "Expected first node of cluster to have a port");
+
+        authorized_operations =
+            rd_kafka_DescribeCluster_result_authorized_operations(
+                res, &authorized_operations_cnt);
+        if (include_authorized_operations) {
+                const rd_kafka_AclOperation_t expected[] = {
+                    RD_KAFKA_ACL_OPERATION_ALTER,
+                    RD_KAFKA_ACL_OPERATION_ALTER_CONFIGS,
+                    RD_KAFKA_ACL_OPERATION_CLUSTER_ACTION,
+                    RD_KAFKA_ACL_OPERATION_CREATE,
+                    RD_KAFKA_ACL_OPERATION_DESCRIBE,
+                    RD_KAFKA_ACL_OPERATION_DESCRIBE_CONFIGS,
+                    RD_KAFKA_ACL_OPERATION_IDEMPOTENT_WRITE};
+
+                test_match_authorized_operations(expected, 7,
+                                                 authorized_operations,
+                                                 authorized_operations_cnt);
+        } else {
+                TEST_ASSERT(
+                    authorized_operations_cnt == 0,
+                    "Authorized operation count should be 0, is %" PRIusz,
+                    authorized_operations_cnt);
+                TEST_ASSERT(
+                    authorized_operations == NULL,
+                    "Authorized operations should be NULL when not requested");
+        }
+
+        rd_kafka_event_destroy(rkev);
+
+        /* If we don't have authentication/authorization set up in our broker,
+         * the following test doesn't make sense, since we're testing ACLs and
+         * authorized operations for our principal. The same goes for
+         * `include_authorized_operations`, if it's not true, it doesn't make
+         * sense to change the ACLs and check. We limit ourselves to SASL_PLAIN
+         * and SASL_SCRAM.*/
+        if (!test_needs_auth() || !include_authorized_operations)
+                goto done;
+
+        sasl_mechanism = test_conf_get(NULL, "sasl.mechanism");
+        if (strcmp(sasl_mechanism, "PLAIN") != 0 &&
+            strncmp(sasl_mechanism, "SCRAM", 5) != 0)
+                goto done;
+
+        sasl_username = test_conf_get(NULL, "sasl.username");
+        principal     = tsprintf("User:%s", sasl_username);
+
+        /* Change authorized operations for the principal which we're using to
+         * connect to the broker. */
+        acl_bindings[0] = rd_kafka_AclBinding_new(
+            RD_KAFKA_RESOURCE_BROKER, "kafka-cluster",
+            RD_KAFKA_RESOURCE_PATTERN_LITERAL, principal, "*",
+            RD_KAFKA_ACL_OPERATION_ALTER, RD_KAFKA_ACL_PERMISSION_TYPE_ALLOW,
+            NULL, 0);
+        test_CreateAcls_simple(rk, NULL, acl_bindings, 1, NULL);
+        rd_kafka_AclBinding_destroy(acl_bindings[0]);
+
+        /* Call DescribeCluster. */
+        options =
+            rd_kafka_AdminOptions_new(rk, RD_KAFKA_ADMIN_OP_DESCRIBECLUSTER);
+
+        TEST_CALL_ERR__(rd_kafka_AdminOptions_set_request_timeout(
+            options, request_timeout, errstr, sizeof(errstr)));
+        TEST_CALL_ERROR__(
+            rd_kafka_AdminOptions_set_include_authorized_operations(options,
+                                                                    1));
+
+        TIMING_START(&timing, "DescribeCluster");
+        rd_kafka_DescribeCluster(rk, options, q);
+        TIMING_ASSERT_LATER(&timing, 0, 50);
+        rd_kafka_AdminOptions_destroy(options);
+
+        rkev = test_wait_admin_result(q, RD_KAFKA_EVENT_DESCRIBECLUSTER_RESULT,
+                                      tmout_multip(20 * 1000));
+        TEST_ASSERT(rkev, "Should receive describe cluster event.");
+
+        /*  Extract result. */
+        res = rd_kafka_event_DescribeCluster_result(rkev);
+        TEST_ASSERT(res, "Expected DescribeCluster result, not %s",
+                    rd_kafka_event_name(rkev));
+
+        err     = rd_kafka_event_error(rkev);
+        errstr2 = rd_kafka_event_error_string(rkev);
+        TEST_ASSERT(!err, "Expected success, not %s: %s",
+                    rd_kafka_err2name(err), errstr2);
+
+        /*
+         * After CreateAcls call with
+         * only RD_KAFKA_ACL_OPERATION_ALTER allowed, the allowed operations
+         * should be 2 (DESCRIBE is implicitly derived from ALTER).
+         */
+        {
+                const rd_kafka_AclOperation_t expected[] = {
+                    RD_KAFKA_ACL_OPERATION_ALTER,
+                    RD_KAFKA_ACL_OPERATION_DESCRIBE};
+                authorized_operations =
+                    rd_kafka_DescribeCluster_result_authorized_operations(
+                        res, &authorized_operations_cnt);
+
+                test_match_authorized_operations(expected, 2,
+                                                 authorized_operations,
+                                                 authorized_operations_cnt);
+        }
+
+        rd_kafka_event_destroy(rkev);
+
+        /*
+         * Remove the previously created ACL so that it doesn't affect other
+         * tests.
+         */
+        acl_bindings_delete = rd_kafka_AclBindingFilter_new(
+            RD_KAFKA_RESOURCE_BROKER, "kafka-cluster",
+            RD_KAFKA_RESOURCE_PATTERN_MATCH, principal, "*",
+            RD_KAFKA_ACL_OPERATION_ALTER, RD_KAFKA_ACL_PERMISSION_TYPE_ALLOW,
+            NULL, 0);
+        test_DeleteAcls_simple(rk, NULL, &acl_bindings_delete, 1, NULL);
+        rd_kafka_AclBinding_destroy(acl_bindings_delete);
+
+done:
+        TEST_LATER_CHECK();
+
+        if (!rkqu)
+                rd_kafka_queue_destroy(q);
+
+        SUB_TEST_PASS();
+}
+
+/**
+ * @brief Test DescribeConsumerGroups's authorized_operations, creating a
+ * consumer for a group, describing it, changing ACLs, and describing it again.
+ */
+static void
+do_test_DescribeConsumerGroups_with_authorized_ops(const char *what,
+                                                   rd_kafka_t *rk,
+                                                   rd_kafka_queue_t *useq,
+                                                   int request_timeout) {
+        rd_kafka_queue_t *q;
+        rd_kafka_AdminOptions_t *options = NULL;
+        rd_kafka_event_t *rkev           = NULL;
+        rd_kafka_resp_err_t err;
+        const rd_kafka_error_t *error;
+        char errstr[512];
+        const char *errstr2;
+#define TEST_DESCRIBE_CONSUMER_GROUPS_CNT 4
+        const int partitions_cnt = 1;
+        const int msgs_cnt       = 100;
+        char *topic, *group_id;
+        rd_kafka_AclBinding_t *acl_bindings[TEST_DESCRIBE_CONSUMER_GROUPS_CNT];
+        int64_t testid = test_id_generate();
+        const rd_kafka_ConsumerGroupDescription_t **results = NULL;
+        size_t results_cnt;
+        const rd_kafka_DescribeConsumerGroups_result_t *res;
+        const char *principal, *sasl_mechanism, *sasl_username;
+        const rd_kafka_AclOperation_t *authorized_operations;
+        size_t authorized_operations_cnt;
+
+        SUB_TEST_QUICK("%s DescribeConsumerGroups with %s, request_timeout %d",
+                       rd_kafka_name(rk), what, request_timeout);
+
+        if (!test_needs_auth())
+                SUB_TEST_SKIP("Test requires authorization to be setup.");
+
+        sasl_mechanism = test_conf_get(NULL, "sasl.mechanism");
+        if (strcmp(sasl_mechanism, "PLAIN") != 0 &&
+            strncmp(sasl_mechanism, "SCRAM", 5) != 0)
+                SUB_TEST_SKIP("Test requites SASL_PLAIN or SASL_SCRAM, got %s",
+                              sasl_mechanism);
+
+        sasl_username = test_conf_get(NULL, "sasl.username");
+        principal     = tsprintf("User:%s", sasl_username);
+
+        topic = rd_strdup(test_mk_topic_name(__FUNCTION__, 1));
+
+        /* Create the topic. */
+        test_CreateTopics_simple(rk, NULL, &topic, 1, partitions_cnt, NULL);
+        test_wait_topic_exists(rk, topic, 10000);
+
+        /* Produce 100 msgs */
+        test_produce_msgs_easy(topic, testid, 0, msgs_cnt);
+
+        /* Create and consumer (and consumer group). */
+        group_id = rd_strdup(test_mk_topic_name(__FUNCTION__, 1));
+        test_consume_msgs_easy(group_id, topic, testid, -1, 100, NULL);
+
+        q = useq ? useq : rd_kafka_queue_new(rk);
+
+        options = rd_kafka_AdminOptions_new(
+            rk, RD_KAFKA_ADMIN_OP_DESCRIBECONSUMERGROUPS);
+
+        TEST_CALL_ERR__(rd_kafka_AdminOptions_set_request_timeout(
+            options, request_timeout, errstr, sizeof(errstr)));
+        TEST_CALL_ERROR__(
+            rd_kafka_AdminOptions_set_include_authorized_operations(options,
+                                                                    1));
+
+        rd_kafka_DescribeConsumerGroups(rk, (const char **)(&group_id), 1,
+                                        options, q);
+        rd_kafka_AdminOptions_destroy(options);
+
+        rkev = test_wait_admin_result(
+            q, RD_KAFKA_EVENT_DESCRIBECONSUMERGROUPS_RESULT,
+            tmout_multip(20 * 1000));
+        TEST_ASSERT(rkev, "Should receive describe consumer groups event.");
+
+        /*  Extract result. */
+        res = rd_kafka_event_DescribeConsumerGroups_result(rkev);
+        TEST_ASSERT(res, "Expected DescribeConsumerGroup result, not %s",
+                    rd_kafka_event_name(rkev));
+
+        err     = rd_kafka_event_error(rkev);
+        errstr2 = rd_kafka_event_error_string(rkev);
+        TEST_ASSERT(!err, "Expected success, not %s: %s",
+                    rd_kafka_err2name(err), errstr2);
+
+        results =
+            rd_kafka_DescribeConsumerGroups_result_groups(res, &results_cnt);
+        TEST_ASSERT((int)results_cnt == 1, "Expected 1 group, got %d",
+                    (int)results_cnt);
+
+        error = rd_kafka_ConsumerGroupDescription_error(results[0]);
+        TEST_ASSERT(!error, "Expected no error in describing group, got: %s",
+                    rd_kafka_error_string(error));
+
+        {
+                const rd_kafka_AclOperation_t expected[] = {
+                    RD_KAFKA_ACL_OPERATION_DELETE,
+                    RD_KAFKA_ACL_OPERATION_DESCRIBE,
+                    RD_KAFKA_ACL_OPERATION_READ};
+                authorized_operations =
+                    rd_kafka_ConsumerGroupDescription_authorized_operations(
+                        results[0], &authorized_operations_cnt);
+                test_match_authorized_operations(expected, 3,
+                                                 authorized_operations,
+                                                 authorized_operations_cnt);
+        }
+
+        rd_kafka_event_destroy(rkev);
+
+        /* Change authorized operations for the principal which we're using to
+         * connect to the broker. */
+        acl_bindings[0] = rd_kafka_AclBinding_new(
+            RD_KAFKA_RESOURCE_GROUP, group_id,
+            RD_KAFKA_RESOURCE_PATTERN_LITERAL, principal, "*",
+            RD_KAFKA_ACL_OPERATION_READ, RD_KAFKA_ACL_PERMISSION_TYPE_ALLOW,
+            NULL, 0);
+        test_CreateAcls_simple(rk, NULL, acl_bindings, 1, NULL);
+        rd_kafka_AclBinding_destroy(acl_bindings[0]);
+
+        /* It seems to be taking some time on the cluster for the ACLs to
+         * propagate for a group.*/
+        rd_sleep(tmout_multip(2));
+
+        options = rd_kafka_AdminOptions_new(
+            rk, RD_KAFKA_ADMIN_OP_DESCRIBECONSUMERGROUPS);
+
+        TEST_CALL_ERR__(rd_kafka_AdminOptions_set_request_timeout(
+            options, request_timeout, errstr, sizeof(errstr)));
+        TEST_CALL_ERROR__(
+            rd_kafka_AdminOptions_set_include_authorized_operations(options,
+                                                                    1));
+
+        rd_kafka_DescribeConsumerGroups(rk, (const char **)(&group_id), 1,
+                                        options, q);
+        rd_kafka_AdminOptions_destroy(options);
+
+        rkev = test_wait_admin_result(
+            q, RD_KAFKA_EVENT_DESCRIBECONSUMERGROUPS_RESULT,
+            tmout_multip(20 * 1000));
+        TEST_ASSERT(rkev, "Should receive describe consumer groups event.");
+
+        /*  Extract result. */
+        res = rd_kafka_event_DescribeConsumerGroups_result(rkev);
+        TEST_ASSERT(res, "Expected DescribeConsumerGroup result, not %s ",
+                    rd_kafka_event_name(rkev));
+
+        err     = rd_kafka_event_error(rkev);
+        errstr2 = rd_kafka_event_error_string(rkev);
+        TEST_ASSERT(!err, "Expected success, not %s: %s",
+                    rd_kafka_err2name(err), errstr2);
+
+        results =
+            rd_kafka_DescribeConsumerGroups_result_groups(res, &results_cnt);
+        TEST_ASSERT((int)results_cnt == 1, "Expected 1 group, got %d",
+                    (int)results_cnt);
+
+        error = rd_kafka_ConsumerGroupDescription_error(results[0]);
+        TEST_ASSERT(!error, "Expected no error in describing group, got: %s",
+                    rd_kafka_error_string(error));
+
+
+        {
+                const rd_kafka_AclOperation_t expected[] = {
+                    RD_KAFKA_ACL_OPERATION_DESCRIBE,
+                    RD_KAFKA_ACL_OPERATION_READ};
+                authorized_operations =
+                    rd_kafka_ConsumerGroupDescription_authorized_operations(
+                        results[0], &authorized_operations_cnt);
+                test_match_authorized_operations(expected, 2,
+                                                 authorized_operations,
+                                                 authorized_operations_cnt);
+        }
+
+        rd_kafka_event_destroy(rkev);
+
+        acl_bindings[0] = rd_kafka_AclBinding_new(
+            RD_KAFKA_RESOURCE_GROUP, group_id,
+            RD_KAFKA_RESOURCE_PATTERN_LITERAL, principal, "*",
+            RD_KAFKA_ACL_OPERATION_DELETE, RD_KAFKA_ACL_PERMISSION_TYPE_ALLOW,
+            NULL, 0);
+        test_CreateAcls_simple(rk, NULL, acl_bindings, 1, NULL);
+        rd_kafka_AclBinding_destroy(acl_bindings[0]);
+
+        /* It seems to be taking some time on the cluster for the ACLs to
+         * propagate for a group.*/
+        rd_sleep(tmout_multip(2));
+
+        test_DeleteGroups_simple(rk, NULL, &group_id, 1, NULL);
+        test_DeleteTopics_simple(rk, q, &topic, 1, NULL);
+
+        rd_free(topic);
+        rd_free(group_id);
+
+        if (!useq)
+                rd_kafka_queue_destroy(q);
+
+
+        TEST_LATER_CHECK();
+#undef TEST_DESCRIBE_CONSUMER_GROUPS_CNT
+
+        SUB_TEST_PASS();
+}
+/**
+ * @brief Test deletion of committed offsets.
+ *
+ *
+ */
+static void do_test_DeleteConsumerGroupOffsets(const char *what,
+                                               rd_kafka_t *rk,
+                                               rd_kafka_queue_t *useq,
+                                               int req_timeout_ms,
+                                               rd_bool_t sub_consumer) {
+        rd_kafka_queue_t *q;
+        rd_kafka_AdminOptions_t *options = NULL;
+        rd_kafka_topic_partition_list_t *orig_offsets, *offsets, *to_delete,
+            *committed, *deleted, *subscription = NULL;
+        rd_kafka_event_t *rkev = NULL;
+        rd_kafka_resp_err_t err;
+        char errstr[512];
+        const char *errstr2;
+#define MY_TOPIC_CNT 3
+        int i;
+        const int partitions_cnt = 3;
+        char *topics[MY_TOPIC_CNT];
+        rd_kafka_metadata_topic_t exp_mdtopics[MY_TOPIC_CNT] = {{0}};
+        int exp_mdtopic_cnt                                  = 0;
+        test_timing_t timing;
+        rd_kafka_resp_err_t exp_err = RD_KAFKA_RESP_ERR_NO_ERROR;
+        rd_kafka_DeleteConsumerGroupOffsets_t *cgoffsets;
+        const rd_kafka_DeleteConsumerGroupOffsets_result_t *res;
+        const rd_kafka_group_result_t **gres;
+        size_t gres_cnt;
+        rd_kafka_t *consumer;
+        char *groupid;
+
+        SUB_TEST_QUICK(
+            "%s DeleteConsumerGroupOffsets with %s, req_timeout_ms %d%s",
+            rd_kafka_name(rk), what, req_timeout_ms,
+            sub_consumer ? ", with subscribing consumer" : "");
+
+        if (sub_consumer)
+                exp_err = RD_KAFKA_RESP_ERR_GROUP_SUBSCRIBED_TO_TOPIC;
+
+        q = useq ? useq : rd_kafka_queue_new(rk);
+
+        if (req_timeout_ms != -1) {
+                options = rd_kafka_AdminOptions_new(
+                    rk, RD_KAFKA_ADMIN_OP_DELETECONSUMERGROUPOFFSETS);
+
+                err = rd_kafka_AdminOptions_set_request_timeout(
+                    options, req_timeout_ms, errstr, sizeof(errstr));
+                TEST_ASSERT(!err, "%s", rd_kafka_err2str(err));
+        }
+
+
+        subscription = rd_kafka_topic_partition_list_new(MY_TOPIC_CNT);
+
+        for (i = 0; i < MY_TOPIC_CNT; i++) {
+                char pfx[64];
+                char *topic;
+
+                rd_snprintf(pfx, sizeof(pfx), "DCGO-topic%d", i);
+                topic = rd_strdup(test_mk_topic_name(pfx, 1));
+
+                topics[i]                             = topic;
+                exp_mdtopics[exp_mdtopic_cnt++].topic = topic;
+
+                rd_kafka_topic_partition_list_add(subscription, topic,
+                                                  RD_KAFKA_PARTITION_UA);
+        }
+
+        groupid = topics[0];
+
+        /* Create the topics first. */
         test_CreateTopics_simple(rk, NULL, topics, MY_TOPIC_CNT, partitions_cnt,
                                  NULL);
 
@@ -3652,6 +4652,501 @@ static void do_test_ListConsumerGroupOffsets(const char *what,
         SUB_TEST_PASS();
 }
 
+static void do_test_UserScramCredentials(const char *what,
+                                         rd_kafka_t *rk,
+                                         rd_kafka_queue_t *useq,
+                                         rd_bool_t null_bytes) {
+        rd_kafka_event_t *event;
+        rd_kafka_resp_err_t err;
+        const rd_kafka_DescribeUserScramCredentials_result_t *describe_result;
+        const rd_kafka_UserScramCredentialsDescription_t **descriptions;
+        const rd_kafka_UserScramCredentialsDescription_t *description;
+        const rd_kafka_AlterUserScramCredentials_result_t *alter_result;
+        const rd_kafka_AlterUserScramCredentials_result_response_t *
+            *alter_responses;
+        const rd_kafka_AlterUserScramCredentials_result_response_t *response;
+        const rd_kafka_ScramCredentialInfo_t *scram_credential;
+        rd_kafka_ScramMechanism_t mechanism;
+        size_t response_cnt;
+        size_t description_cnt;
+        size_t num_credentials;
+        char errstr[512];
+        const char *username;
+        const rd_kafka_error_t *error;
+        int32_t iterations;
+        rd_kafka_UserScramCredentialAlteration_t *alterations[1];
+        char *salt           = tsprintf("%s", "salt");
+        size_t salt_size     = 4;
+        char *password       = tsprintf("%s", "password");
+        size_t password_size = 8;
+        rd_kafka_queue_t *queue;
+        const char *users[1];
+        users[0] = "testuserforscram";
+
+        if (null_bytes) {
+                salt[1]     = '\0';
+                salt[3]     = '\0';
+                password[0] = '\0';
+                password[3] = '\0';
+        }
+
+        SUB_TEST_QUICK("%s, null bytes: %s", what, RD_STR_ToF(null_bytes));
+
+        queue = useq ? useq : rd_kafka_queue_new(rk);
+
+        rd_kafka_AdminOptions_t *options = rd_kafka_AdminOptions_new(
+            rk, RD_KAFKA_ADMIN_OP_DESCRIBEUSERSCRAMCREDENTIALS);
+
+        TEST_CALL_ERR__(rd_kafka_AdminOptions_set_request_timeout(
+            options, 30 * 1000 /* 30s */, errstr, sizeof(errstr)));
+
+        /* Describe an unknown user */
+        rd_kafka_DescribeUserScramCredentials(rk, users, RD_ARRAY_SIZE(users),
+                                              options, queue);
+        rd_kafka_AdminOptions_destroy(options);
+        event = rd_kafka_queue_poll(queue, -1 /*indefinitely*/);
+
+        /* Request level error code should be 0*/
+        TEST_CALL_ERR__(rd_kafka_event_error(event));
+        err = rd_kafka_event_error(event);
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR_NO_ERROR,
+                    "Expected NO_ERROR, not %s", rd_kafka_err2name(err));
+
+        describe_result =
+            rd_kafka_event_DescribeUserScramCredentials_result(event);
+        descriptions =
+            rd_kafka_DescribeUserScramCredentials_result_descriptions(
+                describe_result, &description_cnt);
+
+        /* Assert num_results should be 1 */
+        TEST_ASSERT(description_cnt == 1,
+                    "There should be exactly 1 description, got %" PRIusz,
+                    description_cnt);
+
+        description = descriptions[0];
+        username = rd_kafka_UserScramCredentialsDescription_user(description);
+        error    = rd_kafka_UserScramCredentialsDescription_error(description);
+        err      = rd_kafka_error_code(error);
+
+        num_credentials =
+            rd_kafka_UserScramCredentialsDescription_scramcredentialinfo_count(
+                description);
+        /* username should be the same, err should be RESOURCE_NOT_FOUND
+         * and num_credentials should be 0 */
+        TEST_ASSERT(strcmp(users[0], username) == 0,
+                    "Username should be %s, got %s", users[0], username);
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR_RESOURCE_NOT_FOUND,
+                    "Error code should be RESOURCE_NOT_FOUND as user "
+                    "does not exist, got %s",
+                    rd_kafka_err2name(err));
+        TEST_ASSERT(num_credentials == 0,
+                    "Credentials count should be 0, got %" PRIusz,
+                    num_credentials);
+        rd_kafka_event_destroy(event);
+
+        /* Create a credential for user 0 */
+        mechanism      = RD_KAFKA_SCRAM_MECHANISM_SHA_256;
+        iterations     = 10000;
+        alterations[0] = rd_kafka_UserScramCredentialUpsertion_new(
+            users[0], mechanism, iterations, (unsigned char *)password,
+            password_size, (unsigned char *)salt, salt_size);
+
+        options = rd_kafka_AdminOptions_new(
+            rk, RD_KAFKA_ADMIN_OP_ALTERUSERSCRAMCREDENTIALS);
+
+        TEST_CALL_ERR__(rd_kafka_AdminOptions_set_request_timeout(
+            options, 30 * 1000 /* 30s */, errstr, sizeof(errstr)));
+
+        rd_kafka_AlterUserScramCredentials(
+            rk, alterations, RD_ARRAY_SIZE(alterations), options, queue);
+        rd_kafka_AdminOptions_destroy(options);
+        rd_kafka_UserScramCredentialAlteration_destroy_array(
+            alterations, RD_ARRAY_SIZE(alterations));
+
+        /* Wait for results */
+        event = rd_kafka_queue_poll(queue, -1 /*indefinitely*/);
+        err   = rd_kafka_event_error(event);
+#if !WITH_SSL
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR__INVALID_ARG,
+                    "Expected _INVALID_ARG, not %s", rd_kafka_err2name(err));
+        rd_kafka_event_destroy(event);
+        goto final_checks;
+#else
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR_NO_ERROR,
+                    "Expected NO_ERROR, not %s", rd_kafka_err2name(err));
+
+        alter_result = rd_kafka_event_AlterUserScramCredentials_result(event);
+        alter_responses = rd_kafka_AlterUserScramCredentials_result_responses(
+            alter_result, &response_cnt);
+
+        /* response_cnt should be 1*/
+        TEST_ASSERT(response_cnt == 1,
+                    "There should be exactly 1 response, got %" PRIusz,
+                    response_cnt);
+
+        response = alter_responses[0];
+        username =
+            rd_kafka_AlterUserScramCredentials_result_response_user(response);
+        error =
+            rd_kafka_AlterUserScramCredentials_result_response_error(response);
+
+        err = rd_kafka_error_code(error);
+        /* username should be the same and err should be NO_ERROR*/
+        TEST_ASSERT(strcmp(users[0], username) == 0,
+                    "Username should be %s, got %s", users[0], username);
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR_NO_ERROR,
+                    "Error code should be NO_ERROR, got %s",
+                    rd_kafka_err2name(err));
+
+        rd_kafka_event_destroy(event);
+#endif
+
+        /* Credential should be retrieved */
+        options = rd_kafka_AdminOptions_new(
+            rk, RD_KAFKA_ADMIN_OP_DESCRIBEUSERSCRAMCREDENTIALS);
+
+        TEST_CALL_ERR__(rd_kafka_AdminOptions_set_request_timeout(
+            options, 30 * 1000 /* 30s */, errstr, sizeof(errstr)));
+
+        rd_kafka_DescribeUserScramCredentials(rk, users, RD_ARRAY_SIZE(users),
+                                              options, queue);
+        rd_kafka_AdminOptions_destroy(options);
+
+        /* Wait for results */
+        event = rd_kafka_queue_poll(queue, -1 /*indefinitely*/);
+        err   = rd_kafka_event_error(event);
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR_NO_ERROR,
+                    "Expected NO_ERROR, not %s", rd_kafka_err2name(err));
+
+        describe_result =
+            rd_kafka_event_DescribeUserScramCredentials_result(event);
+        descriptions =
+            rd_kafka_DescribeUserScramCredentials_result_descriptions(
+                describe_result, &description_cnt);
+        /* Assert description_cnt should be 1 , request level error code should
+         * be 0*/
+        TEST_ASSERT(description_cnt == 1,
+                    "There should be exactly 1 description, got %" PRIusz,
+                    description_cnt);
+
+        description = descriptions[0];
+        username = rd_kafka_UserScramCredentialsDescription_user(description);
+        error    = rd_kafka_UserScramCredentialsDescription_error(description);
+        err      = rd_kafka_error_code(error);
+
+        num_credentials =
+            rd_kafka_UserScramCredentialsDescription_scramcredentialinfo_count(
+                description);
+        /* username should be the same, err should be NO_ERROR and
+         * num_credentials should be 1 */
+        TEST_ASSERT(strcmp(users[0], username) == 0,
+                    "Username should be %s, got %s", users[0], username);
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR_NO_ERROR,
+                    "Error code should be NO_ERROR, got %s",
+                    rd_kafka_err2name(err));
+        TEST_ASSERT(num_credentials == 1,
+                    "Credentials count should be 1, got %" PRIusz,
+                    num_credentials);
+
+        scram_credential =
+            rd_kafka_UserScramCredentialsDescription_scramcredentialinfo(
+                description, 0);
+        mechanism  = rd_kafka_ScramCredentialInfo_mechanism(scram_credential);
+        iterations = rd_kafka_ScramCredentialInfo_iterations(scram_credential);
+        /* mechanism should be SHA 256 and iterations 10000 */
+        TEST_ASSERT(mechanism == RD_KAFKA_SCRAM_MECHANISM_SHA_256,
+                    "Mechanism should be %d, got: %d",
+                    RD_KAFKA_SCRAM_MECHANISM_SHA_256, mechanism);
+        TEST_ASSERT(iterations == 10000,
+                    "Iterations should be 10000, got %" PRId32, iterations);
+
+        rd_kafka_event_destroy(event);
+
+        /* Delete the credential */
+        alterations[0] =
+            rd_kafka_UserScramCredentialDeletion_new(users[0], mechanism);
+
+        options = rd_kafka_AdminOptions_new(
+            rk, RD_KAFKA_ADMIN_OP_ALTERUSERSCRAMCREDENTIALS);
+
+        TEST_CALL_ERR__(rd_kafka_AdminOptions_set_request_timeout(
+            options, 30 * 1000 /* 30s */, errstr, sizeof(errstr)));
+
+        rd_kafka_AlterUserScramCredentials(
+            rk, alterations, RD_ARRAY_SIZE(alterations), options, queue);
+        rd_kafka_AdminOptions_destroy(options);
+        rd_kafka_UserScramCredentialAlteration_destroy_array(
+            alterations, RD_ARRAY_SIZE(alterations));
+
+        /* Wait for results */
+        event = rd_kafka_queue_poll(queue, -1 /*indefinitely*/);
+        err   = rd_kafka_event_error(event);
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR_NO_ERROR,
+                    "Expected NO_ERROR, not %s", rd_kafka_err2name(err));
+
+        alter_result = rd_kafka_event_AlterUserScramCredentials_result(event);
+        alter_responses = rd_kafka_AlterUserScramCredentials_result_responses(
+            alter_result, &response_cnt);
+
+        /* response_cnt should be 1*/
+        TEST_ASSERT(response_cnt == 1,
+                    "There should be exactly 1 response, got %" PRIusz,
+                    response_cnt);
+
+        response = alter_responses[0];
+        username =
+            rd_kafka_AlterUserScramCredentials_result_response_user(response);
+        error =
+            rd_kafka_AlterUserScramCredentials_result_response_error(response);
+
+        err = rd_kafka_error_code(error);
+        /* username should be the same and err should be NO_ERROR*/
+        TEST_ASSERT(strcmp(users[0], username) == 0,
+                    "Username should be %s, got %s", users[0], username);
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR_NO_ERROR,
+                    "Error code should be NO_ERROR, got %s",
+                    rd_kafka_err2name(err));
+
+        rd_kafka_event_destroy(event);
+
+#if !WITH_SSL
+final_checks:
+#endif
+
+        /* Credential doesn't exist anymore for this user */
+
+        options = rd_kafka_AdminOptions_new(
+            rk, RD_KAFKA_ADMIN_OP_DESCRIBEUSERSCRAMCREDENTIALS);
+
+        TEST_CALL_ERR__(rd_kafka_AdminOptions_set_request_timeout(
+            options, 30 * 1000 /* 30s */, errstr, sizeof(errstr)));
+
+        rd_kafka_DescribeUserScramCredentials(rk, users, RD_ARRAY_SIZE(users),
+                                              options, queue);
+        rd_kafka_AdminOptions_destroy(options);
+        /* Wait for results */
+        event = rd_kafka_queue_poll(queue, -1 /*indefinitely*/);
+        err   = rd_kafka_event_error(event);
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR_NO_ERROR,
+                    "Expected NO_ERROR, not %s", rd_kafka_err2name(err));
+
+        describe_result =
+            rd_kafka_event_DescribeUserScramCredentials_result(event);
+        descriptions =
+            rd_kafka_DescribeUserScramCredentials_result_descriptions(
+                describe_result, &description_cnt);
+        /* Assert description_cnt should be 1, request level error code should
+         * be 0*/
+        TEST_ASSERT(description_cnt == 1,
+                    "There should be exactly 1 description, got %" PRIusz,
+                    description_cnt);
+
+        description = descriptions[0];
+        username = rd_kafka_UserScramCredentialsDescription_user(description);
+        error    = rd_kafka_UserScramCredentialsDescription_error(description);
+        err      = rd_kafka_error_code(error);
+        num_credentials =
+            rd_kafka_UserScramCredentialsDescription_scramcredentialinfo_count(
+                description);
+        /* username should be the same, err should be RESOURCE_NOT_FOUND
+         * and num_credentials should be 0 */
+        TEST_ASSERT(strcmp(users[0], username) == 0,
+                    "Username should be %s, got %s", users[0], username);
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR_RESOURCE_NOT_FOUND,
+                    "Error code should be RESOURCE_NOT_FOUND, got %s",
+                    rd_kafka_err2name(err));
+        TEST_ASSERT(num_credentials == 0,
+                    "Credentials count should be 0, got %" PRIusz,
+                    num_credentials);
+
+        rd_kafka_event_destroy(event);
+
+        if (!useq)
+                rd_kafka_queue_destroy(queue);
+
+        SUB_TEST_PASS();
+}
+
+static void do_test_ListOffsets(const char *what,
+                                rd_kafka_t *rk,
+                                rd_kafka_queue_t *useq,
+                                int req_timeout_ms) {
+        char errstr[512];
+        const char *topic = test_mk_topic_name(__FUNCTION__, 1);
+        char *message     = "Message";
+        rd_kafka_AdminOptions_t *options;
+        rd_kafka_event_t *event;
+        rd_kafka_queue_t *q;
+        rd_kafka_t *p;
+        size_t i = 0, cnt = 0;
+        rd_kafka_topic_partition_list_t *topic_partitions,
+            *empty_topic_partitions;
+        const rd_kafka_ListOffsets_result_t *result;
+        const rd_kafka_ListOffsetsResultInfo_t **result_infos;
+        int64_t basetimestamp = 10000000;
+        int64_t timestamps[]  = {
+            basetimestamp + 100,
+            basetimestamp + 400,
+            basetimestamp + 250,
+        };
+        struct test_fixture_s {
+                int64_t query;
+                int64_t expected;
+                int min_broker_version;
+        } test_fixtures[] = {
+            {.query = RD_KAFKA_OFFSET_SPEC_EARLIEST, .expected = 0},
+            {.query = RD_KAFKA_OFFSET_SPEC_LATEST, .expected = 3},
+            {.query              = RD_KAFKA_OFFSET_SPEC_MAX_TIMESTAMP,
+             .expected           = 1,
+             .min_broker_version = TEST_BRKVER(3, 0, 0, 0)},
+            {.query = basetimestamp + 50, .expected = 0},
+            {.query = basetimestamp + 300, .expected = 1},
+            {.query = basetimestamp + 150, .expected = 1},
+        };
+
+        SUB_TEST_QUICK(
+            "%s ListOffsets with %s, "
+            "request_timeout %d",
+            rd_kafka_name(rk), what, req_timeout_ms);
+
+        q = useq ? useq : rd_kafka_queue_new(rk);
+
+        test_CreateTopics_simple(rk, NULL, (char **)&topic, 1, 1, NULL);
+
+        p = test_create_producer();
+        for (i = 0; i < RD_ARRAY_SIZE(timestamps); i++) {
+                rd_kafka_producev(
+                    /* Producer handle */
+                    p,
+                    /* Topic name */
+                    RD_KAFKA_V_TOPIC(topic),
+                    /* Make a copy of the payload. */
+                    RD_KAFKA_V_MSGFLAGS(RD_KAFKA_MSG_F_COPY),
+                    /* Message value and length */
+                    RD_KAFKA_V_VALUE(message, strlen(message)),
+
+                    RD_KAFKA_V_TIMESTAMP(timestamps[i]),
+                    /* Per-Message opaque, provided in
+                     * delivery report callback as
+                     * msg_opaque. */
+                    RD_KAFKA_V_OPAQUE(NULL),
+                    /* End sentinel */
+                    RD_KAFKA_V_END);
+        }
+
+        rd_kafka_flush(p, 20 * 1000);
+        rd_kafka_destroy(p);
+
+        /* Set timeout (optional) */
+        options = rd_kafka_AdminOptions_new(rk, RD_KAFKA_ADMIN_OP_LISTOFFSETS);
+
+        TEST_CALL_ERR__(rd_kafka_AdminOptions_set_request_timeout(
+            options, 30 * 1000 /* 30s */, errstr, sizeof(errstr)));
+
+        TEST_CALL_ERROR__(rd_kafka_AdminOptions_set_isolation_level(
+            options, RD_KAFKA_ISOLATION_LEVEL_READ_COMMITTED));
+
+        topic_partitions       = rd_kafka_topic_partition_list_new(1);
+        empty_topic_partitions = rd_kafka_topic_partition_list_new(0);
+        rd_kafka_topic_partition_list_add(topic_partitions, topic, 0);
+
+        /* Call ListOffsets with empty partition list */
+        rd_kafka_ListOffsets(rk, empty_topic_partitions, options, q);
+        rd_kafka_topic_partition_list_destroy(empty_topic_partitions);
+        /* Wait for results */
+        event = rd_kafka_queue_poll(q, -1 /*indefinitely*/);
+        if (!event)
+                TEST_FAIL("Event missing");
+
+        TEST_CALL_ERR__(rd_kafka_event_error(event));
+
+        result       = rd_kafka_event_ListOffsets_result(event);
+        result_infos = rd_kafka_ListOffsets_result_infos(result, &cnt);
+        rd_kafka_event_destroy(event);
+
+        TEST_ASSERT(!cnt,
+                    "Expected empty result info array, got %" PRIusz
+                    " result infos",
+                    cnt);
+
+        for (i = 0; i < RD_ARRAY_SIZE(test_fixtures); i++) {
+                rd_bool_t retry = rd_true;
+                rd_kafka_topic_partition_list_t *topic_partitions_copy;
+
+                struct test_fixture_s test_fixture = test_fixtures[i];
+                if (test_fixture.min_broker_version &&
+                    test_broker_version < test_fixture.min_broker_version) {
+                        TEST_SAY("Skipping offset %" PRId64
+                                 ", as not supported\n",
+                                 test_fixture.query);
+                        continue;
+                }
+
+                TEST_SAY("Testing offset %" PRId64 "\n", test_fixture.query);
+
+                topic_partitions_copy =
+                    rd_kafka_topic_partition_list_copy(topic_partitions);
+
+                /* Set OffsetSpec */
+                topic_partitions_copy->elems[0].offset = test_fixture.query;
+
+                while (retry) {
+                        size_t j;
+                        rd_kafka_resp_err_t err;
+                        /* Call ListOffsets */
+                        rd_kafka_ListOffsets(rk, topic_partitions_copy, options,
+                                             q);
+                        /* Wait for results */
+                        event = rd_kafka_queue_poll(q, -1 /*indefinitely*/);
+                        if (!event)
+                                TEST_FAIL("Event missing");
+
+                        err = rd_kafka_event_error(event);
+                        if (err == RD_KAFKA_RESP_ERR__NOENT) {
+                                rd_kafka_event_destroy(event);
+                                /* Still looking for the leader */
+                                rd_usleep(100000, 0);
+                                continue;
+                        } else if (err) {
+                                TEST_FAIL("Failed with error: %s",
+                                          rd_kafka_err2name(err));
+                        }
+
+                        result = rd_kafka_event_ListOffsets_result(event);
+                        result_infos =
+                            rd_kafka_ListOffsets_result_infos(result, &cnt);
+                        for (j = 0; j < cnt; j++) {
+                                const rd_kafka_topic_partition_t *topic_partition =
+                                    rd_kafka_ListOffsetsResultInfo_topic_partition(
+                                        result_infos[j]);
+                                TEST_ASSERT(
+                                    topic_partition->err == 0,
+                                    "Expected error NO_ERROR, got %s",
+                                    rd_kafka_err2name(topic_partition->err));
+                                TEST_ASSERT(topic_partition->offset ==
+                                                test_fixture.expected,
+                                            "Expected offset %" PRId64
+                                            ", got %" PRId64,
+                                            test_fixture.expected,
+                                            topic_partition->offset);
+                        }
+                        rd_kafka_event_destroy(event);
+                        retry = rd_false;
+                }
+                rd_kafka_topic_partition_list_destroy(topic_partitions_copy);
+        }
+
+        rd_kafka_AdminOptions_destroy(options);
+        rd_kafka_topic_partition_list_destroy(topic_partitions);
+
+        test_DeleteTopics_simple(rk, NULL, (char **)&topic, 1, NULL);
+
+        if (!useq)
+                rd_kafka_queue_destroy(q);
+
+        SUB_TEST_PASS();
+}
+
 static void do_test_apis(rd_kafka_type_t cltype) {
         rd_kafka_t *rk;
         rd_kafka_conf_t *conf;
@@ -3670,6 +5165,7 @@ static void do_test_apis(rd_kafka_type_t cltype) {
 
         test_conf_init(&conf, NULL, 180);
         test_conf_set(conf, "socket.timeout.ms", "10000");
+
         rk = test_create_handle(cltype, conf);
 
         mainq = rd_kafka_queue_get_main(rk);
@@ -3713,6 +5209,11 @@ static void do_test_apis(rd_kafka_type_t cltype) {
         /* AlterConfigs */
         do_test_AlterConfigs(rk, mainq);
 
+        if (test_broker_version >= TEST_BRKVER(2, 3, 0, 0)) {
+                /* IncrementalAlterConfigs */
+                do_test_IncrementalAlterConfigs(rk, mainq);
+        }
+
         /* DescribeConfigs */
         do_test_DescribeConfigs(rk, mainq);
 
@@ -3728,6 +5229,28 @@ static void do_test_apis(rd_kafka_type_t cltype) {
         do_test_DescribeConsumerGroups("temp queue", rk, NULL, -1);
         do_test_DescribeConsumerGroups("main queue", rk, mainq, 1500);
 
+        /* Describe topics */
+        do_test_DescribeTopics("temp queue", rk, NULL, 15000, rd_false);
+        do_test_DescribeTopics("main queue", rk, mainq, 15000, rd_false);
+
+        /* Describe cluster */
+        do_test_DescribeCluster("temp queue", rk, NULL, 1500, rd_false);
+        do_test_DescribeCluster("main queue", rk, mainq, 1500, rd_false);
+
+        if (test_broker_version >= TEST_BRKVER(2, 3, 0, 0)) {
+                /* Describe topics */
+                do_test_DescribeTopics("temp queue", rk, NULL, 15000, rd_true);
+                do_test_DescribeTopics("main queue", rk, mainq, 15000, rd_true);
+
+                do_test_DescribeCluster("temp queue", rk, NULL, 1500, rd_true);
+                do_test_DescribeCluster("main queue", rk, mainq, 1500, rd_true);
+
+                do_test_DescribeConsumerGroups_with_authorized_ops(
+                    "temp queue", rk, NULL, 1500);
+                do_test_DescribeConsumerGroups_with_authorized_ops(
+                    "main queue", rk, mainq, 1500);
+        }
+
         /* Delete groups */
         do_test_DeleteGroups("temp queue", rk, NULL, -1);
         do_test_DeleteGroups("main queue", rk, mainq, 1500);
@@ -3741,6 +5264,12 @@ static void do_test_apis(rd_kafka_type_t cltype) {
                 do_test_DeleteConsumerGroupOffsets(
                     "main queue", rk, mainq, 1500,
                     rd_true /*with subscribing consumer*/);
+        }
+
+        if (test_broker_version >= TEST_BRKVER(2, 5, 0, 0)) {
+                /* ListOffsets */
+                do_test_ListOffsets("temp queue", rk, NULL, -1);
+                do_test_ListOffsets("main queue", rk, mainq, 1500);
 
                 /* Alter committed offsets */
                 do_test_AlterConsumerGroupOffsets("temp queue", rk, NULL, -1,
@@ -3754,7 +5283,9 @@ static void do_test_apis(rd_kafka_type_t cltype) {
                     "main queue", rk, mainq, 1500,
                     rd_true, /*with subscribing consumer*/
                     rd_true);
+        }
 
+        if (test_broker_version >= TEST_BRKVER(2, 0, 0, 0)) {
                 /* List committed offsets */
                 do_test_ListConsumerGroupOffsets("temp queue", rk, NULL, -1,
                                                  rd_false, rd_false);
@@ -3774,6 +5305,12 @@ static void do_test_apis(rd_kafka_type_t cltype) {
                     rd_true /*with subscribing consumer*/, rd_true);
         }
 
+        if (test_broker_version >= TEST_BRKVER(2, 7, 0, 0)) {
+                do_test_UserScramCredentials("main queue", rk, mainq, rd_false);
+                do_test_UserScramCredentials("temp queue", rk, NULL, rd_false);
+                do_test_UserScramCredentials("main queue", rk, mainq, rd_true);
+        }
+
         rd_kafka_queue_destroy(mainq);
 
         rd_kafka_destroy(rk);
@@ -3785,7 +5322,6 @@ static void do_test_apis(rd_kafka_type_t cltype) {
 int main_0081_admin(int argc, char **argv) {
 
         do_test_apis(RD_KAFKA_PRODUCER);
-
         if (test_quick) {
                 TEST_SAY("Skipping further 0081 tests due to quick mode\n");
                 return 0;
diff --git a/lib/librdkafka-2.1.0/tests/0082-fetch_max_bytes.cpp b/lib/librdkafka-2.3.0/tests/0082-fetch_max_bytes.cpp
similarity index 97%
rename from lib/librdkafka-2.1.0/tests/0082-fetch_max_bytes.cpp
rename to lib/librdkafka-2.3.0/tests/0082-fetch_max_bytes.cpp
index 16eb5a21a1f..4ecb370f752 100644
--- a/lib/librdkafka-2.1.0/tests/0082-fetch_max_bytes.cpp
+++ b/lib/librdkafka-2.3.0/tests/0082-fetch_max_bytes.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -71,7 +71,7 @@ static void do_test_fetch_max_bytes(void) {
    * but due to batching overhead it would result in situations where
    * the consumer asked for 1000000 bytes and got 1000096 bytes batch, which
    * was higher than the 1000000 limit.
-   * See https://github.com/edenhill/librdkafka/issues/1616
+   * See https://github.com/confluentinc/librdkafka/issues/1616
    *
    * With the added configuration strictness checks, a user-supplied
    * value is no longer over-written:
diff --git a/lib/librdkafka-2.1.0/tests/0083-cb_event.c b/lib/librdkafka-2.3.0/tests/0083-cb_event.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0083-cb_event.c
rename to lib/librdkafka-2.3.0/tests/0083-cb_event.c
index 23ce7982085..ec84ee6e998 100644
--- a/lib/librdkafka-2.1.0/tests/0083-cb_event.c
+++ b/lib/librdkafka-2.3.0/tests/0083-cb_event.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018, Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0084-destroy_flags.c b/lib/librdkafka-2.3.0/tests/0084-destroy_flags.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0084-destroy_flags.c
rename to lib/librdkafka-2.3.0/tests/0084-destroy_flags.c
index cd8bbf7dedd..df98a742d79 100644
--- a/lib/librdkafka-2.1.0/tests/0084-destroy_flags.c
+++ b/lib/librdkafka-2.3.0/tests/0084-destroy_flags.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018, Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0085-headers.cpp b/lib/librdkafka-2.3.0/tests/0085-headers.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0085-headers.cpp
rename to lib/librdkafka-2.3.0/tests/0085-headers.cpp
index a342478c158..aa9c4246417 100644
--- a/lib/librdkafka-2.1.0/tests/0085-headers.cpp
+++ b/lib/librdkafka-2.3.0/tests/0085-headers.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0086-purge.c b/lib/librdkafka-2.3.0/tests/0086-purge.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0086-purge.c
rename to lib/librdkafka-2.3.0/tests/0086-purge.c
index 4dbf937f3a4..1bf235a313b 100644
--- a/lib/librdkafka-2.1.0/tests/0086-purge.c
+++ b/lib/librdkafka-2.3.0/tests/0086-purge.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0088-produce_metadata_timeout.c b/lib/librdkafka-2.3.0/tests/0088-produce_metadata_timeout.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0088-produce_metadata_timeout.c
rename to lib/librdkafka-2.3.0/tests/0088-produce_metadata_timeout.c
index c71b5a69fdb..68d02449c1e 100644
--- a/lib/librdkafka-2.1.0/tests/0088-produce_metadata_timeout.c
+++ b/lib/librdkafka-2.3.0/tests/0088-produce_metadata_timeout.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0089-max_poll_interval.c b/lib/librdkafka-2.3.0/tests/0089-max_poll_interval.c
similarity index 68%
rename from lib/librdkafka-2.1.0/tests/0089-max_poll_interval.c
rename to lib/librdkafka-2.3.0/tests/0089-max_poll_interval.c
index 3d7cbf66fa3..2089af9907f 100644
--- a/lib/librdkafka-2.1.0/tests/0089-max_poll_interval.c
+++ b/lib/librdkafka-2.3.0/tests/0089-max_poll_interval.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2018, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -351,8 +352,155 @@ static void do_test_with_log_queue(void) {
         SUB_TEST_PASS();
 }
 
+
+/**
+ * @brief Consumer should be able to rejoin the group just by polling after
+ * leaving due to a max.poll.interval.ms timeout. The poll does not need to
+ * go through any special function, any queue containing consumer messages
+ * should suffice.
+ * We test with the result of rd_kafka_queue_get_consumer, and an arbitrary
+ * queue that is forwarded to by the result of rd_kafka_queue_get_consumer.
+ * We also test with an arbitrary queue that is forwarded to the the result of
+ * rd_kafka_queue_get_consumer.
+ */
+static void
+do_test_rejoin_after_interval_expire(rd_bool_t forward_to_another_q,
+                                     rd_bool_t forward_to_consumer_q) {
+        const char *topic = test_mk_topic_name("0089_max_poll_interval", 1);
+        rd_kafka_conf_t *conf;
+        char groupid[64];
+        rd_kafka_t *rk                    = NULL;
+        rd_kafka_queue_t *consumer_queue  = NULL;
+        rd_kafka_queue_t *forwarder_queue = NULL;
+        rd_kafka_event_t *event           = NULL;
+        rd_kafka_queue_t *polling_queue   = NULL;
+
+        SUB_TEST(
+            "Testing with forward_to_another_q = %d, forward_to_consumer_q = "
+            "%d",
+            forward_to_another_q, forward_to_consumer_q);
+
+        test_create_topic(NULL, topic, 1, 1);
+
+        test_str_id_generate(groupid, sizeof(groupid));
+        test_conf_init(&conf, NULL, 60);
+        test_conf_set(conf, "session.timeout.ms", "6000");
+        test_conf_set(conf, "max.poll.interval.ms", "10000" /*10s*/);
+        test_conf_set(conf, "partition.assignment.strategy", "range");
+
+        /* We need to specify a non-NULL rebalance CB to get events of type
+         * RD_KAFKA_EVENT_REBALANCE. */
+        rk = test_create_consumer(groupid, test_rebalance_cb, conf, NULL);
+
+        consumer_queue = rd_kafka_queue_get_consumer(rk);
+
+        test_consumer_subscribe(rk, topic);
+
+        if (forward_to_another_q) {
+                polling_queue = rd_kafka_queue_new(rk);
+                rd_kafka_queue_forward(consumer_queue, polling_queue);
+        } else if (forward_to_consumer_q) {
+                forwarder_queue = rd_kafka_queue_new(rk);
+                rd_kafka_queue_forward(forwarder_queue, consumer_queue);
+                polling_queue = forwarder_queue;
+        } else
+                polling_queue = consumer_queue;
+
+        event = test_wait_event(polling_queue, RD_KAFKA_EVENT_REBALANCE,
+                                (int)(test_timeout_multiplier * 10000));
+        TEST_ASSERT(event,
+                    "Did not get a rebalance event for initial group join");
+        TEST_ASSERT(rd_kafka_event_error(event) ==
+                        RD_KAFKA_RESP_ERR__ASSIGN_PARTITIONS,
+                    "Group join should assign partitions");
+        rd_kafka_assign(rk, rd_kafka_event_topic_partition_list(event));
+        rd_kafka_event_destroy(event);
+
+        rd_sleep(10 + 1); /* Exceed max.poll.interval.ms. */
+
+        /* Note that by polling for the group leave, we're also polling the
+         * consumer queue, and hence it should trigger a rejoin. */
+        event = test_wait_event(polling_queue, RD_KAFKA_EVENT_REBALANCE,
+                                (int)(test_timeout_multiplier * 10000));
+        TEST_ASSERT(event, "Did not get a rebalance event for the group leave");
+        TEST_ASSERT(rd_kafka_event_error(event) ==
+                        RD_KAFKA_RESP_ERR__REVOKE_PARTITIONS,
+                    "Group leave should revoke partitions");
+        rd_kafka_assign(rk, NULL);
+        rd_kafka_event_destroy(event);
+
+        event = test_wait_event(polling_queue, RD_KAFKA_EVENT_REBALANCE,
+                                (int)(test_timeout_multiplier * 10000));
+        TEST_ASSERT(event, "Should get a rebalance event for the group rejoin");
+        TEST_ASSERT(rd_kafka_event_error(event) ==
+                        RD_KAFKA_RESP_ERR__ASSIGN_PARTITIONS,
+                    "Group rejoin should assign partitions");
+        rd_kafka_assign(rk, rd_kafka_event_topic_partition_list(event));
+        rd_kafka_event_destroy(event);
+
+        if (forward_to_another_q)
+                rd_kafka_queue_destroy(polling_queue);
+        if (forward_to_consumer_q)
+                rd_kafka_queue_destroy(forwarder_queue);
+        rd_kafka_queue_destroy(consumer_queue);
+        test_consumer_close(rk);
+        rd_kafka_destroy(rk);
+
+        SUB_TEST_PASS();
+}
+
+static void consume_cb(rd_kafka_message_t *rkmessage, void *opaque) {
+        TEST_SAY("Consume callback\n");
+}
+
+/**
+ * @brief Test that max.poll.interval.ms is reset when
+ * rd_kafka_poll is called with consume_cb.
+ * See issue #4421.
+ */
+static void do_test_max_poll_reset_with_consumer_cb(void) {
+        const char *topic = test_mk_topic_name("0089_max_poll_interval", 1);
+        rd_kafka_conf_t *conf;
+        char groupid[64];
+        rd_kafka_t *rk = NULL;
+
+        SUB_TEST();
+
+        test_create_topic(NULL, topic, 1, 1);
+        uint64_t testid = test_id_generate();
+
+        test_produce_msgs_easy(topic, testid, -1, 100);
+
+        test_str_id_generate(groupid, sizeof(groupid));
+        test_conf_init(&conf, NULL, 60);
+        test_conf_set(conf, "session.timeout.ms", "10000");
+        test_conf_set(conf, "max.poll.interval.ms", "10000" /*10s*/);
+        test_conf_set(conf, "partition.assignment.strategy", "range");
+        rd_kafka_conf_set_consume_cb(conf, consume_cb);
+
+        rk = test_create_consumer(groupid, NULL, conf, NULL);
+        rd_kafka_poll_set_consumer(rk);
+
+        test_consumer_subscribe(rk, topic);
+        TEST_SAY("Subscribed to %s and sleeping for 5 s\n", topic);
+        rd_sleep(5);
+        rd_kafka_poll(rk, 10);
+        TEST_SAY(
+            "Polled and sleeping again for 6s. Max poll should be reset\n");
+        rd_sleep(6);
+
+        /* Poll should work */
+        rd_kafka_poll(rk, 10);
+        test_consumer_close(rk);
+        rd_kafka_destroy(rk);
+}
+
 int main_0089_max_poll_interval(int argc, char **argv) {
         do_test();
         do_test_with_log_queue();
+        do_test_rejoin_after_interval_expire(rd_false, rd_false);
+        do_test_rejoin_after_interval_expire(rd_true, rd_false);
+        do_test_rejoin_after_interval_expire(rd_false, rd_true);
+        do_test_max_poll_reset_with_consumer_cb();
         return 0;
 }
diff --git a/lib/librdkafka-2.1.0/tests/0090-idempotence.c b/lib/librdkafka-2.3.0/tests/0090-idempotence.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0090-idempotence.c
rename to lib/librdkafka-2.3.0/tests/0090-idempotence.c
index 02d16df565b..c665b5f635c 100644
--- a/lib/librdkafka-2.1.0/tests/0090-idempotence.c
+++ b/lib/librdkafka-2.3.0/tests/0090-idempotence.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018, Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0091-max_poll_interval_timeout.c b/lib/librdkafka-2.3.0/tests/0091-max_poll_interval_timeout.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0091-max_poll_interval_timeout.c
rename to lib/librdkafka-2.3.0/tests/0091-max_poll_interval_timeout.c
index c1506afd9b5..f736c108a39 100644
--- a/lib/librdkafka-2.1.0/tests/0091-max_poll_interval_timeout.c
+++ b/lib/librdkafka-2.3.0/tests/0091-max_poll_interval_timeout.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2018, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0092-mixed_msgver.c b/lib/librdkafka-2.3.0/tests/0092-mixed_msgver.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0092-mixed_msgver.c
rename to lib/librdkafka-2.3.0/tests/0092-mixed_msgver.c
index 46308ddf475..877fc48e071 100644
--- a/lib/librdkafka-2.1.0/tests/0092-mixed_msgver.c
+++ b/lib/librdkafka-2.3.0/tests/0092-mixed_msgver.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018, Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0093-holb.c b/lib/librdkafka-2.3.0/tests/0093-holb.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0093-holb.c
rename to lib/librdkafka-2.3.0/tests/0093-holb.c
index 366deca328d..8e80b1550ec 100644
--- a/lib/librdkafka-2.1.0/tests/0093-holb.c
+++ b/lib/librdkafka-2.3.0/tests/0093-holb.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2018, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0094-idempotence_msg_timeout.c b/lib/librdkafka-2.3.0/tests/0094-idempotence_msg_timeout.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0094-idempotence_msg_timeout.c
rename to lib/librdkafka-2.3.0/tests/0094-idempotence_msg_timeout.c
index 8704adc09c9..4f2b3cbe5f1 100644
--- a/lib/librdkafka-2.1.0/tests/0094-idempotence_msg_timeout.c
+++ b/lib/librdkafka-2.3.0/tests/0094-idempotence_msg_timeout.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0095-all_brokers_down.cpp b/lib/librdkafka-2.3.0/tests/0095-all_brokers_down.cpp
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0095-all_brokers_down.cpp
rename to lib/librdkafka-2.3.0/tests/0095-all_brokers_down.cpp
index 6ebd5f500ee..759eb8ffe63 100644
--- a/lib/librdkafka-2.1.0/tests/0095-all_brokers_down.cpp
+++ b/lib/librdkafka-2.3.0/tests/0095-all_brokers_down.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019, Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0097-ssl_verify.cpp b/lib/librdkafka-2.3.0/tests/0097-ssl_verify.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0097-ssl_verify.cpp
rename to lib/librdkafka-2.3.0/tests/0097-ssl_verify.cpp
index 8a3a0bce510..a5e8885267c 100644
--- a/lib/librdkafka-2.1.0/tests/0097-ssl_verify.cpp
+++ b/lib/librdkafka-2.3.0/tests/0097-ssl_verify.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019, Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0098-consumer-txn.cpp b/lib/librdkafka-2.3.0/tests/0098-consumer-txn.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0098-consumer-txn.cpp
rename to lib/librdkafka-2.3.0/tests/0098-consumer-txn.cpp
index 1bdb46d0bfb..6045e785a31 100644
--- a/lib/librdkafka-2.1.0/tests/0098-consumer-txn.cpp
+++ b/lib/librdkafka-2.3.0/tests/0098-consumer-txn.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0099-commit_metadata.c b/lib/librdkafka-2.3.0/tests/0099-commit_metadata.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0099-commit_metadata.c
rename to lib/librdkafka-2.3.0/tests/0099-commit_metadata.c
index 902849fb241..9acdb07f55b 100644
--- a/lib/librdkafka-2.1.0/tests/0099-commit_metadata.c
+++ b/lib/librdkafka-2.3.0/tests/0099-commit_metadata.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0100-thread_interceptors.cpp b/lib/librdkafka-2.3.0/tests/0100-thread_interceptors.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0100-thread_interceptors.cpp
rename to lib/librdkafka-2.3.0/tests/0100-thread_interceptors.cpp
index a34ccac9809..b428c1a892c 100644
--- a/lib/librdkafka-2.1.0/tests/0100-thread_interceptors.cpp
+++ b/lib/librdkafka-2.3.0/tests/0100-thread_interceptors.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019, Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0101-fetch-from-follower.cpp b/lib/librdkafka-2.3.0/tests/0101-fetch-from-follower.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0101-fetch-from-follower.cpp
rename to lib/librdkafka-2.3.0/tests/0101-fetch-from-follower.cpp
index 342ec4f8f93..db438b2a7e9 100644
--- a/lib/librdkafka-2.1.0/tests/0101-fetch-from-follower.cpp
+++ b/lib/librdkafka-2.3.0/tests/0101-fetch-from-follower.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019, Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0102-static_group_rebalance.c b/lib/librdkafka-2.3.0/tests/0102-static_group_rebalance.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0102-static_group_rebalance.c
rename to lib/librdkafka-2.3.0/tests/0102-static_group_rebalance.c
index 231a09065fd..ad8bac4dbb9 100644
--- a/lib/librdkafka-2.1.0/tests/0102-static_group_rebalance.c
+++ b/lib/librdkafka-2.3.0/tests/0102-static_group_rebalance.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019, Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0103-transactions.c b/lib/librdkafka-2.3.0/tests/0103-transactions.c
similarity index 94%
rename from lib/librdkafka-2.1.0/tests/0103-transactions.c
rename to lib/librdkafka-2.3.0/tests/0103-transactions.c
index eaab2f217db..c2217cd2557 100644
--- a/lib/librdkafka-2.1.0/tests/0103-transactions.c
+++ b/lib/librdkafka-2.3.0/tests/0103-transactions.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019, Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -1102,6 +1103,90 @@ static void do_test_empty_txn(rd_bool_t send_offsets, rd_bool_t do_commit) {
         SUB_TEST_PASS();
 }
 
+
+/**
+ * @brief A control message should increase stored offset and
+ *        that stored offset should have correct leader epoch
+ *        and be included in commit.
+ *        See #4384.
+ */
+static void do_test_txn_abort_control_message_leader_epoch(void) {
+        const char *topic = test_mk_topic_name(__FUNCTION__, 1);
+
+        rd_kafka_t *p, *c;
+        rd_kafka_conf_t *p_conf, *c_conf;
+        test_msgver_t mv;
+        int exp_msg_cnt = 0;
+        uint64_t testid = test_id_generate();
+        rd_kafka_topic_partition_list_t *offsets;
+        int r;
+
+        SUB_TEST_QUICK();
+
+        test_conf_init(&p_conf, NULL, 30);
+        c_conf = rd_kafka_conf_dup(p_conf);
+
+        test_conf_set(p_conf, "transactional.id", topic);
+        rd_kafka_conf_set_dr_msg_cb(p_conf, test_dr_msg_cb);
+        p = test_create_handle(RD_KAFKA_PRODUCER, p_conf);
+
+        test_create_topic(p, topic, 1, 3);
+
+        TEST_CALL_ERROR__(rd_kafka_init_transactions(p, 5000));
+
+        TEST_CALL_ERROR__(rd_kafka_begin_transaction(p));
+
+        /* Produce one message */
+        test_produce_msgs2(p, topic, testid, RD_KAFKA_PARTITION_UA, 0, 1, NULL,
+                           0);
+
+        /* Abort the transaction */
+        TEST_CALL_ERROR__(rd_kafka_abort_transaction(p, -1));
+
+        /**
+         * Create consumer.
+         */
+        test_conf_set(c_conf, "enable.auto.commit", "false");
+        test_conf_set(c_conf, "group.id", topic);
+        test_conf_set(c_conf, "enable.partition.eof", "true");
+        test_conf_set(c_conf, "auto.offset.reset", "earliest");
+        test_msgver_init(&mv, testid);
+        c = test_create_consumer(topic, NULL, c_conf, NULL);
+
+
+        test_consumer_subscribe(c, topic);
+        /* Expect 0 messages and 1 EOF */
+        r = test_consumer_poll("consume.nothing", c, testid,
+                               /* exp_eof_cnt */ 1,
+                               /* exp_msg_base */ 0, exp_msg_cnt, &mv);
+        test_msgver_clear(&mv);
+
+        TEST_ASSERT(r == exp_msg_cnt, "expected %d messages, got %d",
+                    exp_msg_cnt, r);
+
+        /* Commits offset 2 (1 aborted message + 1 control message) */
+        TEST_CALL_ERR__(rd_kafka_commit(c, NULL, rd_false));
+
+        offsets = rd_kafka_topic_partition_list_new(1);
+        rd_kafka_topic_partition_list_add(offsets, topic, 0);
+        rd_kafka_committed(c, offsets, -1);
+
+        /* Committed offset must be 2 */
+        TEST_ASSERT(offsets->cnt == 1, "expected 1 partition, got %d",
+                    offsets->cnt);
+        TEST_ASSERT(offsets->elems[0].offset == 2,
+                    "expected offset 2, got %" PRId64,
+                    offsets->elems[0].offset);
+
+        /* All done */
+        test_consumer_close(c);
+        rd_kafka_topic_partition_list_destroy(offsets);
+        rd_kafka_destroy(c);
+        rd_kafka_destroy(p);
+
+        SUB_TEST_PASS();
+}
+
 /**
  * @returns the high watermark for the given partition.
  */
@@ -1219,6 +1304,7 @@ int main_0103_transactions(int argc, char **argv) {
         do_test_empty_txn(rd_true /*send offsets*/, rd_true /*commit*/);
         do_test_empty_txn(rd_true /*send offsets*/, rd_false /*abort*/);
         do_test_wmark_isolation_level();
+        do_test_txn_abort_control_message_leader_epoch();
         return 0;
 }
 
diff --git a/lib/librdkafka-2.1.0/tests/0104-fetch_from_follower_mock.c b/lib/librdkafka-2.3.0/tests/0104-fetch_from_follower_mock.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0104-fetch_from_follower_mock.c
rename to lib/librdkafka-2.3.0/tests/0104-fetch_from_follower_mock.c
index 1ecf99da3fd..5863638da3b 100644
--- a/lib/librdkafka-2.1.0/tests/0104-fetch_from_follower_mock.c
+++ b/lib/librdkafka-2.3.0/tests/0104-fetch_from_follower_mock.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019, Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -595,6 +596,8 @@ int main_0104_fetch_from_follower_mock(int argc, char **argv) {
                 return 0;
         }
 
+        test_timeout_set(50);
+
         do_test_offset_reset("earliest");
         do_test_offset_reset("latest");
 
diff --git a/lib/librdkafka-2.1.0/tests/0105-transactions_mock.c b/lib/librdkafka-2.3.0/tests/0105-transactions_mock.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0105-transactions_mock.c
rename to lib/librdkafka-2.3.0/tests/0105-transactions_mock.c
index 014642df1df..8d6173c7f09 100644
--- a/lib/librdkafka-2.1.0/tests/0105-transactions_mock.c
+++ b/lib/librdkafka-2.3.0/tests/0105-transactions_mock.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2019, Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0106-cgrp_sess_timeout.c b/lib/librdkafka-2.3.0/tests/0106-cgrp_sess_timeout.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0106-cgrp_sess_timeout.c
rename to lib/librdkafka-2.3.0/tests/0106-cgrp_sess_timeout.c
index 0451e4a00c6..ca0a08c20a3 100644
--- a/lib/librdkafka-2.1.0/tests/0106-cgrp_sess_timeout.c
+++ b/lib/librdkafka-2.3.0/tests/0106-cgrp_sess_timeout.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0107-topic_recreate.c b/lib/librdkafka-2.3.0/tests/0107-topic_recreate.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0107-topic_recreate.c
rename to lib/librdkafka-2.3.0/tests/0107-topic_recreate.c
index 1f91e2a84df..474ed2f27ac 100644
--- a/lib/librdkafka-2.1.0/tests/0107-topic_recreate.c
+++ b/lib/librdkafka-2.3.0/tests/0107-topic_recreate.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0109-auto_create_topics.cpp b/lib/librdkafka-2.3.0/tests/0109-auto_create_topics.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0109-auto_create_topics.cpp
rename to lib/librdkafka-2.3.0/tests/0109-auto_create_topics.cpp
index cabee67041c..b64050fee4d 100644
--- a/lib/librdkafka-2.1.0/tests/0109-auto_create_topics.cpp
+++ b/lib/librdkafka-2.3.0/tests/0109-auto_create_topics.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0110-batch_size.cpp b/lib/librdkafka-2.3.0/tests/0110-batch_size.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0110-batch_size.cpp
rename to lib/librdkafka-2.3.0/tests/0110-batch_size.cpp
index 1f36b3a763a..5b216c28049 100644
--- a/lib/librdkafka-2.1.0/tests/0110-batch_size.cpp
+++ b/lib/librdkafka-2.3.0/tests/0110-batch_size.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0111-delay_create_topics.cpp b/lib/librdkafka-2.3.0/tests/0111-delay_create_topics.cpp
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0111-delay_create_topics.cpp
rename to lib/librdkafka-2.3.0/tests/0111-delay_create_topics.cpp
index 4b6683add9b..a46282bd176 100644
--- a/lib/librdkafka-2.1.0/tests/0111-delay_create_topics.cpp
+++ b/lib/librdkafka-2.3.0/tests/0111-delay_create_topics.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0112-assign_unknown_part.c b/lib/librdkafka-2.3.0/tests/0112-assign_unknown_part.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0112-assign_unknown_part.c
rename to lib/librdkafka-2.3.0/tests/0112-assign_unknown_part.c
index d945a2c32c0..a32d8f39ad1 100644
--- a/lib/librdkafka-2.1.0/tests/0112-assign_unknown_part.c
+++ b/lib/librdkafka-2.3.0/tests/0112-assign_unknown_part.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2020, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0113-cooperative_rebalance.cpp b/lib/librdkafka-2.3.0/tests/0113-cooperative_rebalance.cpp
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0113-cooperative_rebalance.cpp
rename to lib/librdkafka-2.3.0/tests/0113-cooperative_rebalance.cpp
index 430798d7f77..c54619d714a 100644
--- a/lib/librdkafka-2.1.0/tests/0113-cooperative_rebalance.cpp
+++ b/lib/librdkafka-2.3.0/tests/0113-cooperative_rebalance.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -2914,6 +2914,57 @@ static void r_lost_partitions_commit_illegal_generation_test_local() {
   test_mock_cluster_destroy(mcluster);
 }
 
+/**
+ * @brief Test that the consumer is destroyed without segfault if
+ *        it happens before first rebalance and there is no assignor
+ *        state. See #4312
+ */
+static void s_no_segfault_before_first_rebalance(void) {
+  rd_kafka_t *c;
+  rd_kafka_conf_t *conf;
+  rd_kafka_mock_cluster_t *mcluster;
+  const char *topic;
+  const char *bootstraps;
+
+  SUB_TEST_QUICK();
+
+  TEST_SAY("Creating mock cluster\n");
+  mcluster = test_mock_cluster_new(1, &bootstraps);
+
+  topic = test_mk_topic_name("0113_s", 1);
+
+  test_conf_init(&conf, NULL, 60);
+  test_conf_set(conf, "bootstrap.servers", bootstraps);
+  test_conf_set(conf, "partition.assignment.strategy", "cooperative-sticky");
+
+  TEST_SAY("Creating topic %s\n", topic);
+  TEST_CALL_ERR__(rd_kafka_mock_topic_create(
+      mcluster, topic, 2 /* partition_cnt */, 1 /* replication_factor */));
+
+  c = test_create_consumer(topic, NULL, conf, NULL);
+
+  /* Add a 1s delay to the SyncGroup response so next condition can happen. */
+  rd_kafka_mock_broker_push_request_error_rtts(
+      mcluster, 1 /*Broker 1*/, RD_KAFKAP_SyncGroup /*FetchRequest*/, 1,
+      RD_KAFKA_RESP_ERR_NOT_COORDINATOR, 1000);
+
+  test_consumer_subscribe(c, topic);
+
+  /* Wait for initial rebalance 3000 ms (default) + 500 ms for processing
+   * the JoinGroup response. Consumer close must come between the JoinGroup
+   * response and the SyncGroup response, so that rkcg_assignor is set,
+   * but rkcg_assignor_state isn't. */
+  TEST_ASSERT(!test_consumer_poll_once(c, NULL, 3500), "poll should timeout");
+
+  rd_kafka_consumer_close(c);
+
+  rd_kafka_destroy(c);
+
+  TEST_SAY("Destroying mock cluster\n");
+  test_mock_cluster_destroy(mcluster);
+
+  SUB_TEST_PASS();
+}
 
 /**
  * @brief Rebalance callback for the v_.. test below.
@@ -3117,6 +3168,7 @@ int main_0113_cooperative_rebalance_local(int argc, char **argv) {
   q_lost_partitions_illegal_generation_test(rd_false /*joingroup*/);
   q_lost_partitions_illegal_generation_test(rd_true /*syncgroup*/);
   r_lost_partitions_commit_illegal_generation_test_local();
+  s_no_segfault_before_first_rebalance();
   return 0;
 }
 
diff --git a/lib/librdkafka-2.1.0/tests/0114-sticky_partitioning.cpp b/lib/librdkafka-2.3.0/tests/0114-sticky_partitioning.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0114-sticky_partitioning.cpp
rename to lib/librdkafka-2.3.0/tests/0114-sticky_partitioning.cpp
index 8ef88e7df40..f3b33301ef7 100644
--- a/lib/librdkafka-2.1.0/tests/0114-sticky_partitioning.cpp
+++ b/lib/librdkafka-2.3.0/tests/0114-sticky_partitioning.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0115-producer_auth.cpp b/lib/librdkafka-2.3.0/tests/0115-producer_auth.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0115-producer_auth.cpp
rename to lib/librdkafka-2.3.0/tests/0115-producer_auth.cpp
index c4d1a96aa91..644ff1af245 100644
--- a/lib/librdkafka-2.1.0/tests/0115-producer_auth.cpp
+++ b/lib/librdkafka-2.3.0/tests/0115-producer_auth.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0116-kafkaconsumer_close.cpp b/lib/librdkafka-2.3.0/tests/0116-kafkaconsumer_close.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0116-kafkaconsumer_close.cpp
rename to lib/librdkafka-2.3.0/tests/0116-kafkaconsumer_close.cpp
index c674d4443bb..dd68c99f704 100644
--- a/lib/librdkafka-2.1.0/tests/0116-kafkaconsumer_close.cpp
+++ b/lib/librdkafka-2.3.0/tests/0116-kafkaconsumer_close.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0117-mock_errors.c b/lib/librdkafka-2.3.0/tests/0117-mock_errors.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0117-mock_errors.c
rename to lib/librdkafka-2.3.0/tests/0117-mock_errors.c
index 7a82f713eaf..b91a3b61e8c 100644
--- a/lib/librdkafka-2.1.0/tests/0117-mock_errors.c
+++ b/lib/librdkafka-2.3.0/tests/0117-mock_errors.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0118-commit_rebalance.c b/lib/librdkafka-2.3.0/tests/0118-commit_rebalance.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0118-commit_rebalance.c
rename to lib/librdkafka-2.3.0/tests/0118-commit_rebalance.c
index 1cdcda46238..1ca0a683668 100644
--- a/lib/librdkafka-2.1.0/tests/0118-commit_rebalance.c
+++ b/lib/librdkafka-2.3.0/tests/0118-commit_rebalance.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0119-consumer_auth.cpp b/lib/librdkafka-2.3.0/tests/0119-consumer_auth.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0119-consumer_auth.cpp
rename to lib/librdkafka-2.3.0/tests/0119-consumer_auth.cpp
index 507b673024a..40c81ea32b0 100644
--- a/lib/librdkafka-2.1.0/tests/0119-consumer_auth.cpp
+++ b/lib/librdkafka-2.3.0/tests/0119-consumer_auth.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0120-asymmetric_subscription.c b/lib/librdkafka-2.3.0/tests/0120-asymmetric_subscription.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0120-asymmetric_subscription.c
rename to lib/librdkafka-2.3.0/tests/0120-asymmetric_subscription.c
index 2031dcba195..11ee5f705ea 100644
--- a/lib/librdkafka-2.1.0/tests/0120-asymmetric_subscription.c
+++ b/lib/librdkafka-2.3.0/tests/0120-asymmetric_subscription.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0121-clusterid.c b/lib/librdkafka-2.3.0/tests/0121-clusterid.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0121-clusterid.c
rename to lib/librdkafka-2.3.0/tests/0121-clusterid.c
index 35f5d529e9a..0a463a88d04 100644
--- a/lib/librdkafka-2.1.0/tests/0121-clusterid.c
+++ b/lib/librdkafka-2.3.0/tests/0121-clusterid.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0122-buffer_cleaning_after_rebalance.c b/lib/librdkafka-2.3.0/tests/0122-buffer_cleaning_after_rebalance.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0122-buffer_cleaning_after_rebalance.c
rename to lib/librdkafka-2.3.0/tests/0122-buffer_cleaning_after_rebalance.c
index 4f8727017f0..9778391e891 100644
--- a/lib/librdkafka-2.1.0/tests/0122-buffer_cleaning_after_rebalance.c
+++ b/lib/librdkafka-2.3.0/tests/0122-buffer_cleaning_after_rebalance.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2021, Magnus Edenhill
+ * Copyright (c) 2021-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0123-connections_max_idle.c b/lib/librdkafka-2.3.0/tests/0123-connections_max_idle.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0123-connections_max_idle.c
rename to lib/librdkafka-2.3.0/tests/0123-connections_max_idle.c
index 734467017d5..6c7eb8eef99 100644
--- a/lib/librdkafka-2.1.0/tests/0123-connections_max_idle.c
+++ b/lib/librdkafka-2.3.0/tests/0123-connections_max_idle.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2021, Magnus Edenhill
+ * Copyright (c) 2021-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0124-openssl_invalid_engine.c b/lib/librdkafka-2.3.0/tests/0124-openssl_invalid_engine.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0124-openssl_invalid_engine.c
rename to lib/librdkafka-2.3.0/tests/0124-openssl_invalid_engine.c
index 5c61e5318ad..33371f4f0b0 100644
--- a/lib/librdkafka-2.1.0/tests/0124-openssl_invalid_engine.c
+++ b/lib/librdkafka-2.3.0/tests/0124-openssl_invalid_engine.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2021, Magnus Edenhill
+ * Copyright (c) 2021-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0125-immediate_flush.c b/lib/librdkafka-2.3.0/tests/0125-immediate_flush.c
similarity index 56%
rename from lib/librdkafka-2.1.0/tests/0125-immediate_flush.c
rename to lib/librdkafka-2.3.0/tests/0125-immediate_flush.c
index 12f36cf191d..c7cbcca174b 100644
--- a/lib/librdkafka-2.1.0/tests/0125-immediate_flush.c
+++ b/lib/librdkafka-2.3.0/tests/0125-immediate_flush.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2021, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -33,7 +34,7 @@
  * Verify that flush() overrides the linger.ms time.
  *
  */
-int main_0125_immediate_flush(int argc, char **argv) {
+void do_test_flush_overrides_linger_ms_time() {
         rd_kafka_t *rk;
         rd_kafka_conf_t *conf;
         const char *topic = test_mk_topic_name("0125_immediate_flush", 1);
@@ -73,6 +74,74 @@ int main_0125_immediate_flush(int argc, char **argv) {
 
         /* Verify messages were actually produced by consuming them back. */
         test_consume_msgs_easy(topic, topic, 0, 1, msgcnt, NULL);
+}
+
+/**
+ * @brief Tests if the first metadata call is able to update leader for the
+ * topic or not. If it is not able to update the leader for some partitions,
+ * flush call waits for 1s to refresh the leader and then flush is completed.
+ * Ideally, it should update in the first call itself.
+ *
+ * Number of brokers in the cluster should be more than the number of
+ * brokers in the bootstrap.servers list for this test case to work correctly
+ *
+ */
+void do_test_first_flush_immediate() {
+        rd_kafka_mock_cluster_t *mock_cluster;
+        rd_kafka_t *produce_rk;
+        const char *brokers;
+        char *bootstrap_server;
+        test_timing_t t_time;
+        size_t i;
+        rd_kafka_conf_t *conf = NULL;
+        const char *topic     = test_mk_topic_name("0125_immediate_flush", 1);
+        size_t partition_cnt  = 9;
+        int remains           = 0;
+
+        mock_cluster = test_mock_cluster_new(3, &brokers);
+
+        for (i = 0; brokers[i]; i++)
+                if (brokers[i] == ',' || brokers[i] == ' ')
+                        break;
+        bootstrap_server = rd_strndup(brokers, i);
+
+        test_conf_init(&conf, NULL, 30);
+        rd_kafka_conf_set_dr_msg_cb(conf, test_dr_msg_cb);
+        test_conf_set(conf, "bootstrap.servers", bootstrap_server);
+        free(bootstrap_server);
+
+        rd_kafka_mock_topic_create(mock_cluster, topic, partition_cnt, 1);
+
+        produce_rk = test_create_handle(RD_KAFKA_PRODUCER, conf);
+
+        for (i = 0; i < partition_cnt; i++) {
+                test_produce_msgs2_nowait(produce_rk, topic, 0, i, 0, 1, NULL,
+                                          0, &remains);
+        }
+
+        TIMING_START(&t_time, "FLUSH");
+        TEST_CALL_ERR__(rd_kafka_flush(produce_rk, 5000));
+        TIMING_ASSERT(&t_time, 0, 999);
+
+        rd_kafka_destroy(produce_rk);
+        test_mock_cluster_destroy(mock_cluster);
+}
+
+int main_0125_immediate_flush(int argc, char **argv) {
+
+        do_test_flush_overrides_linger_ms_time();
+
+        return 0;
+}
+
+int main_0125_immediate_flush_mock(int argc, char **argv) {
+
+        if (test_needs_auth()) {
+                TEST_SKIP("Mock cluster does not support SSL/SASL\n");
+                return 0;
+        }
+
+        do_test_first_flush_immediate();
 
         return 0;
 }
diff --git a/lib/librdkafka-2.1.0/tests/0126-oauthbearer_oidc.c b/lib/librdkafka-2.3.0/tests/0126-oauthbearer_oidc.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0126-oauthbearer_oidc.c
rename to lib/librdkafka-2.3.0/tests/0126-oauthbearer_oidc.c
index 8eb18706844..0db40ea1dc2 100644
--- a/lib/librdkafka-2.1.0/tests/0126-oauthbearer_oidc.c
+++ b/lib/librdkafka-2.3.0/tests/0126-oauthbearer_oidc.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2021, Magnus Edenhill
+ * Copyright (c) 2021-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.3.0/tests/0127-fetch_queue_backoff.cpp b/lib/librdkafka-2.3.0/tests/0127-fetch_queue_backoff.cpp
new file mode 100644
index 00000000000..41c2db8c3ba
--- /dev/null
+++ b/lib/librdkafka-2.3.0/tests/0127-fetch_queue_backoff.cpp
@@ -0,0 +1,165 @@
+/*
+ * librdkafka - Apache Kafka C library
+ *
+ * Copyright (c) 2020-2022, Magnus Edenhill
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <iostream>
+#include <map>
+#include <cstring>
+#include <cstdlib>
+#include "testcpp.h"
+extern "C" {
+#include "test.h"
+}
+
+/**
+ * Test consumer fetch.queue.backoff.ms behaviour.
+ *
+ * @param backoff_ms Backoff ms to configure, -1 to rely on default one.
+ *
+ * 1. Produce N messages, 1 message per batch.
+ * 2. Configure consumer with queued.min.messages=1 and
+ *    fetch.queue.backoff.ms=<backoff_ms>
+ * 3. Verify that the consume() latency is <= fetch.queue.backoff.ms.
+ */
+
+
+static void do_test_queue_backoff(const std::string &topic, int backoff_ms) {
+  SUB_TEST("backoff_ms = %d", backoff_ms);
+
+  /* Create consumer */
+  RdKafka::Conf *conf;
+  Test::conf_init(&conf, NULL, 60);
+  Test::conf_set(conf, "group.id", topic);
+  Test::conf_set(conf, "enable.auto.commit", "false");
+  Test::conf_set(conf, "auto.offset.reset", "beginning");
+  Test::conf_set(conf, "queued.min.messages", "1");
+  if (backoff_ms >= 0) {
+    Test::conf_set(conf, "fetch.queue.backoff.ms", tostr() << backoff_ms);
+  }
+  /* Make sure to include only one message in each fetch.
+   * Message size is 10000. */
+  Test::conf_set(conf, "fetch.message.max.bytes", "12000");
+
+  if (backoff_ms < 0)
+    /* default */
+    backoff_ms = 1000;
+
+  std::string errstr;
+
+  RdKafka::KafkaConsumer *c = RdKafka::KafkaConsumer::create(conf, errstr);
+  if (!c)
+    Test::Fail("Failed to create KafkaConsumer: " + errstr);
+  delete conf;
+
+  RdKafka::TopicPartition *rktpar = RdKafka::TopicPartition::create(topic, 0);
+  std::vector<RdKafka::TopicPartition *> parts;
+  parts.push_back(rktpar);
+
+  RdKafka::ErrorCode err;
+  if ((err = c->assign(parts)))
+    Test::Fail("assigned failed: " + RdKafka::err2str(err));
+  RdKafka::TopicPartition::destroy(parts);
+
+  int received       = 0;
+  int in_profile_cnt = 0;
+  int dmax =
+      (int)((double)backoff_ms * (test_timeout_multiplier > 1 ? 1.5 : 1.2));
+  if (backoff_ms < 15)
+    dmax = 15;
+
+  int64_t ts_consume = test_clock();
+
+  while (received < 5) {
+    /* Wait more than dmax to count out of profile messages.
+     * Different for first message, that is skipped. */
+    int consume_timeout =
+        received == 0 ? 500 * test_timeout_multiplier : dmax * 2;
+    RdKafka::Message *msg = c->consume(consume_timeout);
+
+    rd_ts_t now     = test_clock();
+    int latency     = (test_clock() - ts_consume) / 1000;
+    ts_consume      = now;
+    bool in_profile = latency <= dmax;
+
+    if (!msg)
+      Test::Fail(tostr() << "No message for " << consume_timeout << "ms");
+    if (msg->err())
+      Test::Fail("Unexpected consumer error: " + msg->errstr());
+
+    Test::Say(tostr() << "Message #" << received << " consumed in " << latency
+                      << "ms (expecting <= " << dmax << "ms)"
+                      << (received == 0 ? ": skipping first" : "")
+                      << (in_profile ? ": in profile" : ": OUT OF PROFILE")
+                      << "\n");
+
+    if (received++ > 0 && in_profile)
+      in_profile_cnt++;
+
+    delete msg;
+  }
+
+  Test::Say(tostr() << in_profile_cnt << "/" << received << " messages were "
+                    << "in profile (<= " << dmax
+                    << ") for backoff_ms=" << backoff_ms << "\n");
+
+  /* first message isn't counted*/
+  const int expected_in_profile = received - 1;
+  TEST_ASSERT(expected_in_profile - in_profile_cnt == 0,
+              "Only %d/%d messages were in profile", in_profile_cnt,
+              expected_in_profile);
+
+  delete c;
+
+  SUB_TEST_PASS();
+}
+
+
+extern "C" {
+int main_0127_fetch_queue_backoff(int argc, char **argv) {
+  std::string topic = Test::mk_topic_name("0127_fetch_queue_backoff", 1);
+
+  /* Prime the topic with messages. */
+  RdKafka::Conf *conf;
+  Test::conf_init(&conf, NULL, 10);
+  Test::conf_set(conf, "batch.num.messages", "1");
+  std::string errstr;
+  RdKafka::Producer *p = RdKafka::Producer::create(conf, errstr);
+  if (!p)
+    Test::Fail(tostr() << __FUNCTION__
+                       << ": Failed to create producer: " << errstr);
+  delete conf;
+
+  Test::produce_msgs(p, topic, 0, 100, 10000, true /*flush*/);
+  delete p;
+
+  do_test_queue_backoff(topic, -1);
+  do_test_queue_backoff(topic, 500);
+  do_test_queue_backoff(topic, 10);
+  do_test_queue_backoff(topic, 0);
+  return 0;
+}
+}
diff --git a/lib/librdkafka-2.1.0/tests/0128-sasl_callback_queue.cpp b/lib/librdkafka-2.3.0/tests/0128-sasl_callback_queue.cpp
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0128-sasl_callback_queue.cpp
rename to lib/librdkafka-2.3.0/tests/0128-sasl_callback_queue.cpp
index 784f09bf605..aaf23a081bf 100644
--- a/lib/librdkafka-2.1.0/tests/0128-sasl_callback_queue.cpp
+++ b/lib/librdkafka-2.3.0/tests/0128-sasl_callback_queue.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2021, Magnus Edenhill
+ * Copyright (c) 2021-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0129-fetch_aborted_msgs.c b/lib/librdkafka-2.3.0/tests/0129-fetch_aborted_msgs.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0129-fetch_aborted_msgs.c
rename to lib/librdkafka-2.3.0/tests/0129-fetch_aborted_msgs.c
index cc150feccbe..7805e6094fe 100644
--- a/lib/librdkafka-2.1.0/tests/0129-fetch_aborted_msgs.c
+++ b/lib/librdkafka-2.3.0/tests/0129-fetch_aborted_msgs.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2021, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0130-store_offsets.c b/lib/librdkafka-2.3.0/tests/0130-store_offsets.c
similarity index 61%
rename from lib/librdkafka-2.1.0/tests/0130-store_offsets.c
rename to lib/librdkafka-2.3.0/tests/0130-store_offsets.c
index 9fb8d2350a5..e451d7569b2 100644
--- a/lib/librdkafka-2.1.0/tests/0130-store_offsets.c
+++ b/lib/librdkafka-2.3.0/tests/0130-store_offsets.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -30,8 +31,8 @@
 
 
 /**
- * Verify that offsets_store() is not allowed for unassigned partitions,
- * and that those offsets are not committed.
+ * Verify that offsets_store() commits the right offsets and metadata,
+ * and is not allowed for unassigned partitions.
  */
 static void do_test_store_unassigned(void) {
         const char *topic = test_mk_topic_name("0130_store_unassigned", 1);
@@ -40,6 +41,7 @@ static void do_test_store_unassigned(void) {
         rd_kafka_topic_partition_list_t *parts;
         rd_kafka_resp_err_t err;
         rd_kafka_message_t *rkmessage;
+        char metadata[]             = "metadata";
         const int64_t proper_offset = 900, bad_offset = 300;
 
         SUB_TEST_QUICK();
@@ -60,8 +62,13 @@ static void do_test_store_unassigned(void) {
         TEST_SAY("Consume one message\n");
         test_consumer_poll_once(c, NULL, tmout_multip(3000));
 
-        parts->elems[0].offset = proper_offset;
-        TEST_SAY("Storing offset %" PRId64 " while assigned: should succeed\n",
+        parts->elems[0].offset        = proper_offset;
+        parts->elems[0].metadata_size = sizeof metadata;
+        parts->elems[0].metadata      = malloc(parts->elems[0].metadata_size);
+        memcpy(parts->elems[0].metadata, metadata,
+               parts->elems[0].metadata_size);
+        TEST_SAY("Storing offset %" PRId64
+                 " with metadata while assigned: should succeed\n",
                  parts->elems[0].offset);
         TEST_CALL_ERR__(rd_kafka_offsets_store(c, parts));
 
@@ -71,7 +78,10 @@ static void do_test_store_unassigned(void) {
         TEST_SAY("Unassigning partitions and trying to store again\n");
         TEST_CALL_ERR__(rd_kafka_assign(c, NULL));
 
-        parts->elems[0].offset = bad_offset;
+        parts->elems[0].offset        = bad_offset;
+        parts->elems[0].metadata_size = 0;
+        rd_free(parts->elems[0].metadata);
+        parts->elems[0].metadata = NULL;
         TEST_SAY("Storing offset %" PRId64 " while unassigned: should fail\n",
                  parts->elems[0].offset);
         err = rd_kafka_offsets_store(c, parts);
@@ -108,9 +118,50 @@ static void do_test_store_unassigned(void) {
                     "offset %" PRId64 ", not %" PRId64,
                     proper_offset, rkmessage->offset);
 
+        TEST_SAY(
+            "Retrieving committed offsets to verify committed offset "
+            "metadata\n");
+        rd_kafka_topic_partition_list_t *committed_toppar;
+        committed_toppar = rd_kafka_topic_partition_list_new(1);
+        rd_kafka_topic_partition_list_add(committed_toppar, topic, 0);
+        TEST_CALL_ERR__(
+            rd_kafka_committed(c, committed_toppar, tmout_multip(3000)));
+        TEST_ASSERT(committed_toppar->elems[0].offset == proper_offset,
+                    "Expected committed offset to be %" PRId64 ", not %" PRId64,
+                    proper_offset, committed_toppar->elems[0].offset);
+        TEST_ASSERT(committed_toppar->elems[0].metadata != NULL,
+                    "Expected metadata to not be NULL");
+        TEST_ASSERT(strcmp(committed_toppar->elems[0].metadata, metadata) == 0,
+                    "Expected metadata to be %s, not %s", metadata,
+                    (char *)committed_toppar->elems[0].metadata);
+
+        TEST_SAY("Storing next offset without metadata\n");
+        parts->elems[0].offset = proper_offset + 1;
+        TEST_CALL_ERR__(rd_kafka_offsets_store(c, parts));
+
+        TEST_SAY("Committing\n");
+        TEST_CALL_ERR__(rd_kafka_commit(c, NULL, rd_false /*sync*/));
+
+        TEST_SAY(
+            "Retrieving committed offset to verify empty committed offset "
+            "metadata\n");
+        rd_kafka_topic_partition_list_t *committed_toppar_empty;
+        committed_toppar_empty = rd_kafka_topic_partition_list_new(1);
+        rd_kafka_topic_partition_list_add(committed_toppar_empty, topic, 0);
+        TEST_CALL_ERR__(
+            rd_kafka_committed(c, committed_toppar_empty, tmout_multip(3000)));
+        TEST_ASSERT(committed_toppar_empty->elems[0].offset ==
+                        proper_offset + 1,
+                    "Expected committed offset to be %" PRId64 ", not %" PRId64,
+                    proper_offset, committed_toppar_empty->elems[0].offset);
+        TEST_ASSERT(committed_toppar_empty->elems[0].metadata == NULL,
+                    "Expected metadata to be NULL");
+
         rd_kafka_message_destroy(rkmessage);
 
         rd_kafka_topic_partition_list_destroy(parts);
+        rd_kafka_topic_partition_list_destroy(committed_toppar);
+        rd_kafka_topic_partition_list_destroy(committed_toppar_empty);
 
         rd_kafka_consumer_close(c);
         rd_kafka_destroy(c);
diff --git a/lib/librdkafka-2.1.0/tests/0131-connect_timeout.c b/lib/librdkafka-2.3.0/tests/0131-connect_timeout.c
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/0131-connect_timeout.c
rename to lib/librdkafka-2.3.0/tests/0131-connect_timeout.c
diff --git a/lib/librdkafka-2.1.0/tests/0132-strategy_ordering.c b/lib/librdkafka-2.3.0/tests/0132-strategy_ordering.c
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/0132-strategy_ordering.c
rename to lib/librdkafka-2.3.0/tests/0132-strategy_ordering.c
diff --git a/lib/librdkafka-2.1.0/tests/0133-ssl_keys.c b/lib/librdkafka-2.3.0/tests/0133-ssl_keys.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/0133-ssl_keys.c
rename to lib/librdkafka-2.3.0/tests/0133-ssl_keys.c
index 850fa27613b..4c3e66fee70 100644
--- a/lib/librdkafka-2.1.0/tests/0133-ssl_keys.c
+++ b/lib/librdkafka-2.3.0/tests/0133-ssl_keys.c
@@ -34,7 +34,7 @@
  * file. Decoding it with the correct password or not.
  *
  * Ensures it's read correctly on Windows too.
- * See https://github.com/edenhill/librdkafka/issues/3992
+ * See https://github.com/confluentinc/librdkafka/issues/3992
  */
 static void do_test_ssl_keys(const char *type, rd_bool_t correct_password) {
 #define TEST_FIXTURES_FOLDER            "./fixtures"
diff --git a/lib/librdkafka-2.1.0/tests/0134-ssl_provider.c b/lib/librdkafka-2.3.0/tests/0134-ssl_provider.c
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/0134-ssl_provider.c
rename to lib/librdkafka-2.3.0/tests/0134-ssl_provider.c
diff --git a/lib/librdkafka-2.1.0/tests/0135-sasl_credentials.cpp b/lib/librdkafka-2.3.0/tests/0135-sasl_credentials.cpp
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/0135-sasl_credentials.cpp
rename to lib/librdkafka-2.3.0/tests/0135-sasl_credentials.cpp
diff --git a/lib/librdkafka-2.1.0/tests/0136-resolve_cb.c b/lib/librdkafka-2.3.0/tests/0136-resolve_cb.c
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/0136-resolve_cb.c
rename to lib/librdkafka-2.3.0/tests/0136-resolve_cb.c
diff --git a/lib/librdkafka-2.1.0/tests/0137-barrier_batch_consume.c b/lib/librdkafka-2.3.0/tests/0137-barrier_batch_consume.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/0137-barrier_batch_consume.c
rename to lib/librdkafka-2.3.0/tests/0137-barrier_batch_consume.c
index 4e3c855d23a..d5c2b32d078 100644
--- a/lib/librdkafka-2.1.0/tests/0137-barrier_batch_consume.c
+++ b/lib/librdkafka-2.3.0/tests/0137-barrier_batch_consume.c
@@ -2,6 +2,7 @@
  * librdkafka - Apache Kafka C library
  *
  * Copyright (c) 2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/0138-admin_mock.c b/lib/librdkafka-2.3.0/tests/0138-admin_mock.c
similarity index 69%
rename from lib/librdkafka-2.1.0/tests/0138-admin_mock.c
rename to lib/librdkafka-2.3.0/tests/0138-admin_mock.c
index 0f9021de977..32c67c09d7d 100644
--- a/lib/librdkafka-2.1.0/tests/0138-admin_mock.c
+++ b/lib/librdkafka-2.3.0/tests/0138-admin_mock.c
@@ -175,6 +175,99 @@ static void do_test_AlterConsumerGroupOffsets_errors(int req_timeout_ms) {
 #undef TEST_ERR_SIZE
 }
 
+/**
+ * @brief A leader change should remove metadata cache for a topic
+ *        queried in ListOffsets.
+ */
+static void do_test_ListOffsets_leader_change(void) {
+        size_t cnt;
+        rd_kafka_conf_t *conf;
+        rd_kafka_mock_cluster_t *mcluster;
+        const char *bootstraps;
+        const char *topic = "test";
+        rd_kafka_t *rk;
+        rd_kafka_queue_t *q;
+        rd_kafka_topic_partition_list_t *to_list;
+        rd_kafka_event_t *rkev;
+        rd_kafka_resp_err_t err;
+        const rd_kafka_ListOffsets_result_t *result;
+        const rd_kafka_ListOffsetsResultInfo_t **result_infos;
+
+        test_conf_init(&conf, NULL, 60);
+
+        mcluster = test_mock_cluster_new(2, &bootstraps);
+        rd_kafka_mock_topic_create(mcluster, topic, 1, 2);
+        rd_kafka_mock_partition_set_leader(mcluster, topic, 0, 1);
+        test_conf_set(conf, "bootstrap.servers", bootstraps);
+
+        rk = test_create_handle(RD_KAFKA_CONSUMER, conf);
+
+        q = rd_kafka_queue_get_main(rk);
+
+        to_list = rd_kafka_topic_partition_list_new(1);
+        rd_kafka_topic_partition_list_add(to_list, topic, 0)->offset = -1;
+
+        TEST_SAY("First ListOffsets call to leader broker 1\n");
+        rd_kafka_ListOffsets(rk, to_list, NULL, q);
+
+        rkev = rd_kafka_queue_poll(q, -1);
+
+        TEST_ASSERT(rd_kafka_event_type(rkev) ==
+                        RD_KAFKA_EVENT_LISTOFFSETS_RESULT,
+                    "Expected LISTOFFSETS_RESULT event type, got %d",
+                    rd_kafka_event_type(rkev));
+
+        TEST_CALL_ERR__(rd_kafka_event_error(rkev));
+
+        rd_kafka_event_destroy(rkev);
+
+
+        rd_kafka_mock_partition_set_leader(mcluster, topic, 0, 2);
+
+        TEST_SAY(
+            "Second ListOffsets call to leader broker 1, returns "
+            "NOT_LEADER_OR_FOLLOWER"
+            " and invalidates cache\n");
+        rd_kafka_ListOffsets(rk, to_list, NULL, q);
+
+        rkev         = rd_kafka_queue_poll(q, -1);
+        result       = rd_kafka_event_ListOffsets_result(rkev);
+        result_infos = rd_kafka_ListOffsets_result_infos(result, &cnt);
+
+        TEST_ASSERT(cnt == 1, "Result topic cnt should be 1, got %" PRIusz,
+                    cnt);
+        err = rd_kafka_ListOffsetsResultInfo_topic_partition(result_infos[0])
+                  ->err;
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR_NOT_LEADER_OR_FOLLOWER,
+                    "Expected event error NOT_LEADER_OR_FOLLOWER, got %s",
+                    rd_kafka_err2name(err));
+
+        rd_kafka_event_destroy(rkev);
+
+        TEST_SAY(
+            "Third ListOffsets call to leader broker 2, returns NO_ERROR\n");
+        rd_kafka_ListOffsets(rk, to_list, NULL, q);
+
+        rkev         = rd_kafka_queue_poll(q, -1);
+        result       = rd_kafka_event_ListOffsets_result(rkev);
+        result_infos = rd_kafka_ListOffsets_result_infos(result, &cnt);
+
+        TEST_ASSERT(cnt == 1, "Result topic cnt should be 1, got %" PRIusz,
+                    cnt);
+        err = rd_kafka_ListOffsetsResultInfo_topic_partition(result_infos[0])
+                  ->err;
+        TEST_ASSERT(err == RD_KAFKA_RESP_ERR_NO_ERROR,
+                    "Expected event error NO_ERROR, got %s",
+                    rd_kafka_err2name(err));
+
+        rd_kafka_event_destroy(rkev);
+
+        rd_kafka_topic_partition_list_destroy(to_list);
+        rd_kafka_queue_destroy(q);
+        rd_kafka_destroy(rk);
+        test_mock_cluster_destroy(mcluster);
+}
+
 int main_0138_admin_mock(int argc, char **argv) {
 
         if (test_needs_auth()) {
@@ -185,5 +278,7 @@ int main_0138_admin_mock(int argc, char **argv) {
         do_test_AlterConsumerGroupOffsets_errors(-1);
         do_test_AlterConsumerGroupOffsets_errors(1000);
 
+        do_test_ListOffsets_leader_change();
+
         return 0;
 }
diff --git a/lib/librdkafka-2.3.0/tests/0139-offset_validation_mock.c b/lib/librdkafka-2.3.0/tests/0139-offset_validation_mock.c
new file mode 100644
index 00000000000..967563fd703
--- /dev/null
+++ b/lib/librdkafka-2.3.0/tests/0139-offset_validation_mock.c
@@ -0,0 +1,445 @@
+/*
+ * librdkafka - Apache Kafka C library
+ *
+ * Copyright (c) 2023, Confluent Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "test.h"
+
+#include "../src/rdkafka_proto.h"
+
+
+struct _produce_args {
+        const char *topic;
+        int sleep;
+        rd_kafka_conf_t *conf;
+};
+
+static int produce_concurrent_thread(void *args) {
+        rd_kafka_t *p1;
+        test_curr->exp_dr_err    = RD_KAFKA_RESP_ERR_NO_ERROR;
+        test_curr->exp_dr_status = RD_KAFKA_MSG_STATUS_PERSISTED;
+
+        struct _produce_args *produce_args = args;
+        rd_sleep(produce_args->sleep);
+
+        p1 = test_create_handle(RD_KAFKA_PRODUCER, produce_args->conf);
+        TEST_CALL_ERR__(
+            rd_kafka_producev(p1, RD_KAFKA_V_TOPIC(produce_args->topic),
+                              RD_KAFKA_V_VALUE("hi", 2), RD_KAFKA_V_END));
+        rd_kafka_flush(p1, -1);
+        rd_kafka_destroy(p1);
+        return 0;
+}
+
+/**
+ * @brief Send a produce request in the middle of an offset validation
+ *        and expect that the fetched message is discarded, don't producing
+ *        a duplicate when state becomes active again. See #4249.
+ */
+static void do_test_no_duplicates_during_offset_validation(void) {
+        const char *topic      = test_mk_topic_name(__FUNCTION__, 1);
+        const char *c1_groupid = topic;
+        rd_kafka_t *c1;
+        rd_kafka_conf_t *conf, *conf_producer;
+        const char *bootstraps;
+        rd_kafka_mock_cluster_t *mcluster;
+        int initial_msg_count = 5;
+        thrd_t thrd;
+        struct _produce_args args = RD_ZERO_INIT;
+        uint64_t testid           = test_id_generate();
+
+        SUB_TEST_QUICK();
+
+        mcluster = test_mock_cluster_new(1, &bootstraps);
+        rd_kafka_mock_topic_create(mcluster, topic, 1, 1);
+
+        /* Slow down OffsetForLeaderEpoch so a produce and
+         * subsequent fetch can happen while it's in-flight */
+        rd_kafka_mock_broker_push_request_error_rtts(
+            mcluster, 1, RD_KAFKAP_OffsetForLeaderEpoch, 1,
+            RD_KAFKA_RESP_ERR_NO_ERROR, 5000);
+
+        test_conf_init(&conf_producer, NULL, 60);
+        test_conf_set(conf_producer, "bootstrap.servers", bootstraps);
+
+
+        /* Seed the topic with messages */
+        test_produce_msgs_easy_v(topic, testid, 0, 0, initial_msg_count, 10,
+                                 "bootstrap.servers", bootstraps,
+                                 "batch.num.messages", "1", NULL);
+
+        args.topic = topic;
+        /* Makes that the message is produced while an offset validation
+         * is ongoing */
+        args.sleep = 5;
+        args.conf  = conf_producer;
+        /* Spin up concurrent thread */
+        if (thrd_create(&thrd, produce_concurrent_thread, (void *)&args) !=
+            thrd_success)
+                TEST_FAIL("Failed to create thread");
+
+        test_conf_init(&conf, NULL, 60);
+
+        test_conf_set(conf, "bootstrap.servers", bootstraps);
+        /* Makes that an offset validation happens at the same
+         * time a new message is being produced */
+        test_conf_set(conf, "topic.metadata.refresh.interval.ms", "5000");
+        test_conf_set(conf, "auto.offset.reset", "earliest");
+        test_conf_set(conf, "enable.auto.commit", "false");
+        test_conf_set(conf, "enable.auto.offset.store", "false");
+        test_conf_set(conf, "enable.partition.eof", "true");
+
+        c1 = test_create_consumer(c1_groupid, NULL, conf, NULL);
+        test_consumer_subscribe(c1, topic);
+
+        /* Consume initial messages */
+        test_consumer_poll("MSG_INIT", c1, testid, 0, 0, initial_msg_count,
+                           NULL);
+        /* EOF after initial messages */
+        test_consumer_poll("MSG_EOF", c1, testid, 1, initial_msg_count, 0,
+                           NULL);
+        /* Concurrent producer message and EOF */
+        test_consumer_poll("MSG_AND_EOF", c1, testid, 1, initial_msg_count, 1,
+                           NULL);
+        /* Only an EOF, not a duplicate message */
+        test_consumer_poll("MSG_EOF2", c1, testid, 1, initial_msg_count, 0,
+                           NULL);
+
+        thrd_join(thrd, NULL);
+
+        rd_kafka_destroy(c1);
+
+        test_mock_cluster_destroy(mcluster);
+
+        TEST_LATER_CHECK();
+        SUB_TEST_PASS();
+}
+
+
+/**
+ * @brief Test that a permanent error doesn't cause an offset reset.
+ *        See issues #4293, #4427.
+ * @param err The error OffsetForLeaderEpoch fails with.
+ */
+static void do_test_permanent_error_retried(rd_kafka_resp_err_t err) {
+        rd_kafka_mock_cluster_t *mcluster;
+        rd_kafka_conf_t *conf;
+        const char *bootstraps;
+        const char *topic      = test_mk_topic_name(__FUNCTION__, 1);
+        const char *c1_groupid = topic;
+        rd_kafka_t *c1;
+        rd_kafka_topic_partition_list_t *rktpars;
+        rd_kafka_topic_partition_t *rktpar;
+        int msg_count   = 5;
+        uint64_t testid = test_id_generate();
+
+        SUB_TEST_QUICK("err: %s", rd_kafka_err2name(err));
+
+        mcluster = test_mock_cluster_new(3, &bootstraps);
+        rd_kafka_mock_topic_create(mcluster, topic, 1, 1);
+
+        /* Seed the topic with messages */
+        test_produce_msgs_easy_v(topic, testid, 0, 0, msg_count, 10,
+                                 "bootstrap.servers", bootstraps,
+                                 "batch.num.messages", "1", NULL);
+
+        /* Make OffsetForLeaderEpoch fail with the corresponding error code */
+        rd_kafka_mock_push_request_errors(
+            mcluster, RD_KAFKAP_OffsetForLeaderEpoch, 1, err);
+
+        test_conf_init(&conf, NULL, 60);
+
+        test_conf_set(conf, "bootstrap.servers", bootstraps);
+        test_conf_set(conf, "topic.metadata.refresh.interval.ms", "5000");
+        test_conf_set(conf, "auto.offset.reset", "latest");
+        test_conf_set(conf, "enable.auto.commit", "false");
+        test_conf_set(conf, "enable.auto.offset.store", "false");
+        test_conf_set(conf, "enable.partition.eof", "true");
+
+        c1 = test_create_consumer(c1_groupid, NULL, conf, NULL);
+        test_consumer_subscribe(c1, topic);
+
+        /* EOF because of reset to latest */
+        test_consumer_poll("MSG_EOF", c1, testid, 1, 0, 0, NULL);
+
+        rd_kafka_mock_partition_set_leader(mcluster, topic, 0, 2);
+
+        /* Seek to 0 for validating the offset. */
+        rktpars        = rd_kafka_topic_partition_list_new(1);
+        rktpar         = rd_kafka_topic_partition_list_add(rktpars, topic, 0);
+        rktpar->offset = 0;
+
+        /* Will validate the offset at start fetching again
+         * from offset 0. */
+        rd_kafka_topic_partition_set_leader_epoch(rktpar, 0);
+        rd_kafka_seek_partitions(c1, rktpars, -1);
+        rd_kafka_topic_partition_list_destroy(rktpars);
+
+        /* Read all messages after seek to zero.
+         * In case of permanent error instead it reset to latest and
+         * gets an EOF. */
+        test_consumer_poll("MSG_ALL", c1, testid, 0, 0, 5, NULL);
+
+        rd_kafka_destroy(c1);
+
+        test_mock_cluster_destroy(mcluster);
+
+        TEST_LATER_CHECK();
+        SUB_TEST_PASS();
+}
+
+
+/**
+ * @brief If there's an OffsetForLeaderEpoch request which fails, and the leader
+ * changes meanwhile, we end up in an infinite loop of OffsetForLeaderEpoch
+ * requests.
+ * Specifically:
+ * a. Leader Change - causes OffsetForLeaderEpoch
+ *     request 'A'.
+ * b. Request 'A' fails with a retriable error, and we retry it.
+ * c. While waiting for Request 'A', the leader changes again, and we send a
+ *    Request 'B', but the leader epoch is not updated correctly in this
+ *    request, causing a loop.
+ *
+ * See #4425.
+ */
+static void do_test_two_leader_changes(void) {
+        const char *topic      = test_mk_topic_name(__FUNCTION__, 1);
+        const char *c1_groupid = topic;
+        rd_kafka_t *c1;
+        const char *bootstraps;
+        rd_kafka_mock_cluster_t *mcluster;
+        int msg_cnt     = 5;
+        uint64_t testid = test_id_generate();
+        rd_kafka_conf_t *conf;
+
+        SUB_TEST_QUICK();
+
+        mcluster = test_mock_cluster_new(2, &bootstraps);
+        rd_kafka_mock_topic_create(mcluster, topic, 1, 2);
+        rd_kafka_mock_partition_set_leader(mcluster, topic, 0, 1);
+
+        /* Seed the topic with messages */
+        test_produce_msgs_easy_v(topic, testid, 0, 0, msg_cnt, 10,
+                                 "bootstrap.servers", bootstraps,
+                                 "batch.num.messages", "1", NULL);
+
+        test_conf_init(&conf, NULL, 60);
+        test_conf_set(conf, "bootstrap.servers", bootstraps);
+        test_conf_set(conf, "auto.offset.reset", "earliest");
+
+        c1 = test_create_consumer(c1_groupid, NULL, conf, NULL);
+        test_consumer_subscribe(c1, topic);
+
+        /* Consume initial messages and join the group, etc. */
+        test_consumer_poll("MSG_INIT", c1, testid, 0, 0, msg_cnt, NULL);
+
+        /* The leader will change from 1->2, and the OffsetForLeaderEpoch will
+         * be sent to broker 2. We need to first fail it with
+         * an error, and then give enough time to change the leader before
+         * returning a success. */
+        rd_kafka_mock_broker_push_request_error_rtts(
+            mcluster, 2, RD_KAFKAP_OffsetForLeaderEpoch, 2,
+            RD_KAFKA_RESP_ERR_KAFKA_STORAGE_ERROR, 900,
+            RD_KAFKA_RESP_ERR_NO_ERROR, 1000);
+
+        rd_kafka_mock_partition_set_leader(mcluster, topic, 0, 2);
+        rd_kafka_poll(c1, 1000);
+        /* Enough time to make a request, fail with a retriable error, and
+         * retry. */
+        rd_sleep(1);
+
+        /* Reset leader. */
+        rd_kafka_mock_partition_set_leader(mcluster, topic, 0, 1);
+        rd_kafka_poll(c1, 1000);
+        rd_sleep(1);
+
+        /* There should be no infinite loop of OffsetForLeaderEpoch, and
+         * consequently, we should be able to consume these messages as a sign
+         * of success. */
+        test_produce_msgs_easy_v(topic, testid, 0, 0, msg_cnt, 10,
+                                 "bootstrap.servers", bootstraps,
+                                 "batch.num.messages", "1", NULL);
+
+        test_consumer_poll("MSG_INIT", c1, testid, 0, 0, msg_cnt, NULL);
+
+
+        rd_kafka_destroy(c1);
+
+        test_mock_cluster_destroy(mcluster);
+
+        TEST_LATER_CHECK();
+        SUB_TEST_PASS();
+}
+
+/**
+ * @brief Storing an offset without leader epoch should still be allowed
+ *        and the greater than check should apply only to the offset.
+ *        See #4384.
+ */
+static void do_test_store_offset_without_leader_epoch(void) {
+        rd_kafka_mock_cluster_t *mcluster;
+        rd_kafka_conf_t *conf;
+        const char *bootstraps;
+        const char *topic      = test_mk_topic_name(__FUNCTION__, 1);
+        const char *c1_groupid = topic;
+        rd_kafka_t *c1;
+        rd_kafka_topic_t *rdk_topic;
+        uint64_t testid = test_id_generate();
+        rd_kafka_topic_partition_list_t *rktpars;
+        rd_kafka_topic_partition_t *rktpar;
+        int32_t leader_epoch;
+
+        SUB_TEST_QUICK();
+
+        mcluster = test_mock_cluster_new(3, &bootstraps);
+        rd_kafka_mock_topic_create(mcluster, topic, 1, 1);
+
+        test_conf_init(&conf, NULL, 60);
+        test_conf_set(conf, "bootstrap.servers", bootstraps);
+        test_conf_set(conf, "topic.metadata.refresh.interval.ms", "5000");
+        test_conf_set(conf, "auto.offset.reset", "earliest");
+        test_conf_set(conf, "enable.auto.commit", "false");
+        test_conf_set(conf, "enable.auto.offset.store", "false");
+        test_conf_set(conf, "enable.partition.eof", "true");
+
+        c1 = test_create_consumer(c1_groupid, NULL, conf, NULL);
+        test_consumer_subscribe(c1, topic);
+
+        /* Leader epoch becomes 1. */
+        rd_kafka_mock_partition_set_leader(mcluster, topic, 0, 2);
+
+        /* Read EOF. */
+        test_consumer_poll("MSG_ALL", c1, testid, 1, 0, 0, NULL);
+
+        TEST_SAY(
+            "Storing offset without leader epoch with rd_kafka_offset_store");
+        rdk_topic = rd_kafka_topic_new(c1, topic, NULL);
+        /* Legacy function stores offset + 1 */
+        rd_kafka_offset_store(rdk_topic, 0, 1);
+        rd_kafka_topic_destroy(rdk_topic);
+
+        rd_kafka_commit(c1, NULL, rd_false);
+
+        rktpars = rd_kafka_topic_partition_list_new(1);
+        rd_kafka_topic_partition_list_add(rktpars, topic, 0);
+        rd_kafka_committed(c1, rktpars, -1);
+
+        TEST_ASSERT(rktpars->elems[0].offset == 2, "expected %d, got %" PRId64,
+                    2, rktpars->elems[0].offset);
+        leader_epoch =
+            rd_kafka_topic_partition_get_leader_epoch(&rktpars->elems[0]);
+
+        /* OffsetFetch returns the leader epoch even if not set. */
+        TEST_ASSERT(leader_epoch == 1, "expected %d, got %" PRId32, 1,
+                    leader_epoch);
+        rd_kafka_topic_partition_list_destroy(rktpars);
+
+        TEST_SAY(
+            "Storing offset without leader epoch with rd_kafka_offsets_store");
+        rktpars = rd_kafka_topic_partition_list_new(1);
+        rd_kafka_topic_partition_list_add(rktpars, topic, 0)->offset = 5;
+        rd_kafka_offsets_store(c1, rktpars);
+        rd_kafka_topic_partition_list_destroy(rktpars);
+
+        TEST_CALL_ERR__(rd_kafka_commit(c1, NULL, rd_false));
+
+        rktpars = rd_kafka_topic_partition_list_new(1);
+        rd_kafka_topic_partition_list_add(rktpars, topic, 0);
+        rd_kafka_committed(c1, rktpars, -1);
+
+        TEST_ASSERT(rktpars->elems[0].offset == 5, "expected %d, got %" PRId64,
+                    5, rktpars->elems[0].offset);
+        leader_epoch =
+            rd_kafka_topic_partition_get_leader_epoch(&rktpars->elems[0]);
+        /* OffsetFetch returns the leader epoch even if not set. */
+        TEST_ASSERT(leader_epoch == 1, "expected %d, got %" PRId32, 1,
+                    leader_epoch);
+        rd_kafka_topic_partition_list_destroy(rktpars);
+
+        TEST_SAY(
+            "While storing offset with leader epoch it should check that value "
+            "first");
+        /* Setting it to (6,1), as last one has epoch -1. */
+        rktpars        = rd_kafka_topic_partition_list_new(1);
+        rktpar         = rd_kafka_topic_partition_list_add(rktpars, topic, 0);
+        rktpar->offset = 6;
+        rd_kafka_topic_partition_set_leader_epoch(rktpar, 1);
+        rd_kafka_offsets_store(c1, rktpars);
+        rd_kafka_topic_partition_list_destroy(rktpars);
+
+        rd_kafka_commit(c1, NULL, rd_false);
+
+        /* Trying to store (7,0), it should skip the commit. */
+        rktpars        = rd_kafka_topic_partition_list_new(1);
+        rktpar         = rd_kafka_topic_partition_list_add(rktpars, topic, 0);
+        rktpar->offset = 7;
+        rd_kafka_topic_partition_set_leader_epoch(rktpar, 0);
+        rd_kafka_offsets_store(c1, rktpars);
+        rd_kafka_topic_partition_list_destroy(rktpars);
+
+        rd_kafka_commit(c1, NULL, rd_false);
+
+        /* Committed offset is (6,1). */
+        rktpars = rd_kafka_topic_partition_list_new(1);
+        rd_kafka_topic_partition_list_add(rktpars, topic, 0);
+        rd_kafka_committed(c1, rktpars, -1);
+
+        TEST_ASSERT(rktpars->elems[0].offset == 6, "expected %d, got %" PRId64,
+                    6, rktpars->elems[0].offset);
+        leader_epoch =
+            rd_kafka_topic_partition_get_leader_epoch(&rktpars->elems[0]);
+        TEST_ASSERT(leader_epoch == 1, "expected %d, got %" PRId32, 1,
+                    leader_epoch);
+        rd_kafka_topic_partition_list_destroy(rktpars);
+
+        rd_kafka_destroy(c1);
+
+        test_mock_cluster_destroy(mcluster);
+
+        TEST_LATER_CHECK();
+        SUB_TEST_PASS();
+}
+
+
+int main_0139_offset_validation_mock(int argc, char **argv) {
+
+        if (test_needs_auth()) {
+                TEST_SKIP("Mock cluster does not support SSL/SASL\n");
+                return 0;
+        }
+
+        do_test_no_duplicates_during_offset_validation();
+
+        do_test_permanent_error_retried(RD_KAFKA_RESP_ERR__SSL);
+        do_test_permanent_error_retried(RD_KAFKA_RESP_ERR__RESOLVE);
+
+        do_test_two_leader_changes();
+
+        do_test_store_offset_without_leader_epoch();
+
+        return 0;
+}
diff --git a/lib/librdkafka-2.3.0/tests/0140-commit_metadata.cpp b/lib/librdkafka-2.3.0/tests/0140-commit_metadata.cpp
new file mode 100644
index 00000000000..fae655915b8
--- /dev/null
+++ b/lib/librdkafka-2.3.0/tests/0140-commit_metadata.cpp
@@ -0,0 +1,108 @@
+/*
+ * librdkafka - Apache Kafka C library
+ *
+ * Copyright (c) 2023, Confluent Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "testcpp.h"
+
+using namespace std;
+
+/**
+ * @brief Committed metadata should be stored and received back when
+ *        checking committed offsets.
+ */
+static void test_commit_metadata() {
+  SUB_TEST_QUICK();
+
+  std::string bootstraps;
+  std::string errstr;
+  RdKafka::ErrorCode err;
+
+  RdKafka::Conf *conf;
+  std::string topic = Test::mk_topic_name(__FUNCTION__, 1);
+  Test::conf_init(&conf, NULL, 3000);
+  Test::conf_set(conf, "group.id", topic);
+
+  RdKafka::KafkaConsumer *consumer =
+      RdKafka::KafkaConsumer::create(conf, errstr);
+  if (!consumer)
+    Test::Fail("Failed to create KafkaConsumer: " + errstr);
+  delete conf;
+
+  Test::Say("Create topic.\n");
+  Test::create_topic(consumer, topic.c_str(), 1, 1);
+
+  Test::Say("Commit offsets.\n");
+  std::vector<RdKafka::TopicPartition *> offsets;
+  RdKafka::TopicPartition *offset =
+      RdKafka::TopicPartition::create(topic, 0, 10);
+
+  std::string metadata = "some_metadata";
+  std::vector<unsigned char> metadata_vect(metadata.begin(), metadata.end());
+
+  offset->set_metadata(metadata_vect);
+  offsets.push_back(offset);
+
+  err = consumer->commitSync(offsets);
+  TEST_ASSERT(!err, "commit failed: %s", RdKafka::err2str(err).c_str());
+  RdKafka::TopicPartition::destroy(offsets);
+
+  Test::Say("Read committed offsets.\n");
+  offset = RdKafka::TopicPartition::create(topic, 0, 10);
+  offsets.push_back(offset);
+  err = consumer->committed(offsets, 5000);
+  TEST_ASSERT(!err, "committed offsets failed: %s",
+              RdKafka::err2str(err).c_str());
+  TEST_ASSERT(offsets.size() == 1, "expected offsets size 1, got %" PRIusz,
+              offsets.size());
+
+  Test::Say("Check committed metadata.\n");
+  std::vector<unsigned char> metadata_vect_committed =
+      offsets[0]->get_metadata();
+  std::string metadata_committed(metadata_vect_committed.begin(),
+                                 metadata_vect_committed.end());
+
+  if (metadata != metadata_committed) {
+    Test::Fail(tostr() << "Expecting metadata to be \"" << metadata
+                       << "\", got \"" << metadata_committed << "\"");
+  }
+
+  RdKafka::TopicPartition::destroy(offsets);
+
+  consumer->close();
+
+  delete consumer;
+
+  SUB_TEST_PASS();
+}
+
+extern "C" {
+int main_0140_commit_metadata(int argc, char **argv) {
+  test_commit_metadata();
+  return 0;
+}
+}
diff --git a/lib/librdkafka-2.3.0/tests/0142-reauthentication.c b/lib/librdkafka-2.3.0/tests/0142-reauthentication.c
new file mode 100644
index 00000000000..445e8dc8a5a
--- /dev/null
+++ b/lib/librdkafka-2.3.0/tests/0142-reauthentication.c
@@ -0,0 +1,495 @@
+/*
+ * librdkafka - Apache Kafka C library
+ *
+ * Copyright (c) 2023, Confluent Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "test.h"
+/* Typical include path would be <librdkafka/rdkafka.h>, but this program
+ * is built from within the librdkafka source tree and thus differs. */
+#include "rdkafka.h" /* for Kafka driver */
+
+static int delivered_msg = 0;
+static int expect_err    = 0;
+static int error_seen    = 0;
+
+static void
+dr_msg_cb(rd_kafka_t *rk, const rd_kafka_message_t *rkmessage, void *opaque) {
+        if (rkmessage->err)
+                TEST_FAIL("Message delivery failed: %s\n",
+                          rd_kafka_err2str(rkmessage->err));
+        else {
+                delivered_msg++;
+        }
+}
+
+static void
+auth_error_cb(rd_kafka_t *rk, int err, const char *reason, void *opaque) {
+        if (expect_err && (err == RD_KAFKA_RESP_ERR__AUTHENTICATION ||
+                           err == RD_KAFKA_RESP_ERR__ALL_BROKERS_DOWN)) {
+                TEST_SAY("Expected error: %s: %s\n", rd_kafka_err2str(err),
+                         reason);
+                error_seen = rd_true;
+        } else
+                TEST_FAIL("Unexpected error: %s: %s", rd_kafka_err2str(err),
+                          reason);
+        rd_kafka_yield(rk);
+}
+
+
+/* Test producer message loss while reauth happens between produce. */
+void do_test_producer(int64_t reauth_time, const char *topic) {
+        rd_kafka_topic_t *rkt = NULL;
+        rd_kafka_conf_t *conf = NULL;
+        rd_kafka_t *rk        = NULL;
+        uint64_t testid       = test_id_generate();
+        rd_kafka_resp_err_t err;
+        int msgrate, msgcnt, sent_msg;
+        test_timing_t t_produce;
+
+        msgrate = 200; /* msg/sec */
+        /* Messages should be produced such that at least one reauth happens.
+         * The 1.2 is added as a buffer to avoid flakiness. */
+        msgcnt        = msgrate * reauth_time / 1000 * 1.2;
+        delivered_msg = 0;
+        sent_msg      = 0;
+
+        SUB_TEST("test producer message loss while reauthenticating");
+
+        test_conf_init(&conf, NULL, 30);
+        rd_kafka_conf_set_dr_msg_cb(conf, dr_msg_cb);
+
+        rk  = test_create_handle(RD_KAFKA_PRODUCER, conf);
+        rkt = test_create_producer_topic(rk, topic, NULL);
+
+        /* Create the topic to make sure connections are up and ready. */
+        err = test_auto_create_topic_rkt(rk, rkt, tmout_multip(5000));
+        TEST_ASSERT(!err, "topic creation failed: %s", rd_kafka_err2str(err));
+
+        TIMING_START(&t_produce, "PRODUCE");
+        /* Produce enough messages such that we have time enough for at least
+         * one reauth. */
+        test_produce_msgs_nowait(rk, rkt, testid, 0, 0, msgcnt, NULL, 0,
+                                 msgrate, &sent_msg);
+        TIMING_STOP(&t_produce);
+
+        rd_kafka_flush(rk, 10 * 1000);
+
+        TEST_ASSERT(TIMING_DURATION(&t_produce) >= reauth_time * 1000,
+                    "time enough for one reauth should pass (%ld vs %ld)",
+                    TIMING_DURATION(&t_produce), reauth_time * 1000);
+        TEST_ASSERT(delivered_msg == sent_msg,
+                    "did not deliver as many messages as sent (%d vs %d)",
+                    delivered_msg, sent_msg);
+
+        rd_kafka_topic_destroy(rkt);
+        rd_kafka_destroy(rk);
+
+        SUB_TEST_PASS();
+}
+
+/* Test consumer message loss while reauth happens between consume. */
+void do_test_consumer(int64_t reauth_time, const char *topic) {
+        uint64_t testid;
+        rd_kafka_t *p1;
+        rd_kafka_t *c1;
+        rd_kafka_conf_t *conf;
+        int64_t start_time = 0;
+        int64_t wait_time  = reauth_time * 1.2 * 1000;
+        int recv_cnt = 0, sent_cnt = 0;
+
+        SUB_TEST("test consumer message loss while reauthenticating");
+
+        testid = test_id_generate();
+
+        test_conf_init(&conf, NULL, 30);
+        rd_kafka_conf_set_dr_msg_cb(conf, test_dr_msg_cb);
+
+        p1 = test_create_handle(RD_KAFKA_PRODUCER, rd_kafka_conf_dup(conf));
+
+        test_create_topic(p1, topic, 1, 3);
+        TEST_SAY("Topic: %s is created\n", topic);
+
+        test_conf_set(conf, "auto.offset.reset", "earliest");
+        c1 = test_create_consumer(topic, NULL, conf, NULL);
+        test_consumer_subscribe(c1, topic);
+
+        start_time = test_clock();
+        while ((test_clock() - start_time) <= wait_time) {
+                /* Produce one message. */
+                test_produce_msgs2(p1, topic, testid, 0, 0, 1, NULL, 0);
+                sent_cnt++;
+
+                rd_kafka_message_t *rkm = rd_kafka_consumer_poll(c1, 100);
+                if (!rkm || rkm->err) {
+                        /* Ignore errors. Add a flush for good measure so maybe
+                         * we'll have messages in the next iteration. */
+                        rd_kafka_flush(p1, 50);
+                        continue;
+                }
+                recv_cnt++;
+                rd_kafka_message_destroy(rkm);
+
+                /* An approximate way of maintaining the message rate as 200
+                 * msg/s */
+                rd_usleep(1000 * 50, NULL);
+        }
+
+        /* Final flush and receive any remaining messages. */
+        rd_kafka_flush(p1, 10 * 1000);
+        recv_cnt +=
+            test_consumer_poll_timeout("timeout", c1, testid, -1, -1,
+                                       sent_cnt - recv_cnt, NULL, 10 * 1000);
+
+        test_consumer_close(c1);
+
+        TEST_ASSERT(sent_cnt == recv_cnt,
+                    "did not receive as many messages as sent (%d vs %d)",
+                    sent_cnt, recv_cnt);
+
+        rd_kafka_destroy(p1);
+        rd_kafka_destroy(c1);
+        SUB_TEST_PASS();
+}
+
+
+
+/* Test produce from a transactional producer while there is a reauth, and check
+ * consumed messages for a committed or an aborted transaction. */
+void do_test_txn_producer(int64_t reauth_time,
+                          const char *topic,
+                          rd_bool_t abort_txn) {
+        rd_kafka_topic_t *rkt = NULL;
+        rd_kafka_conf_t *conf = NULL;
+        rd_kafka_t *rk        = NULL;
+        uint64_t testid       = test_id_generate();
+        rd_kafka_resp_err_t err;
+        int msgrate, msgcnt, sent_msg;
+        test_timing_t t_produce;
+
+        delivered_msg = 0;
+        sent_msg      = 0;
+        msgrate       = 200; /* msg/sec */
+        /* Messages should be produced such that at least one reauth happens.
+         * The 1.2 is added as a buffer to avoid flakiness. */
+        msgcnt = msgrate * reauth_time / 1000 * 1.2;
+
+        SUB_TEST("test reauth in the middle of a txn, txn is %s",
+                 abort_txn ? "aborted" : "committed");
+
+        test_conf_init(&conf, NULL, 30);
+        test_conf_set(conf, "transactional.id", topic);
+        test_conf_set(conf, "transaction.timeout.ms",
+                      tsprintf("%ld", (int64_t)(reauth_time * 1.2 + 60000)));
+        rd_kafka_conf_set_dr_msg_cb(conf, dr_msg_cb);
+
+        rk  = test_create_handle(RD_KAFKA_PRODUCER, conf);
+        rkt = test_create_producer_topic(rk, topic, NULL);
+
+        err = test_auto_create_topic_rkt(rk, rkt, tmout_multip(5000));
+        TEST_ASSERT(!err, "topic creation failed: %s", rd_kafka_err2str(err));
+
+        TEST_CALL_ERROR__(rd_kafka_init_transactions(rk, -1));
+        TEST_CALL_ERROR__(rd_kafka_begin_transaction(rk));
+
+
+        TIMING_START(&t_produce, "PRODUCE");
+        /* Produce enough messages such that we have time enough for at least
+         * one reauth. */
+        test_produce_msgs_nowait(rk, rkt, testid, 0, 0, msgcnt, NULL, 0,
+                                 msgrate, &sent_msg);
+        TIMING_STOP(&t_produce);
+
+        rd_kafka_flush(rk, 10 * 1000);
+
+        TEST_ASSERT(TIMING_DURATION(&t_produce) >= reauth_time * 1000,
+                    "time enough for one reauth should pass (%ld vs %ld)",
+                    TIMING_DURATION(&t_produce), reauth_time * 1000);
+        TEST_ASSERT(delivered_msg == sent_msg,
+                    "did not deliver as many messages as sent (%d vs %d)",
+                    delivered_msg, sent_msg);
+
+        if (abort_txn) {
+                rd_kafka_t *c = NULL;
+
+                TEST_CALL_ERROR__(rd_kafka_abort_transaction(rk, 30 * 1000));
+
+                /* We can reuse conf because the old one's been moved to rk
+                 * already. */
+                test_conf_init(&conf, NULL, 30);
+                test_conf_set(conf, "isolation.level", "read_committed");
+                c = test_create_consumer("mygroup", NULL, conf, NULL);
+                test_consumer_poll_no_msgs("mygroup", c, testid, 10 * 1000);
+
+                rd_kafka_destroy(c);
+        } else {
+                TEST_CALL_ERROR__(rd_kafka_commit_transaction(rk, 30 * 1000));
+                test_consume_txn_msgs_easy("mygroup", topic, testid, -1,
+                                           sent_msg, NULL);
+        }
+
+        rd_kafka_topic_destroy(rkt);
+        rd_kafka_destroy(rk);
+
+        SUB_TEST_PASS();
+}
+
+
+/* Check reauthentication in case of OAUTHBEARER mechanism, with different
+ * reauth times and token lifetimes. */
+void do_test_oauthbearer(int64_t reauth_time,
+                         const char *topic,
+                         int64_t token_lifetime_ms,
+                         rd_bool_t use_sasl_queue) {
+        rd_kafka_topic_t *rkt = NULL;
+        rd_kafka_conf_t *conf = NULL;
+        rd_kafka_t *rk        = NULL;
+        uint64_t testid       = test_id_generate();
+        rd_kafka_resp_err_t err;
+        char *mechanism;
+        int msgrate, msgcnt, sent_msg;
+        test_timing_t t_produce;
+        int token_lifetime_s = token_lifetime_ms / 1000;
+
+        SUB_TEST(
+            "test reauthentication with oauthbearer, reauth_time = %ld, "
+            "token_lifetime = %ld",
+            reauth_time, token_lifetime_ms);
+
+        test_conf_init(&conf, NULL, 30);
+        rd_kafka_conf_set_dr_msg_cb(conf, dr_msg_cb);
+        rd_kafka_conf_enable_sasl_queue(conf, use_sasl_queue);
+
+        mechanism = test_conf_get(conf, "sasl.mechanism");
+        if (rd_strcasecmp(mechanism, "oauthbearer")) {
+                rd_kafka_conf_destroy(conf);
+                SUB_TEST_SKIP(
+                    "`sasl.mechanism=OAUTHBEARER` is required, have %s\n",
+                    mechanism);
+        }
+
+        test_conf_set(
+            conf, "sasl.oauthbearer.config",
+            tsprintf("principal=admin scope=requiredScope lifeSeconds=%d",
+                     token_lifetime_s));
+        test_conf_set(conf, "enable.sasl.oauthbearer.unsecure.jwt", "true");
+        rk = test_create_handle(RD_KAFKA_PRODUCER, conf);
+
+        /* Enable to background queue since we don't want to poll the SASL
+         * queue. */
+        if (use_sasl_queue)
+                rd_kafka_sasl_background_callbacks_enable(rk);
+
+        rkt = test_create_producer_topic(rk, topic, NULL);
+
+        /* Create the topic to make sure connections are up and ready. */
+        err = test_auto_create_topic_rkt(rk, rkt, tmout_multip(5000));
+        TEST_ASSERT(!err, "topic creation failed: %s", rd_kafka_err2str(err));
+
+        msgrate = 200; /* msg/sec */
+        /* Messages should be produced such that at least one reauth happens.
+         * The 1.2 is added as a buffer to avoid flakiness. */
+        msgcnt        = msgrate * reauth_time / 1000 * 1.2;
+        delivered_msg = 0;
+        sent_msg      = 0;
+
+        TIMING_START(&t_produce, "PRODUCE");
+        test_produce_msgs_nowait(rk, rkt, testid, 0, 0, msgcnt, NULL, 0,
+                                 msgrate, &sent_msg);
+        TIMING_STOP(&t_produce);
+
+        rd_kafka_flush(rk, 10 * 1000);
+
+        TEST_ASSERT(TIMING_DURATION(&t_produce) >= reauth_time * 1000,
+                    "time enough for one reauth should pass (%ld vs %ld)",
+                    TIMING_DURATION(&t_produce), reauth_time * 1000);
+        TEST_ASSERT(delivered_msg == sent_msg,
+                    "did not deliver as many messages as sent (%d vs %d)",
+                    delivered_msg, sent_msg);
+
+        rd_kafka_topic_destroy(rkt);
+        rd_kafka_destroy(rk);
+
+        SUB_TEST_PASS();
+}
+
+
+/* Check that credentials changed into wrong ones cause authentication errors.
+ */
+void do_test_reauth_failure(int64_t reauth_time, const char *topic) {
+        rd_kafka_topic_t *rkt = NULL;
+        rd_kafka_conf_t *conf = NULL;
+        rd_kafka_t *rk        = NULL;
+        uint64_t testid       = test_id_generate();
+        char *mechanism;
+        rd_kafka_resp_err_t err;
+        int msgrate, msgcnt, sent_msg;
+        test_timing_t t_produce;
+
+        msgrate = 200; /* msg/sec */
+        /* Messages should be produced such that at least one reauth happens.
+         * The 1.2 is added as a buffer to avoid flakiness. */
+        msgcnt     = msgrate * reauth_time / 1000 * 1.2;
+        error_seen = 0;
+        expect_err = 0;
+
+        SUB_TEST("test reauth failure with wrong credentials for reauth");
+
+        test_conf_init(&conf, NULL, 30);
+        rd_kafka_conf_set_dr_msg_cb(conf, dr_msg_cb);
+        rd_kafka_conf_set_error_cb(conf, auth_error_cb);
+
+        mechanism = test_conf_get(conf, "sasl.mechanism");
+
+        if (!rd_strcasecmp(mechanism, "oauthbearer")) {
+                rd_kafka_conf_destroy(conf);
+                SUB_TEST_SKIP(
+                    "PLAIN or SCRAM mechanism is required is required, have "
+                    "OAUTHBEARER");
+        }
+
+        rk  = test_create_handle(RD_KAFKA_PRODUCER, conf);
+        rkt = test_create_producer_topic(rk, topic, NULL);
+
+        /* Create the topic to make sure connections are up and ready. */
+        err = test_auto_create_topic_rkt(rk, rkt, tmout_multip(5000));
+        TEST_ASSERT(!err, "topic creation failed: %s", rd_kafka_err2str(err));
+
+        rd_kafka_sasl_set_credentials(rk, "somethingwhich", "isnotright");
+        expect_err = 1;
+
+        TIMING_START(&t_produce, "PRODUCE");
+        /* Produce enough messages such that we have time enough for at least
+         * one reauth. */
+        test_produce_msgs_nowait(rk, rkt, testid, 0, 0, msgcnt, NULL, 0,
+                                 msgrate, &sent_msg);
+        TIMING_STOP(&t_produce);
+
+        TEST_ASSERT(TIMING_DURATION(&t_produce) >= reauth_time * 1000,
+                    "time enough for one reauth should pass (%ld vs %ld)",
+                    TIMING_DURATION(&t_produce), reauth_time * 1000);
+        TEST_ASSERT(error_seen, "should have had an authentication error");
+
+        rd_kafka_topic_destroy(rkt);
+        rd_kafka_destroy(rk);
+
+        SUB_TEST_PASS();
+}
+
+
+int main_0142_reauthentication(int argc, char **argv) {
+        size_t broker_id_cnt;
+        int32_t *broker_ids   = NULL;
+        rd_kafka_conf_t *conf = NULL;
+        const char *security_protocol, *sasl_mechanism;
+
+        size_t i;
+        int64_t reauth_time = INT64_MAX;
+        const char *topic   = test_mk_topic_name(__FUNCTION__ + 5, 1);
+
+        test_conf_init(&conf, NULL, 30);
+        security_protocol = test_conf_get(NULL, "security.protocol");
+
+        if (strncmp(security_protocol, "sasl", 4)) {
+                rd_kafka_conf_destroy(conf);
+                TEST_SKIP("Test requires SASL_PLAINTEXT or SASL_SSL, got %s\n",
+                          security_protocol);
+                return 0;
+        }
+
+        sasl_mechanism = test_conf_get(NULL, "sasl.mechanism");
+        if (!rd_strcasecmp(sasl_mechanism, "oauthbearer"))
+                test_conf_set(conf, "enable.sasl.oauthbearer.unsecure.jwt",
+                              "true");
+
+        rd_kafka_t *rk = test_create_handle(RD_KAFKA_PRODUCER, conf);
+
+        TEST_SAY("Fetching broker IDs\n");
+        broker_ids = test_get_broker_ids(rk, &broker_id_cnt);
+
+        TEST_ASSERT(broker_id_cnt != 0);
+
+        for (i = 0; i < broker_id_cnt; i++) {
+                char *property_value = test_get_broker_config_entry(
+                    rk, broker_ids[i], "connections.max.reauth.ms");
+
+                int64_t parsed_value;
+
+                if (!property_value)
+                        continue;
+
+                parsed_value = strtoll(property_value, NULL, 0);
+                if (parsed_value < reauth_time)
+                        reauth_time = parsed_value;
+
+                free(property_value);
+        }
+
+        if (broker_ids)
+                free(broker_ids);
+        if (rk)
+                rd_kafka_destroy(rk);
+
+        if (reauth_time ==
+                INT64_MAX /* denotes property is unset on all brokers */
+            ||
+            reauth_time == 0 /* denotes at least one broker without timeout */
+        ) {
+                TEST_SKIP(
+                    "Test requires all brokers to have non-zero "
+                    "connections.max.reauth.ms\n");
+                return 0;
+        }
+
+        /* Each test (7 of them) will take slightly more than 1 reauth_time
+         * interval. Additional 30s provide a reasonable buffer. */
+        test_timeout_set(9 * reauth_time / 1000 + 30);
+
+
+        do_test_consumer(reauth_time, topic);
+        do_test_producer(reauth_time, topic);
+        do_test_txn_producer(reauth_time, topic, rd_false /* abort txn */);
+        do_test_txn_producer(reauth_time, topic, rd_true /* abort txn */);
+
+        /* Case when token_lifetime is shorter than the maximum reauth time
+         * configured on the broker.
+         * In this case, the broker returns the time to the next
+         * reauthentication based on the expiry provided in the token.
+         * We should recreate the token and reauthenticate before this
+         * reauth time. */
+        do_test_oauthbearer(reauth_time, topic, reauth_time / 2, rd_true);
+        do_test_oauthbearer(reauth_time, topic, reauth_time / 2, rd_false);
+        /* Case when the token_lifetime is greater than the maximum reauth time
+         * configured.
+         * In this case, the broker returns the maximum reauth time configured.
+         * We don't need to recreate the token, but we need to reauthenticate
+         * using the same token. */
+        do_test_oauthbearer(reauth_time, topic, reauth_time * 2, rd_true);
+        do_test_oauthbearer(reauth_time, topic, reauth_time * 2, rd_false);
+
+        do_test_reauth_failure(reauth_time, topic);
+
+        return 0;
+}
diff --git a/lib/librdkafka-2.3.0/tests/0143-exponential_backoff_mock.c b/lib/librdkafka-2.3.0/tests/0143-exponential_backoff_mock.c
new file mode 100644
index 00000000000..80ae817d5ce
--- /dev/null
+++ b/lib/librdkafka-2.3.0/tests/0143-exponential_backoff_mock.c
@@ -0,0 +1,561 @@
+/*
+ * librdkafka - Apache Kafka C library
+ *
+ * Copyright (c) 2023, Confluent Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "test.h"
+#include "../src/rdkafka_proto.h"
+#include "../src/rdkafka_mock.h"
+
+const int32_t retry_ms     = 100;
+const int32_t retry_max_ms = 1000;
+
+static void free_mock_requests(rd_kafka_mock_request_t **requests,
+                               size_t request_cnt) {
+        size_t i;
+        for (i = 0; i < request_cnt; i++)
+                rd_kafka_mock_request_destroy(requests[i]);
+        rd_free(requests);
+}
+/**
+ * @brief find_coordinator test
+ * We fail the request with RD_KAFKA_RESP_ERR_GROUP_COORDINATOR_NOT_AVAILABLE,
+ * so that the request is tried via the intervalled mechanism. The intervalling
+ * is done at 500 ms, with a 20% jitter. However, the actual code to retry the
+ * request runs inside rd_kafka_cgrp_serve that is called every one second,
+ * hence, the retry actually happens always in 1 second, no matter what the
+ * jitter is. This will be fixed once rd_kafka_cgrp_serve is timer triggered.
+ * The exponential backoff does not apply in this case we just apply the jitter
+ * to the backoff of intervalled query The retry count is non - deterministic as
+ * fresh request spawned on its own.
+ */
+static void test_find_coordinator(rd_kafka_mock_cluster_t *mcluster,
+                                  const char *topic,
+                                  rd_kafka_conf_t *conf) {
+        rd_kafka_mock_request_t **requests = NULL;
+        size_t request_cnt                 = 0;
+        int64_t previous_request_ts        = -1;
+        int32_t retry_count                = 0;
+        int32_t num_retries                = 4;
+        const int32_t low                  = 1000;
+        int32_t buffer                     = 200;  // 200 ms buffer added
+        rd_kafka_t *consumer;
+        rd_kafka_message_t *rkm;
+        size_t i;
+
+        SUB_TEST();
+        test_conf_set(conf, "auto.offset.reset", "earliest");
+        test_conf_set(conf, "enable.auto.commit", "false");
+
+        consumer = test_create_consumer(topic, NULL, conf, NULL);
+
+        rd_kafka_mock_push_request_errors(
+            mcluster, RD_KAFKAP_FindCoordinator, num_retries,
+            RD_KAFKA_RESP_ERR_GROUP_COORDINATOR_NOT_AVAILABLE,
+            RD_KAFKA_RESP_ERR_GROUP_COORDINATOR_NOT_AVAILABLE,
+            RD_KAFKA_RESP_ERR_GROUP_COORDINATOR_NOT_AVAILABLE,
+            RD_KAFKA_RESP_ERR_GROUP_COORDINATOR_NOT_AVAILABLE);
+        /* This will trigger a find_coordinator request */
+        rkm = rd_kafka_consumer_poll(consumer, 10 * 1000);
+        if (rkm)
+                rd_kafka_message_destroy(rkm);
+        rd_sleep(4);
+        requests = rd_kafka_mock_get_requests(mcluster, &request_cnt);
+        for (i = 0; (i < request_cnt) && (retry_count < num_retries); i++) {
+                TEST_SAY("Broker Id : %d API Key : %d Timestamp : %" PRId64
+                         "\n",
+                         rd_kafka_mock_request_id(requests[i]),
+                         rd_kafka_mock_request_api_key(requests[i]),
+                         rd_kafka_mock_request_timestamp(requests[i]));
+
+                if (rd_kafka_mock_request_api_key(requests[i]) !=
+                    RD_KAFKAP_FindCoordinator)
+                        continue;
+
+                if (previous_request_ts != -1) {
+                        int64_t time_difference =
+                            (rd_kafka_mock_request_timestamp(requests[i]) -
+                             previous_request_ts) /
+                            1000;
+                        TEST_ASSERT(((time_difference > low - buffer) &&
+                                     (time_difference < low + buffer)),
+                                    "Time difference should be close "
+                                    "to 1 second, it is %" PRId64
+                                    " ms instead.\n",
+                                    time_difference);
+                        retry_count++;
+                }
+                previous_request_ts =
+                    rd_kafka_mock_request_timestamp(requests[i]);
+        }
+        rd_kafka_destroy(consumer);
+        free_mock_requests(requests, request_cnt);
+        rd_kafka_mock_clear_requests(mcluster);
+        SUB_TEST_PASS();
+}
+
+/**
+ * Exponential Backoff needs to be checked for the request_type. Also the
+ * request_type should only be retried if one previous has failed for correct
+ * execution.
+ */
+static void helper_exponential_backoff(rd_kafka_mock_cluster_t *mcluster,
+                                       int32_t request_type) {
+        rd_kafka_mock_request_t **requests = NULL;
+        size_t request_cnt                 = 0;
+        int64_t previous_request_ts        = -1;
+        int32_t retry_count                = 0;
+        size_t i;
+        requests = rd_kafka_mock_get_requests(mcluster, &request_cnt);
+        for (i = 0; i < request_cnt; i++) {
+                TEST_SAY("Broker Id : %d API Key : %d Timestamp : %" PRId64
+                         "\n",
+                         rd_kafka_mock_request_id(requests[i]),
+                         rd_kafka_mock_request_api_key(requests[i]),
+                         rd_kafka_mock_request_timestamp(requests[i]));
+
+                if (rd_kafka_mock_request_api_key(requests[i]) != request_type)
+                        continue;
+
+                if (previous_request_ts != -1) {
+                        int64_t time_difference =
+                            (rd_kafka_mock_request_timestamp(requests[i]) -
+                             previous_request_ts) /
+                            1000;
+                        /* Max Jitter is 20 percent each side so buffer chosen
+                         * is 25 percent to account for latency delays */
+                        int64_t low =
+                            ((1 << retry_count) * (retry_ms)*75) / 100;
+                        int64_t high =
+                            ((1 << retry_count) * (retry_ms)*125) / 100;
+                        if (high > ((retry_max_ms * 125) / 100))
+                                high = (retry_max_ms * 125) / 100;
+                        if (low > ((retry_max_ms * 75) / 100))
+                                low = (retry_max_ms * 75) / 100;
+                        TEST_ASSERT((time_difference < high) &&
+                                        (time_difference > low),
+                                    "Time difference is not respected, should "
+                                    "be between %" PRId64 " and %" PRId64
+                                    " where time difference is %" PRId64 "\n",
+                                    low, high, time_difference);
+                        retry_count++;
+                }
+                previous_request_ts =
+                    rd_kafka_mock_request_timestamp(requests[i]);
+        }
+        free_mock_requests(requests, request_cnt);
+}
+/**
+ * @brief offset_commit test
+ * We fail the request with RD_KAFKA_RESP_ERR_COORDINATOR_LOAD_IN_PROGRESS so
+ * that the request is retried with the exponential backoff. The max retries
+ * allowed is 2 for offset_commit. The RPC calls rd_kafka_buf_retry for its
+ * retry attempt so this tests all such RPCs which depend on it for retrying.
+ * The retry number of request is deterministic i.e no fresh requests are
+ * spawned on its own. Also the max retries is 2 for Offset Commit.
+ */
+static void test_offset_commit(rd_kafka_mock_cluster_t *mcluster,
+                               const char *topic,
+                               rd_kafka_conf_t *conf) {
+        rd_kafka_t *consumer;
+        rd_kafka_message_t *rkm;
+        rd_kafka_topic_partition_list_t *offsets;
+        rd_kafka_topic_partition_t *rktpar;
+        SUB_TEST();
+        test_conf_set(conf, "auto.offset.reset", "earliest");
+        test_conf_set(conf, "enable.auto.commit", "false");
+
+        consumer = test_create_consumer(topic, NULL, conf, NULL);
+        test_consumer_subscribe(consumer, topic);
+        rkm = rd_kafka_consumer_poll(consumer, 10 * 1000);
+        if (rkm)
+                rd_kafka_message_destroy(rkm);
+        rd_sleep(4);
+        rd_kafka_mock_push_request_errors(
+            mcluster, RD_KAFKAP_OffsetCommit, 2,
+            RD_KAFKA_RESP_ERR_COORDINATOR_LOAD_IN_PROGRESS,
+            RD_KAFKA_RESP_ERR_COORDINATOR_LOAD_IN_PROGRESS);
+
+        offsets = rd_kafka_topic_partition_list_new(1);
+        rktpar  = rd_kafka_topic_partition_list_add(offsets, topic, 0);
+        /* Setting Offset to an arbitrary number */
+        rktpar->offset = 4;
+        /* rd_kafka_commit will trigger OffsetCommit RPC call */
+        rd_kafka_commit(consumer, offsets, 0);
+        rd_kafka_topic_partition_list_destroy(offsets);
+        rd_sleep(3);
+
+        helper_exponential_backoff(mcluster, RD_KAFKAP_OffsetCommit);
+
+
+        rd_kafka_destroy(consumer);
+        rd_kafka_mock_clear_requests(mcluster);
+        SUB_TEST_PASS();
+}
+
+/**
+ * @brief produce test
+ * We fail the request with RD_KAFKA_RESP_ERR_NOT_ENOUGH_REPLICAS so
+ * that the request is retried with the exponential backoff. The exponential
+ * backoff is capped at retry_max_ms with jitter. The retry number of request is
+ * deterministic i.e no fresh requests are spawned on its own.
+ */
+static void test_produce(rd_kafka_mock_cluster_t *mcluster,
+                         const char *topic,
+                         rd_kafka_conf_t *conf) {
+        rd_kafka_t *producer;
+        rd_kafka_topic_t *rkt;
+        SUB_TEST();
+        rd_kafka_conf_set_dr_msg_cb(conf, test_dr_msg_cb);
+
+        producer = test_create_handle(RD_KAFKA_PRODUCER, conf);
+        rkt      = test_create_producer_topic(producer, topic, NULL);
+
+        rd_kafka_mock_push_request_errors(
+            mcluster, RD_KAFKAP_Produce, 7,
+            RD_KAFKA_RESP_ERR_NOT_ENOUGH_REPLICAS,
+            RD_KAFKA_RESP_ERR_NOT_ENOUGH_REPLICAS,
+            RD_KAFKA_RESP_ERR_NOT_ENOUGH_REPLICAS,
+            RD_KAFKA_RESP_ERR_NOT_ENOUGH_REPLICAS,
+            RD_KAFKA_RESP_ERR_NOT_ENOUGH_REPLICAS,
+            RD_KAFKA_RESP_ERR_NOT_ENOUGH_REPLICAS,
+            RD_KAFKA_RESP_ERR_NOT_ENOUGH_REPLICAS);
+
+        test_produce_msgs(producer, rkt, 0, RD_KAFKA_PARTITION_UA, 0, 1,
+                          "hello", 5);
+        rd_sleep(3);
+
+        helper_exponential_backoff(mcluster, RD_KAFKAP_Produce);
+
+
+        rd_kafka_topic_destroy(rkt);
+        rd_kafka_destroy(producer);
+        rd_kafka_mock_clear_requests(mcluster);
+        SUB_TEST_PASS();
+}
+
+/**
+ * Helper function for find coordinator trigger with the given request_type, the
+ * find coordinator request should be triggered after a failing request of
+ * request_type.
+ */
+static void helper_find_coordinator_trigger(rd_kafka_mock_cluster_t *mcluster,
+                                            int32_t request_type) {
+        rd_kafka_mock_request_t **requests = NULL;
+        size_t request_cnt                 = 0;
+        int32_t num_request                = 0;
+        size_t i;
+        requests = rd_kafka_mock_get_requests(mcluster, &request_cnt);
+        for (i = 0; i < request_cnt; i++) {
+                TEST_SAY("Broker Id : %d API Key : %d Timestamp : %" PRId64
+                         "\n",
+                         rd_kafka_mock_request_id(requests[i]),
+                         rd_kafka_mock_request_api_key(requests[i]),
+                         rd_kafka_mock_request_timestamp(requests[i]));
+                if (num_request == 0) {
+                        if (rd_kafka_mock_request_api_key(requests[i]) ==
+                            request_type) {
+                                num_request++;
+                        }
+                } else if (num_request == 1) {
+                        if (rd_kafka_mock_request_api_key(requests[i]) ==
+                            RD_KAFKAP_FindCoordinator) {
+                                TEST_SAY(
+                                    "FindCoordinator request made after "
+                                    "failing request with NOT_COORDINATOR "
+                                    "error.\n");
+                                break;
+                        } else if (rd_kafka_mock_request_api_key(requests[i]) ==
+                                   request_type) {
+                                num_request++;
+                                TEST_FAIL(
+                                    "Second request made without any "
+                                    "FindCoordinator request.");
+                        }
+                }
+        }
+        free_mock_requests(requests, request_cnt);
+        if (num_request != 1)
+                TEST_FAIL("No request was made.");
+}
+/**
+ * @brief heartbeat-find_coordinator test
+ * We fail the request with RD_KAFKA_RESP_ERR_NOT_COORDINATOR_FOR_GROUP so that
+ * the FindCoordinator request is triggered.
+ */
+static void test_heartbeat_find_coordinator(rd_kafka_mock_cluster_t *mcluster,
+                                            const char *topic,
+                                            rd_kafka_conf_t *conf) {
+        rd_kafka_t *consumer;
+        rd_kafka_message_t *rkm;
+        SUB_TEST();
+        test_conf_set(conf, "auto.offset.reset", "earliest");
+        test_conf_set(conf, "enable.auto.commit", "false");
+
+        consumer = test_create_consumer(topic, NULL, conf, NULL);
+
+        rd_kafka_mock_push_request_errors(
+            mcluster, RD_KAFKAP_Heartbeat, 1,
+            RD_KAFKA_RESP_ERR_NOT_COORDINATOR_FOR_GROUP);
+
+        rd_kafka_mock_clear_requests(mcluster);
+        test_consumer_subscribe(consumer, topic);
+        /* This will trigger a find_coordinator request */
+        rkm = rd_kafka_consumer_poll(consumer, 10 * 1000);
+        if (rkm)
+                rd_kafka_message_destroy(rkm);
+        rd_sleep(6);
+
+
+        helper_find_coordinator_trigger(mcluster, RD_KAFKAP_Heartbeat);
+
+
+        rd_kafka_destroy(consumer);
+        rd_kafka_mock_clear_requests(mcluster);
+        SUB_TEST_PASS();
+}
+
+/**
+ * @brief joingroup-find_coordinator test
+ * We fail the request with RD_KAFKA_RESP_ERR_NOT_COORDINATOR_FOR_GROUP so that
+ * the FindCoordinator request is triggered.
+ */
+static void test_joingroup_find_coordinator(rd_kafka_mock_cluster_t *mcluster,
+                                            const char *topic,
+                                            rd_kafka_conf_t *conf) {
+        rd_kafka_t *consumer;
+        rd_kafka_message_t *rkm;
+        SUB_TEST();
+        test_conf_set(conf, "auto.offset.reset", "earliest");
+        test_conf_set(conf, "enable.auto.commit", "false");
+
+        consumer = test_create_consumer(topic, NULL, conf, NULL);
+        rd_kafka_mock_push_request_errors(
+            mcluster, RD_KAFKAP_JoinGroup, 1,
+            RD_KAFKA_RESP_ERR_NOT_COORDINATOR_FOR_GROUP);
+        rd_kafka_mock_clear_requests(mcluster);
+        test_consumer_subscribe(consumer, topic);
+        /* This will trigger a find_coordinator request */
+        rkm = rd_kafka_consumer_poll(consumer, 10 * 1000);
+        if (rkm)
+                rd_kafka_message_destroy(rkm);
+        rd_sleep(4);
+
+        helper_find_coordinator_trigger(mcluster, RD_KAFKAP_JoinGroup);
+
+        rd_kafka_destroy(consumer);
+        rd_kafka_mock_clear_requests(mcluster);
+        SUB_TEST_PASS();
+}
+
+/**
+ * @brief produce-fast_leader_query test
+ * We fail a Produce request with RD_KAFKA_RESP_ERR_NOT_LEADER_OR_FOLLOWER, so
+ * that it triggers a fast leader query (a Metadata request). We don't update
+ * the leader in this test, so the Metadata is always stale from the client's
+ * perspective, and the fast leader query carries on, being backed off
+ * exponentially until the max retry time is reached. The retry number of
+ * request is non deterministic as it will keep retrying till the leader change.
+ */
+static void test_produce_fast_leader_query(rd_kafka_mock_cluster_t *mcluster,
+                                           const char *topic,
+                                           rd_kafka_conf_t *conf) {
+        rd_kafka_mock_request_t **requests = NULL;
+        size_t request_cnt                 = 0;
+        int64_t previous_request_ts        = -1;
+        int32_t retry_count                = 0;
+        rd_bool_t produced                 = rd_false;
+        rd_kafka_t *producer;
+        rd_kafka_topic_t *rkt;
+        size_t i;
+        SUB_TEST();
+        rd_kafka_conf_set_dr_msg_cb(conf, test_dr_msg_cb);
+
+        producer = test_create_handle(RD_KAFKA_PRODUCER, conf);
+        rkt      = test_create_producer_topic(producer, topic, NULL);
+
+        rd_kafka_mock_push_request_errors(
+            mcluster, RD_KAFKAP_Produce, 1,
+            RD_KAFKA_RESP_ERR_NOT_LEADER_OR_FOLLOWER);
+        rd_kafka_mock_clear_requests(mcluster);
+        test_produce_msgs(producer, rkt, 0, RD_KAFKA_PARTITION_UA, 0, 1,
+                          "hello", 1);
+        rd_sleep(10);
+        requests = rd_kafka_mock_get_requests(mcluster, &request_cnt);
+
+        for (i = 0; i < request_cnt; i++) {
+                TEST_SAY("Broker Id : %d API Key : %d Timestamp : %" PRId64
+                         "\n",
+                         rd_kafka_mock_request_id(requests[i]),
+                         rd_kafka_mock_request_api_key(requests[i]),
+                         rd_kafka_mock_request_timestamp(requests[i]));
+
+                if (!produced && rd_kafka_mock_request_api_key(requests[i]) ==
+                                     RD_KAFKAP_Produce)
+                        produced = rd_true;
+                else if (rd_kafka_mock_request_api_key(requests[i]) ==
+                             RD_KAFKAP_Metadata &&
+                         produced) {
+                        if (previous_request_ts != -1) {
+                                int64_t time_difference =
+                                    (rd_kafka_mock_request_timestamp(
+                                         requests[i]) -
+                                     previous_request_ts) /
+                                    1000;
+                                /* Max Jitter is 20 percent each side so buffer
+                                 * chosen is 25 percent to account for latency
+                                 * delays */
+                                int64_t low =
+                                    ((1 << retry_count) * (retry_ms)*75) / 100;
+                                int64_t high =
+                                    ((1 << retry_count) * (retry_ms)*125) / 100;
+                                if (high > ((retry_max_ms * 125) / 100))
+                                        high = (retry_max_ms * 125) / 100;
+                                if (low > ((retry_max_ms * 75) / 100))
+                                        low = (retry_max_ms * 75) / 100;
+                                TEST_ASSERT(
+                                    (time_difference < high) &&
+                                        (time_difference > low),
+                                    "Time difference is not respected, should "
+                                    "be between %" PRId64 " and %" PRId64
+                                    " where time difference is %" PRId64 "\n",
+                                    low, high, time_difference);
+                                retry_count++;
+                        }
+                        previous_request_ts =
+                            rd_kafka_mock_request_timestamp(requests[i]);
+                }
+        }
+        rd_kafka_topic_destroy(rkt);
+        rd_kafka_destroy(producer);
+        free_mock_requests(requests, request_cnt);
+        rd_kafka_mock_clear_requests(mcluster);
+        SUB_TEST_PASS();
+}
+
+/**
+ * @brief fetch-fast_leader_query test
+ * We fail a Fetch request by causing a leader change (the leader is the same,
+ * but with a different leader epoch). It triggers fast leader query (Metadata
+ * request). The request is able to obtain an updated leader, and hence, the
+ * fast leader query terminates after one Metadata request.
+ */
+static void test_fetch_fast_leader_query(rd_kafka_mock_cluster_t *mcluster,
+                                         const char *topic,
+                                         rd_kafka_conf_t *conf) {
+        rd_kafka_mock_request_t **requests   = NULL;
+        size_t request_cnt                   = 0;
+        rd_bool_t previous_request_was_Fetch = rd_false;
+        rd_bool_t Metadata_after_Fetch       = rd_false;
+        rd_kafka_t *consumer;
+        rd_kafka_message_t *rkm;
+        size_t i;
+        SUB_TEST();
+        test_conf_set(conf, "auto.offset.reset", "earliest");
+        test_conf_set(conf, "enable.auto.commit", "false");
+
+        consumer = test_create_consumer(topic, NULL, conf, NULL);
+
+        test_consumer_subscribe(consumer, topic);
+        rkm = rd_kafka_consumer_poll(consumer, 10 * 1000);
+
+        if (rkm)
+                rd_kafka_message_destroy(rkm);
+        rd_kafka_mock_clear_requests(mcluster);
+
+        rd_kafka_mock_partition_set_leader(mcluster, topic, 0, 1);
+        rkm = rd_kafka_consumer_poll(consumer, 10 * 1000);
+        if (rkm)
+                rd_kafka_message_destroy(rkm);
+        rd_sleep(3);
+        requests = rd_kafka_mock_get_requests(mcluster, &request_cnt);
+        for (i = 0; i < request_cnt; i++) {
+                TEST_SAY("Broker Id : %d API Key : %d Timestamp : %" PRId64
+                         "\n",
+                         rd_kafka_mock_request_id(requests[i]),
+                         rd_kafka_mock_request_api_key(requests[i]),
+                         rd_kafka_mock_request_timestamp(requests[i]));
+
+                if (rd_kafka_mock_request_api_key(requests[i]) ==
+                    RD_KAFKAP_Fetch)
+                        previous_request_was_Fetch = rd_true;
+                else if (rd_kafka_mock_request_api_key(requests[i]) ==
+                             RD_KAFKAP_Metadata &&
+                         previous_request_was_Fetch) {
+                        Metadata_after_Fetch = rd_true;
+                        break;
+                } else
+                        previous_request_was_Fetch = rd_false;
+        }
+        rd_kafka_destroy(consumer);
+        free_mock_requests(requests, request_cnt);
+        rd_kafka_mock_clear_requests(mcluster);
+        TEST_ASSERT(
+            Metadata_after_Fetch,
+            "Metadata Request should have been made after fetch atleast once.");
+        SUB_TEST_PASS();
+}
+
+/**
+ * @brief Exponential Backoff (KIP 580)
+ * We test all the pipelines which affect the retry mechanism for both
+ * intervalled queries where jitter is added and backed off queries where both
+ * jitter and exponential backoff is applied with the max being retry_max_ms.
+ */
+int main_0143_exponential_backoff_mock(int argc, char **argv) {
+        const char *topic = test_mk_topic_name("topic", 1);
+        rd_kafka_mock_cluster_t *mcluster;
+        rd_kafka_conf_t *conf;
+        const char *bootstraps;
+        if (test_needs_auth()) {
+                TEST_SKIP("Mock cluster does not support SSL/SASL.\n");
+                return 0;
+        }
+        mcluster = test_mock_cluster_new(1, &bootstraps);
+        rd_kafka_mock_start_request_tracking(mcluster);
+        rd_kafka_mock_topic_create(mcluster, topic, 1, 1);
+
+        test_conf_init(&conf, NULL, 30);
+        /* This test may be slower when running with CI or Helgrind,
+         * restart the timeout. */
+        test_timeout_set(100);
+        test_conf_set(conf, "bootstrap.servers", bootstraps);
+        test_conf_set(conf, "topic.metadata.refresh.interval.ms", "-1");
+
+        test_produce(mcluster, topic, rd_kafka_conf_dup(conf));
+        test_find_coordinator(mcluster, topic, rd_kafka_conf_dup(conf));
+        test_offset_commit(mcluster, topic, rd_kafka_conf_dup(conf));
+        test_heartbeat_find_coordinator(mcluster, topic,
+                                        rd_kafka_conf_dup(conf));
+        test_joingroup_find_coordinator(mcluster, topic,
+                                        rd_kafka_conf_dup(conf));
+        test_fetch_fast_leader_query(mcluster, topic, rd_kafka_conf_dup(conf));
+        test_produce_fast_leader_query(mcluster, topic,
+                                       rd_kafka_conf_dup(conf));
+        test_mock_cluster_destroy(mcluster);
+        rd_kafka_conf_destroy(conf);
+        return 0;
+}
diff --git a/lib/librdkafka-2.3.0/tests/0144-idempotence_mock.c b/lib/librdkafka-2.3.0/tests/0144-idempotence_mock.c
new file mode 100644
index 00000000000..62b392cde2e
--- /dev/null
+++ b/lib/librdkafka-2.3.0/tests/0144-idempotence_mock.c
@@ -0,0 +1,376 @@
+/*
+ * librdkafka - Apache Kafka C library
+ *
+ * Copyright (c) 2023, Confluent Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "test.h"
+
+#include "../src/rdkafka_proto.h"
+
+#include <stdarg.h>
+
+
+/**
+ * @name Idempotent producer tests using the mock cluster
+ *
+ */
+
+
+static int allowed_error;
+
+/**
+ * @brief Decide what error_cb's will cause the test to fail.
+ */
+static int
+error_is_fatal_cb(rd_kafka_t *rk, rd_kafka_resp_err_t err, const char *reason) {
+        if (err == allowed_error ||
+            /* If transport errors are allowed then it is likely
+             * that we'll also see ALL_BROKERS_DOWN. */
+            (allowed_error == RD_KAFKA_RESP_ERR__TRANSPORT &&
+             err == RD_KAFKA_RESP_ERR__ALL_BROKERS_DOWN)) {
+                TEST_SAY("Ignoring allowed error: %s: %s\n",
+                         rd_kafka_err2name(err), reason);
+                return 0;
+        }
+        return 1;
+}
+
+
+static rd_kafka_resp_err_t (*on_response_received_cb)(rd_kafka_t *rk,
+                                                      int sockfd,
+                                                      const char *brokername,
+                                                      int32_t brokerid,
+                                                      int16_t ApiKey,
+                                                      int16_t ApiVersion,
+                                                      int32_t CorrId,
+                                                      size_t size,
+                                                      int64_t rtt,
+                                                      rd_kafka_resp_err_t err,
+                                                      void *ic_opaque);
+
+/**
+ * @brief Simple on_response_received interceptor that simply calls the
+ *        sub-test's on_response_received_cb function, if set.
+ */
+static rd_kafka_resp_err_t
+on_response_received_trampoline(rd_kafka_t *rk,
+                                int sockfd,
+                                const char *brokername,
+                                int32_t brokerid,
+                                int16_t ApiKey,
+                                int16_t ApiVersion,
+                                int32_t CorrId,
+                                size_t size,
+                                int64_t rtt,
+                                rd_kafka_resp_err_t err,
+                                void *ic_opaque) {
+        TEST_ASSERT(on_response_received_cb != NULL, "");
+        return on_response_received_cb(rk, sockfd, brokername, brokerid, ApiKey,
+                                       ApiVersion, CorrId, size, rtt, err,
+                                       ic_opaque);
+}
+
+
+/**
+ * @brief on_new interceptor to add an on_response_received interceptor.
+ */
+static rd_kafka_resp_err_t on_new_producer(rd_kafka_t *rk,
+                                           const rd_kafka_conf_t *conf,
+                                           void *ic_opaque,
+                                           char *errstr,
+                                           size_t errstr_size) {
+        rd_kafka_resp_err_t err = RD_KAFKA_RESP_ERR_NO_ERROR;
+
+        if (on_response_received_cb)
+                err = rd_kafka_interceptor_add_on_response_received(
+                    rk, "on_response_received", on_response_received_trampoline,
+                    ic_opaque);
+
+        return err;
+}
+
+
+/**
+ * @brief Create an idempotent producer and a mock cluster.
+ *
+ * The var-arg list is a NULL-terminated list of
+ * (const char *key, const char *value) config properties.
+ *
+ * Special keys:
+ *   "on_response_received", "" - enable the on_response_received_cb
+ *                                interceptor,
+ *                                which must be assigned prior to
+ *                                calling create_tnx_producer().
+ */
+static RD_SENTINEL rd_kafka_t *
+create_idempo_producer(rd_kafka_mock_cluster_t **mclusterp,
+                       int broker_cnt,
+                       ...) {
+        rd_kafka_conf_t *conf;
+        rd_kafka_t *rk;
+        char numstr[8];
+        va_list ap;
+        const char *key;
+        rd_bool_t add_interceptors = rd_false;
+
+        rd_snprintf(numstr, sizeof(numstr), "%d", broker_cnt);
+
+        test_conf_init(&conf, NULL, 60);
+
+        test_conf_set(conf, "enable.idempotence", "true");
+        /* When mock brokers are set to down state they're still binding
+         * the port, just not listening to it, which makes connection attempts
+         * stall until socket.connection.setup.timeout.ms expires.
+         * To speed up detection of brokers being down we reduce this timeout
+         * to just a couple of seconds. */
+        test_conf_set(conf, "socket.connection.setup.timeout.ms", "5000");
+        /* Speed up reconnects */
+        test_conf_set(conf, "reconnect.backoff.max.ms", "2000");
+        test_conf_set(conf, "test.mock.num.brokers", numstr);
+        rd_kafka_conf_set_dr_msg_cb(conf, test_dr_msg_cb);
+
+        test_curr->ignore_dr_err = rd_false;
+
+        va_start(ap, broker_cnt);
+        while ((key = va_arg(ap, const char *))) {
+                if (!strcmp(key, "on_response_received")) {
+                        add_interceptors = rd_true;
+                        (void)va_arg(ap, const char *);
+                } else {
+                        test_conf_set(conf, key, va_arg(ap, const char *));
+                }
+        }
+        va_end(ap);
+
+        /* Add an on_.. interceptors */
+        if (add_interceptors)
+                rd_kafka_conf_interceptor_add_on_new(conf, "on_new_producer",
+                                                     on_new_producer, NULL);
+
+        rk = test_create_handle(RD_KAFKA_PRODUCER, conf);
+
+        if (mclusterp) {
+                *mclusterp = rd_kafka_handle_mock_cluster(rk);
+                TEST_ASSERT(*mclusterp, "failed to create mock cluster");
+
+                /* Create some of the common consumer "input" topics
+                 * that we must be able to commit to with
+                 * send_offsets_to_transaction().
+                 * The number depicts the number of partitions in the topic. */
+                TEST_CALL_ERR__(
+                    rd_kafka_mock_topic_create(*mclusterp, "srctopic4", 4, 1));
+                TEST_CALL_ERR__(rd_kafka_mock_topic_create(
+                    *mclusterp, "srctopic64", 64, 1));
+        }
+
+        return rk;
+}
+
+/**
+ * @brief A possibly persisted error should treat the message as not persisted,
+ *        avoid increasing next expected sequence an causing a possible fatal
+ *        error.
+ *        n = 1 triggered the "sequence desynchronization" fatal
+ *        error, n > 1 triggered the "rewound sequence number" fatal error.
+ *        See #3584.
+ *
+ * @param n Number of messages (1 to 5) to send before disconnection. These
+ *        will fail with a possibly persisted error,
+ *        rest will be sent before reconnecting.
+ *
+ */
+static void
+do_test_idempo_possibly_persisted_not_causing_fatal_error(size_t n) {
+        rd_kafka_t *rk;
+        rd_kafka_mock_cluster_t *mcluster;
+        size_t i;
+        int remains = 0;
+
+        SUB_TEST_QUICK();
+
+        rk = create_idempo_producer(&mcluster, 1, "batch.num.messages", "1",
+                                    "linger.ms", "0", NULL);
+        test_curr->ignore_dr_err = rd_true;
+        test_curr->is_fatal_cb   = error_is_fatal_cb;
+        /* Only allow an error from the disconnection below. */
+        allowed_error = RD_KAFKA_RESP_ERR__TRANSPORT;
+
+        /* Produce 5 messages without error first, msgids 1->5. */
+        test_produce_msgs2(rk, "mytopic", 0, 0, 0, 5, NULL, 64);
+        rd_kafka_flush(rk, -1);
+
+        /* First sequence is for the immediately produced reply,
+         * response is never delivered because of the disconnection. */
+        for (i = 0; i < n; i++) {
+                rd_kafka_mock_broker_push_request_error_rtts(
+                    mcluster, 1, RD_KAFKAP_Produce, 1,
+                    RD_KAFKA_RESP_ERR_NO_ERROR, 750);
+        }
+
+        /* After disconnection: first message fails with NOT_ENOUGH_REPLICAS,
+         * rest with OUT_OF_ORDER_SEQUENCE_NUMBER. */
+        for (i = 0; i < 5; i++) {
+                if (i == 0) {
+                        rd_kafka_mock_broker_push_request_error_rtts(
+                            mcluster, 1, RD_KAFKAP_Produce, 1,
+                            RD_KAFKA_RESP_ERR_NOT_ENOUGH_REPLICAS, 750);
+                } else {
+                        rd_kafka_mock_broker_push_request_error_rtts(
+                            mcluster, 1, RD_KAFKAP_Produce, 1,
+                            RD_KAFKA_RESP_ERR_OUT_OF_ORDER_SEQUENCE_NUMBER, 1);
+                }
+        }
+
+        /* Produce n messages that will be retried, msgids 6->(6+n-1). */
+        test_produce_msgs2_nowait(rk, "mytopic", 0, 0, 0, n, NULL, 64,
+                                  &remains);
+
+        /* Wait that messages are sent, then set it down and up again.
+         * "possibly persisted" errors won't increase next_ack,
+         * but it will be increased when receiving a NO_ERROR
+         * during the second retry after broker is set up again. */
+        rd_usleep(250000, 0);
+        rd_kafka_mock_broker_set_down(mcluster, 1);
+        rd_usleep(250000, 0);
+
+        /* Produce rest of (5 - n) messages that will enqueued
+         * after retried ones, msgids (6+n)->10. */
+        if (n < 5)
+                test_produce_msgs2_nowait(rk, "mytopic", 0, 0, 0, 5 - n, NULL,
+                                          64, &remains);
+
+        rd_kafka_mock_broker_set_up(mcluster, 1);
+
+        /* All done, producer recovers without fatal errors. */
+        rd_kafka_flush(rk, -1);
+        rd_kafka_destroy(rk);
+
+        allowed_error = RD_KAFKA_RESP_ERR_NO_ERROR;
+
+        SUB_TEST_PASS();
+}
+
+/**
+ * @brief After a possibly persisted error that caused a retry, messages
+ *        can fail with DUPLICATE_SEQUENCE_NUMBER or succeed and in both
+ *        cases they'll be considered as persisted.
+ */
+static void
+do_test_idempo_duplicate_sequence_number_after_possibly_persisted(void) {
+        rd_kafka_t *rk;
+        rd_kafka_mock_cluster_t *mcluster;
+        int remains = 0;
+
+        SUB_TEST_QUICK();
+
+        rk = create_idempo_producer(&mcluster, 1, "batch.num.messages", "1",
+                                    "linger.ms", "0", NULL);
+        test_curr->ignore_dr_err = rd_true;
+        test_curr->is_fatal_cb   = error_is_fatal_cb;
+        /* Only allow an error from the disconnection below. */
+        allowed_error = RD_KAFKA_RESP_ERR__TRANSPORT;
+
+        /* Produce 5 messages without error first, msgids 1-5. */
+        test_produce_msgs2(rk, "mytopic", 0, 0, 0, 5, NULL, 64);
+
+
+        /* Make sure first response comes after disconnection. */
+        rd_kafka_mock_broker_push_request_error_rtts(
+            mcluster, 1, RD_KAFKAP_Produce, 5,
+            RD_KAFKA_RESP_ERR_DUPLICATE_SEQUENCE_NUMBER, 500,
+            RD_KAFKA_RESP_ERR_NO_ERROR, 0, RD_KAFKA_RESP_ERR_NO_ERROR, 0,
+            RD_KAFKA_RESP_ERR_NO_ERROR, 0, RD_KAFKA_RESP_ERR_NO_ERROR, 0);
+
+        test_produce_msgs2_nowait(rk, "mytopic", 0, 0, 0, 5, NULL, 64,
+                                  &remains);
+
+        /* Let the message fail because of _TRANSPORT (possibly persisted). */
+        rd_kafka_mock_broker_set_down(mcluster, 1);
+
+        rd_usleep(250000, 0);
+
+        /* When retrying the first DUPLICATE_SEQUENCE_NUMBER is treated
+         * as NO_ERROR. */
+        rd_kafka_mock_broker_set_up(mcluster, 1);
+
+        /* All done. */
+        rd_kafka_flush(rk, -1);
+        rd_kafka_destroy(rk);
+
+        allowed_error = RD_KAFKA_RESP_ERR_NO_ERROR;
+
+        SUB_TEST_PASS();
+}
+
+/**
+ * @brief When a message fails on the broker with a possibly persisted error
+ *        NOT_ENOUGH_REPLICAS_AFTER_APPEND, in case next messages
+ *        succeed, it should be implicitly acked.
+ */
+static void do_test_idempo_success_after_possibly_persisted(void) {
+        rd_kafka_t *rk;
+        rd_kafka_mock_cluster_t *mcluster;
+
+        SUB_TEST_QUICK();
+
+        rk = create_idempo_producer(&mcluster, 1, "batch.num.messages", "1",
+                                    "linger.ms", "0", NULL);
+        test_curr->ignore_dr_err = rd_true;
+        test_curr->is_fatal_cb   = error_is_fatal_cb;
+
+        /* Make sure first response fails with possibly persisted
+         * error NOT_ENOUGH_REPLICAS_AFTER_APPEND next messages
+         * will succeed. */
+        rd_kafka_mock_broker_push_request_error_rtts(
+            mcluster, 1, RD_KAFKAP_Produce, 1,
+            RD_KAFKA_RESP_ERR_NOT_ENOUGH_REPLICAS_AFTER_APPEND, 0);
+
+        /* Produce 5 messages, msgids 1-5. */
+        test_produce_msgs2(rk, "mytopic", 0, 0, 0, 5, NULL, 64);
+
+        /* All done. */
+        rd_kafka_flush(rk, -1);
+        rd_kafka_destroy(rk);
+
+        SUB_TEST_PASS();
+}
+
+int main_0144_idempotence_mock(int argc, char **argv) {
+        if (test_needs_auth()) {
+                TEST_SKIP("Mock cluster does not support SSL/SASL\n");
+                return 0;
+        }
+
+        int i;
+        for (i = 1; i <= 5; i++)
+                do_test_idempo_possibly_persisted_not_causing_fatal_error(i);
+
+        do_test_idempo_duplicate_sequence_number_after_possibly_persisted();
+
+        do_test_idempo_success_after_possibly_persisted();
+
+        return 0;
+}
diff --git a/lib/librdkafka-2.1.0/tests/1000-unktopic.c b/lib/librdkafka-2.3.0/tests/1000-unktopic.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/1000-unktopic.c
rename to lib/librdkafka-2.3.0/tests/1000-unktopic.c
index ad2b7e87096..af4a45a188d 100644
--- a/lib/librdkafka-2.1.0/tests/1000-unktopic.c
+++ b/lib/librdkafka-2.3.0/tests/1000-unktopic.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/8000-idle.cpp b/lib/librdkafka-2.3.0/tests/8000-idle.cpp
similarity index 97%
rename from lib/librdkafka-2.1.0/tests/8000-idle.cpp
rename to lib/librdkafka-2.3.0/tests/8000-idle.cpp
index 9659ade97a1..3004df406fe 100644
--- a/lib/librdkafka-2.1.0/tests/8000-idle.cpp
+++ b/lib/librdkafka-2.3.0/tests/8000-idle.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2016, Magnus Edenhill
+ * Copyright (c) 2016-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.3.0/tests/8001-fetch_from_follower_mock_manual.c b/lib/librdkafka-2.3.0/tests/8001-fetch_from_follower_mock_manual.c
new file mode 100644
index 00000000000..d542be5f919
--- /dev/null
+++ b/lib/librdkafka-2.3.0/tests/8001-fetch_from_follower_mock_manual.c
@@ -0,0 +1,116 @@
+/*
+ * librdkafka - Apache Kafka C library
+ *
+ * Copyright (c) 2023, Confluent Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "test.h"
+
+#include "../src/rdkafka_proto.h"
+
+/**
+ * @brief Test that the #4195 segfault doesn't happen when preferred replica
+ *        lease expires and the rktp is in fetch state
+ *        RD_KAFKA_TOPPAR_FETCH_OFFSET_WAIT.
+ */
+static void do_test_fetch_from_follower_offset_retry(void) {
+        const char *bootstraps;
+        rd_kafka_mock_cluster_t *mcluster;
+        rd_kafka_conf_t *conf;
+        rd_kafka_t *c;
+        const char *topic = "test";
+        rd_kafka_topic_partition_t *rktpar;
+        rd_kafka_topic_partition_list_t *seek;
+        int i;
+
+        SUB_TEST_QUICK();
+        test_timeout_set(600);
+
+        mcluster = test_mock_cluster_new(3, &bootstraps);
+        /* Set partition leader to broker 1. */
+        rd_kafka_mock_partition_set_leader(mcluster, topic, 0, 1);
+        rd_kafka_mock_partition_set_follower(mcluster, topic, 0, 2);
+
+        test_conf_init(&conf, NULL, 0);
+        test_conf_set(conf, "bootstrap.servers", bootstraps);
+        test_conf_set(conf, "client.rack", "myrack");
+        test_conf_set(conf, "auto.offset.reset", "earliest");
+        test_conf_set(conf, "fetch.error.backoff.ms", "1000");
+        test_conf_set(conf, "fetch.message.max.bytes", "10");
+        test_conf_set(conf, "session.timeout.ms", "600000");
+        test_conf_set(conf, "topic.metadata.refresh.interval.ms", "600000");
+
+        c = test_create_consumer("mygroup", NULL, conf, NULL);
+
+        test_consumer_assign_partition(
+            "do_test_fetch_from_follower_offset_retry", c, topic, 0,
+            RD_KAFKA_OFFSET_INVALID);
+
+        /* Since there are no messages, this poll only waits for metadata, and
+         * then sets the preferred replica after the first fetch request.
+         * Subsequent polls are for waiting up to 5 minutes. */
+        for (i = 0; i < 7; i++) {
+                test_consumer_poll_no_msgs(
+                    "initial metadata and preferred replica set", c, 0, 40000);
+        }
+
+
+        /* Seek to end to trigger ListOffsets */
+        seek           = rd_kafka_topic_partition_list_new(1);
+        rktpar         = rd_kafka_topic_partition_list_add(seek, topic, 0);
+        rktpar->offset = RD_KAFKA_OFFSET_END;
+
+        /* Increase RTT for this ListOffsets */
+        rd_kafka_mock_broker_push_request_error_rtts(
+            mcluster, 2, RD_KAFKAP_ListOffsets, 1, RD_KAFKA_RESP_ERR_NO_ERROR,
+            40 * 1000);
+
+        rd_kafka_seek_partitions(c, seek, -1);
+        rd_kafka_topic_partition_list_destroy(seek);
+
+        /* Wait lease expiry */
+        rd_sleep(50);
+
+        test_consumer_close(c);
+
+        rd_kafka_destroy(c);
+
+        test_mock_cluster_destroy(mcluster);
+
+        SUB_TEST_PASS();
+}
+
+
+int main_8001_fetch_from_follower_mock_manual(int argc, char **argv) {
+
+        if (test_needs_auth()) {
+                TEST_SKIP("Mock cluster does not support SSL/SASL\n");
+                return 0;
+        }
+
+        do_test_fetch_from_follower_offset_retry();
+
+        return 0;
+}
diff --git a/lib/librdkafka-2.1.0/tests/CMakeLists.txt b/lib/librdkafka-2.3.0/tests/CMakeLists.txt
similarity index 94%
rename from lib/librdkafka-2.1.0/tests/CMakeLists.txt
rename to lib/librdkafka-2.3.0/tests/CMakeLists.txt
index a9dccfa5e58..8a4c285e95d 100644
--- a/lib/librdkafka-2.1.0/tests/CMakeLists.txt
+++ b/lib/librdkafka-2.3.0/tests/CMakeLists.txt
@@ -118,6 +118,7 @@ set(
     0124-openssl_invalid_engine.c
     0125-immediate_flush.c
     0126-oauthbearer_oidc.c
+    0127-fetch_queue_backoff.cpp
     0128-sasl_callback_queue.cpp
     0129-fetch_aborted_msgs.c
     0130-store_offsets.c
@@ -129,7 +130,13 @@ set(
     0136-resolve_cb.c
     0137-barrier_batch_consume.c
     0138-admin_mock.c
+    0139-offset_validation_mock.c
+    0140-commit_metadata.cpp
+    0142-reauthentication.c
+    0143-exponential_backoff_mock.c
+    0144-idempotence_mock.c
     8000-idle.cpp
+    8001-fetch_from_follower_mock_manual.c
     test.c
     testcpp.cpp
     rusage.c
diff --git a/lib/librdkafka-2.1.0/tests/LibrdkafkaTestApp.py b/lib/librdkafka-2.3.0/tests/LibrdkafkaTestApp.py
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/LibrdkafkaTestApp.py
rename to lib/librdkafka-2.3.0/tests/LibrdkafkaTestApp.py
diff --git a/lib/librdkafka-2.1.0/tests/Makefile b/lib/librdkafka-2.3.0/tests/Makefile
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/Makefile
rename to lib/librdkafka-2.3.0/tests/Makefile
index 73eab21406f..543639e49bc 100644
--- a/lib/librdkafka-2.1.0/tests/Makefile
+++ b/lib/librdkafka-2.3.0/tests/Makefile
@@ -12,7 +12,7 @@ CXXFLAGS += -I../src -I../src-cpp
 LDFLAGS += -rdynamic -L../src -L../src-cpp
 
 # Latest Kafka version
-KAFKA_VERSION?=3.1.0
+KAFKA_VERSION?=3.4.0
 # Kafka versions for compatibility tests
 COMPAT_KAFKA_VERSIONS?=0.8.2.2 0.9.0.1 0.11.0.3 1.0.2 2.4.1 2.8.1 $(KAFKA_VERSION)
 
diff --git a/lib/librdkafka-2.1.0/tests/README.md b/lib/librdkafka-2.3.0/tests/README.md
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/README.md
rename to lib/librdkafka-2.3.0/tests/README.md
index b0d99b0bbc7..4d2c011ad31 100644
--- a/lib/librdkafka-2.1.0/tests/README.md
+++ b/lib/librdkafka-2.3.0/tests/README.md
@@ -186,6 +186,10 @@ be it `make`, `run-test.sh`, `until-fail.sh`, etc.
                           with `TESTS=0000`.
                           See [../src/rdunittest.c](../src/rdunittest.c) for
                           unit test names.
+ * `TESTS_SKIP_BEFORE=0nnn` - skip tests before this test. Tests are skipped
+                              even if they are part of `TESTS` variable.
+                              Usage: `TESTS_SKIP_BEFORE=0030`. All the tests
+                              until test 0030 are skipped.
 
 
 Let's say that you run the full test suite and get a failure in test 0061,
diff --git a/lib/librdkafka-2.1.0/tests/autotest.sh b/lib/librdkafka-2.3.0/tests/autotest.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/autotest.sh
rename to lib/librdkafka-2.3.0/tests/autotest.sh
diff --git a/lib/librdkafka-2.1.0/tests/backtrace.gdb b/lib/librdkafka-2.3.0/tests/backtrace.gdb
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/backtrace.gdb
rename to lib/librdkafka-2.3.0/tests/backtrace.gdb
diff --git a/lib/librdkafka-2.1.0/tests/broker_version_tests.py b/lib/librdkafka-2.3.0/tests/broker_version_tests.py
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/broker_version_tests.py
rename to lib/librdkafka-2.3.0/tests/broker_version_tests.py
diff --git a/lib/librdkafka-2.1.0/tests/buildbox.sh b/lib/librdkafka-2.3.0/tests/buildbox.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/buildbox.sh
rename to lib/librdkafka-2.3.0/tests/buildbox.sh
diff --git a/lib/librdkafka-2.1.0/tests/cleanup-checker-tests.sh b/lib/librdkafka-2.3.0/tests/cleanup-checker-tests.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/cleanup-checker-tests.sh
rename to lib/librdkafka-2.3.0/tests/cleanup-checker-tests.sh
diff --git a/lib/librdkafka-2.1.0/tests/cluster_testing.py b/lib/librdkafka-2.3.0/tests/cluster_testing.py
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/cluster_testing.py
rename to lib/librdkafka-2.3.0/tests/cluster_testing.py
diff --git a/lib/librdkafka-2.1.0/tests/delete-test-topics.sh b/lib/librdkafka-2.3.0/tests/delete-test-topics.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/delete-test-topics.sh
rename to lib/librdkafka-2.3.0/tests/delete-test-topics.sh
diff --git a/lib/librdkafka-2.1.0/tests/fixtures/ssl/.gitignore b/lib/librdkafka-2.3.0/tests/fixtures/ssl/.gitignore
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/fixtures/ssl/.gitignore
rename to lib/librdkafka-2.3.0/tests/fixtures/ssl/.gitignore
diff --git a/lib/librdkafka-2.1.0/tests/fixtures/ssl/Makefile b/lib/librdkafka-2.3.0/tests/fixtures/ssl/Makefile
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/fixtures/ssl/Makefile
rename to lib/librdkafka-2.3.0/tests/fixtures/ssl/Makefile
diff --git a/lib/librdkafka-2.1.0/tests/fixtures/ssl/README.md b/lib/librdkafka-2.3.0/tests/fixtures/ssl/README.md
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/fixtures/ssl/README.md
rename to lib/librdkafka-2.3.0/tests/fixtures/ssl/README.md
diff --git a/lib/librdkafka-2.1.0/tests/fixtures/ssl/client.keystore.p12 b/lib/librdkafka-2.3.0/tests/fixtures/ssl/client.keystore.p12
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/fixtures/ssl/client.keystore.p12
rename to lib/librdkafka-2.3.0/tests/fixtures/ssl/client.keystore.p12
diff --git a/lib/librdkafka-2.1.0/tests/fixtures/ssl/client2.certificate.pem b/lib/librdkafka-2.3.0/tests/fixtures/ssl/client2.certificate.pem
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/fixtures/ssl/client2.certificate.pem
rename to lib/librdkafka-2.3.0/tests/fixtures/ssl/client2.certificate.pem
diff --git a/lib/librdkafka-2.1.0/tests/fixtures/ssl/client2.key b/lib/librdkafka-2.3.0/tests/fixtures/ssl/client2.key
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/fixtures/ssl/client2.key
rename to lib/librdkafka-2.3.0/tests/fixtures/ssl/client2.key
diff --git a/lib/librdkafka-2.1.0/tests/fixtures/ssl/create_keys.sh b/lib/librdkafka-2.3.0/tests/fixtures/ssl/create_keys.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/fixtures/ssl/create_keys.sh
rename to lib/librdkafka-2.3.0/tests/fixtures/ssl/create_keys.sh
diff --git a/lib/librdkafka-2.1.0/tests/fuzzers/.gitignore b/lib/librdkafka-2.3.0/tests/fuzzers/.gitignore
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/fuzzers/.gitignore
rename to lib/librdkafka-2.3.0/tests/fuzzers/.gitignore
diff --git a/lib/librdkafka-2.1.0/tests/fuzzers/Makefile b/lib/librdkafka-2.3.0/tests/fuzzers/Makefile
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/fuzzers/Makefile
rename to lib/librdkafka-2.3.0/tests/fuzzers/Makefile
diff --git a/lib/librdkafka-2.1.0/tests/fuzzers/README.md b/lib/librdkafka-2.3.0/tests/fuzzers/README.md
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/fuzzers/README.md
rename to lib/librdkafka-2.3.0/tests/fuzzers/README.md
diff --git a/lib/librdkafka-2.1.0/tests/fuzzers/fuzz_regex.c b/lib/librdkafka-2.3.0/tests/fuzzers/fuzz_regex.c
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/fuzzers/fuzz_regex.c
rename to lib/librdkafka-2.3.0/tests/fuzzers/fuzz_regex.c
index 2facc19f029..8e75848ddc5 100644
--- a/lib/librdkafka-2.1.0/tests/fuzzers/fuzz_regex.c
+++ b/lib/librdkafka-2.3.0/tests/fuzzers/fuzz_regex.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/fuzzers/helpers.h b/lib/librdkafka-2.3.0/tests/fuzzers/helpers.h
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/fuzzers/helpers.h
rename to lib/librdkafka-2.3.0/tests/fuzzers/helpers.h
index cfab037779c..37d956b2333 100644
--- a/lib/librdkafka-2.1.0/tests/fuzzers/helpers.h
+++ b/lib/librdkafka-2.3.0/tests/fuzzers/helpers.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/gen-ssl-certs.sh b/lib/librdkafka-2.3.0/tests/gen-ssl-certs.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/gen-ssl-certs.sh
rename to lib/librdkafka-2.3.0/tests/gen-ssl-certs.sh
diff --git a/lib/librdkafka-2.1.0/tests/interactive_broker_version.py b/lib/librdkafka-2.3.0/tests/interactive_broker_version.py
similarity index 93%
rename from lib/librdkafka-2.1.0/tests/interactive_broker_version.py
rename to lib/librdkafka-2.3.0/tests/interactive_broker_version.py
index bcd4931f95b..d294b7a61c3 100755
--- a/lib/librdkafka-2.1.0/tests/interactive_broker_version.py
+++ b/lib/librdkafka-2.3.0/tests/interactive_broker_version.py
@@ -68,11 +68,13 @@ def test_version(version, cmd=None, deploy=True, conf={}, debug=False,
         # Configure rack & replica selector if broker supports
         # fetch-from-follower
         if version_as_number(version) >= 2.4:
+            curr_conf = defconf.get('conf', list())
             defconf.update(
                 {
                     'conf': [
                         'broker.rack=RACK${appid}',
-                        'replica.selector.class=org.apache.kafka.common.replica.RackAwareReplicaSelector']})  # noqa: E501
+                        'replica.selector.class=org.apache.kafka.common.replica.RackAwareReplicaSelector'] + curr_conf})  # noqa: E501
+            print('conf broker', str(n), ': ', defconf)
         brokers.append(KafkaBrokerApp(cluster, defconf))
 
     cmd_env = os.environ.copy()
@@ -268,7 +270,12 @@ def test_version(version, cmd=None, deploy=True, conf={}, debug=False,
                         help='Dont deploy applications, '
                         'assume already deployed.')
     parser.add_argument('--conf', type=str, dest='conf', default=None,
-                        help='JSON config object (not file)')
+                        help='''
+    JSON config object (not file).
+    This does not translate to broker configs directly.
+    If broker config properties are to be specified,
+    they should be specified with
+    --conf \'{"conf": ["key=value", "key=value"]}\'''')
     parser.add_argument('--scenario', type=str, dest='scenario',
                         default='default',
                         help='Test scenario (see scenarios/ directory)')
@@ -318,6 +325,14 @@ def test_version(version, cmd=None, deploy=True, conf={}, debug=False,
         default=None,
         help='OAUTHBEARER/OIDC method (DEFAULT, OIDC), \
              must config SASL mechanism to OAUTHBEARER')
+    parser.add_argument(
+        '--max-reauth-ms',
+        dest='reauth_ms',
+        type=int,
+        default='10000',
+        help='''
+        Sets the value of connections.max.reauth.ms on the brokers.
+        Set 0 to disable.''')
 
     args = parser.parse_args()
     if args.conf is not None:
@@ -349,7 +364,13 @@ def test_version(version, cmd=None, deploy=True, conf={}, debug=False,
         args.conf['sasl_oauthbearer_method'] = \
             args.sasl_oauthbearer_method
 
-    args.conf.get('conf', list()).append("log.retention.bytes=1000000000")
+    if 'conf' not in args.conf:
+        args.conf['conf'] = []
+
+    args.conf['conf'].append(
+        "connections.max.reauth.ms={}".format(
+            args.reauth_ms))
+    args.conf['conf'].append("log.retention.bytes=1000000000")
 
     for version in args.versions:
         r = test_version(version, cmd=args.cmd, deploy=args.deploy,
diff --git a/lib/librdkafka-2.1.0/tests/interceptor_test/.gitignore b/lib/librdkafka-2.3.0/tests/interceptor_test/.gitignore
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/interceptor_test/.gitignore
rename to lib/librdkafka-2.3.0/tests/interceptor_test/.gitignore
diff --git a/lib/librdkafka-2.1.0/tests/interceptor_test/CMakeLists.txt b/lib/librdkafka-2.3.0/tests/interceptor_test/CMakeLists.txt
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/interceptor_test/CMakeLists.txt
rename to lib/librdkafka-2.3.0/tests/interceptor_test/CMakeLists.txt
diff --git a/lib/librdkafka-2.1.0/tests/interceptor_test/Makefile b/lib/librdkafka-2.3.0/tests/interceptor_test/Makefile
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/interceptor_test/Makefile
rename to lib/librdkafka-2.3.0/tests/interceptor_test/Makefile
diff --git a/lib/librdkafka-2.1.0/tests/interceptor_test/interceptor_test.c b/lib/librdkafka-2.3.0/tests/interceptor_test/interceptor_test.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/interceptor_test/interceptor_test.c
rename to lib/librdkafka-2.3.0/tests/interceptor_test/interceptor_test.c
index ee8a63ba982..ee1f3978a91 100644
--- a/lib/librdkafka-2.1.0/tests/interceptor_test/interceptor_test.c
+++ b/lib/librdkafka-2.3.0/tests/interceptor_test/interceptor_test.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/interceptor_test/interceptor_test.h b/lib/librdkafka-2.3.0/tests/interceptor_test/interceptor_test.h
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/interceptor_test/interceptor_test.h
rename to lib/librdkafka-2.3.0/tests/interceptor_test/interceptor_test.h
diff --git a/lib/librdkafka-2.1.0/tests/java/.gitignore b/lib/librdkafka-2.3.0/tests/java/.gitignore
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/java/.gitignore
rename to lib/librdkafka-2.3.0/tests/java/.gitignore
diff --git a/lib/librdkafka-2.1.0/tests/java/IncrementalRebalanceCli.java b/lib/librdkafka-2.3.0/tests/java/IncrementalRebalanceCli.java
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/java/IncrementalRebalanceCli.java
rename to lib/librdkafka-2.3.0/tests/java/IncrementalRebalanceCli.java
index de044ae5856..75622f06a7d 100644
--- a/lib/librdkafka-2.1.0/tests/java/IncrementalRebalanceCli.java
+++ b/lib/librdkafka-2.3.0/tests/java/IncrementalRebalanceCli.java
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/java/Makefile b/lib/librdkafka-2.3.0/tests/java/Makefile
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/java/Makefile
rename to lib/librdkafka-2.3.0/tests/java/Makefile
diff --git a/lib/librdkafka-2.1.0/tests/java/Murmur2Cli.java b/lib/librdkafka-2.3.0/tests/java/Murmur2Cli.java
similarity index 97%
rename from lib/librdkafka-2.1.0/tests/java/Murmur2Cli.java
rename to lib/librdkafka-2.3.0/tests/java/Murmur2Cli.java
index 22444532d25..08105d4e655 100644
--- a/lib/librdkafka-2.1.0/tests/java/Murmur2Cli.java
+++ b/lib/librdkafka-2.3.0/tests/java/Murmur2Cli.java
@@ -2,7 +2,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/java/README.md b/lib/librdkafka-2.3.0/tests/java/README.md
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/java/README.md
rename to lib/librdkafka-2.3.0/tests/java/README.md
diff --git a/lib/librdkafka-2.1.0/tests/java/TransactionProducerCli.java b/lib/librdkafka-2.3.0/tests/java/TransactionProducerCli.java
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/java/TransactionProducerCli.java
rename to lib/librdkafka-2.3.0/tests/java/TransactionProducerCli.java
index f880c1422d2..6bc09712aa7 100644
--- a/lib/librdkafka-2.1.0/tests/java/TransactionProducerCli.java
+++ b/lib/librdkafka-2.3.0/tests/java/TransactionProducerCli.java
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2020, Magnus Edenhill
+ * Copyright (c) 2020-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/java/run-class.sh b/lib/librdkafka-2.3.0/tests/java/run-class.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/java/run-class.sh
rename to lib/librdkafka-2.3.0/tests/java/run-class.sh
diff --git a/lib/librdkafka-2.1.0/tests/librdkafka.suppressions b/lib/librdkafka-2.3.0/tests/librdkafka.suppressions
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/librdkafka.suppressions
rename to lib/librdkafka-2.3.0/tests/librdkafka.suppressions
diff --git a/lib/librdkafka-2.1.0/tests/lz4_manual_test.sh b/lib/librdkafka-2.3.0/tests/lz4_manual_test.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/lz4_manual_test.sh
rename to lib/librdkafka-2.3.0/tests/lz4_manual_test.sh
diff --git a/lib/librdkafka-2.1.0/tests/multi-broker-version-test.sh b/lib/librdkafka-2.3.0/tests/multi-broker-version-test.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/multi-broker-version-test.sh
rename to lib/librdkafka-2.3.0/tests/multi-broker-version-test.sh
diff --git a/lib/librdkafka-2.1.0/tests/parse-refcnt.sh b/lib/librdkafka-2.3.0/tests/parse-refcnt.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/parse-refcnt.sh
rename to lib/librdkafka-2.3.0/tests/parse-refcnt.sh
diff --git a/lib/librdkafka-2.1.0/tests/performance_plot.py b/lib/librdkafka-2.3.0/tests/performance_plot.py
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/performance_plot.py
rename to lib/librdkafka-2.3.0/tests/performance_plot.py
diff --git a/lib/librdkafka-2.1.0/tests/plugin_test/Makefile b/lib/librdkafka-2.3.0/tests/plugin_test/Makefile
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/plugin_test/Makefile
rename to lib/librdkafka-2.3.0/tests/plugin_test/Makefile
diff --git a/lib/librdkafka-2.1.0/tests/plugin_test/plugin_test.c b/lib/librdkafka-2.3.0/tests/plugin_test/plugin_test.c
similarity index 97%
rename from lib/librdkafka-2.1.0/tests/plugin_test/plugin_test.c
rename to lib/librdkafka-2.3.0/tests/plugin_test/plugin_test.c
index 54639a5a83a..dab8687b659 100644
--- a/lib/librdkafka-2.1.0/tests/plugin_test/plugin_test.c
+++ b/lib/librdkafka-2.3.0/tests/plugin_test/plugin_test.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2017 Magnus Edenhill
+ * Copyright (c) 2017-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/requirements.txt b/lib/librdkafka-2.3.0/tests/requirements.txt
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/requirements.txt
rename to lib/librdkafka-2.3.0/tests/requirements.txt
diff --git a/lib/librdkafka-2.1.0/tests/run-consumer-tests.sh b/lib/librdkafka-2.3.0/tests/run-consumer-tests.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/run-consumer-tests.sh
rename to lib/librdkafka-2.3.0/tests/run-consumer-tests.sh
diff --git a/lib/librdkafka-2.1.0/tests/run-producer-tests.sh b/lib/librdkafka-2.3.0/tests/run-producer-tests.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/run-producer-tests.sh
rename to lib/librdkafka-2.3.0/tests/run-producer-tests.sh
diff --git a/lib/librdkafka-2.1.0/tests/run-test.sh b/lib/librdkafka-2.3.0/tests/run-test.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/run-test.sh
rename to lib/librdkafka-2.3.0/tests/run-test.sh
diff --git a/lib/librdkafka-2.1.0/tests/rusage.c b/lib/librdkafka-2.3.0/tests/rusage.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/rusage.c
rename to lib/librdkafka-2.3.0/tests/rusage.c
index 48e702f3f4b..7d4048adb90 100644
--- a/lib/librdkafka-2.1.0/tests/rusage.c
+++ b/lib/librdkafka-2.3.0/tests/rusage.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - The Apache Kafka C/C++ library
  *
- * Copyright (c) 2019 Magnus Edenhill
+ * Copyright (c) 2019-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/sasl_test.py b/lib/librdkafka-2.3.0/tests/sasl_test.py
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/sasl_test.py
rename to lib/librdkafka-2.3.0/tests/sasl_test.py
diff --git a/lib/librdkafka-2.1.0/tests/scenarios/README.md b/lib/librdkafka-2.3.0/tests/scenarios/README.md
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/scenarios/README.md
rename to lib/librdkafka-2.3.0/tests/scenarios/README.md
diff --git a/lib/librdkafka-2.1.0/tests/scenarios/ak23.json b/lib/librdkafka-2.3.0/tests/scenarios/ak23.json
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/scenarios/ak23.json
rename to lib/librdkafka-2.3.0/tests/scenarios/ak23.json
diff --git a/lib/librdkafka-2.1.0/tests/scenarios/default.json b/lib/librdkafka-2.3.0/tests/scenarios/default.json
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/scenarios/default.json
rename to lib/librdkafka-2.3.0/tests/scenarios/default.json
diff --git a/lib/librdkafka-2.1.0/tests/scenarios/noautocreate.json b/lib/librdkafka-2.3.0/tests/scenarios/noautocreate.json
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/scenarios/noautocreate.json
rename to lib/librdkafka-2.3.0/tests/scenarios/noautocreate.json
diff --git a/lib/librdkafka-2.1.0/tests/sockem.c b/lib/librdkafka-2.3.0/tests/sockem.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/sockem.c
rename to lib/librdkafka-2.3.0/tests/sockem.c
index 2de01627d8d..bf707a9b27c 100644
--- a/lib/librdkafka-2.1.0/tests/sockem.c
+++ b/lib/librdkafka-2.3.0/tests/sockem.c
@@ -1,7 +1,7 @@
 /*
  * sockem - socket-level network emulation
  *
- * Copyright (c) 2016, Magnus Edenhill, Andreas Smas
+ * Copyright (c) 2016-2022, Magnus Edenhill, Andreas Smas
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/sockem.h b/lib/librdkafka-2.3.0/tests/sockem.h
similarity index 97%
rename from lib/librdkafka-2.1.0/tests/sockem.h
rename to lib/librdkafka-2.3.0/tests/sockem.h
index 8a2ddcd875c..02fa55fba08 100644
--- a/lib/librdkafka-2.1.0/tests/sockem.h
+++ b/lib/librdkafka-2.3.0/tests/sockem.h
@@ -1,7 +1,7 @@
 /*
  * sockem - socket-level network emulation
  *
- * Copyright (c) 2016, Magnus Edenhill, Andreas Smas
+ * Copyright (c) 2016-2022, Magnus Edenhill, Andreas Smas
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/sockem_ctrl.c b/lib/librdkafka-2.3.0/tests/sockem_ctrl.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/sockem_ctrl.c
rename to lib/librdkafka-2.3.0/tests/sockem_ctrl.c
index c3e8ce92ed9..4396d273a9c 100644
--- a/lib/librdkafka-2.1.0/tests/sockem_ctrl.c
+++ b/lib/librdkafka-2.3.0/tests/sockem_ctrl.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018, Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/sockem_ctrl.h b/lib/librdkafka-2.3.0/tests/sockem_ctrl.h
similarity index 97%
rename from lib/librdkafka-2.1.0/tests/sockem_ctrl.h
rename to lib/librdkafka-2.3.0/tests/sockem_ctrl.h
index d33c87fca02..db616d67659 100644
--- a/lib/librdkafka-2.1.0/tests/sockem_ctrl.h
+++ b/lib/librdkafka-2.3.0/tests/sockem_ctrl.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2018, Magnus Edenhill
+ * Copyright (c) 2018-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/test.c b/lib/librdkafka-2.3.0/tests/test.c
similarity index 95%
rename from lib/librdkafka-2.1.0/tests/test.c
rename to lib/librdkafka-2.3.0/tests/test.c
index 71180c8f47a..2ef4a36c3a9 100644
--- a/lib/librdkafka-2.1.0/tests/test.c
+++ b/lib/librdkafka-2.3.0/tests/test.c
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2013, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -76,6 +77,7 @@ int test_rusage              = 0; /**< Check resource usage */
  *   <1.0: CPU is faster than base line system. */
 double test_rusage_cpu_calibration = 1.0;
 static const char *tests_to_run    = NULL; /* all */
+static const char *skip_tests_till = NULL; /* all */
 static const char *subtests_to_run = NULL; /* all */
 static const char *tests_to_skip   = NULL; /* none */
 int test_write_report              = 0;    /**< Write test report file */
@@ -132,6 +134,7 @@ _TEST_DECL(0028_long_topicnames);
 _TEST_DECL(0029_assign_offset);
 _TEST_DECL(0030_offset_commit);
 _TEST_DECL(0031_get_offsets);
+_TEST_DECL(0031_get_offsets_mock);
 _TEST_DECL(0033_regex_subscribe);
 _TEST_DECL(0033_regex_subscribe_local);
 _TEST_DECL(0034_offset_reset);
@@ -152,6 +155,7 @@ _TEST_DECL(0045_subscribe_update);
 _TEST_DECL(0045_subscribe_update_topic_remove);
 _TEST_DECL(0045_subscribe_update_non_exist_and_partchange);
 _TEST_DECL(0045_subscribe_update_mock);
+_TEST_DECL(0045_subscribe_update_racks_mock);
 _TEST_DECL(0046_rkt_cache);
 _TEST_DECL(0047_partial_buf_tmout);
 _TEST_DECL(0048_partitioner);
@@ -234,7 +238,9 @@ _TEST_DECL(0122_buffer_cleaning_after_rebalance);
 _TEST_DECL(0123_connections_max_idle);
 _TEST_DECL(0124_openssl_invalid_engine);
 _TEST_DECL(0125_immediate_flush);
+_TEST_DECL(0125_immediate_flush_mock);
 _TEST_DECL(0126_oauthbearer_oidc);
+_TEST_DECL(0127_fetch_queue_backoff);
 _TEST_DECL(0128_sasl_callback_queue);
 _TEST_DECL(0129_fetch_aborted_msgs);
 _TEST_DECL(0130_store_offsets);
@@ -246,9 +252,15 @@ _TEST_DECL(0135_sasl_credentials);
 _TEST_DECL(0136_resolve_cb);
 _TEST_DECL(0137_barrier_batch_consume);
 _TEST_DECL(0138_admin_mock);
+_TEST_DECL(0139_offset_validation_mock);
+_TEST_DECL(0140_commit_metadata);
+_TEST_DECL(0142_reauthentication);
+_TEST_DECL(0143_exponential_backoff_mock);
+_TEST_DECL(0144_idempotence_mock);
 
 /* Manual tests */
 _TEST_DECL(8000_idle);
+_TEST_DECL(8001_fetch_from_follower_mock_manual);
 
 
 /* Define test resource usage thresholds if the default limits
@@ -315,7 +327,7 @@ struct test tests[] = {
     _TEST(0028_long_topicnames,
           TEST_F_KNOWN_ISSUE,
           TEST_BRKVER(0, 9, 0, 0),
-          .extra = "https://github.com/edenhill/librdkafka/issues/529"),
+          .extra = "https://github.com/confluentinc/librdkafka/issues/529"),
     _TEST(0029_assign_offset, 0),
     _TEST(0030_offset_commit,
           0,
@@ -323,6 +335,7 @@ struct test tests[] = {
           /* Loops over committed() until timeout */
           _THRES(.ucpu = 10.0, .scpu = 5.0)),
     _TEST(0031_get_offsets, 0),
+    _TEST(0031_get_offsets_mock, TEST_F_LOCAL),
     _TEST(0033_regex_subscribe, 0, TEST_BRKVER(0, 9, 0, 0)),
     _TEST(0033_regex_subscribe_local, TEST_F_LOCAL),
     _TEST(0034_offset_reset, 0),
@@ -359,6 +372,7 @@ struct test tests[] = {
           TEST_BRKVER(0, 9, 0, 0),
           .scenario = "noautocreate"),
     _TEST(0045_subscribe_update_mock, TEST_F_LOCAL),
+    _TEST(0045_subscribe_update_racks_mock, TEST_F_LOCAL),
     _TEST(0046_rkt_cache, TEST_F_LOCAL),
     _TEST(0047_partial_buf_tmout, TEST_F_KNOWN_ISSUE),
     _TEST(0048_partitioner,
@@ -478,7 +492,9 @@ struct test tests[] = {
     _TEST(0123_connections_max_idle, 0),
     _TEST(0124_openssl_invalid_engine, TEST_F_LOCAL),
     _TEST(0125_immediate_flush, 0),
+    _TEST(0125_immediate_flush_mock, TEST_F_LOCAL),
     _TEST(0126_oauthbearer_oidc, 0, TEST_BRKVER(3, 1, 0, 0)),
+    _TEST(0127_fetch_queue_backoff, 0),
     _TEST(0128_sasl_callback_queue, TEST_F_LOCAL, TEST_BRKVER(2, 0, 0, 0)),
     _TEST(0129_fetch_aborted_msgs, 0, TEST_BRKVER(0, 11, 0, 0)),
     _TEST(0130_store_offsets, 0),
@@ -490,9 +506,16 @@ struct test tests[] = {
     _TEST(0136_resolve_cb, TEST_F_LOCAL),
     _TEST(0137_barrier_batch_consume, 0),
     _TEST(0138_admin_mock, TEST_F_LOCAL, TEST_BRKVER(2, 4, 0, 0)),
+    _TEST(0139_offset_validation_mock, 0),
+    _TEST(0140_commit_metadata, 0),
+    _TEST(0142_reauthentication, 0, TEST_BRKVER(2, 2, 0, 0)),
+    _TEST(0143_exponential_backoff_mock, TEST_F_LOCAL),
+    _TEST(0144_idempotence_mock, TEST_F_LOCAL, TEST_BRKVER(0, 11, 0, 0)),
+
 
     /* Manual tests */
     _TEST(8000_idle, TEST_F_MANUAL),
+    _TEST(8001_fetch_from_follower_mock_manual, TEST_F_MANUAL),
 
     {NULL}};
 
@@ -1325,6 +1348,13 @@ static void run_tests(int argc, char **argv) {
                         skip_silent = rd_true;
                 } else if (tests_to_skip && strstr(tests_to_skip, testnum))
                         skip_reason = "included in TESTS_SKIP list";
+                else if (skip_tests_till) {
+                        if (!strcmp(skip_tests_till, testnum))
+                                skip_tests_till = NULL;
+                        else
+                                skip_reason =
+                                    "ignoring test before TESTS_SKIP_BEFORE";
+                }
 
                 if (!skip_reason) {
                         run_test(test, argc, argv);
@@ -1650,6 +1680,8 @@ int main(int argc, char **argv) {
         subtests_to_run = test_getenv("SUBTESTS", NULL);
         tests_to_skip   = test_getenv("TESTS_SKIP", NULL);
         tmpver          = test_getenv("TEST_KAFKA_VERSION", NULL);
+        skip_tests_till = test_getenv("TESTS_SKIP_BEFORE", NULL);
+
         if (!tmpver)
                 tmpver = test_getenv("KAFKA_VERSION", test_broker_version_str);
         test_broker_version_str = tmpver;
@@ -1824,11 +1856,14 @@ int main(int argc, char **argv) {
         if (test_concurrent_max > 1)
                 test_timeout_multiplier += (double)test_concurrent_max / 3;
 
-        TEST_SAY("Tests to run : %s\n", tests_to_run ? tests_to_run : "all");
+        TEST_SAY("Tests to run     : %s\n",
+                 tests_to_run ? tests_to_run : "all");
         if (subtests_to_run)
-                TEST_SAY("Sub tests    : %s\n", subtests_to_run);
+                TEST_SAY("Sub tests        : %s\n", subtests_to_run);
         if (tests_to_skip)
-                TEST_SAY("Skip tests   : %s\n", tests_to_skip);
+                TEST_SAY("Skip tests       : %s\n", tests_to_skip);
+        if (skip_tests_till)
+                TEST_SAY("Skip tests before: %s\n", skip_tests_till);
         TEST_SAY("Test mode    : %s%s%s\n", test_quick ? "quick, " : "",
                  test_mode, test_on_ci ? ", CI" : "");
         TEST_SAY("Test scenario: %s\n", test_scenario);
@@ -2310,7 +2345,7 @@ void test_produce_msgs_rate(rd_kafka_t *rk,
 
 /**
  * Create producer, produce \p msgcnt messages to \p topic \p partition,
- * destroy consumer, and returns the used testid.
+ * destroy producer, and returns the used testid.
  */
 uint64_t test_produce_msgs_easy_size(const char *topic,
                                      uint64_t testid,
@@ -5454,6 +5489,92 @@ int32_t *test_get_broker_ids(rd_kafka_t *use_rk, size_t *cntp) {
         return ids;
 }
 
+/**
+ * @brief Get value of a config property from given broker id.
+ *
+ * @param rk Optional instance to use.
+ * @param broker_id Broker to query.
+ * @param key Entry key to query.
+ *
+ * @return an allocated char* which will be non-NULL if `key` is present
+ *         and there have been no errors.
+ */
+char *test_get_broker_config_entry(rd_kafka_t *use_rk,
+                                   int32_t broker_id,
+                                   const char *key) {
+        rd_kafka_t *rk;
+        char *entry_value = NULL;
+        char errstr[128];
+        rd_kafka_AdminOptions_t *options             = NULL;
+        rd_kafka_ConfigResource_t *config            = NULL;
+        rd_kafka_queue_t *queue                      = NULL;
+        const rd_kafka_DescribeConfigs_result_t *res = NULL;
+        size_t rconfig_cnt;
+        const rd_kafka_ConfigResource_t **rconfigs;
+        rd_kafka_resp_err_t err;
+        const rd_kafka_ConfigEntry_t **entries;
+        size_t entry_cnt;
+        size_t j;
+        rd_kafka_event_t *rkev;
+
+        if (!(rk = use_rk))
+                rk = test_create_producer();
+
+        queue = rd_kafka_queue_new(rk);
+
+        config = rd_kafka_ConfigResource_new(RD_KAFKA_RESOURCE_BROKER,
+                                             tsprintf("%" PRId32, broker_id));
+        options =
+            rd_kafka_AdminOptions_new(rk, RD_KAFKA_ADMIN_OP_DESCRIBECONFIGS);
+        err = rd_kafka_AdminOptions_set_request_timeout(options, 10000, errstr,
+                                                        sizeof(errstr));
+        TEST_ASSERT(!err, "%s", errstr);
+
+        rd_kafka_DescribeConfigs(rk, &config, 1, options, queue);
+        rd_kafka_ConfigResource_destroy(config);
+        rd_kafka_AdminOptions_destroy(options);
+
+        rkev = test_wait_admin_result(
+            queue, RD_KAFKA_EVENT_DESCRIBECONFIGS_RESULT, 10000);
+
+        res = rd_kafka_event_DescribeConfigs_result(rkev);
+        TEST_ASSERT(res, "expecting describe config results to be not NULL");
+
+        err = rd_kafka_event_error(rkev);
+        TEST_ASSERT(!err, "Expected success, not %s", rd_kafka_err2name(err));
+
+        rconfigs = rd_kafka_DescribeConfigs_result_resources(res, &rconfig_cnt);
+        TEST_ASSERT(rconfig_cnt == 1, "Expecting 1 resource, got %" PRIusz,
+                    rconfig_cnt);
+
+        err = rd_kafka_ConfigResource_error(rconfigs[0]);
+
+
+        entries = rd_kafka_ConfigResource_configs(rconfigs[0], &entry_cnt);
+
+        for (j = 0; j < entry_cnt; ++j) {
+                const rd_kafka_ConfigEntry_t *e = entries[j];
+                const char *cname               = rd_kafka_ConfigEntry_name(e);
+
+                if (!strcmp(cname, key)) {
+                        const char *val = rd_kafka_ConfigEntry_value(e);
+
+                        if (val) {
+                                entry_value = rd_strdup(val);
+                                break;
+                        }
+                }
+        }
+
+        rd_kafka_event_destroy(rkev);
+        rd_kafka_queue_destroy(queue);
+
+        if (!use_rk)
+                rd_kafka_destroy(rk);
+
+        return entry_value;
+}
+
 
 
 /**
@@ -5733,6 +5854,7 @@ rd_kafka_event_t *test_wait_admin_result(rd_kafka_queue_t *q,
  *
  *        Supported APIs:
  *        - AlterConfigs
+ *        - IncrementalAlterConfigs
  *        - CreatePartitions
  *        - CreateTopics
  *        - DeleteGroups
@@ -5756,13 +5878,15 @@ rd_kafka_resp_err_t test_wait_topic_admin_result(rd_kafka_queue_t *q,
         size_t aclres_cnt                      = 0;
         int errcnt                             = 0;
         rd_kafka_resp_err_t err;
-        const rd_kafka_group_result_t **gres               = NULL;
-        size_t gres_cnt                                    = 0;
-        const rd_kafka_ConsumerGroupDescription_t **gdescs = NULL;
-        size_t gdescs_cnt                                  = 0;
-        const rd_kafka_error_t **glists_errors             = NULL;
-        size_t glists_error_cnt                            = 0;
-        const rd_kafka_topic_partition_list_t *offsets     = NULL;
+        const rd_kafka_group_result_t **gres                        = NULL;
+        size_t gres_cnt                                             = 0;
+        const rd_kafka_ConsumerGroupDescription_t **gdescs          = NULL;
+        size_t gdescs_cnt                                           = 0;
+        const rd_kafka_error_t **glists_errors                      = NULL;
+        size_t glists_error_cnt                                     = 0;
+        const rd_kafka_topic_partition_list_t *offsets              = NULL;
+        const rd_kafka_DeleteAcls_result_response_t **delete_aclres = NULL;
+        size_t delete_aclres_cnt                                    = 0;
 
         rkev = test_wait_admin_result(q, evtype, tmout);
 
@@ -5816,6 +5940,17 @@ rd_kafka_resp_err_t test_wait_topic_admin_result(rd_kafka_queue_t *q,
 
                 cres = rd_kafka_AlterConfigs_result_resources(res, &cres_cnt);
 
+        } else if (evtype == RD_KAFKA_EVENT_INCREMENTALALTERCONFIGS_RESULT) {
+                const rd_kafka_IncrementalAlterConfigs_result_t *res;
+
+                if (!(res =
+                          rd_kafka_event_IncrementalAlterConfigs_result(rkev)))
+                        TEST_FAIL(
+                            "Expected a IncrementalAlterConfigs result, not %s",
+                            rd_kafka_event_name(rkev));
+
+                cres = rd_kafka_IncrementalAlterConfigs_result_resources(
+                    res, &cres_cnt);
         } else if (evtype == RD_KAFKA_EVENT_CREATEACLS_RESULT) {
                 const rd_kafka_CreateAcls_result_t *res;
 
@@ -5824,6 +5959,15 @@ rd_kafka_resp_err_t test_wait_topic_admin_result(rd_kafka_queue_t *q,
                                   rd_kafka_event_name(rkev));
 
                 aclres = rd_kafka_CreateAcls_result_acls(res, &aclres_cnt);
+        } else if (evtype == RD_KAFKA_EVENT_DELETEACLS_RESULT) {
+                const rd_kafka_DeleteAcls_result_t *res;
+
+                if (!(res = rd_kafka_event_DeleteAcls_result(rkev)))
+                        TEST_FAIL("Expected a DeleteAcls result, not %s",
+                                  rd_kafka_event_name(rkev));
+
+                delete_aclres = rd_kafka_DeleteAcls_result_responses(
+                    res, &delete_aclres_cnt);
         } else if (evtype == RD_KAFKA_EVENT_LISTCONSUMERGROUPS_RESULT) {
                 const rd_kafka_ListConsumerGroups_result_t *res;
                 if (!(res = rd_kafka_event_ListConsumerGroups_result(rkev)))
@@ -5984,6 +6128,20 @@ rd_kafka_resp_err_t test_wait_topic_admin_result(rd_kafka_queue_t *q,
                 }
         }
 
+        /* Check delete ACL errors. */
+        for (i = 0; i < delete_aclres_cnt; i++) {
+                const rd_kafka_DeleteAcls_result_response_t *res_resp =
+                    delete_aclres[i];
+                const rd_kafka_error_t *error =
+                    rd_kafka_DeleteAcls_result_response_error(res_resp);
+                if (error) {
+                        TEST_WARN("DeleteAcls result error: %s\n",
+                                  rd_kafka_error_string(error));
+                        if ((errcnt++) == 0)
+                                err = rd_kafka_error_code(error);
+                }
+        }
+
         if (!err && retevent)
                 *retevent = rkev;
         else
@@ -6171,7 +6329,7 @@ rd_kafka_resp_err_t test_DeleteTopics_simple(rd_kafka_t *rk,
 
         TEST_SAY("Deleting %" PRIusz " topics\n", topic_cnt);
 
-        rd_kafka_DeleteTopics(rk, del_topics, topic_cnt, options, useq);
+        rd_kafka_DeleteTopics(rk, del_topics, topic_cnt, options, q);
 
         rd_kafka_AdminOptions_destroy(options);
 
@@ -6394,7 +6552,7 @@ rd_kafka_resp_err_t test_AlterConfigs_simple(rd_kafka_t *rk,
         size_t result_cnt;
         const rd_kafka_ConfigEntry_t **configents;
         size_t configent_cnt;
-
+        config_cnt = config_cnt * 2;
 
         q = rd_kafka_queue_new(rk);
 
@@ -6479,6 +6637,74 @@ rd_kafka_resp_err_t test_AlterConfigs_simple(rd_kafka_t *rk,
         return err;
 }
 
+/**
+ * @brief Delta Incremental Alter configuration for the given resource,
+ *        overwriting/setting the configs provided in \p configs.
+ *        Existing configuration remains intact.
+ *
+ * @param configs 'const char *name, const char *op_type', const char *value'
+ * tuples
+ * @param config_cnt is the number of tuples in \p configs
+ */
+rd_kafka_resp_err_t
+test_IncrementalAlterConfigs_simple(rd_kafka_t *rk,
+                                    rd_kafka_ResourceType_t restype,
+                                    const char *resname,
+                                    const char **configs,
+                                    size_t config_cnt) {
+        rd_kafka_queue_t *q;
+        rd_kafka_ConfigResource_t *confres;
+        size_t i;
+        rd_kafka_resp_err_t err;
+        rd_kafka_error_t *error;
+
+
+        TEST_SAY("Incrementally altering configuration for %d %s\n", restype,
+                 resname);
+
+        q          = rd_kafka_queue_new(rk);
+        confres    = rd_kafka_ConfigResource_new(restype, resname);
+        config_cnt = config_cnt * 3;
+
+        /* Apply the configuration to change. */
+        for (i = 0; i < config_cnt; i += 3) {
+                const char *confname  = configs[i];
+                const char *op_string = configs[i + 1];
+                const char *confvalue = configs[i + 2];
+                rd_kafka_AlterConfigOpType_t op_type =
+                    RD_KAFKA_ALTER_CONFIG_OP_TYPE__CNT;
+
+                if (!strcmp(op_string, "SET"))
+                        op_type = RD_KAFKA_ALTER_CONFIG_OP_TYPE_SET;
+                else if (!strcmp(op_string, "DELETE"))
+                        op_type = RD_KAFKA_ALTER_CONFIG_OP_TYPE_DELETE;
+                else if (!strcmp(op_string, "APPEND"))
+                        op_type = RD_KAFKA_ALTER_CONFIG_OP_TYPE_APPEND;
+                else if (!strcmp(op_string, "SUBTRACT"))
+                        op_type = RD_KAFKA_ALTER_CONFIG_OP_TYPE_SUBTRACT;
+                else
+                        TEST_FAIL("Unknown op type %s\n", op_string);
+
+                error = rd_kafka_ConfigResource_add_incremental_config(
+                    confres, confname, op_type, confvalue);
+                TEST_ASSERT(!error,
+                            "Failed to set incremental %s config %s=%s on "
+                            "local resource object",
+                            op_string, confname, confvalue);
+        }
+
+        rd_kafka_IncrementalAlterConfigs(rk, &confres, 1, NULL, q);
+
+        rd_kafka_ConfigResource_destroy(confres);
+
+        err = test_wait_topic_admin_result(
+            q, RD_KAFKA_EVENT_INCREMENTALALTERCONFIGS_RESULT, NULL, 15 * 1000);
+
+        rd_kafka_queue_destroy(q);
+
+        return err;
+}
+
 /**
  * @brief Topic Admin API helpers
  *
@@ -6529,6 +6755,56 @@ rd_kafka_resp_err_t test_CreateAcls_simple(rd_kafka_t *rk,
         return err;
 }
 
+/**
+ * @brief Topic Admin API helpers
+ *
+ * @param useq Makes the call async and posts the response in this queue.
+ *             If NULL this call will be synchronous and return the error
+ *             result.
+ *
+ * @remark Fails the current test on failure.
+ */
+
+rd_kafka_resp_err_t
+test_DeleteAcls_simple(rd_kafka_t *rk,
+                       rd_kafka_queue_t *useq,
+                       rd_kafka_AclBindingFilter_t **acl_filters,
+                       size_t acl_filters_cnt,
+                       void *opaque) {
+        rd_kafka_AdminOptions_t *options;
+        rd_kafka_queue_t *q;
+        rd_kafka_resp_err_t err;
+        const int tmout = 30 * 1000;
+
+        options = rd_kafka_AdminOptions_new(rk, RD_KAFKA_ADMIN_OP_DELETEACLS);
+        rd_kafka_AdminOptions_set_opaque(options, opaque);
+
+        if (!useq) {
+                q = rd_kafka_queue_new(rk);
+        } else {
+                q = useq;
+        }
+
+        TEST_SAY("Deleting acls using %" PRIusz " filters\n", acl_filters_cnt);
+
+        rd_kafka_DeleteAcls(rk, acl_filters, acl_filters_cnt, options, q);
+
+        rd_kafka_AdminOptions_destroy(options);
+
+        if (useq)
+                return RD_KAFKA_RESP_ERR_NO_ERROR;
+
+        err = test_wait_topic_admin_result(q, RD_KAFKA_EVENT_DELETEACLS_RESULT,
+                                           NULL, tmout + 5000);
+
+        rd_kafka_queue_destroy(q);
+
+        if (err)
+                TEST_FAIL("Failed to delete acl(s): %s", rd_kafka_err2str(err));
+
+        return err;
+}
+
 static void test_free_string_array(char **strs, size_t cnt) {
         size_t i;
         for (i = 0; i < cnt; i++)
diff --git a/lib/librdkafka-2.1.0/tests/test.conf.example b/lib/librdkafka-2.3.0/tests/test.conf.example
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/test.conf.example
rename to lib/librdkafka-2.3.0/tests/test.conf.example
diff --git a/lib/librdkafka-2.1.0/tests/test.h b/lib/librdkafka-2.3.0/tests/test.h
similarity index 97%
rename from lib/librdkafka-2.1.0/tests/test.h
rename to lib/librdkafka-2.3.0/tests/test.h
index a431f9a255f..671472b43a0 100644
--- a/lib/librdkafka-2.1.0/tests/test.h
+++ b/lib/librdkafka-2.3.0/tests/test.h
@@ -1,7 +1,8 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
+ *               2023, Confluent Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -756,6 +757,10 @@ void test_headers_dump(const char *what,
 
 int32_t *test_get_broker_ids(rd_kafka_t *use_rk, size_t *cntp);
 
+char *test_get_broker_config_entry(rd_kafka_t *use_rk,
+                                   int32_t broker_id,
+                                   const char *key);
+
 void test_wait_metadata_update(rd_kafka_t *rk,
                                rd_kafka_metadata_topic_t *topics,
                                size_t topic_cnt,
@@ -796,6 +801,13 @@ rd_kafka_resp_err_t test_AlterConfigs_simple(rd_kafka_t *rk,
                                              const char **configs,
                                              size_t config_cnt);
 
+rd_kafka_resp_err_t
+test_IncrementalAlterConfigs_simple(rd_kafka_t *rk,
+                                    rd_kafka_ResourceType_t restype,
+                                    const char *resname,
+                                    const char **configs,
+                                    size_t config_cnt);
+
 rd_kafka_resp_err_t test_DeleteGroups_simple(rd_kafka_t *rk,
                                              rd_kafka_queue_t *useq,
                                              char **groups,
@@ -821,6 +833,13 @@ rd_kafka_resp_err_t test_CreateAcls_simple(rd_kafka_t *rk,
                                            size_t acl_cnt,
                                            void *opaque);
 
+rd_kafka_resp_err_t
+test_DeleteAcls_simple(rd_kafka_t *rk,
+                       rd_kafka_queue_t *useq,
+                       rd_kafka_AclBindingFilter_t **acl_filters,
+                       size_t acl_filters_cnt,
+                       void *opaque);
+
 rd_kafka_resp_err_t test_delete_all_test_topics(int timeout_ms);
 
 void test_mock_cluster_destroy(rd_kafka_mock_cluster_t *mcluster);
diff --git a/lib/librdkafka-2.1.0/tests/testcpp.cpp b/lib/librdkafka-2.3.0/tests/testcpp.cpp
similarity index 98%
rename from lib/librdkafka-2.1.0/tests/testcpp.cpp
rename to lib/librdkafka-2.3.0/tests/testcpp.cpp
index e965e249f18..c1a7f128104 100644
--- a/lib/librdkafka-2.1.0/tests/testcpp.cpp
+++ b/lib/librdkafka-2.3.0/tests/testcpp.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/testcpp.h b/lib/librdkafka-2.3.0/tests/testcpp.h
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/testcpp.h
rename to lib/librdkafka-2.3.0/tests/testcpp.h
index 2ecaed39489..1c5bc17d40b 100644
--- a/lib/librdkafka-2.1.0/tests/testcpp.h
+++ b/lib/librdkafka-2.3.0/tests/testcpp.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/testshared.h b/lib/librdkafka-2.3.0/tests/testshared.h
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/testshared.h
rename to lib/librdkafka-2.3.0/tests/testshared.h
index efdd5d5550c..0ba512b273d 100644
--- a/lib/librdkafka-2.1.0/tests/testshared.h
+++ b/lib/librdkafka-2.3.0/tests/testshared.h
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/tools/README.md b/lib/librdkafka-2.3.0/tests/tools/README.md
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/tools/README.md
rename to lib/librdkafka-2.3.0/tests/tools/README.md
diff --git a/lib/librdkafka-2.1.0/tests/tools/stats/README.md b/lib/librdkafka-2.3.0/tests/tools/stats/README.md
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/tools/stats/README.md
rename to lib/librdkafka-2.3.0/tests/tools/stats/README.md
diff --git a/lib/librdkafka-2.1.0/tests/tools/stats/filter.jq b/lib/librdkafka-2.3.0/tests/tools/stats/filter.jq
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/tools/stats/filter.jq
rename to lib/librdkafka-2.3.0/tests/tools/stats/filter.jq
diff --git a/lib/librdkafka-2.1.0/tests/tools/stats/graph.py b/lib/librdkafka-2.3.0/tests/tools/stats/graph.py
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/tools/stats/graph.py
rename to lib/librdkafka-2.3.0/tests/tools/stats/graph.py
diff --git a/lib/librdkafka-2.1.0/tests/tools/stats/requirements.txt b/lib/librdkafka-2.3.0/tests/tools/stats/requirements.txt
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/tools/stats/requirements.txt
rename to lib/librdkafka-2.3.0/tests/tools/stats/requirements.txt
diff --git a/lib/librdkafka-2.1.0/tests/tools/stats/to_csv.py b/lib/librdkafka-2.3.0/tests/tools/stats/to_csv.py
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/tools/stats/to_csv.py
rename to lib/librdkafka-2.3.0/tests/tools/stats/to_csv.py
diff --git a/lib/librdkafka-2.1.0/tests/until-fail.sh b/lib/librdkafka-2.3.0/tests/until-fail.sh
similarity index 100%
rename from lib/librdkafka-2.1.0/tests/until-fail.sh
rename to lib/librdkafka-2.3.0/tests/until-fail.sh
diff --git a/lib/librdkafka-2.1.0/tests/xxxx-assign_partition.c b/lib/librdkafka-2.3.0/tests/xxxx-assign_partition.c
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/xxxx-assign_partition.c
rename to lib/librdkafka-2.3.0/tests/xxxx-assign_partition.c
index 18431ba7238..801919c3c74 100644
--- a/lib/librdkafka-2.1.0/tests/xxxx-assign_partition.c
+++ b/lib/librdkafka-2.3.0/tests/xxxx-assign_partition.c
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2015, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/tests/xxxx-metadata.cpp b/lib/librdkafka-2.3.0/tests/xxxx-metadata.cpp
similarity index 99%
rename from lib/librdkafka-2.1.0/tests/xxxx-metadata.cpp
rename to lib/librdkafka-2.3.0/tests/xxxx-metadata.cpp
index 00c31bc8241..163b68f2413 100644
--- a/lib/librdkafka-2.1.0/tests/xxxx-metadata.cpp
+++ b/lib/librdkafka-2.3.0/tests/xxxx-metadata.cpp
@@ -1,7 +1,7 @@
 /*
  * librdkafka - Apache Kafka C library
  *
- * Copyright (c) 2012-2014, Magnus Edenhill
+ * Copyright (c) 2012-2022, Magnus Edenhill
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/lib/librdkafka-2.1.0/vcpkg.json b/lib/librdkafka-2.3.0/vcpkg.json
similarity index 95%
rename from lib/librdkafka-2.1.0/vcpkg.json
rename to lib/librdkafka-2.3.0/vcpkg.json
index 5e446107a62..f44a3be6829 100644
--- a/lib/librdkafka-2.1.0/vcpkg.json
+++ b/lib/librdkafka-2.3.0/vcpkg.json
@@ -1,6 +1,6 @@
 {
     "name": "librdkafka",
-    "version": "2.1.0",
+    "version": "2.3.0",
     "dependencies": [
         {
             "name": "zstd",
diff --git a/lib/librdkafka-2.1.0/win32/.gitignore b/lib/librdkafka-2.3.0/win32/.gitignore
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/.gitignore
rename to lib/librdkafka-2.3.0/win32/.gitignore
diff --git a/lib/librdkafka-2.1.0/win32/README.md b/lib/librdkafka-2.3.0/win32/README.md
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/README.md
rename to lib/librdkafka-2.3.0/win32/README.md
diff --git a/lib/librdkafka-2.1.0/win32/build-package.bat b/lib/librdkafka-2.3.0/win32/build-package.bat
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/build-package.bat
rename to lib/librdkafka-2.3.0/win32/build-package.bat
diff --git a/lib/librdkafka-2.1.0/win32/build.bat b/lib/librdkafka-2.3.0/win32/build.bat
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/build.bat
rename to lib/librdkafka-2.3.0/win32/build.bat
diff --git a/lib/librdkafka-2.1.0/win32/common.vcxproj b/lib/librdkafka-2.3.0/win32/common.vcxproj
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/common.vcxproj
rename to lib/librdkafka-2.3.0/win32/common.vcxproj
diff --git a/lib/librdkafka-2.1.0/win32/install-openssl.ps1 b/lib/librdkafka-2.3.0/win32/install-openssl.ps1
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/install-openssl.ps1
rename to lib/librdkafka-2.3.0/win32/install-openssl.ps1
diff --git a/lib/librdkafka-2.1.0/win32/interceptor_test/interceptor_test.vcxproj b/lib/librdkafka-2.3.0/win32/interceptor_test/interceptor_test.vcxproj
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/interceptor_test/interceptor_test.vcxproj
rename to lib/librdkafka-2.3.0/win32/interceptor_test/interceptor_test.vcxproj
diff --git a/lib/librdkafka-2.1.0/win32/librdkafka.autopkg.template b/lib/librdkafka-2.3.0/win32/librdkafka.autopkg.template
similarity index 84%
rename from lib/librdkafka-2.1.0/win32/librdkafka.autopkg.template
rename to lib/librdkafka-2.3.0/win32/librdkafka.autopkg.template
index 5ad8b1026c7..4a4ccfbddc4 100644
--- a/lib/librdkafka-2.1.0/win32/librdkafka.autopkg.template
+++ b/lib/librdkafka-2.3.0/win32/librdkafka.autopkg.template
@@ -1,9 +1,9 @@
 configurations {
-    Toolset { 
-        key : "PlatformToolset"; 
+    Toolset {
+        key : "PlatformToolset";
         choices: { v120, v140, v142 };
- 
-        // Explicitly Not including pivot variants:  "WindowsKernelModeDriver8.0", "WindowsApplicationForDrivers8.0", "WindowsUserModeDriver8.0" 
+
+        // Explicitly Not including pivot variants:  "WindowsKernelModeDriver8.0", "WindowsApplicationForDrivers8.0", "WindowsUserModeDriver8.0"
 
         // We're normalizing out the concept of the v140 platform -- Overloading the $(PlatformToolset) variable for additional pivots was a dumb idea.
         v140.condition = "( $(PlatformToolset.ToLower().IndexOf('v140')) > -1 Or '$(PlatformToolset.ToLower())' == 'windowskernelmodedriver8.0' Or '$(PlatformToolset.ToLower())' == 'windowsapplicationfordrivers8.0' Or '$(PlatformToolset.ToLower())' == 'windowsusermodedriver8.0' )";
@@ -17,10 +17,9 @@ nuget {
         // pre-deployment script.
         version : @version;
         title: "librdkafka";
-        authors: {Magnus Edenhill, edenhill};
-        owners: {Magnus Edenhill, edenhill};
-        licenseUrl: "https://github.com/edenhill/librdkafka/blob/master/LICENSES.txt";
-        projectUrl: "https://github.com/edenhill/librdkafka";
+        authors: {Magnus Edenhill, edenhill, confluent};
+        licenseUrl: "https://github.com/confluentinc/librdkafka/blob/master/LICENSES.txt";
+        projectUrl: "https://github.com/confluentinc/librdkafka";
         requireLicenseAcceptance: false;
         summary: "The Apache Kafka C/C++ client library";
 		description:"The Apache Kafka C/C++ client library";
@@ -52,4 +51,4 @@ nuget {
  targets {
 	Defines += HAS_LIBRDKAFKA;
  };
-};
\ No newline at end of file
+};
diff --git a/lib/librdkafka-2.1.0/win32/librdkafka.master.testing.targets b/lib/librdkafka-2.3.0/win32/librdkafka.master.testing.targets
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/librdkafka.master.testing.targets
rename to lib/librdkafka-2.3.0/win32/librdkafka.master.testing.targets
diff --git a/lib/librdkafka-2.1.0/win32/librdkafka.sln b/lib/librdkafka-2.3.0/win32/librdkafka.sln
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/librdkafka.sln
rename to lib/librdkafka-2.3.0/win32/librdkafka.sln
diff --git a/lib/librdkafka-2.1.0/win32/librdkafka.vcxproj b/lib/librdkafka-2.3.0/win32/librdkafka.vcxproj
similarity index 97%
rename from lib/librdkafka-2.1.0/win32/librdkafka.vcxproj
rename to lib/librdkafka-2.3.0/win32/librdkafka.vcxproj
index 2735fca9ca1..a7f267e89ed 100644
--- a/lib/librdkafka-2.1.0/win32/librdkafka.vcxproj
+++ b/lib/librdkafka-2.3.0/win32/librdkafka.vcxproj
@@ -102,6 +102,7 @@
     <ClInclude Include="..\src\rdatomic.h" />
     <ClInclude Include="..\src\rdavg.h" />
     <ClInclude Include="..\src\rdbuf.h" />
+    <ClInclude Include="..\src\rdbase64.h" />
     <ClInclude Include="..\src\rdendian.h" />
     <ClInclude Include="..\src\rdfloat.h" />
     <ClInclude Include="..\src\rdgz.h" />
@@ -174,6 +175,7 @@
     <ClCompile Include="..\src\crc32c.c" />
     <ClCompile Include="..\src\rdaddr.c" />
     <ClCompile Include="..\src\rdbuf.c" />
+    <ClCompile Include="..\src\rdbase64.c" />
     <ClCompile Include="..\src\rdcrc32.c" />
     <ClCompile Include="..\src\rdfnv1a.c" />
     <ClCompile Include="..\src\rdgz.c" />
diff --git a/lib/librdkafka-2.1.0/win32/librdkafkacpp/librdkafkacpp.vcxproj b/lib/librdkafka-2.3.0/win32/librdkafkacpp/librdkafkacpp.vcxproj
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/librdkafkacpp/librdkafkacpp.vcxproj
rename to lib/librdkafka-2.3.0/win32/librdkafkacpp/librdkafkacpp.vcxproj
diff --git a/lib/librdkafka-2.1.0/win32/msbuild.ps1 b/lib/librdkafka-2.3.0/win32/msbuild.ps1
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/msbuild.ps1
rename to lib/librdkafka-2.3.0/win32/msbuild.ps1
diff --git a/lib/librdkafka-2.1.0/win32/openssl_engine_example/openssl_engine_example.vcxproj b/lib/librdkafka-2.3.0/win32/openssl_engine_example/openssl_engine_example.vcxproj
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/openssl_engine_example/openssl_engine_example.vcxproj
rename to lib/librdkafka-2.3.0/win32/openssl_engine_example/openssl_engine_example.vcxproj
diff --git a/lib/librdkafka-2.1.0/win32/package-zip.ps1 b/lib/librdkafka-2.3.0/win32/package-zip.ps1
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/package-zip.ps1
rename to lib/librdkafka-2.3.0/win32/package-zip.ps1
diff --git a/lib/librdkafka-2.1.0/win32/packages/repositories.config b/lib/librdkafka-2.3.0/win32/packages/repositories.config
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/packages/repositories.config
rename to lib/librdkafka-2.3.0/win32/packages/repositories.config
diff --git a/lib/librdkafka-2.1.0/win32/push-package.bat b/lib/librdkafka-2.3.0/win32/push-package.bat
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/push-package.bat
rename to lib/librdkafka-2.3.0/win32/push-package.bat
diff --git a/lib/librdkafka-2.1.0/win32/rdkafka_complex_consumer_example_cpp/rdkafka_complex_consumer_example_cpp.vcxproj b/lib/librdkafka-2.3.0/win32/rdkafka_complex_consumer_example_cpp/rdkafka_complex_consumer_example_cpp.vcxproj
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/rdkafka_complex_consumer_example_cpp/rdkafka_complex_consumer_example_cpp.vcxproj
rename to lib/librdkafka-2.3.0/win32/rdkafka_complex_consumer_example_cpp/rdkafka_complex_consumer_example_cpp.vcxproj
diff --git a/lib/librdkafka-2.1.0/win32/rdkafka_example/rdkafka_example.vcxproj b/lib/librdkafka-2.3.0/win32/rdkafka_example/rdkafka_example.vcxproj
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/rdkafka_example/rdkafka_example.vcxproj
rename to lib/librdkafka-2.3.0/win32/rdkafka_example/rdkafka_example.vcxproj
diff --git a/lib/librdkafka-2.1.0/win32/rdkafka_performance/rdkafka_performance.vcxproj b/lib/librdkafka-2.3.0/win32/rdkafka_performance/rdkafka_performance.vcxproj
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/rdkafka_performance/rdkafka_performance.vcxproj
rename to lib/librdkafka-2.3.0/win32/rdkafka_performance/rdkafka_performance.vcxproj
diff --git a/lib/librdkafka-2.1.0/win32/setup-msys2.ps1 b/lib/librdkafka-2.3.0/win32/setup-msys2.ps1
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/setup-msys2.ps1
rename to lib/librdkafka-2.3.0/win32/setup-msys2.ps1
diff --git a/lib/librdkafka-2.1.0/win32/setup-vcpkg.ps1 b/lib/librdkafka-2.3.0/win32/setup-vcpkg.ps1
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/setup-vcpkg.ps1
rename to lib/librdkafka-2.3.0/win32/setup-vcpkg.ps1
diff --git a/lib/librdkafka-2.1.0/win32/tests/.gitignore b/lib/librdkafka-2.3.0/win32/tests/.gitignore
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/tests/.gitignore
rename to lib/librdkafka-2.3.0/win32/tests/.gitignore
diff --git a/lib/librdkafka-2.1.0/win32/tests/test.conf.example b/lib/librdkafka-2.3.0/win32/tests/test.conf.example
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/tests/test.conf.example
rename to lib/librdkafka-2.3.0/win32/tests/test.conf.example
diff --git a/lib/librdkafka-2.1.0/win32/tests/tests.vcxproj b/lib/librdkafka-2.3.0/win32/tests/tests.vcxproj
similarity index 96%
rename from lib/librdkafka-2.1.0/win32/tests/tests.vcxproj
rename to lib/librdkafka-2.3.0/win32/tests/tests.vcxproj
index de69a62d856..6a48f527d88 100644
--- a/lib/librdkafka-2.1.0/win32/tests/tests.vcxproj
+++ b/lib/librdkafka-2.3.0/win32/tests/tests.vcxproj
@@ -208,6 +208,7 @@
     <ClCompile Include="..\..\tests\0124-openssl_invalid_engine.c" />
     <ClCompile Include="..\..\tests\0125-immediate_flush.c" />
     <ClCompile Include="..\..\tests\0126-oauthbearer_oidc.c" />
+    <ClCompile Include="..\..\tests\0127-fetch_queue_backoff.cpp" />
     <ClCompile Include="..\..\tests\0128-sasl_callback_queue.cpp" />
     <ClCompile Include="..\..\tests\0129-fetch_aborted_msgs.c" />
     <ClCompile Include="..\..\tests\0130-store_offsets.c" />
@@ -219,7 +220,13 @@
     <ClCompile Include="..\..\tests\0136-resolve_cb.c" />
     <ClCompile Include="..\..\tests\0137-barrier_batch_consume.c" />
     <ClCompile Include="..\..\tests\0138-admin_mock.c" />
+    <ClCompile Include="..\..\tests\0139-offset_validation_mock.c" />
+    <ClCompile Include="..\..\tests\0140-commit_metadata.cpp" />
+    <ClCompile Include="..\..\tests\0142-reauthentication.c" />
+    <ClCompile Include="..\..\tests\0143-exponential_backoff_mock.c" />
+    <ClCompile Include="..\..\tests\0144-idempotence_mock.c" />
     <ClCompile Include="..\..\tests\8000-idle.cpp" />
+    <ClCompile Include="..\..\tests\8001-fetch_from_follower_mock_manual.c" />
     <ClCompile Include="..\..\tests\test.c" />
     <ClCompile Include="..\..\tests\testcpp.cpp" />
     <ClCompile Include="..\..\tests\rusage.c" />
diff --git a/lib/librdkafka-2.1.0/win32/win_ssl_cert_store/win_ssl_cert_store.vcxproj b/lib/librdkafka-2.3.0/win32/win_ssl_cert_store/win_ssl_cert_store.vcxproj
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/win_ssl_cert_store/win_ssl_cert_store.vcxproj
rename to lib/librdkafka-2.3.0/win32/win_ssl_cert_store/win_ssl_cert_store.vcxproj
diff --git a/lib/librdkafka-2.1.0/win32/wingetopt.c b/lib/librdkafka-2.3.0/win32/wingetopt.c
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/wingetopt.c
rename to lib/librdkafka-2.3.0/win32/wingetopt.c
diff --git a/lib/librdkafka-2.1.0/win32/wingetopt.h b/lib/librdkafka-2.3.0/win32/wingetopt.h
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/wingetopt.h
rename to lib/librdkafka-2.3.0/win32/wingetopt.h
diff --git a/lib/librdkafka-2.1.0/win32/wintime.h b/lib/librdkafka-2.3.0/win32/wintime.h
similarity index 100%
rename from lib/librdkafka-2.1.0/win32/wintime.h
rename to lib/librdkafka-2.3.0/win32/wintime.h
diff --git a/lib/mpack-amalgamation-1.1/AUTHORS.md b/lib/mpack-amalgamation-1.1.1/AUTHORS.md
similarity index 87%
rename from lib/mpack-amalgamation-1.1/AUTHORS.md
rename to lib/mpack-amalgamation-1.1.1/AUTHORS.md
index b06dbb53b80..1d8e2673cf4 100644
--- a/lib/mpack-amalgamation-1.1/AUTHORS.md
+++ b/lib/mpack-amalgamation-1.1.1/AUTHORS.md
@@ -12,3 +12,5 @@
 | Dirkson                         | https://github.com/dirkson                 |
 | Ethan Li                        | https://github.com/ethanjli                |
 | Danny Povolotski                | https://github.com/israelidanny            |
+| Weston Schmidt                  | https://github.com/schmidtw                |
+| Nikita Danilov                  | https://github.com/nikitadanilov           |
diff --git a/lib/mpack-amalgamation-1.1/CHANGELOG.md b/lib/mpack-amalgamation-1.1.1/CHANGELOG.md
similarity index 92%
rename from lib/mpack-amalgamation-1.1/CHANGELOG.md
rename to lib/mpack-amalgamation-1.1.1/CHANGELOG.md
index 45976bd5afa..f7adb06730f 100644
--- a/lib/mpack-amalgamation-1.1/CHANGELOG.md
+++ b/lib/mpack-amalgamation-1.1.1/CHANGELOG.md
@@ -1,3 +1,22 @@
+MPack v1.1.1
+------------
+
+Bug Fixes:
+
+- Fixed a crash that could occur when a builder element was aligned exactly at the end of a page. (See #94)
+
+- Fixed a crash when encountering an I/O error during builder resolution without an error handler callback. (See #98)
+
+- Fixed an error destroying a writer while a builder is open. (See #88)
+
+- Fixed an issue with incorrect NULL checks for 0-length buffers. (See #97)
+
+- Fixed a string formatting issue on platforms where `int` is not 32 bits. (See #103)
+
+- Fixed some documentation errors. (See #93, #102)
+
+- Cleaned up some references to old unit test buildsystem. (See #95)
+
 MPack v1.1
 ----------
 
diff --git a/lib/mpack-amalgamation-1.1/CMakeLists.txt b/lib/mpack-amalgamation-1.1.1/CMakeLists.txt
similarity index 100%
rename from lib/mpack-amalgamation-1.1/CMakeLists.txt
rename to lib/mpack-amalgamation-1.1.1/CMakeLists.txt
diff --git a/lib/mpack-amalgamation-1.1/LICENSE b/lib/mpack-amalgamation-1.1.1/LICENSE
similarity index 100%
rename from lib/mpack-amalgamation-1.1/LICENSE
rename to lib/mpack-amalgamation-1.1.1/LICENSE
diff --git a/lib/mpack-amalgamation-1.1/README.md b/lib/mpack-amalgamation-1.1.1/README.md
similarity index 100%
rename from lib/mpack-amalgamation-1.1/README.md
rename to lib/mpack-amalgamation-1.1.1/README.md
diff --git a/lib/mpack-amalgamation-1.1/src/mpack/mpack.c b/lib/mpack-amalgamation-1.1.1/src/mpack/mpack.c
similarity index 98%
rename from lib/mpack-amalgamation-1.1/src/mpack/mpack.c
rename to lib/mpack-amalgamation-1.1.1/src/mpack/mpack.c
index 2557d5b9f0b..4f0dab4aa14 100644
--- a/lib/mpack-amalgamation-1.1/src/mpack/mpack.c
+++ b/lib/mpack-amalgamation-1.1.1/src/mpack/mpack.c
@@ -24,7 +24,7 @@
  */
 
 /*
- * This is the MPack 1.1 amalgamation package.
+ * This is the MPack 1.1.1 amalgamation package.
  *
  * http://github.com/ludocode/mpack
  */
@@ -424,7 +424,7 @@ static void mpack_tag_debug_pseudo_json_bin(mpack_tag_t tag, char* buffer, size_
         const char* prefix, size_t prefix_size)
 {
     mpack_assert(mpack_tag_type(&tag) == mpack_type_bin);
-    size_t length = (size_t)mpack_snprintf(buffer, buffer_size, "<binary data of length %u", tag.v.l);
+    size_t length = (size_t)mpack_snprintf(buffer, buffer_size, "<binary data of length %" PRIu32 "", tag.v.l);
     mpack_tag_debug_complete_bin_ext(tag, length, buffer, buffer_size, prefix, prefix_size);
 }
 
@@ -433,7 +433,7 @@ static void mpack_tag_debug_pseudo_json_ext(mpack_tag_t tag, char* buffer, size_
         const char* prefix, size_t prefix_size)
 {
     mpack_assert(mpack_tag_type(&tag) == mpack_type_ext);
-    size_t length = (size_t)mpack_snprintf(buffer, buffer_size, "<ext data of type %i and length %u",
+    size_t length = (size_t)mpack_snprintf(buffer, buffer_size, "<ext data of type %i and length %" PRIu32 "",
             mpack_tag_ext_exttype(&tag), mpack_tag_ext_length(&tag));
     mpack_tag_debug_complete_bin_ext(tag, length, buffer, buffer_size, prefix, prefix_size);
 }
@@ -474,7 +474,7 @@ static void mpack_tag_debug_pseudo_json_impl(mpack_tag_t tag, char* buffer, size
             return;
 
         case mpack_type_str:
-            mpack_snprintf(buffer, buffer_size, "<string of %u bytes>", tag.v.l);
+            mpack_snprintf(buffer, buffer_size, "<string of %" PRIu32 " bytes>", tag.v.l);
             return;
         case mpack_type_bin:
             mpack_tag_debug_pseudo_json_bin(tag, buffer, buffer_size, prefix, prefix_size);
@@ -486,10 +486,10 @@ static void mpack_tag_debug_pseudo_json_impl(mpack_tag_t tag, char* buffer, size
         #endif
 
         case mpack_type_array:
-            mpack_snprintf(buffer, buffer_size, "<array of %u elements>", tag.v.n);
+            mpack_snprintf(buffer, buffer_size, "<array of %" PRIu32 " elements>", tag.v.n);
             return;
         case mpack_type_map:
-            mpack_snprintf(buffer, buffer_size, "<map of %u key-value pairs>", tag.v.n);
+            mpack_snprintf(buffer, buffer_size, "<map of %" PRIu32 " key-value pairs>", tag.v.n);
             return;
     }
 
@@ -541,22 +541,22 @@ static void mpack_tag_debug_describe_impl(mpack_tag_t tag, char* buffer, size_t
             #endif
             return;
         case mpack_type_str:
-            mpack_snprintf(buffer, buffer_size, "str of %u bytes", tag.v.l);
+            mpack_snprintf(buffer, buffer_size, "str of %" PRIu32 " bytes", tag.v.l);
             return;
         case mpack_type_bin:
-            mpack_snprintf(buffer, buffer_size, "bin of %u bytes", tag.v.l);
+            mpack_snprintf(buffer, buffer_size, "bin of %" PRIu32 " bytes", tag.v.l);
             return;
         #if MPACK_EXTENSIONS
         case mpack_type_ext:
-            mpack_snprintf(buffer, buffer_size, "ext of type %i, %u bytes",
+            mpack_snprintf(buffer, buffer_size, "ext of type %i, %" PRIu32 " bytes",
                     mpack_tag_ext_exttype(&tag), mpack_tag_ext_length(&tag));
             return;
         #endif
         case mpack_type_array:
-            mpack_snprintf(buffer, buffer_size, "array of %u elements", tag.v.n);
+            mpack_snprintf(buffer, buffer_size, "array of %" PRIu32 " elements", tag.v.n);
             return;
         case mpack_type_map:
-            mpack_snprintf(buffer, buffer_size, "map of %u key-value pairs", tag.v.n);
+            mpack_snprintf(buffer, buffer_size, "map of %" PRIu32 " key-value pairs", tag.v.n);
             return;
     }
 
@@ -1034,7 +1034,7 @@ static inline void mpack_writer_track_element(mpack_writer_t* writer) {
         if (build->nested_compound_elements == 0) {
             if (build->type != mpack_type_map) {
                 ++build->count;
-                mpack_log("adding element to build %p, now %u elements\n", (void*)build, build->count);
+                mpack_log("adding element to build %p, now %" PRIu32 " elements\n", (void*)build, build->count);
             } else if (build->key_needs_value) {
                 build->key_needs_value = false;
                 ++build->count;
@@ -1489,6 +1489,46 @@ mpack_error_t mpack_writer_destroy(mpack_writer_t* writer) {
     mpack_track_destroy(&writer->track, writer->error != mpack_ok);
     #endif
 
+    #if MPACK_BUILDER
+    mpack_builder_t* builder = &writer->builder;
+    if (builder->current_build != NULL) {
+        // A builder is open!
+
+        // Flag an error, if there's not already an error. You can only skip
+        // closing any open compound types if a write error occurred. If there
+        // wasn't already an error, it's a bug, which will assert in debug.
+        if (mpack_writer_error(writer) == mpack_ok) {
+            mpack_break("writer cannot be destroyed with an incomplete builder unless "
+                    "an error was flagged!");
+            mpack_writer_flag_error(writer, mpack_error_bug);
+        }
+
+        // Free any remaining builder pages
+        mpack_builder_page_t* page = builder->pages;
+        #if MPACK_BUILDER_INTERNAL_STORAGE
+        mpack_assert(page == (mpack_builder_page_t*)builder->internal);
+        page = page->next;
+        #endif
+        while (page != NULL) {
+            mpack_builder_page_t* next = page->next;
+            MPACK_FREE(page);
+            page = next;
+        }
+
+        // Restore the stashed pointers. The teardown function may need to free
+        // them (e.g. mpack_growable_writer_teardown().)
+        writer->buffer = builder->stash_buffer;
+        writer->position = builder->stash_position;
+        writer->end = builder->stash_end;
+
+        // Note: It's not necessary to clean up the current_build or other
+        // pointers at this point because we're guaranteed to be in an error
+        // state already so a user error callback can't longjmp out. This
+        // destroy function will complete no matter what so it doesn't matter
+        // what junk is left in the writer.
+    }
+    #endif
+
     // flush any outstanding data
     if (mpack_writer_error(writer) == mpack_ok && mpack_writer_buffer_used(writer) != 0 && writer->flush != NULL) {
         writer->flush(writer, writer->buffer, mpack_writer_buffer_used(writer));
@@ -2017,7 +2057,7 @@ void mpack_write_timestamp(mpack_writer_t* writer, int64_t seconds, uint32_t nan
     #endif
 
     if (nanoseconds > MPACK_TIMESTAMP_NANOSECONDS_MAX) {
-        mpack_break("timestamp nanoseconds out of bounds: %u", nanoseconds);
+        mpack_break("timestamp nanoseconds out of bounds: %" PRIu32 , nanoseconds);
         mpack_writer_flag_error(writer, mpack_error_bug);
         return;
     }
@@ -2160,7 +2200,7 @@ void mpack_start_ext(mpack_writer_t* writer, int8_t exttype, uint32_t count) {
  */
 
 void mpack_write_str(mpack_writer_t* writer, const char* data, uint32_t count) {
-    mpack_assert(data != NULL, "data for string of length %i is NULL", (int)count);
+    mpack_assert(count == 0 || data != NULL, "data for string of length %i is NULL", (int)count);
 
     #if MPACK_OPTIMIZE_FOR_SIZE
     mpack_writer_track_element(writer);
@@ -2215,7 +2255,7 @@ void mpack_write_str(mpack_writer_t* writer, const char* data, uint32_t count) {
 }
 
 void mpack_write_bin(mpack_writer_t* writer, const char* data, uint32_t count) {
-    mpack_assert(data != NULL, "data pointer for bin of %i bytes is NULL", (int)count);
+    mpack_assert(count == 0 || data != NULL, "data pointer for bin of %i bytes is NULL", (int)count);
     mpack_start_bin(writer, count);
     mpack_write_bytes(writer, data, count);
     mpack_finish_bin(writer);
@@ -2223,7 +2263,7 @@ void mpack_write_bin(mpack_writer_t* writer, const char* data, uint32_t count) {
 
 #if MPACK_EXTENSIONS
 void mpack_write_ext(mpack_writer_t* writer, int8_t exttype, const char* data, uint32_t count) {
-    mpack_assert(data != NULL, "data pointer for ext of type %i and %i bytes is NULL", exttype, (int)count);
+    mpack_assert(count == 0 || data != NULL, "data pointer for ext of type %i and %i bytes is NULL", exttype, (int)count);
     mpack_start_ext(writer, exttype, count);
     mpack_write_bytes(writer, data, count);
     mpack_finish_ext(writer);
@@ -2231,7 +2271,7 @@ void mpack_write_ext(mpack_writer_t* writer, int8_t exttype, const char* data, u
 #endif
 
 void mpack_write_bytes(mpack_writer_t* writer, const char* data, size_t count) {
-    mpack_assert(data != NULL, "data pointer for %i bytes is NULL", (int)count);
+    mpack_assert(count == 0 || data != NULL, "data pointer for %i bytes is NULL", (int)count);
     mpack_writer_track_bytes(writer, count);
     mpack_write_native(writer, data, count);
 }
@@ -2252,7 +2292,7 @@ void mpack_write_cstr_or_nil(mpack_writer_t* writer, const char* cstr) {
 }
 
 void mpack_write_utf8(mpack_writer_t* writer, const char* str, uint32_t length) {
-    mpack_assert(str != NULL, "data for string of length %i is NULL", (int)length);
+    mpack_assert(length == 0 || str != NULL, "data for string of length %i is NULL", (int)length);
     if (!mpack_utf8_check(str, length)) {
         mpack_writer_flag_error(writer, mpack_error_invalid);
         return;
@@ -2552,6 +2592,16 @@ MPACK_NOINLINE
 static void mpack_builder_resolve(mpack_writer_t* writer) {
     mpack_builder_t* builder = &writer->builder;
 
+    // We should not have gotten here if we are in an error state. If an error
+    // occurs with an open builder, the writer will free the open builder pages
+    // when destroyed.
+    mpack_assert(mpack_writer_error(writer) == mpack_ok, "can't resolve in error state!");
+
+    // We don't want the user to longjmp out of any I/O errors while we are
+    // walking the page list, so defer error callbacks to after we're done.
+    mpack_writer_error_t error_fn = writer->error_fn;
+    writer->error_fn = NULL;
+
     // The starting page is the internal storage (if we have it), otherwise
     // it's the first page in the array
     mpack_builder_page_t* page =
@@ -2584,13 +2634,13 @@ static void mpack_builder_resolve(mpack_writer_t* writer) {
 
     // Walk the list of builds, writing everything out in the buffer. Note that
     // we don't check for errors anywhere. The lower-level write functions will
-    // all check for errors. We need to walk all pages anyway to free them, so
-    // there's not much point in optimizing an error path at the expense of the
-    // normal path.
+    // all check for errors and do nothing after an error occurs. We need to
+    // walk all pages anyway to free them, so there's not much point in
+    // optimizing an error path at the expense of the normal path.
     while (true) {
 
         // write out the container tag
-        mpack_log("writing out an %s with count %u followed by %zi bytes\n",
+        mpack_log("writing out an %s with count %" PRIu32 " followed by %zi bytes\n",
                 mpack_type_to_string(build->type), build->count, build->bytes);
         switch (build->type) {
             case mpack_type_map:
@@ -2640,7 +2690,7 @@ static void mpack_builder_resolve(mpack_writer_t* writer) {
 
         // now see if we can find another build.
         offset = mpack_builder_align_build(offset);
-        if (offset + sizeof(mpack_build_t) >= mpack_builder_page_size(writer, page)) {
+        if (offset + sizeof(mpack_build_t) > mpack_builder_page_size(writer, page)) {
             mpack_log("not enough room in this page for another build\n");
             mpack_builder_page_t* next_page = page->next;
             mpack_builder_free_page(writer, page);
@@ -2666,13 +2716,18 @@ static void mpack_builder_resolve(mpack_writer_t* writer) {
     }
 
     mpack_log("done resolve.\n");
+
+    // We can now restore the error handler and call it if an error occurred.
+    writer->error_fn = error_fn;
+    if (writer->error_fn && mpack_writer_error(writer) != mpack_ok)
+        writer->error_fn(writer, writer->error);
 }
 
 static void mpack_builder_complete(mpack_writer_t* writer, mpack_type_t type) {
+    mpack_writer_track_pop_builder(writer, type);
     if (mpack_writer_error(writer) != mpack_ok)
         return;
 
-    mpack_writer_track_pop_builder(writer, type);
     mpack_builder_t* builder = &writer->builder;
     mpack_assert(builder->current_build != NULL, "no build in progress!");
     mpack_assert(builder->latest_build != NULL, "missing latest build!");
@@ -3113,7 +3168,7 @@ void mpack_skip_bytes(mpack_reader_t* reader, size_t count) {
     // check if we have enough in the buffer already
     size_t left = (size_t)(reader->end - reader->data);
     if (left >= count) {
-        mpack_log("skipping %u bytes still in buffer\n", (uint32_t)count);
+        mpack_log("skipping %" PRIu32 " bytes still in buffer\n", (uint32_t)count);
         reader->data += count;
         return;
     }
@@ -5002,7 +5057,7 @@ static bool mpack_tree_reserve_fill(mpack_tree_t* tree) {
             return false;
         }
 
-        mpack_log("read %u more bytes\n", (uint32_t)read);
+        mpack_log("read %" PRIu32 " more bytes\n", (uint32_t)read);
         tree->data_length += read;
         tree->parser.possible_nodes_left += read;
     } while (tree->parser.possible_nodes_left < bytes);
diff --git a/lib/mpack-amalgamation-1.1/src/mpack/mpack.h b/lib/mpack-amalgamation-1.1.1/src/mpack/mpack.h
similarity index 99%
rename from lib/mpack-amalgamation-1.1/src/mpack/mpack.h
rename to lib/mpack-amalgamation-1.1.1/src/mpack/mpack.h
index 803f03e41a7..1f2386a82b5 100644
--- a/lib/mpack-amalgamation-1.1/src/mpack/mpack.h
+++ b/lib/mpack-amalgamation-1.1.1/src/mpack/mpack.h
@@ -24,7 +24,7 @@
  */
 
 /*
- * This is the MPack 1.1 amalgamation package.
+ * This is the MPack 1.1.1 amalgamation package.
  *
  * http://github.com/ludocode/mpack
  */
@@ -1893,7 +1893,7 @@ MPACK_EXTERN_C_BEGIN
 
 #define MPACK_VERSION_MAJOR 1  /**< The major version number of MPack. */
 #define MPACK_VERSION_MINOR 1  /**< The minor version number of MPack. */
-#define MPACK_VERSION_PATCH 0  /**< The patch version number of MPack. */
+#define MPACK_VERSION_PATCH 1  /**< The patch version number of MPack. */
 
 /** A number containing the version number of MPack for comparison purposes. */
 #define MPACK_VERSION ((MPACK_VERSION_MAJOR * 10000) + \
@@ -3780,7 +3780,7 @@ MPACK_INLINE void mpack_finish_map(mpack_writer_t* writer) {
 /**
  * Starts building an array.
  *
- * Elements must follow, and mpack_complete_map() must be called when done. The
+ * Elements must follow, and mpack_complete_array() must be called when done. The
  * number of elements is determined automatically.
  *
  * If you know ahead of time the number of elements in the array, it is more
@@ -7719,7 +7719,7 @@ size_t mpack_node_bin_size(mpack_node_t node);
  *
  * This returns zero if the tree is in an error state.
  *
- * If this node is not a str, bin or map, @ref mpack_error_type is raised and zero
+ * If this node is not a str, bin or ext, @ref mpack_error_type is raised and zero
  * is returned.
  */
 uint32_t mpack_node_data_len(mpack_node_t node);
@@ -7759,7 +7759,7 @@ const char* mpack_node_str(mpack_node_t node);
  *
  * The pointer is valid as long as the data backing the tree is valid.
  *
- * If this node is not of a str, bin or map, @ref mpack_error_type is raised, and
+ * If this node is not of a str, bin or ext, @ref mpack_error_type is raised, and
  * @c NULL is returned.
  *
  * @see mpack_node_copy_cstr()
diff --git a/lib/sqlite-amalgamation-3390300/CMakeLists.txt b/lib/sqlite-amalgamation-3440200/CMakeLists.txt
similarity index 100%
rename from lib/sqlite-amalgamation-3390300/CMakeLists.txt
rename to lib/sqlite-amalgamation-3440200/CMakeLists.txt
diff --git a/lib/sqlite-amalgamation-3390300/sqlite3.c b/lib/sqlite-amalgamation-3440200/sqlite3.c
similarity index 92%
rename from lib/sqlite-amalgamation-3390300/sqlite3.c
rename to lib/sqlite-amalgamation-3440200/sqlite3.c
index 451ca8e6a2e..9443127f46e 100644
--- a/lib/sqlite-amalgamation-3390300/sqlite3.c
+++ b/lib/sqlite-amalgamation-3440200/sqlite3.c
@@ -1,6 +1,6 @@
 /******************************************************************************
 ** This file is an amalgamation of many separate C source files from SQLite
-** version 3.39.3.  By combining all the individual C code files into this
+** version 3.44.2.  By combining all the individual C code files into this
 ** single large file, the entire code can be compiled as a single translation
 ** unit.  This allows many compilers to do optimizations that would not be
 ** possible if the files were compiled separately.  Performance improvements
@@ -16,6 +16,9 @@
 ** if you want a wrapper to interface SQLite with your choice of programming
 ** language. The code for the "sqlite3" command-line shell is also in a
 ** separate file. This file contains only code for the core SQLite library.
+**
+** The content in this amalgamation comes from Fossil check-in
+** ebead0e7230cd33bcec9f95d2183069565b9.
 */
 #define SQLITE_CORE 1
 #define SQLITE_AMALGAMATION 1
@@ -50,11 +53,11 @@
 **                                  used on lines of code that actually
 **                                  implement parts of coverage testing.
 **
-**    OPTIMIZATION-IF-TRUE        - This branch is allowed to alway be false
+**    OPTIMIZATION-IF-TRUE        - This branch is allowed to always be false
 **                                  and the correct answer is still obtained,
 **                                  though perhaps more slowly.
 **
-**    OPTIMIZATION-IF-FALSE       - This branch is allowed to alway be true
+**    OPTIMIZATION-IF-FALSE       - This branch is allowed to always be true
 **                                  and the correct answer is still obtained,
 **                                  though perhaps more slowly.
 **
@@ -123,6 +126,10 @@
 #define SQLITE_4_BYTE_ALIGNED_MALLOC
 #endif /* defined(_MSC_VER) && !defined(_WIN64) */
 
+#if !defined(HAVE_LOG2) && defined(_MSC_VER) && _MSC_VER<1800
+#define HAVE_LOG2 0
+#endif /* !defined(HAVE_LOG2) && defined(_MSC_VER) && _MSC_VER<1800 */
+
 #endif /* SQLITE_MSVC_H */
 
 /************** End of msvc.h ************************************************/
@@ -452,9 +459,9 @@ extern "C" {
 ** [sqlite3_libversion_number()], [sqlite3_sourceid()],
 ** [sqlite_version()] and [sqlite_source_id()].
 */
-#define SQLITE_VERSION        "3.39.3"
-#define SQLITE_VERSION_NUMBER 3039003
-#define SQLITE_SOURCE_ID      "2022-09-05 11:02:23 4635f4a69c8c2a8df242b384a992aea71224e39a2ccab42d8c0b0602f1e826e8"
+#define SQLITE_VERSION        "3.44.2"
+#define SQLITE_VERSION_NUMBER 3044002
+#define SQLITE_SOURCE_ID      "2023-11-24 11:41:44 ebead0e7230cd33bcec9f95d2183069565b9e709bf745c9b5db65cc0cbf92c0f"
 
 /*
 ** CAPI3REF: Run-Time Library Version Numbers
@@ -834,6 +841,7 @@ SQLITE_API int sqlite3_exec(
 #define SQLITE_IOERR_ROLLBACK_ATOMIC   (SQLITE_IOERR | (31<<8))
 #define SQLITE_IOERR_DATA              (SQLITE_IOERR | (32<<8))
 #define SQLITE_IOERR_CORRUPTFS         (SQLITE_IOERR | (33<<8))
+#define SQLITE_IOERR_IN_PAGE           (SQLITE_IOERR | (34<<8))
 #define SQLITE_LOCKED_SHAREDCACHE      (SQLITE_LOCKED |  (1<<8))
 #define SQLITE_LOCKED_VTAB             (SQLITE_LOCKED |  (2<<8))
 #define SQLITE_BUSY_RECOVERY           (SQLITE_BUSY   |  (1<<8))
@@ -869,6 +877,7 @@ SQLITE_API int sqlite3_exec(
 #define SQLITE_CONSTRAINT_DATATYPE     (SQLITE_CONSTRAINT |(12<<8))
 #define SQLITE_NOTICE_RECOVER_WAL      (SQLITE_NOTICE | (1<<8))
 #define SQLITE_NOTICE_RECOVER_ROLLBACK (SQLITE_NOTICE | (2<<8))
+#define SQLITE_NOTICE_RBU              (SQLITE_NOTICE | (3<<8))
 #define SQLITE_WARNING_AUTOINDEX       (SQLITE_WARNING | (1<<8))
 #define SQLITE_AUTH_USER               (SQLITE_AUTH | (1<<8))
 #define SQLITE_OK_LOAD_PERMANENTLY     (SQLITE_OK | (1<<8))
@@ -976,13 +985,17 @@ SQLITE_API int sqlite3_exec(
 **
 ** SQLite uses one of these integer values as the second
 ** argument to calls it makes to the xLock() and xUnlock() methods
-** of an [sqlite3_io_methods] object.
+** of an [sqlite3_io_methods] object.  These values are ordered from
+** lest restrictive to most restrictive.
+**
+** The argument to xLock() is always SHARED or higher.  The argument to
+** xUnlock is either SHARED or NONE.
 */
-#define SQLITE_LOCK_NONE          0
-#define SQLITE_LOCK_SHARED        1
-#define SQLITE_LOCK_RESERVED      2
-#define SQLITE_LOCK_PENDING       3
-#define SQLITE_LOCK_EXCLUSIVE     4
+#define SQLITE_LOCK_NONE          0       /* xUnlock() only */
+#define SQLITE_LOCK_SHARED        1       /* xLock() or xUnlock() */
+#define SQLITE_LOCK_RESERVED      2       /* xLock() only */
+#define SQLITE_LOCK_PENDING       3       /* xLock() only */
+#define SQLITE_LOCK_EXCLUSIVE     4       /* xLock() only */
 
 /*
 ** CAPI3REF: Synchronization Type Flags
@@ -1060,7 +1073,14 @@ struct sqlite3_file {
 ** <li> [SQLITE_LOCK_PENDING], or
 ** <li> [SQLITE_LOCK_EXCLUSIVE].
 ** </ul>
-** xLock() increases the lock. xUnlock() decreases the lock.
+** xLock() upgrades the database file lock.  In other words, xLock() moves the
+** database file lock in the direction NONE toward EXCLUSIVE. The argument to
+** xLock() is always on of SHARED, RESERVED, PENDING, or EXCLUSIVE, never
+** SQLITE_LOCK_NONE.  If the database file lock is already at or above the
+** requested lock, then the call to xLock() is a no-op.
+** xUnlock() downgrades the database file lock to either SHARED or NONE.
+*  If the lock is already at or below the requested lock state, then the call
+** to xUnlock() is a no-op.
 ** The xCheckReservedLock() method checks whether any database connection,
 ** either in this process or in some other process, is holding a RESERVED,
 ** PENDING, or EXCLUSIVE lock on the file.  It returns true
@@ -1165,9 +1185,8 @@ struct sqlite3_io_methods {
 ** opcode causes the xFileControl method to write the current state of
 ** the lock (one of [SQLITE_LOCK_NONE], [SQLITE_LOCK_SHARED],
 ** [SQLITE_LOCK_RESERVED], [SQLITE_LOCK_PENDING], or [SQLITE_LOCK_EXCLUSIVE])
-** into an integer that the pArg argument points to. This capability
-** is used during testing and is only available when the SQLITE_TEST
-** compile-time option is used.
+** into an integer that the pArg argument points to.
+** This capability is only available if SQLite is compiled with [SQLITE_DEBUG].
 **
 ** <li>[[SQLITE_FCNTL_SIZE_HINT]]
 ** The [SQLITE_FCNTL_SIZE_HINT] opcode is used by SQLite to give the VFS
@@ -1471,7 +1490,6 @@ struct sqlite3_io_methods {
 ** in wal mode after the client has finished copying pages from the wal
 ** file to the database file, but before the *-shm file is updated to
 ** record the fact that the pages have been checkpointed.
-** </ul>
 **
 ** <li>[[SQLITE_FCNTL_EXTERNAL_READER]]
 ** The EXPERIMENTAL [SQLITE_FCNTL_EXTERNAL_READER] opcode is used to detect
@@ -1484,10 +1502,16 @@ struct sqlite3_io_methods {
 ** the database is not a wal-mode db, or if there is no such connection in any
 ** other process. This opcode cannot be used to detect transactions opened
 ** by clients within the current process, only within other processes.
-** </ul>
 **
 ** <li>[[SQLITE_FCNTL_CKSM_FILE]]
-** Used by the cksmvfs VFS module only.
+** The [SQLITE_FCNTL_CKSM_FILE] opcode is for use internally by the
+** [checksum VFS shim] only.
+**
+** <li>[[SQLITE_FCNTL_RESET_CACHE]]
+** If there is currently no transaction open on the database, and the
+** database is not a temp db, then the [SQLITE_FCNTL_RESET_CACHE] file-control
+** purges the contents of the in-memory page cache. If there is an open
+** transaction, or if the db is a temp-db, this opcode is a no-op, not an error.
 ** </ul>
 */
 #define SQLITE_FCNTL_LOCKSTATE               1
@@ -1530,6 +1554,7 @@ struct sqlite3_io_methods {
 #define SQLITE_FCNTL_CKPT_START             39
 #define SQLITE_FCNTL_EXTERNAL_READER        40
 #define SQLITE_FCNTL_CKSM_FILE              41
+#define SQLITE_FCNTL_RESET_CACHE            42
 
 /* deprecated names */
 #define SQLITE_GET_LOCKPROXYFILE      SQLITE_FCNTL_GET_LOCKPROXYFILE
@@ -1559,6 +1584,26 @@ typedef struct sqlite3_mutex sqlite3_mutex;
 */
 typedef struct sqlite3_api_routines sqlite3_api_routines;
 
+/*
+** CAPI3REF: File Name
+**
+** Type [sqlite3_filename] is used by SQLite to pass filenames to the
+** xOpen method of a [VFS]. It may be cast to (const char*) and treated
+** as a normal, nul-terminated, UTF-8 buffer containing the filename, but
+** may also be passed to special APIs such as:
+**
+** <ul>
+** <li>  sqlite3_filename_database()
+** <li>  sqlite3_filename_journal()
+** <li>  sqlite3_filename_wal()
+** <li>  sqlite3_uri_parameter()
+** <li>  sqlite3_uri_boolean()
+** <li>  sqlite3_uri_int64()
+** <li>  sqlite3_uri_key()
+** </ul>
+*/
+typedef const char *sqlite3_filename;
+
 /*
 ** CAPI3REF: OS Interface Object
 **
@@ -1737,7 +1782,7 @@ struct sqlite3_vfs {
   sqlite3_vfs *pNext;      /* Next registered VFS */
   const char *zName;       /* Name of this virtual file system */
   void *pAppData;          /* Pointer to application-specific data */
-  int (*xOpen)(sqlite3_vfs*, const char *zName, sqlite3_file*,
+  int (*xOpen)(sqlite3_vfs*, sqlite3_filename zName, sqlite3_file*,
                int flags, int *pOutFlags);
   int (*xDelete)(sqlite3_vfs*, const char *zName, int syncDir);
   int (*xAccess)(sqlite3_vfs*, const char *zName, int flags, int *pResOut);
@@ -1924,20 +1969,23 @@ SQLITE_API int sqlite3_os_end(void);
 ** must ensure that no other SQLite interfaces are invoked by other
 ** threads while sqlite3_config() is running.</b>
 **
-** The sqlite3_config() interface
-** may only be invoked prior to library initialization using
-** [sqlite3_initialize()] or after shutdown by [sqlite3_shutdown()].
-** ^If sqlite3_config() is called after [sqlite3_initialize()] and before
-** [sqlite3_shutdown()] then it will return SQLITE_MISUSE.
-** Note, however, that ^sqlite3_config() can be called as part of the
-** implementation of an application-defined [sqlite3_os_init()].
-**
 ** The first argument to sqlite3_config() is an integer
 ** [configuration option] that determines
 ** what property of SQLite is to be configured.  Subsequent arguments
 ** vary depending on the [configuration option]
 ** in the first argument.
 **
+** For most configuration options, the sqlite3_config() interface
+** may only be invoked prior to library initialization using
+** [sqlite3_initialize()] or after shutdown by [sqlite3_shutdown()].
+** The exceptional configuration options that may be invoked at any time
+** are called "anytime configuration options".
+** ^If sqlite3_config() is called after [sqlite3_initialize()] and before
+** [sqlite3_shutdown()] with a first argument that is not an anytime
+** configuration option, then the sqlite3_config() call will return SQLITE_MISUSE.
+** Note, however, that ^sqlite3_config() can be called as part of the
+** implementation of an application-defined [sqlite3_os_init()].
+**
 ** ^When a configuration option is set, sqlite3_config() returns [SQLITE_OK].
 ** ^If the option is unknown or SQLite is unable to set the option
 ** then this routine returns a non-zero [error code].
@@ -2045,6 +2093,23 @@ struct sqlite3_mem_methods {
 ** These constants are the available integer configuration options that
 ** can be passed as the first argument to the [sqlite3_config()] interface.
 **
+** Most of the configuration options for sqlite3_config()
+** will only work if invoked prior to [sqlite3_initialize()] or after
+** [sqlite3_shutdown()].  The few exceptions to this rule are called
+** "anytime configuration options".
+** ^Calling [sqlite3_config()] with a first argument that is not an
+** anytime configuration option in between calls to [sqlite3_initialize()] and
+** [sqlite3_shutdown()] is a no-op that returns SQLITE_MISUSE.
+**
+** The set of anytime configuration options can change (by insertions
+** and/or deletions) from one release of SQLite to the next.
+** As of SQLite version 3.42.0, the complete set of anytime configuration
+** options is:
+** <ul>
+** <li> SQLITE_CONFIG_LOG
+** <li> SQLITE_CONFIG_PCACHE_HDRSZ
+** </ul>
+**
 ** New configuration options may be added in future releases of SQLite.
 ** Existing configuration options might be discontinued.  Applications
 ** should check the return code from [sqlite3_config()] to make sure that
@@ -2375,7 +2440,7 @@ struct sqlite3_mem_methods {
 ** is stored in each sorted record and the required column values loaded
 ** from the database as records are returned in sorted order. The default
 ** value for this option is to never use this optimization. Specifying a
-** negative value for this option restores the default behaviour.
+** negative value for this option restores the default behavior.
 ** This option is only available if SQLite is compiled with the
 ** [SQLITE_ENABLE_SORTER_REFERENCES] compile-time option.
 **
@@ -2391,28 +2456,28 @@ struct sqlite3_mem_methods {
 ** compile-time option is not set, then the default maximum is 1073741824.
 ** </dl>
 */
-#define SQLITE_CONFIG_SINGLETHREAD  1  /* nil */
-#define SQLITE_CONFIG_MULTITHREAD   2  /* nil */
-#define SQLITE_CONFIG_SERIALIZED    3  /* nil */
-#define SQLITE_CONFIG_MALLOC        4  /* sqlite3_mem_methods* */
-#define SQLITE_CONFIG_GETMALLOC     5  /* sqlite3_mem_methods* */
-#define SQLITE_CONFIG_SCRATCH       6  /* No longer used */
-#define SQLITE_CONFIG_PAGECACHE     7  /* void*, int sz, int N */
-#define SQLITE_CONFIG_HEAP          8  /* void*, int nByte, int min */
-#define SQLITE_CONFIG_MEMSTATUS     9  /* boolean */
-#define SQLITE_CONFIG_MUTEX        10  /* sqlite3_mutex_methods* */
-#define SQLITE_CONFIG_GETMUTEX     11  /* sqlite3_mutex_methods* */
-/* previously SQLITE_CONFIG_CHUNKALLOC 12 which is now unused. */
-#define SQLITE_CONFIG_LOOKASIDE    13  /* int int */
-#define SQLITE_CONFIG_PCACHE       14  /* no-op */
-#define SQLITE_CONFIG_GETPCACHE    15  /* no-op */
-#define SQLITE_CONFIG_LOG          16  /* xFunc, void* */
-#define SQLITE_CONFIG_URI          17  /* int */
-#define SQLITE_CONFIG_PCACHE2      18  /* sqlite3_pcache_methods2* */
-#define SQLITE_CONFIG_GETPCACHE2   19  /* sqlite3_pcache_methods2* */
+#define SQLITE_CONFIG_SINGLETHREAD         1  /* nil */
+#define SQLITE_CONFIG_MULTITHREAD          2  /* nil */
+#define SQLITE_CONFIG_SERIALIZED           3  /* nil */
+#define SQLITE_CONFIG_MALLOC               4  /* sqlite3_mem_methods* */
+#define SQLITE_CONFIG_GETMALLOC            5  /* sqlite3_mem_methods* */
+#define SQLITE_CONFIG_SCRATCH              6  /* No longer used */
+#define SQLITE_CONFIG_PAGECACHE            7  /* void*, int sz, int N */
+#define SQLITE_CONFIG_HEAP                 8  /* void*, int nByte, int min */
+#define SQLITE_CONFIG_MEMSTATUS            9  /* boolean */
+#define SQLITE_CONFIG_MUTEX               10  /* sqlite3_mutex_methods* */
+#define SQLITE_CONFIG_GETMUTEX            11  /* sqlite3_mutex_methods* */
+/* previously SQLITE_CONFIG_CHUNKALLOC    12 which is now unused. */
+#define SQLITE_CONFIG_LOOKASIDE           13  /* int int */
+#define SQLITE_CONFIG_PCACHE              14  /* no-op */
+#define SQLITE_CONFIG_GETPCACHE           15  /* no-op */
+#define SQLITE_CONFIG_LOG                 16  /* xFunc, void* */
+#define SQLITE_CONFIG_URI                 17  /* int */
+#define SQLITE_CONFIG_PCACHE2             18  /* sqlite3_pcache_methods2* */
+#define SQLITE_CONFIG_GETPCACHE2          19  /* sqlite3_pcache_methods2* */
 #define SQLITE_CONFIG_COVERING_INDEX_SCAN 20  /* int */
-#define SQLITE_CONFIG_SQLLOG       21  /* xSqllog, void* */
-#define SQLITE_CONFIG_MMAP_SIZE    22  /* sqlite3_int64, sqlite3_int64 */
+#define SQLITE_CONFIG_SQLLOG              21  /* xSqllog, void* */
+#define SQLITE_CONFIG_MMAP_SIZE           22  /* sqlite3_int64, sqlite3_int64 */
 #define SQLITE_CONFIG_WIN32_HEAPSIZE      23  /* int nByte */
 #define SQLITE_CONFIG_PCACHE_HDRSZ        24  /* int *psz */
 #define SQLITE_CONFIG_PMASZ               25  /* unsigned int szPma */
@@ -2453,7 +2518,7 @@ struct sqlite3_mem_methods {
 ** configuration for a database connection can only be changed when that
 ** connection is not currently using lookaside memory, or in other words
 ** when the "current value" returned by
-** [sqlite3_db_status](D,[SQLITE_CONFIG_LOOKASIDE],...) is zero.
+** [sqlite3_db_status](D,[SQLITE_DBSTATUS_LOOKASIDE_USED],...) is zero.
 ** Any attempt to change the lookaside memory configuration when lookaside
 ** memory is in use leaves the configuration unchanged and returns
 ** [SQLITE_BUSY].)^</dd>
@@ -2550,7 +2615,7 @@ struct sqlite3_mem_methods {
 ** database handle, SQLite checks if this will mean that there are now no
 ** connections at all to the database. If so, it performs a checkpoint
 ** operation before closing the connection. This option may be used to
-** override this behaviour. The first parameter passed to this operation
+** override this behavior. The first parameter passed to this operation
 ** is an integer - positive to disable checkpoints-on-close, or zero (the
 ** default) to enable them, and negative to leave the setting unchanged.
 ** The second parameter is a pointer to an integer
@@ -2603,8 +2668,12 @@ struct sqlite3_mem_methods {
 ** <li> sqlite3_db_config(db, SQLITE_DBCONFIG_RESET_DATABASE, 0, 0);
 ** </ol>
 ** Because resetting a database is destructive and irreversible, the
-** process requires the use of this obscure API and multiple steps to help
-** ensure that it does not happen by accident.
+** process requires the use of this obscure API and multiple steps to
+** help ensure that it does not happen by accident. Because this
+** feature must be capable of resetting corrupt databases, and
+** shutting down virtual tables may require access to that corrupt
+** storage, the library must abandon any installed virtual tables
+** without calling their xDestroy() methods.
 **
 ** [[SQLITE_DBCONFIG_DEFENSIVE]] <dt>SQLITE_DBCONFIG_DEFENSIVE</dt>
 ** <dd>The SQLITE_DBCONFIG_DEFENSIVE option activates or deactivates the
@@ -2615,6 +2684,7 @@ struct sqlite3_mem_methods {
 ** <ul>
 ** <li> The [PRAGMA writable_schema=ON] statement.
 ** <li> The [PRAGMA journal_mode=OFF] statement.
+** <li> The [PRAGMA schema_version=N] statement.
 ** <li> Writes to the [sqlite_dbpage] virtual table.
 ** <li> Direct writes to [shadow tables].
 ** </ul>
@@ -2642,7 +2712,7 @@ struct sqlite3_mem_methods {
 ** </dd>
 **
 ** [[SQLITE_DBCONFIG_DQS_DML]]
-** <dt>SQLITE_DBCONFIG_DQS_DML</td>
+** <dt>SQLITE_DBCONFIG_DQS_DML</dt>
 ** <dd>The SQLITE_DBCONFIG_DQS_DML option activates or deactivates
 ** the legacy [double-quoted string literal] misfeature for DML statements
 ** only, that is DELETE, INSERT, SELECT, and UPDATE statements. The
@@ -2651,7 +2721,7 @@ struct sqlite3_mem_methods {
 ** </dd>
 **
 ** [[SQLITE_DBCONFIG_DQS_DDL]]
-** <dt>SQLITE_DBCONFIG_DQS_DDL</td>
+** <dt>SQLITE_DBCONFIG_DQS_DDL</dt>
 ** <dd>The SQLITE_DBCONFIG_DQS option activates or deactivates
 ** the legacy [double-quoted string literal] misfeature for DDL statements,
 ** such as CREATE TABLE and CREATE INDEX. The
@@ -2660,7 +2730,7 @@ struct sqlite3_mem_methods {
 ** </dd>
 **
 ** [[SQLITE_DBCONFIG_TRUSTED_SCHEMA]]
-** <dt>SQLITE_DBCONFIG_TRUSTED_SCHEMA</td>
+** <dt>SQLITE_DBCONFIG_TRUSTED_SCHEMA</dt>
 ** <dd>The SQLITE_DBCONFIG_TRUSTED_SCHEMA option tells SQLite to
 ** assume that database schemas are untainted by malicious content.
 ** When the SQLITE_DBCONFIG_TRUSTED_SCHEMA option is disabled, SQLite
@@ -2680,7 +2750,7 @@ struct sqlite3_mem_methods {
 ** </dd>
 **
 ** [[SQLITE_DBCONFIG_LEGACY_FILE_FORMAT]]
-** <dt>SQLITE_DBCONFIG_LEGACY_FILE_FORMAT</td>
+** <dt>SQLITE_DBCONFIG_LEGACY_FILE_FORMAT</dt>
 ** <dd>The SQLITE_DBCONFIG_LEGACY_FILE_FORMAT option activates or deactivates
 ** the legacy file format flag.  When activated, this flag causes all newly
 ** created database file to have a schema format version number (the 4-byte
@@ -2689,7 +2759,7 @@ struct sqlite3_mem_methods {
 ** any SQLite version back to 3.0.0 ([dateof:3.0.0]).  Without this setting,
 ** newly created databases are generally not understandable by SQLite versions
 ** prior to 3.3.0 ([dateof:3.3.0]).  As these words are written, there
-** is now scarcely any need to generated database files that are compatible
+** is now scarcely any need to generate database files that are compatible
 ** all the way back to version 3.0.0, and so this setting is of little
 ** practical use, but is provided so that SQLite can continue to claim the
 ** ability to generate new database files that are compatible with  version
@@ -2698,8 +2768,40 @@ struct sqlite3_mem_methods {
 ** the [VACUUM] command will fail with an obscure error when attempting to
 ** process a table with generated columns and a descending index.  This is
 ** not considered a bug since SQLite versions 3.3.0 and earlier do not support
-** either generated columns or decending indexes.
+** either generated columns or descending indexes.
+** </dd>
+**
+** [[SQLITE_DBCONFIG_STMT_SCANSTATUS]]
+** <dt>SQLITE_DBCONFIG_STMT_SCANSTATUS</dt>
+** <dd>The SQLITE_DBCONFIG_STMT_SCANSTATUS option is only useful in
+** SQLITE_ENABLE_STMT_SCANSTATUS builds. In this case, it sets or clears
+** a flag that enables collection of the sqlite3_stmt_scanstatus_v2()
+** statistics. For statistics to be collected, the flag must be set on
+** the database handle both when the SQL statement is prepared and when it
+** is stepped. The flag is set (collection of statistics is enabled)
+** by default.  This option takes two arguments: an integer and a pointer to
+** an integer..  The first argument is 1, 0, or -1 to enable, disable, or
+** leave unchanged the statement scanstatus option.  If the second argument
+** is not NULL, then the value of the statement scanstatus setting after
+** processing the first argument is written into the integer that the second
+** argument points to.
+** </dd>
+**
+** [[SQLITE_DBCONFIG_REVERSE_SCANORDER]]
+** <dt>SQLITE_DBCONFIG_REVERSE_SCANORDER</dt>
+** <dd>The SQLITE_DBCONFIG_REVERSE_SCANORDER option changes the default order
+** in which tables and indexes are scanned so that the scans start at the end
+** and work toward the beginning rather than starting at the beginning and
+** working toward the end. Setting SQLITE_DBCONFIG_REVERSE_SCANORDER is the
+** same as setting [PRAGMA reverse_unordered_selects].  This option takes
+** two arguments which are an integer and a pointer to an integer.  The first
+** argument is 1, 0, or -1 to enable, disable, or leave unchanged the
+** reverse scan order flag, respectively.  If the second argument is not NULL,
+** then 0 or 1 is written into the integer that the second argument points to
+** depending on if the reverse scan order flag is set after processing the
+** first argument.
 ** </dd>
+**
 ** </dl>
 */
 #define SQLITE_DBCONFIG_MAINDBNAME            1000 /* const char* */
@@ -2720,7 +2822,9 @@ struct sqlite3_mem_methods {
 #define SQLITE_DBCONFIG_ENABLE_VIEW           1015 /* int int* */
 #define SQLITE_DBCONFIG_LEGACY_FILE_FORMAT    1016 /* int int* */
 #define SQLITE_DBCONFIG_TRUSTED_SCHEMA        1017 /* int int* */
-#define SQLITE_DBCONFIG_MAX                   1017 /* Largest DBCONFIG */
+#define SQLITE_DBCONFIG_STMT_SCANSTATUS       1018 /* int int* */
+#define SQLITE_DBCONFIG_REVERSE_SCANORDER     1019 /* int int* */
+#define SQLITE_DBCONFIG_MAX                   1019 /* Largest DBCONFIG */
 
 /*
 ** CAPI3REF: Enable Or Disable Extended Result Codes
@@ -2942,8 +3046,13 @@ SQLITE_API sqlite3_int64 sqlite3_total_changes64(sqlite3*);
 ** ^A call to sqlite3_interrupt(D) that occurs when there are no running
 ** SQL statements is a no-op and has no effect on SQL statements
 ** that are started after the sqlite3_interrupt() call returns.
+**
+** ^The [sqlite3_is_interrupted(D)] interface can be used to determine whether
+** or not an interrupt is currently in effect for [database connection] D.
+** It returns 1 if an interrupt is currently in effect, or 0 otherwise.
 */
 SQLITE_API void sqlite3_interrupt(sqlite3*);
+SQLITE_API int sqlite3_is_interrupted(sqlite3*);
 
 /*
 ** CAPI3REF: Determine If An SQL Statement Is Complete
@@ -3561,8 +3670,8 @@ SQLITE_API SQLITE_DEPRECATED void *sqlite3_profile(sqlite3*,
 ** <dd>^An SQLITE_TRACE_PROFILE callback provides approximately the same
 ** information as is provided by the [sqlite3_profile()] callback.
 ** ^The P argument is a pointer to the [prepared statement] and the
-** X argument points to a 64-bit integer which is the estimated of
-** the number of nanosecond that the prepared statement took to run.
+** X argument points to a 64-bit integer which is approximately
+** the number of nanoseconds that the prepared statement took to run.
 ** ^The SQLITE_TRACE_PROFILE callback is invoked when the statement finishes.
 **
 ** [[SQLITE_TRACE_ROW]] <dt>SQLITE_TRACE_ROW</dt>
@@ -3594,8 +3703,10 @@ SQLITE_API SQLITE_DEPRECATED void *sqlite3_profile(sqlite3*,
 ** M argument should be the bitwise OR-ed combination of
 ** zero or more [SQLITE_TRACE] constants.
 **
-** ^Each call to either sqlite3_trace() or sqlite3_trace_v2() overrides
-** (cancels) any prior calls to sqlite3_trace() or sqlite3_trace_v2().
+** ^Each call to either sqlite3_trace(D,X,P) or sqlite3_trace_v2(D,M,X,P)
+** overrides (cancels) all prior calls to sqlite3_trace(D,X,P) or
+** sqlite3_trace_v2(D,M,X,P) for the [database connection] D.  Each
+** database connection may have at most one trace callback.
 **
 ** ^The X callback is invoked whenever any of the events identified by
 ** mask M occur.  ^The integer return value from the callback is currently
@@ -3625,7 +3736,7 @@ SQLITE_API int sqlite3_trace_v2(
 **
 ** ^The sqlite3_progress_handler(D,N,X,P) interface causes the callback
 ** function X to be invoked periodically during long running calls to
-** [sqlite3_exec()], [sqlite3_step()] and [sqlite3_get_table()] for
+** [sqlite3_step()] and [sqlite3_prepare()] and similar for
 ** database connection D.  An example use for this
 ** interface is to keep a GUI updated during a large query.
 **
@@ -3650,6 +3761,13 @@ SQLITE_API int sqlite3_trace_v2(
 ** Note that [sqlite3_prepare_v2()] and [sqlite3_step()] both modify their
 ** database connections for the meaning of "modify" in this paragraph.
 **
+** The progress handler callback would originally only be invoked from the
+** bytecode engine.  It still might be invoked during [sqlite3_prepare()]
+** and similar because those routines might force a reparse of the schema
+** which involves running the bytecode engine.  However, beginning with
+** SQLite version 3.41.0, the progress handler callback might also be
+** invoked directly from [sqlite3_prepare()] while analyzing and generating
+** code for complex queries.
 */
 SQLITE_API void sqlite3_progress_handler(sqlite3*, int, int(*)(void*), void*);
 
@@ -3686,13 +3804,18 @@ SQLITE_API void sqlite3_progress_handler(sqlite3*, int, int(*)(void*), void*);
 **
 ** <dl>
 ** ^(<dt>[SQLITE_OPEN_READONLY]</dt>
-** <dd>The database is opened in read-only mode.  If the database does not
-** already exist, an error is returned.</dd>)^
+** <dd>The database is opened in read-only mode.  If the database does
+** not already exist, an error is returned.</dd>)^
 **
 ** ^(<dt>[SQLITE_OPEN_READWRITE]</dt>
-** <dd>The database is opened for reading and writing if possible, or reading
-** only if the file is write protected by the operating system.  In either
-** case the database must already exist, otherwise an error is returned.</dd>)^
+** <dd>The database is opened for reading and writing if possible, or
+** reading only if the file is write protected by the operating
+** system.  In either case the database must already exist, otherwise
+** an error is returned.  For historical reasons, if opening in
+** read-write mode fails due to OS-level permissions, an attempt is
+** made to open it in read-only mode. [sqlite3_db_readonly()] can be
+** used to determine whether the database is actually
+** read-write.</dd>)^
 **
 ** ^(<dt>[SQLITE_OPEN_READWRITE] | [SQLITE_OPEN_CREATE]</dt>
 ** <dd>The database is opened for reading and writing, and is created if
@@ -3730,6 +3853,9 @@ SQLITE_API void sqlite3_progress_handler(sqlite3*, int, int(*)(void*), void*);
 ** <dd>The database is opened [shared cache] enabled, overriding
 ** the default shared cache setting provided by
 ** [sqlite3_enable_shared_cache()].)^
+** The [use of shared cache mode is discouraged] and hence shared cache
+** capabilities may be omitted from many builds of SQLite.  In such cases,
+** this option is a no-op.
 **
 ** ^(<dt>[SQLITE_OPEN_PRIVATECACHE]</dt>
 ** <dd>The database is opened [shared cache] disabled, overriding
@@ -3745,7 +3871,7 @@ SQLITE_API void sqlite3_progress_handler(sqlite3*, int, int(*)(void*), void*);
 ** to return an extended result code.</dd>
 **
 ** [[OPEN_NOFOLLOW]] ^(<dt>[SQLITE_OPEN_NOFOLLOW]</dt>
-** <dd>The database filename is not allowed to be a symbolic link</dd>
+** <dd>The database filename is not allowed to contain a symbolic link</dd>
 ** </dl>)^
 **
 ** If the 3rd parameter to sqlite3_open_v2() is not one of the
@@ -3949,7 +4075,7 @@ SQLITE_API int sqlite3_open_v2(
 ** as F) must be one of:
 ** <ul>
 ** <li> A database filename pointer created by the SQLite core and
-** passed into the xOpen() method of a VFS implemention, or
+** passed into the xOpen() method of a VFS implementation, or
 ** <li> A filename obtained from [sqlite3_db_filename()], or
 ** <li> A new filename constructed using [sqlite3_create_filename()].
 ** </ul>
@@ -4004,10 +4130,10 @@ SQLITE_API int sqlite3_open_v2(
 **
 ** See the [URI filename] documentation for additional information.
 */
-SQLITE_API const char *sqlite3_uri_parameter(const char *zFilename, const char *zParam);
-SQLITE_API int sqlite3_uri_boolean(const char *zFile, const char *zParam, int bDefault);
-SQLITE_API sqlite3_int64 sqlite3_uri_int64(const char*, const char*, sqlite3_int64);
-SQLITE_API const char *sqlite3_uri_key(const char *zFilename, int N);
+SQLITE_API const char *sqlite3_uri_parameter(sqlite3_filename z, const char *zParam);
+SQLITE_API int sqlite3_uri_boolean(sqlite3_filename z, const char *zParam, int bDefault);
+SQLITE_API sqlite3_int64 sqlite3_uri_int64(sqlite3_filename, const char*, sqlite3_int64);
+SQLITE_API const char *sqlite3_uri_key(sqlite3_filename z, int N);
 
 /*
 ** CAPI3REF:  Translate filenames
@@ -4036,9 +4162,9 @@ SQLITE_API const char *sqlite3_uri_key(const char *zFilename, int N);
 ** return value from [sqlite3_db_filename()], then the result is
 ** undefined and is likely a memory access violation.
 */
-SQLITE_API const char *sqlite3_filename_database(const char*);
-SQLITE_API const char *sqlite3_filename_journal(const char*);
-SQLITE_API const char *sqlite3_filename_wal(const char*);
+SQLITE_API const char *sqlite3_filename_database(sqlite3_filename);
+SQLITE_API const char *sqlite3_filename_journal(sqlite3_filename);
+SQLITE_API const char *sqlite3_filename_wal(sqlite3_filename);
 
 /*
 ** CAPI3REF:  Database File Corresponding To A Journal
@@ -4062,7 +4188,7 @@ SQLITE_API sqlite3_file *sqlite3_database_file_object(const char*);
 /*
 ** CAPI3REF: Create and Destroy VFS Filenames
 **
-** These interfces are provided for use by [VFS shim] implementations and
+** These interfaces are provided for use by [VFS shim] implementations and
 ** are not useful outside of that context.
 **
 ** The sqlite3_create_filename(D,J,W,N,P) allocates memory to hold a version of
@@ -4104,14 +4230,14 @@ SQLITE_API sqlite3_file *sqlite3_database_file_object(const char*);
 ** then the corresponding [sqlite3_module.xClose() method should also be
 ** invoked prior to calling sqlite3_free_filename(Y).
 */
-SQLITE_API char *sqlite3_create_filename(
+SQLITE_API sqlite3_filename sqlite3_create_filename(
   const char *zDatabase,
   const char *zJournal,
   const char *zWal,
   int nParam,
   const char **azParam
 );
-SQLITE_API void sqlite3_free_filename(char*);
+SQLITE_API void sqlite3_free_filename(sqlite3_filename);
 
 /*
 ** CAPI3REF: Error Codes And Messages
@@ -4142,6 +4268,7 @@ SQLITE_API void sqlite3_free_filename(char*);
 **
 ** ^The sqlite3_errmsg() and sqlite3_errmsg16() return English-language
 ** text that describes the error, as either UTF-8 or UTF-16 respectively.
+** (See how SQLite handles [invalid UTF] for exceptions to this rule.)
 ** ^(Memory to hold the error message string is managed internally.
 ** The application does not need to worry about freeing the result.
 ** However, the error string might be overwritten or deallocated by
@@ -4609,6 +4736,41 @@ SQLITE_API int sqlite3_stmt_readonly(sqlite3_stmt *pStmt);
 */
 SQLITE_API int sqlite3_stmt_isexplain(sqlite3_stmt *pStmt);
 
+/*
+** CAPI3REF: Change The EXPLAIN Setting For A Prepared Statement
+** METHOD: sqlite3_stmt
+**
+** The sqlite3_stmt_explain(S,E) interface changes the EXPLAIN
+** setting for [prepared statement] S.  If E is zero, then S becomes
+** a normal prepared statement.  If E is 1, then S behaves as if
+** its SQL text began with "[EXPLAIN]".  If E is 2, then S behaves as if
+** its SQL text began with "[EXPLAIN QUERY PLAN]".
+**
+** Calling sqlite3_stmt_explain(S,E) might cause S to be reprepared.
+** SQLite tries to avoid a reprepare, but a reprepare might be necessary
+** on the first transition into EXPLAIN or EXPLAIN QUERY PLAN mode.
+**
+** Because of the potential need to reprepare, a call to
+** sqlite3_stmt_explain(S,E) will fail with SQLITE_ERROR if S cannot be
+** reprepared because it was created using [sqlite3_prepare()] instead of
+** the newer [sqlite3_prepare_v2()] or [sqlite3_prepare_v3()] interfaces and
+** hence has no saved SQL text with which to reprepare.
+**
+** Changing the explain setting for a prepared statement does not change
+** the original SQL text for the statement.  Hence, if the SQL text originally
+** began with EXPLAIN or EXPLAIN QUERY PLAN, but sqlite3_stmt_explain(S,0)
+** is called to convert the statement into an ordinary statement, the EXPLAIN
+** or EXPLAIN QUERY PLAN keywords will still appear in the sqlite3_sql(S)
+** output, even though the statement now acts like a normal SQL statement.
+**
+** This routine returns SQLITE_OK if the explain mode is successfully
+** changed, or an error code if the explain mode could not be changed.
+** The explain mode cannot be changed while a statement is active.
+** Hence, it is good practice to call [sqlite3_reset(S)]
+** immediately prior to calling sqlite3_stmt_explain(S,E).
+*/
+SQLITE_API int sqlite3_stmt_explain(sqlite3_stmt *pStmt, int eMode);
+
 /*
 ** CAPI3REF: Determine If A Prepared Statement Has Been Reset
 ** METHOD: sqlite3_stmt
@@ -4772,7 +4934,7 @@ typedef struct sqlite3_context sqlite3_context;
 ** with it may be passed. ^It is called to dispose of the BLOB or string even
 ** if the call to the bind API fails, except the destructor is not called if
 ** the third parameter is a NULL pointer or the fourth parameter is negative.
-** ^ (2) The special constant, [SQLITE_STATIC], may be passsed to indicate that
+** ^ (2) The special constant, [SQLITE_STATIC], may be passed to indicate that
 ** the application remains responsible for disposing of the object. ^In this
 ** case, the object and the provided pointer to it must remain valid until
 ** either the prepared statement is finalized or the same SQL parameter is
@@ -5451,20 +5613,33 @@ SQLITE_API int sqlite3_finalize(sqlite3_stmt *pStmt);
 ** ^The [sqlite3_reset(S)] interface resets the [prepared statement] S
 ** back to the beginning of its program.
 **
-** ^If the most recent call to [sqlite3_step(S)] for the
-** [prepared statement] S returned [SQLITE_ROW] or [SQLITE_DONE],
-** or if [sqlite3_step(S)] has never before been called on S,
-** then [sqlite3_reset(S)] returns [SQLITE_OK].
+** ^The return code from [sqlite3_reset(S)] indicates whether or not
+** the previous evaluation of prepared statement S completed successfully.
+** ^If [sqlite3_step(S)] has never before been called on S or if
+** [sqlite3_step(S)] has not been called since the previous call
+** to [sqlite3_reset(S)], then [sqlite3_reset(S)] will return
+** [SQLITE_OK].
 **
 ** ^If the most recent call to [sqlite3_step(S)] for the
 ** [prepared statement] S indicated an error, then
 ** [sqlite3_reset(S)] returns an appropriate [error code].
+** ^The [sqlite3_reset(S)] interface might also return an [error code]
+** if there were no prior errors but the process of resetting
+** the prepared statement caused a new error. ^For example, if an
+** [INSERT] statement with a [RETURNING] clause is only stepped one time,
+** that one call to [sqlite3_step(S)] might return SQLITE_ROW but
+** the overall statement might still fail and the [sqlite3_reset(S)] call
+** might return SQLITE_BUSY if locking constraints prevent the
+** database change from committing.  Therefore, it is important that
+** applications check the return code from [sqlite3_reset(S)] even if
+** no prior call to [sqlite3_step(S)] indicated a problem.
 **
 ** ^The [sqlite3_reset(S)] interface does not change the values
 ** of any [sqlite3_bind_blob|bindings] on the [prepared statement] S.
 */
 SQLITE_API int sqlite3_reset(sqlite3_stmt *pStmt);
 
+
 /*
 ** CAPI3REF: Create Or Redefine SQL Functions
 ** KEYWORDS: {function creation routines}
@@ -5670,10 +5845,21 @@ SQLITE_API int sqlite3_create_window_function(
 ** from top-level SQL, and cannot be used in VIEWs or TRIGGERs nor in
 ** schema structures such as [CHECK constraints], [DEFAULT clauses],
 ** [expression indexes], [partial indexes], or [generated columns].
-** The SQLITE_DIRECTONLY flags is a security feature which is recommended
-** for all [application-defined SQL functions], and especially for functions
-** that have side-effects or that could potentially leak sensitive
-** information.
+** <p>
+** The SQLITE_DIRECTONLY flag is recommended for any
+** [application-defined SQL function]
+** that has side-effects or that could potentially leak sensitive information.
+** This will prevent attacks in which an application is tricked
+** into using a database file that has had its schema surreptitiously
+** modified to invoke the application-defined function in ways that are
+** harmful.
+** <p>
+** Some people say it is good practice to set SQLITE_DIRECTONLY on all
+** [application-defined SQL functions], regardless of whether or not they
+** are security sensitive, as doing so prevents those functions from being used
+** inside of the database schema, and thus ensures that the database
+** can be inspected and modified using generic tools (such as the [CLI])
+** that do not have access to the application-defined functions.
 ** </dd>
 **
 ** [[SQLITE_INNOCUOUS]] <dt>SQLITE_INNOCUOUS</dt><dd>
@@ -5700,13 +5886,27 @@ SQLITE_API int sqlite3_create_window_function(
 ** </dd>
 **
 ** [[SQLITE_SUBTYPE]] <dt>SQLITE_SUBTYPE</dt><dd>
-** The SQLITE_SUBTYPE flag indicates to SQLite that a function may call
+** The SQLITE_SUBTYPE flag indicates to SQLite that a function might call
 ** [sqlite3_value_subtype()] to inspect the sub-types of its arguments.
-** Specifying this flag makes no difference for scalar or aggregate user
-** functions. However, if it is not specified for a user-defined window
-** function, then any sub-types belonging to arguments passed to the window
-** function may be discarded before the window function is called (i.e.
-** sqlite3_value_subtype() will always return 0).
+** This flag instructs SQLite to omit some corner-case optimizations that
+** might disrupt the operation of the [sqlite3_value_subtype()] function,
+** causing it to return zero rather than the correct subtype().
+** SQL functions that invokes [sqlite3_value_subtype()] should have this
+** property.  If the SQLITE_SUBTYPE property is omitted, then the return
+** value from [sqlite3_value_subtype()] might sometimes be zero even though
+** a non-zero subtype was specified by the function argument expression.
+**
+** [[SQLITE_RESULT_SUBTYPE]] <dt>SQLITE_RESULT_SUBTYPE</dt><dd>
+** The SQLITE_RESULT_SUBTYPE flag indicates to SQLite that a function might call
+** [sqlite3_result_subtype()] to cause a sub-type to be associated with its
+** result.
+** Every function that invokes [sqlite3_result_subtype()] should have this
+** property.  If it does not, then the call to [sqlite3_result_subtype()]
+** might become a no-op if the function is used as term in an
+** [expression index].  On the other hand, SQL functions that never invoke
+** [sqlite3_result_subtype()] should avoid setting this property, as the
+** purpose of this property is to disable certain optimizations that are
+** incompatible with subtypes.
 ** </dd>
 ** </dl>
 */
@@ -5714,6 +5914,7 @@ SQLITE_API int sqlite3_create_window_function(
 #define SQLITE_DIRECTONLY       0x000080000
 #define SQLITE_SUBTYPE          0x000100000
 #define SQLITE_INNOCUOUS        0x000200000
+#define SQLITE_RESULT_SUBTYPE   0x001000000
 
 /*
 ** CAPI3REF: Deprecated Functions
@@ -5879,6 +6080,28 @@ SQLITE_API int sqlite3_value_numeric_type(sqlite3_value*);
 SQLITE_API int sqlite3_value_nochange(sqlite3_value*);
 SQLITE_API int sqlite3_value_frombind(sqlite3_value*);
 
+/*
+** CAPI3REF: Report the internal text encoding state of an sqlite3_value object
+** METHOD: sqlite3_value
+**
+** ^(The sqlite3_value_encoding(X) interface returns one of [SQLITE_UTF8],
+** [SQLITE_UTF16BE], or [SQLITE_UTF16LE] according to the current text encoding
+** of the value X, assuming that X has type TEXT.)^  If sqlite3_value_type(X)
+** returns something other than SQLITE_TEXT, then the return value from
+** sqlite3_value_encoding(X) is meaningless.  ^Calls to
+** [sqlite3_value_text(X)], [sqlite3_value_text16(X)], [sqlite3_value_text16be(X)],
+** [sqlite3_value_text16le(X)], [sqlite3_value_bytes(X)], or
+** [sqlite3_value_bytes16(X)] might change the encoding of the value X and
+** thus change the return from subsequent calls to sqlite3_value_encoding(X).
+**
+** This routine is intended for used by applications that test and validate
+** the SQLite implementation.  This routine is inquiring about the opaque
+** internal state of an [sqlite3_value] object.  Ordinary applications should
+** not need to know what the internal state of an sqlite3_value object is and
+** hence should not need to use this interface.
+*/
+SQLITE_API int sqlite3_value_encoding(sqlite3_value*);
+
 /*
 ** CAPI3REF: Finding The Subtype Of SQL Values
 ** METHOD: sqlite3_value
@@ -5888,6 +6111,12 @@ SQLITE_API int sqlite3_value_frombind(sqlite3_value*);
 ** information can be used to pass a limited amount of context from
 ** one SQL function to another.  Use the [sqlite3_result_subtype()]
 ** routine to set the subtype for the return value of an SQL function.
+**
+** Every [application-defined SQL function] that invoke this interface
+** should include the [SQLITE_SUBTYPE] property in the text
+** encoding argument when the function is [sqlite3_create_function|registered].
+** If the [SQLITE_SUBTYPE] property is omitted, then sqlite3_value_subtype()
+** might return zero instead of the upstream subtype in some corner cases.
 */
 SQLITE_API unsigned int sqlite3_value_subtype(sqlite3_value*);
 
@@ -5931,7 +6160,7 @@ SQLITE_API void sqlite3_value_free(sqlite3_value*);
 **
 ** ^The sqlite3_aggregate_context(C,N) routine returns a NULL pointer
 ** when first called if N is less than or equal to zero or if a memory
-** allocate error occurs.
+** allocation error occurs.
 **
 ** ^(The amount of space allocated by sqlite3_aggregate_context(C,N) is
 ** determined by the N parameter on first successful call.  Changing the
@@ -5986,48 +6215,56 @@ SQLITE_API sqlite3 *sqlite3_context_db_handle(sqlite3_context*);
 ** METHOD: sqlite3_context
 **
 ** These functions may be used by (non-aggregate) SQL functions to
-** associate metadata with argument values. If the same value is passed to
-** multiple invocations of the same SQL function during query execution, under
-** some circumstances the associated metadata may be preserved.  An example
-** of where this might be useful is in a regular-expression matching
-** function. The compiled version of the regular expression can be stored as
-** metadata associated with the pattern string.
+** associate auxiliary data with argument values. If the same argument
+** value is passed to multiple invocations of the same SQL function during
+** query execution, under some circumstances the associated auxiliary data
+** might be preserved.  An example of where this might be useful is in a
+** regular-expression matching function. The compiled version of the regular
+** expression can be stored as auxiliary data associated with the pattern string.
 ** Then as long as the pattern string remains the same,
 ** the compiled regular expression can be reused on multiple
 ** invocations of the same function.
 **
-** ^The sqlite3_get_auxdata(C,N) interface returns a pointer to the metadata
+** ^The sqlite3_get_auxdata(C,N) interface returns a pointer to the auxiliary data
 ** associated by the sqlite3_set_auxdata(C,N,P,X) function with the Nth argument
 ** value to the application-defined function.  ^N is zero for the left-most
-** function argument.  ^If there is no metadata
+** function argument.  ^If there is no auxiliary data
 ** associated with the function argument, the sqlite3_get_auxdata(C,N) interface
 ** returns a NULL pointer.
 **
-** ^The sqlite3_set_auxdata(C,N,P,X) interface saves P as metadata for the N-th
-** argument of the application-defined function.  ^Subsequent
+** ^The sqlite3_set_auxdata(C,N,P,X) interface saves P as auxiliary data for the
+** N-th argument of the application-defined function.  ^Subsequent
 ** calls to sqlite3_get_auxdata(C,N) return P from the most recent
-** sqlite3_set_auxdata(C,N,P,X) call if the metadata is still valid or
-** NULL if the metadata has been discarded.
+** sqlite3_set_auxdata(C,N,P,X) call if the auxiliary data is still valid or
+** NULL if the auxiliary data has been discarded.
 ** ^After each call to sqlite3_set_auxdata(C,N,P,X) where X is not NULL,
 ** SQLite will invoke the destructor function X with parameter P exactly
-** once, when the metadata is discarded.
-** SQLite is free to discard the metadata at any time, including: <ul>
+** once, when the auxiliary data is discarded.
+** SQLite is free to discard the auxiliary data at any time, including: <ul>
 ** <li> ^(when the corresponding function parameter changes)^, or
 ** <li> ^(when [sqlite3_reset()] or [sqlite3_finalize()] is called for the
 **      SQL statement)^, or
 ** <li> ^(when sqlite3_set_auxdata() is invoked again on the same
 **       parameter)^, or
 ** <li> ^(during the original sqlite3_set_auxdata() call when a memory
-**      allocation error occurs.)^ </ul>
+**      allocation error occurs.)^
+** <li> ^(during the original sqlite3_set_auxdata() call if the function
+**      is evaluated during query planning instead of during query execution,
+**      as sometimes happens with [SQLITE_ENABLE_STAT4].)^ </ul>
 **
-** Note the last bullet in particular.  The destructor X in
+** Note the last two bullets in particular.  The destructor X in
 ** sqlite3_set_auxdata(C,N,P,X) might be called immediately, before the
 ** sqlite3_set_auxdata() interface even returns.  Hence sqlite3_set_auxdata()
 ** should be called near the end of the function implementation and the
 ** function implementation should not make any use of P after
-** sqlite3_set_auxdata() has been called.
-**
-** ^(In practice, metadata is preserved between function calls for
+** sqlite3_set_auxdata() has been called.  Furthermore, a call to
+** sqlite3_get_auxdata() that occurs immediately after a corresponding call
+** to sqlite3_set_auxdata() might still return NULL if an out-of-memory
+** condition occurred during the sqlite3_set_auxdata() call or if the
+** function is being evaluated during query planning rather than during
+** query execution.
+**
+** ^(In practice, auxiliary data is preserved between function calls for
 ** function parameters that are compile-time constants, including literal
 ** values and [parameters] and expressions composed from the same.)^
 **
@@ -6037,10 +6274,67 @@ SQLITE_API sqlite3 *sqlite3_context_db_handle(sqlite3_context*);
 **
 ** These routines must be called from the same thread in which
 ** the SQL function is running.
+**
+** See also: [sqlite3_get_clientdata()] and [sqlite3_set_clientdata()].
 */
 SQLITE_API void *sqlite3_get_auxdata(sqlite3_context*, int N);
 SQLITE_API void sqlite3_set_auxdata(sqlite3_context*, int N, void*, void (*)(void*));
 
+/*
+** CAPI3REF: Database Connection Client Data
+** METHOD: sqlite3
+**
+** These functions are used to associate one or more named pointers
+** with a [database connection].
+** A call to sqlite3_set_clientdata(D,N,P,X) causes the pointer P
+** to be attached to [database connection] D using name N.  Subsequent
+** calls to sqlite3_get_clientdata(D,N) will return a copy of pointer P
+** or a NULL pointer if there were no prior calls to
+** sqlite3_set_clientdata() with the same values of D and N.
+** Names are compared using strcmp() and are thus case sensitive.
+**
+** If P and X are both non-NULL, then the destructor X is invoked with
+** argument P on the first of the following occurrences:
+** <ul>
+** <li> An out-of-memory error occurs during the call to
+**      sqlite3_set_clientdata() which attempts to register pointer P.
+** <li> A subsequent call to sqlite3_set_clientdata(D,N,P,X) is made
+**      with the same D and N parameters.
+** <li> The database connection closes.  SQLite does not make any guarantees
+**      about the order in which destructors are called, only that all
+**      destructors will be called exactly once at some point during the
+**      database connection closing process.
+** </ul>
+**
+** SQLite does not do anything with client data other than invoke
+** destructors on the client data at the appropriate time.  The intended
+** use for client data is to provide a mechanism for wrapper libraries
+** to store additional information about an SQLite database connection.
+**
+** There is no limit (other than available memory) on the number of different
+** client data pointers (with different names) that can be attached to a
+** single database connection.  However, the implementation is optimized
+** for the case of having only one or two different client data names.
+** Applications and wrapper libraries are discouraged from using more than
+** one client data name each.
+**
+** There is no way to enumerate the client data pointers
+** associated with a database connection.  The N parameter can be thought
+** of as a secret key such that only code that knows the secret key is able
+** to access the associated data.
+**
+** Security Warning:  These interfaces should not be exposed in scripting
+** languages or in other circumstances where it might be possible for an
+** an attacker to invoke them.  Any agent that can invoke these interfaces
+** can probably also take control of the process.
+**
+** Database connection client data is only available for SQLite
+** version 3.44.0 ([dateof:3.44.0]) and later.
+**
+** See also: [sqlite3_set_auxdata()] and [sqlite3_get_auxdata()].
+*/
+SQLITE_API void *sqlite3_get_clientdata(sqlite3*,const char*);
+SQLITE_API int sqlite3_set_clientdata(sqlite3*, const char*, void*, void(*)(void*));
 
 /*
 ** CAPI3REF: Constants Defining Special Destructor Behavior
@@ -6136,9 +6430,10 @@ typedef void (*sqlite3_destructor_type)(void*);
 ** of [SQLITE_UTF8], [SQLITE_UTF16], [SQLITE_UTF16BE], or [SQLITE_UTF16LE].
 ** ^SQLite takes the text result from the application from
 ** the 2nd parameter of the sqlite3_result_text* interfaces.
-** ^If the 3rd parameter to the sqlite3_result_text* interfaces
-** is negative, then SQLite takes result text from the 2nd parameter
-** through the first zero character.
+** ^If the 3rd parameter to any of the sqlite3_result_text* interfaces
+** other than sqlite3_result_text64() is negative, then SQLite computes
+** the string length itself by searching the 2nd parameter for the first
+** zero character.
 ** ^If the 3rd parameter to the sqlite3_result_text* interfaces
 ** is non-negative, then as many bytes (not characters) of the text
 ** pointed to by the 2nd parameter are taken as the application-defined
@@ -6241,6 +6536,20 @@ SQLITE_API int sqlite3_result_zeroblob64(sqlite3_context*, sqlite3_uint64 n);
 ** higher order bits are discarded.
 ** The number of subtype bytes preserved by SQLite might increase
 ** in future releases of SQLite.
+**
+** Every [application-defined SQL function] that invokes this interface
+** should include the [SQLITE_RESULT_SUBTYPE] property in its
+** text encoding argument when the SQL function is
+** [sqlite3_create_function|registered].  If the [SQLITE_RESULT_SUBTYPE]
+** property is omitted from the function that invokes sqlite3_result_subtype(),
+** then in some cases the sqlite3_result_subtype() might fail to set
+** the result subtype.
+**
+** If SQLite is compiled with -DSQLITE_STRICT_SUBTYPE=1, then any
+** SQL function that invokes the sqlite3_result_subtype() interface
+** and that does not have the SQLITE_RESULT_SUBTYPE property will raise
+** an error.  Future versions of SQLite might enable -DSQLITE_STRICT_SUBTYPE=1
+** by default.
 */
 SQLITE_API void sqlite3_result_subtype(sqlite3_context*,unsigned int);
 
@@ -6412,6 +6721,13 @@ SQLITE_API void sqlite3_activate_cerod(
 ** of the default VFS is not implemented correctly, or not implemented at
 ** all, then the behavior of sqlite3_sleep() may deviate from the description
 ** in the previous paragraphs.
+**
+** If a negative argument is passed to sqlite3_sleep() the results vary by
+** VFS and operating system.  Some system treat a negative argument as an
+** instruction to sleep forever.  Others understand it to mean do not sleep
+** at all. ^In SQLite version 3.42.0 and later, a negative
+** argument passed into sqlite3_sleep() is changed to zero before it is relayed
+** down into the xSleep method of the VFS.
 */
 SQLITE_API int sqlite3_sleep(int);
 
@@ -6634,7 +6950,7 @@ SQLITE_API const char *sqlite3_db_name(sqlite3 *db, int N);
 ** <li> [sqlite3_filename_wal()]
 ** </ul>
 */
-SQLITE_API const char *sqlite3_db_filename(sqlite3 *db, const char *zDbName);
+SQLITE_API sqlite3_filename sqlite3_db_filename(sqlite3 *db, const char *zDbName);
 
 /*
 ** CAPI3REF: Determine if a database is read-only
@@ -6665,7 +6981,7 @@ SQLITE_API int sqlite3_db_readonly(sqlite3 *db, const char *zDbName);
 SQLITE_API int sqlite3_txn_state(sqlite3*,const char *zSchema);
 
 /*
-** CAPI3REF: Allowed return values from [sqlite3_txn_state()]
+** CAPI3REF: Allowed return values from sqlite3_txn_state()
 ** KEYWORDS: {transaction state}
 **
 ** These constants define the current transaction state of a database file.
@@ -6771,7 +7087,7 @@ SQLITE_API void *sqlite3_rollback_hook(sqlite3*, void(*)(void *), void*);
 ** function C that is invoked prior to each autovacuum of the database
 ** file.  ^The callback is passed a copy of the generic data pointer (P),
 ** the schema-name of the attached database that is being autovacuumed,
-** the the size of the database file in pages, the number of free pages,
+** the size of the database file in pages, the number of free pages,
 ** and the number of bytes per page, respectively.  The callback should
 ** return the number of free pages that should be removed by the
 ** autovacuum.  ^If the callback returns zero, then no autovacuum happens.
@@ -6797,7 +7113,7 @@ SQLITE_API void *sqlite3_rollback_hook(sqlite3*, void(*)(void *), void*);
 ** ^Each call to the sqlite3_autovacuum_pages() interface overrides all
 ** previous invocations for that database connection.  ^If the callback
 ** argument (C) to sqlite3_autovacuum_pages(D,C,P,X) is a NULL pointer,
-** then the autovacuum steps callback is cancelled.  The return value
+** then the autovacuum steps callback is canceled.  The return value
 ** from sqlite3_autovacuum_pages() is normally SQLITE_OK, but might
 ** be some other error code if something goes wrong.  The current
 ** implementation will only return SQLITE_OK or SQLITE_MISUSE, but other
@@ -6892,6 +7208,11 @@ SQLITE_API void *sqlite3_update_hook(
 ** to the same database. Sharing is enabled if the argument is true
 ** and disabled if the argument is false.)^
 **
+** This interface is omitted if SQLite is compiled with
+** [-DSQLITE_OMIT_SHARED_CACHE].  The [-DSQLITE_OMIT_SHARED_CACHE]
+** compile-time option is recommended because the
+** [use of shared cache mode is discouraged].
+**
 ** ^Cache sharing is enabled and disabled for an entire process.
 ** This is a change as of SQLite [version 3.5.0] ([dateof:3.5.0]).
 ** In prior versions of SQLite,
@@ -6990,7 +7311,7 @@ SQLITE_API int sqlite3_db_release_memory(sqlite3*);
 ** ^The soft heap limit may not be greater than the hard heap limit.
 ** ^If the hard heap limit is enabled and if sqlite3_soft_heap_limit(N)
 ** is invoked with a value of N that is greater than the hard heap limit,
-** the the soft heap limit is set to the value of the hard heap limit.
+** the soft heap limit is set to the value of the hard heap limit.
 ** ^The soft heap limit is automatically enabled whenever the hard heap
 ** limit is enabled. ^When sqlite3_hard_heap_limit64(N) is invoked and
 ** the soft heap limit is outside the range of 1..N, then the soft heap
@@ -7251,15 +7572,6 @@ SQLITE_API int sqlite3_cancel_auto_extension(void(*xEntryPoint)(void));
 */
 SQLITE_API void sqlite3_reset_auto_extension(void);
 
-/*
-** The interface to the virtual-table mechanism is currently considered
-** to be experimental.  The interface might change in incompatible ways.
-** If this is a problem for you, do not use the interface at this time.
-**
-** When the virtual-table mechanism stabilizes, we will declare the
-** interface fixed, support it indefinitely, and remove this comment.
-*/
-
 /*
 ** Structures used by the virtual table interface
 */
@@ -7320,6 +7632,10 @@ struct sqlite3_module {
   /* The methods above are in versions 1 and 2 of the sqlite_module object.
   ** Those below are for version 3 and greater. */
   int (*xShadowName)(const char*);
+  /* The methods above are in versions 1 through 3 of the sqlite_module object.
+  ** Those below are for version 4 and greater. */
+  int (*xIntegrity)(sqlite3_vtab *pVTab, const char *zSchema,
+                    const char *zTabName, int mFlags, char **pzErr);
 };
 
 /*
@@ -7378,10 +7694,10 @@ struct sqlite3_module {
 ** when the omit flag is true there is no guarantee that the constraint will
 ** not be checked again using byte code.)^
 **
-** ^The idxNum and idxPtr values are recorded and passed into the
+** ^The idxNum and idxStr values are recorded and passed into the
 ** [xFilter] method.
-** ^[sqlite3_free()] is used to free idxPtr if and only if
-** needToFreeIdxPtr is true.
+** ^[sqlite3_free()] is used to free idxStr if and only if
+** needToFreeIdxStr is true.
 **
 ** ^The orderByConsumed means that output from [xFilter]/[xNext] will occur in
 ** the correct order to satisfy the ORDER BY clause so that no separate
@@ -7501,7 +7817,7 @@ struct sqlite3_index_info {
 ** the [sqlite3_vtab_collation()] interface.  For most real-world virtual
 ** tables, the collating sequence of constraints does not matter (for example
 ** because the constraints are numeric) and so the sqlite3_vtab_collation()
-** interface is no commonly needed.
+** interface is not commonly needed.
 */
 #define SQLITE_INDEX_CONSTRAINT_EQ          2
 #define SQLITE_INDEX_CONSTRAINT_GT          4
@@ -7660,16 +7976,6 @@ SQLITE_API int sqlite3_declare_vtab(sqlite3*, const char *zSQL);
 */
 SQLITE_API int sqlite3_overload_function(sqlite3*, const char *zFuncName, int nArg);
 
-/*
-** The interface to the virtual-table mechanism defined above (back up
-** to a comment remarkably similar to this one) is currently considered
-** to be experimental.  The interface might change in incompatible ways.
-** If this is a problem for you, do not use the interface at this time.
-**
-** When the virtual-table mechanism stabilizes, we will declare the
-** interface fixed, support it indefinitely, and remove this comment.
-*/
-
 /*
 ** CAPI3REF: A Handle To An Open BLOB
 ** KEYWORDS: {BLOB handle} {BLOB handles}
@@ -7817,7 +8123,7 @@ SQLITE_API int sqlite3_blob_reopen(sqlite3_blob *, sqlite3_int64);
 ** code is returned and the transaction rolled back.
 **
 ** Calling this function with an argument that is not a NULL pointer or an
-** open blob handle results in undefined behaviour. ^Calling this routine
+** open blob handle results in undefined behavior. ^Calling this routine
 ** with a null pointer (such as would be returned by a failed call to
 ** [sqlite3_blob_open()]) is a harmless no-op. ^Otherwise, if this function
 ** is passed a valid open blob handle, the values returned by the
@@ -8053,9 +8359,9 @@ SQLITE_API int sqlite3_vfs_unregister(sqlite3_vfs*);
 ** is undefined if the mutex is not currently entered by the
 ** calling thread or is not currently allocated.
 **
-** ^If the argument to sqlite3_mutex_enter(), sqlite3_mutex_try(), or
-** sqlite3_mutex_leave() is a NULL pointer, then all three routines
-** behave as no-ops.
+** ^If the argument to sqlite3_mutex_enter(), sqlite3_mutex_try(),
+** sqlite3_mutex_leave(), or sqlite3_mutex_free() is a NULL pointer,
+** then any of the four routines behaves as a no-op.
 **
 ** See also: [sqlite3_mutex_held()] and [sqlite3_mutex_notheld()].
 */
@@ -8297,6 +8603,7 @@ SQLITE_API int sqlite3_test_control(int op, ...);
 #define SQLITE_TESTCTRL_PRNG_SAVE                5
 #define SQLITE_TESTCTRL_PRNG_RESTORE             6
 #define SQLITE_TESTCTRL_PRNG_RESET               7  /* NOT USED */
+#define SQLITE_TESTCTRL_FK_NO_ACTION             7
 #define SQLITE_TESTCTRL_BITVEC_TEST              8
 #define SQLITE_TESTCTRL_FAULT_INSTALL            9
 #define SQLITE_TESTCTRL_BENIGN_MALLOC_HOOKS     10
@@ -8325,7 +8632,8 @@ SQLITE_API int sqlite3_test_control(int op, ...);
 #define SQLITE_TESTCTRL_TRACEFLAGS              31
 #define SQLITE_TESTCTRL_TUNE                    32
 #define SQLITE_TESTCTRL_LOGEST                  33
-#define SQLITE_TESTCTRL_LAST                    33  /* Largest TESTCTRL */
+#define SQLITE_TESTCTRL_USELONGDOUBLE           34
+#define SQLITE_TESTCTRL_LAST                    34  /* Largest TESTCTRL */
 
 /*
 ** CAPI3REF: SQL Keyword Checking
@@ -9285,7 +9593,7 @@ typedef struct sqlite3_backup sqlite3_backup;
 ** if the application incorrectly accesses the destination [database connection]
 ** and so no error code is reported, but the operations may malfunction
 ** nevertheless.  Use of the destination database connection while a
-** backup is in progress might also also cause a mutex deadlock.
+** backup is in progress might also cause a mutex deadlock.
 **
 ** If running in [shared cache mode], the application must
 ** guarantee that the shared cache used by the destination database
@@ -9713,7 +10021,7 @@ SQLITE_API int sqlite3_wal_checkpoint_v2(
 */
 #define SQLITE_CHECKPOINT_PASSIVE  0  /* Do as much as possible w/o blocking */
 #define SQLITE_CHECKPOINT_FULL     1  /* Wait for writers, then checkpoint */
-#define SQLITE_CHECKPOINT_RESTART  2  /* Like FULL but wait for for readers */
+#define SQLITE_CHECKPOINT_RESTART  2  /* Like FULL but wait for readers */
 #define SQLITE_CHECKPOINT_TRUNCATE 3  /* Like RESTART but also truncate WAL */
 
 /*
@@ -9781,7 +10089,7 @@ SQLITE_API int sqlite3_vtab_config(sqlite3*, int op, ...);
 ** [[SQLITE_VTAB_DIRECTONLY]]<dt>SQLITE_VTAB_DIRECTONLY</dt>
 ** <dd>Calls of the form
 ** [sqlite3_vtab_config](db,SQLITE_VTAB_DIRECTONLY) from within the
-** the [xConnect] or [xCreate] methods of a [virtual table] implmentation
+** the [xConnect] or [xCreate] methods of a [virtual table] implementation
 ** prohibits that virtual table from being used from within triggers and
 ** views.
 ** </dd>
@@ -9789,18 +10097,28 @@ SQLITE_API int sqlite3_vtab_config(sqlite3*, int op, ...);
 ** [[SQLITE_VTAB_INNOCUOUS]]<dt>SQLITE_VTAB_INNOCUOUS</dt>
 ** <dd>Calls of the form
 ** [sqlite3_vtab_config](db,SQLITE_VTAB_INNOCUOUS) from within the
-** the [xConnect] or [xCreate] methods of a [virtual table] implmentation
+** the [xConnect] or [xCreate] methods of a [virtual table] implementation
 ** identify that virtual table as being safe to use from within triggers
 ** and views.  Conceptually, the SQLITE_VTAB_INNOCUOUS tag means that the
 ** virtual table can do no serious harm even if it is controlled by a
 ** malicious hacker.  Developers should avoid setting the SQLITE_VTAB_INNOCUOUS
 ** flag unless absolutely necessary.
 ** </dd>
+**
+** [[SQLITE_VTAB_USES_ALL_SCHEMAS]]<dt>SQLITE_VTAB_USES_ALL_SCHEMAS</dt>
+** <dd>Calls of the form
+** [sqlite3_vtab_config](db,SQLITE_VTAB_USES_ALL_SCHEMA) from within the
+** the [xConnect] or [xCreate] methods of a [virtual table] implementation
+** instruct the query planner to begin at least a read transaction on
+** all schemas ("main", "temp", and any ATTACH-ed databases) whenever the
+** virtual table is used.
+** </dd>
 ** </dl>
 */
 #define SQLITE_VTAB_CONSTRAINT_SUPPORT 1
 #define SQLITE_VTAB_INNOCUOUS          2
 #define SQLITE_VTAB_DIRECTONLY         3
+#define SQLITE_VTAB_USES_ALL_SCHEMAS   4
 
 /*
 ** CAPI3REF: Determine The Virtual Table Conflict Policy
@@ -9873,7 +10191,7 @@ SQLITE_API int sqlite3_vtab_nochange(sqlite3_context*);
 ** <li><p> Otherwise, "BINARY" is returned.
 ** </ol>
 */
-SQLITE_API SQLITE_EXPERIMENTAL const char *sqlite3_vtab_collation(sqlite3_index_info*,int);
+SQLITE_API const char *sqlite3_vtab_collation(sqlite3_index_info*,int);
 
 /*
 ** CAPI3REF: Determine if a virtual table query is DISTINCT
@@ -9961,7 +10279,7 @@ SQLITE_API int sqlite3_vtab_distinct(sqlite3_index_info*);
 ** communicated to the xBestIndex method as a
 ** [SQLITE_INDEX_CONSTRAINT_EQ] constraint.)^  If xBestIndex wants to use
 ** this constraint, it must set the corresponding
-** aConstraintUsage[].argvIndex to a postive integer.  ^(Then, under
+** aConstraintUsage[].argvIndex to a positive integer.  ^(Then, under
 ** the usual mode of handling IN operators, SQLite generates [bytecode]
 ** that invokes the [xFilter|xFilter() method] once for each value
 ** on the right-hand side of the IN operator.)^  Thus the virtual table
@@ -10030,21 +10348,20 @@ SQLITE_API int sqlite3_vtab_in(sqlite3_index_info*, int iCons, int bHandle);
 ** is undefined and probably harmful.
 **
 ** The X parameter in a call to sqlite3_vtab_in_first(X,P) or
-** sqlite3_vtab_in_next(X,P) must be one of the parameters to the
+** sqlite3_vtab_in_next(X,P) should be one of the parameters to the
 ** xFilter method which invokes these routines, and specifically
 ** a parameter that was previously selected for all-at-once IN constraint
 ** processing use the [sqlite3_vtab_in()] interface in the
 ** [xBestIndex|xBestIndex method].  ^(If the X parameter is not
 ** an xFilter argument that was selected for all-at-once IN constraint
-** processing, then these routines return [SQLITE_MISUSE])^ or perhaps
-** exhibit some other undefined or harmful behavior.
+** processing, then these routines return [SQLITE_ERROR].)^
 **
 ** ^(Use these routines to access all values on the right-hand side
 ** of the IN constraint using code like the following:
 **
 ** <blockquote><pre>
 ** &nbsp;  for(rc=sqlite3_vtab_in_first(pList, &pVal);
-** &nbsp;      rc==SQLITE_OK && pVal
+** &nbsp;      rc==SQLITE_OK && pVal;
 ** &nbsp;      rc=sqlite3_vtab_in_next(pList, &pVal)
 ** &nbsp;  ){
 ** &nbsp;    // do something with pVal
@@ -10142,6 +10459,10 @@ SQLITE_API int sqlite3_vtab_rhs_value(sqlite3_index_info*, int, sqlite3_value **
 ** managed by the prepared statement S and will be automatically freed when
 ** S is finalized.
 **
+** Not all values are available for all query elements. When a value is
+** not available, the output variable is set to -1 if the value is numeric,
+** or to NULL if it is a string (SQLITE_SCANSTAT_NAME).
+**
 ** <dl>
 ** [[SQLITE_SCANSTAT_NLOOP]] <dt>SQLITE_SCANSTAT_NLOOP</dt>
 ** <dd>^The [sqlite3_int64] variable pointed to by the V parameter will be
@@ -10169,12 +10490,24 @@ SQLITE_API int sqlite3_vtab_rhs_value(sqlite3_index_info*, int, sqlite3_value **
 ** to a zero-terminated UTF-8 string containing the [EXPLAIN QUERY PLAN]
 ** description for the X-th loop.
 **
-** [[SQLITE_SCANSTAT_SELECTID]] <dt>SQLITE_SCANSTAT_SELECT</dt>
+** [[SQLITE_SCANSTAT_SELECTID]] <dt>SQLITE_SCANSTAT_SELECTID</dt>
 ** <dd>^The "int" variable pointed to by the V parameter will be set to the
-** "select-id" for the X-th loop.  The select-id identifies which query or
-** subquery the loop is part of.  The main query has a select-id of zero.
-** The select-id is the same value as is output in the first column
-** of an [EXPLAIN QUERY PLAN] query.
+** id for the X-th query plan element. The id value is unique within the
+** statement. The select-id is the same value as is output in the first
+** column of an [EXPLAIN QUERY PLAN] query.
+**
+** [[SQLITE_SCANSTAT_PARENTID]] <dt>SQLITE_SCANSTAT_PARENTID</dt>
+** <dd>The "int" variable pointed to by the V parameter will be set to the
+** the id of the parent of the current query element, if applicable, or
+** to zero if the query element has no parent. This is the same value as
+** returned in the second column of an [EXPLAIN QUERY PLAN] query.
+**
+** [[SQLITE_SCANSTAT_NCYCLE]] <dt>SQLITE_SCANSTAT_NCYCLE</dt>
+** <dd>The sqlite3_int64 output value is set to the number of cycles,
+** according to the processor time-stamp counter, that elapsed while the
+** query element was being processed. This value is not available for
+** all query elements - if it is unavailable the output variable is
+** set to -1.
 ** </dl>
 */
 #define SQLITE_SCANSTAT_NLOOP    0
@@ -10183,12 +10516,14 @@ SQLITE_API int sqlite3_vtab_rhs_value(sqlite3_index_info*, int, sqlite3_value **
 #define SQLITE_SCANSTAT_NAME     3
 #define SQLITE_SCANSTAT_EXPLAIN  4
 #define SQLITE_SCANSTAT_SELECTID 5
+#define SQLITE_SCANSTAT_PARENTID 6
+#define SQLITE_SCANSTAT_NCYCLE   7
 
 /*
 ** CAPI3REF: Prepared Statement Scan Status
 ** METHOD: sqlite3_stmt
 **
-** This interface returns information about the predicted and measured
+** These interfaces return information about the predicted and measured
 ** performance for pStmt.  Advanced applications can use this
 ** interface to compare the predicted and the measured performance and
 ** issue warnings and/or rerun [ANALYZE] if discrepancies are found.
@@ -10199,19 +10534,25 @@ SQLITE_API int sqlite3_vtab_rhs_value(sqlite3_index_info*, int, sqlite3_value **
 **
 ** The "iScanStatusOp" parameter determines which status information to return.
 ** The "iScanStatusOp" must be one of the [scanstatus options] or the behavior
-** of this interface is undefined.
-** ^The requested measurement is written into a variable pointed to by
-** the "pOut" parameter.
-** Parameter "idx" identifies the specific loop to retrieve statistics for.
-** Loops are numbered starting from zero. ^If idx is out of range - less than
-** zero or greater than or equal to the total number of loops used to implement
-** the statement - a non-zero value is returned and the variable that pOut
-** points to is unchanged.
-**
-** ^Statistics might not be available for all loops in all statements. ^In cases
-** where there exist loops with no available statistics, this function behaves
-** as if the loop did not exist - it returns non-zero and leave the variable
-** that pOut points to unchanged.
+** of this interface is undefined. ^The requested measurement is written into
+** a variable pointed to by the "pOut" parameter.
+**
+** The "flags" parameter must be passed a mask of flags. At present only
+** one flag is defined - SQLITE_SCANSTAT_COMPLEX. If SQLITE_SCANSTAT_COMPLEX
+** is specified, then status information is available for all elements
+** of a query plan that are reported by "EXPLAIN QUERY PLAN" output. If
+** SQLITE_SCANSTAT_COMPLEX is not specified, then only query plan elements
+** that correspond to query loops (the "SCAN..." and "SEARCH..." elements of
+** the EXPLAIN QUERY PLAN output) are available. Invoking API
+** sqlite3_stmt_scanstatus() is equivalent to calling
+** sqlite3_stmt_scanstatus_v2() with a zeroed flags parameter.
+**
+** Parameter "idx" identifies the specific query element to retrieve statistics
+** for. Query elements are numbered starting from zero. A value of -1 may be
+** to query for statistics regarding the entire query. ^If idx is out of range
+** - less than -1 or greater than or equal to the total number of query
+** elements used to implement the statement - a non-zero value is returned and
+** the variable that pOut points to is unchanged.
 **
 ** See also: [sqlite3_stmt_scanstatus_reset()]
 */
@@ -10221,6 +10562,19 @@ SQLITE_API int sqlite3_stmt_scanstatus(
   int iScanStatusOp,        /* Information desired.  SQLITE_SCANSTAT_* */
   void *pOut                /* Result written here */
 );
+SQLITE_API int sqlite3_stmt_scanstatus_v2(
+  sqlite3_stmt *pStmt,      /* Prepared statement for which info desired */
+  int idx,                  /* Index of loop to report on */
+  int iScanStatusOp,        /* Information desired.  SQLITE_SCANSTAT_* */
+  int flags,                /* Mask of flags defined below */
+  void *pOut                /* Result written here */
+);
+
+/*
+** CAPI3REF: Prepared Statement Scan Status
+** KEYWORDS: {scan status flags}
+*/
+#define SQLITE_SCANSTAT_COMPLEX 0x0001
 
 /*
 ** CAPI3REF: Zero Scan-Status Counters
@@ -10311,6 +10665,10 @@ SQLITE_API int sqlite3_db_cacheflush(sqlite3*);
 ** function is not defined for operations on WITHOUT ROWID tables, or for
 ** DELETE operations on rowid tables.
 **
+** ^The sqlite3_preupdate_hook(D,C,P) function returns the P argument from
+** the previous call on the same [database connection] D, or NULL for
+** the first call on D.
+**
 ** The [sqlite3_preupdate_old()], [sqlite3_preupdate_new()],
 ** [sqlite3_preupdate_count()], and [sqlite3_preupdate_depth()] interfaces
 ** provide additional information about a preupdate event. These routines
@@ -10350,7 +10708,7 @@ SQLITE_API int sqlite3_db_cacheflush(sqlite3*);
 ** When the [sqlite3_blob_write()] API is used to update a blob column,
 ** the pre-update hook is invoked with SQLITE_DELETE. This is because the
 ** in this case the new values are not available. In this case, when a
-** callback made with op==SQLITE_DELETE is actuall a write using the
+** callback made with op==SQLITE_DELETE is actually a write using the
 ** sqlite3_blob_write() API, the [sqlite3_preupdate_blobwrite()] returns
 ** the index of the column being written. In other cases, where the
 ** pre-update hook is being invoked for some other reason, including a
@@ -10611,6 +10969,13 @@ SQLITE_API SQLITE_EXPERIMENTAL int sqlite3_snapshot_recover(sqlite3 *db, const c
 ** SQLITE_SERIALIZE_NOCOPY bit is set but no contiguous copy
 ** of the database exists.
 **
+** After the call, if the SQLITE_SERIALIZE_NOCOPY bit had been set,
+** the returned buffer content will remain accessible and unchanged
+** until either the next write operation on the connection or when
+** the connection is closed, and applications must not modify the
+** buffer. If the bit had been clear, the returned buffer will not
+** be accessed by SQLite after the call.
+**
 ** A call to sqlite3_serialize(D,S,P,F) might return NULL even if the
 ** SQLITE_SERIALIZE_NOCOPY bit is omitted from argument F if a memory
 ** allocation error occurs.
@@ -10659,6 +11024,9 @@ SQLITE_API unsigned char *sqlite3_serialize(
 ** SQLite will try to increase the buffer size using sqlite3_realloc64()
 ** if writes on the database cause it to grow larger than M bytes.
 **
+** Applications must not modify the buffer P or invalidate it before
+** the database connection D is closed.
+**
 ** The sqlite3_deserialize() interface will fail with SQLITE_BUSY if the
 ** database is currently in a read transaction or is involved in a backup
 ** operation.
@@ -10667,6 +11035,13 @@ SQLITE_API unsigned char *sqlite3_serialize(
 ** S argument to sqlite3_deserialize(D,S,P,N,M,F) is "temp" then the
 ** function returns SQLITE_ERROR.
 **
+** The deserialized database should not be in [WAL mode].  If the database
+** is in WAL mode, then any attempt to use the database file will result
+** in an [SQLITE_CANTOPEN] error.  The application can set the
+** [file format version numbers] (bytes 18 and 19) of the input database P
+** to 0x01 prior to invoking sqlite3_deserialize(D,S,P,N,M,F) to force the
+** database file into rollback mode and work around this limitation.
+**
 ** If sqlite3_deserialize(D,S,P,N,M,F) fails for any reason and if the
 ** SQLITE_DESERIALIZE_FREEONCLOSE bit is set in argument F, then
 ** [sqlite3_free()] is invoked on argument P prior to returning.
@@ -10716,6 +11091,19 @@ SQLITE_API int sqlite3_deserialize(
 # undef double
 #endif
 
+#if defined(__wasi__)
+# undef SQLITE_WASI
+# define SQLITE_WASI 1
+# undef SQLITE_OMIT_WAL
+# define SQLITE_OMIT_WAL 1/* because it requires shared memory APIs */
+# ifndef SQLITE_OMIT_LOAD_EXTENSION
+#  define SQLITE_OMIT_LOAD_EXTENSION
+# endif
+# ifndef SQLITE_THREADSAFE
+#  define SQLITE_THREADSAFE 0
+# endif
+#endif
+
 #if 0
 }  /* End of the 'extern "C"' block */
 #endif
@@ -10922,16 +11310,20 @@ SQLITE_API int sqlite3session_create(
 SQLITE_API void sqlite3session_delete(sqlite3_session *pSession);
 
 /*
-** CAPIREF: Conigure a Session Object
+** CAPI3REF: Configure a Session Object
 ** METHOD: sqlite3_session
 **
 ** This method is used to configure a session object after it has been
-** created. At present the only valid value for the second parameter is
-** [SQLITE_SESSION_OBJCONFIG_SIZE].
+** created. At present the only valid values for the second parameter are
+** [SQLITE_SESSION_OBJCONFIG_SIZE] and [SQLITE_SESSION_OBJCONFIG_ROWID].
 **
-** Arguments for sqlite3session_object_config()
+*/
+SQLITE_API int sqlite3session_object_config(sqlite3_session*, int op, void *pArg);
+
+/*
+** CAPI3REF: Options for sqlite3session_object_config
 **
-** The following values may passed as the the 4th parameter to
+** The following values may passed as the the 2nd parameter to
 ** sqlite3session_object_config().
 **
 ** <dt>SQLITE_SESSION_OBJCONFIG_SIZE <dd>
@@ -10947,12 +11339,21 @@ SQLITE_API void sqlite3session_delete(sqlite3_session *pSession);
 **
 **   It is an error (SQLITE_MISUSE) to attempt to modify this setting after
 **   the first table has been attached to the session object.
+**
+** <dt>SQLITE_SESSION_OBJCONFIG_ROWID <dd>
+**   This option is used to set, clear or query the flag that enables
+**   collection of data for tables with no explicit PRIMARY KEY.
+**
+**   Normally, tables with no explicit PRIMARY KEY are simply ignored
+**   by the sessions module. However, if this flag is set, it behaves
+**   as if such tables have a column "_rowid_ INTEGER PRIMARY KEY" inserted
+**   as their leftmost columns.
+**
+**   It is an error (SQLITE_MISUSE) to attempt to modify this setting after
+**   the first table has been attached to the session object.
 */
-SQLITE_API int sqlite3session_object_config(sqlite3_session*, int op, void *pArg);
-
-/*
-*/
-#define SQLITE_SESSION_OBJCONFIG_SIZE 1
+#define SQLITE_SESSION_OBJCONFIG_SIZE  1
+#define SQLITE_SESSION_OBJCONFIG_ROWID 2
 
 /*
 ** CAPI3REF: Enable Or Disable A Session Object
@@ -11713,6 +12114,18 @@ SQLITE_API int sqlite3changeset_concat(
 );
 
 
+/*
+** CAPI3REF: Upgrade the Schema of a Changeset/Patchset
+*/
+SQLITE_API int sqlite3changeset_upgrade(
+  sqlite3 *db,
+  const char *zDb,
+  int nIn, const void *pIn,       /* Input changeset */
+  int *pnOut, void **ppOut        /* OUT: Inverse of input */
+);
+
+
+
 /*
 ** CAPI3REF: Changegroup Handle
 **
@@ -11759,6 +12172,38 @@ typedef struct sqlite3_changegroup sqlite3_changegroup;
 */
 SQLITE_API int sqlite3changegroup_new(sqlite3_changegroup **pp);
 
+/*
+** CAPI3REF: Add a Schema to a Changegroup
+** METHOD: sqlite3_changegroup_schema
+**
+** This method may be used to optionally enforce the rule that the changesets
+** added to the changegroup handle must match the schema of database zDb
+** ("main", "temp", or the name of an attached database). If
+** sqlite3changegroup_add() is called to add a changeset that is not compatible
+** with the configured schema, SQLITE_SCHEMA is returned and the changegroup
+** object is left in an undefined state.
+**
+** A changeset schema is considered compatible with the database schema in
+** the same way as for sqlite3changeset_apply(). Specifically, for each
+** table in the changeset, there exists a database table with:
+**
+** <ul>
+**   <li> The name identified by the changeset, and
+**   <li> at least as many columns as recorded in the changeset, and
+**   <li> the primary key columns in the same position as recorded in
+**        the changeset.
+** </ul>
+**
+** The output of the changegroup object always has the same schema as the
+** database nominated using this function. In cases where changesets passed
+** to sqlite3changegroup_add() have fewer columns than the corresponding table
+** in the database schema, these are filled in using the default column
+** values from the database schema. This makes it possible to combined
+** changesets that have different numbers of columns for a single table
+** within a changegroup, provided that they are otherwise compatible.
+*/
+SQLITE_API int sqlite3changegroup_schema(sqlite3_changegroup*, sqlite3*, const char *zDb);
+
 /*
 ** CAPI3REF: Add A Changeset To A Changegroup
 ** METHOD: sqlite3_changegroup
@@ -11827,13 +12272,18 @@ SQLITE_API int sqlite3changegroup_new(sqlite3_changegroup **pp);
 ** If the new changeset contains changes to a table that is already present
 ** in the changegroup, then the number of columns and the position of the
 ** primary key columns for the table must be consistent. If this is not the
-** case, this function fails with SQLITE_SCHEMA. If the input changeset
-** appears to be corrupt and the corruption is detected, SQLITE_CORRUPT is
-** returned. Or, if an out-of-memory condition occurs during processing, this
-** function returns SQLITE_NOMEM. In all cases, if an error occurs the state
-** of the final contents of the changegroup is undefined.
+** case, this function fails with SQLITE_SCHEMA. Except, if the changegroup
+** object has been configured with a database schema using the
+** sqlite3changegroup_schema() API, then it is possible to combine changesets
+** with different numbers of columns for a single table, provided that
+** they are otherwise compatible.
 **
-** If no error occurs, SQLITE_OK is returned.
+** If the input changeset appears to be corrupt and the corruption is
+** detected, SQLITE_CORRUPT is returned. Or, if an out-of-memory condition
+** occurs during processing, this function returns SQLITE_NOMEM.
+**
+** In all cases, if an error occurs the state of the final contents of the
+** changegroup is undefined. If no error occurs, SQLITE_OK is returned.
 */
 SQLITE_API int sqlite3changegroup_add(sqlite3_changegroup*, int nData, void *pData);
 
@@ -12085,9 +12535,30 @@ SQLITE_API int sqlite3changeset_apply_v2(
 **   Invert the changeset before applying it. This is equivalent to inverting
 **   a changeset using sqlite3changeset_invert() before applying it. It is
 **   an error to specify this flag with a patchset.
+**
+** <dt>SQLITE_CHANGESETAPPLY_IGNORENOOP <dd>
+**   Do not invoke the conflict handler callback for any changes that
+**   would not actually modify the database even if they were applied.
+**   Specifically, this means that the conflict handler is not invoked
+**   for:
+**    <ul>
+**    <li>a delete change if the row being deleted cannot be found,
+**    <li>an update change if the modified fields are already set to
+**        their new values in the conflicting row, or
+**    <li>an insert change if all fields of the conflicting row match
+**        the row being inserted.
+**    </ul>
+**
+** <dt>SQLITE_CHANGESETAPPLY_FKNOACTION <dd>
+**   If this flag it set, then all foreign key constraints in the target
+**   database behave as if they were declared with "ON UPDATE NO ACTION ON
+**   DELETE NO ACTION", even if they are actually CASCADE, RESTRICT, SET NULL
+**   or SET DEFAULT.
 */
 #define SQLITE_CHANGESETAPPLY_NOSAVEPOINT   0x0001
 #define SQLITE_CHANGESETAPPLY_INVERT        0x0002
+#define SQLITE_CHANGESETAPPLY_IGNORENOOP    0x0004
+#define SQLITE_CHANGESETAPPLY_FKNOACTION    0x0008
 
 /*
 ** CAPI3REF: Constants Passed To The Conflict Handler
@@ -12828,7 +13299,7 @@ struct Fts5PhraseIter {
 **   See xPhraseFirstColumn above.
 */
 struct Fts5ExtensionApi {
-  int iVersion;                   /* Currently always set to 3 */
+  int iVersion;                   /* Currently always set to 2 */
 
   void *(*xUserData)(Fts5Context*);
 
@@ -13057,8 +13528,8 @@ struct Fts5ExtensionApi {
 **   as separate queries of the FTS index are required for each synonym.
 **
 **   When using methods (2) or (3), it is important that the tokenizer only
-**   provide synonyms when tokenizing document text (method (2)) or query
-**   text (method (3)), not both. Doing so will not cause any errors, but is
+**   provide synonyms when tokenizing document text (method (3)) or query
+**   text (method (2)), not both. Doing so will not cause any errors, but is
 **   inefficient.
 */
 typedef struct Fts5Tokenizer Fts5Tokenizer;
@@ -13106,7 +13577,7 @@ struct fts5_api {
   int (*xCreateTokenizer)(
     fts5_api *pApi,
     const char *zName,
-    void *pContext,
+    void *pUserData,
     fts5_tokenizer *pTokenizer,
     void (*xDestroy)(void*)
   );
@@ -13115,7 +13586,7 @@ struct fts5_api {
   int (*xFindTokenizer)(
     fts5_api *pApi,
     const char *zName,
-    void **ppContext,
+    void **ppUserData,
     fts5_tokenizer *pTokenizer
   );
 
@@ -13123,7 +13594,7 @@ struct fts5_api {
   int (*xCreateFunction)(
     fts5_api *pApi,
     const char *zName,
-    void *pContext,
+    void *pUserData,
     fts5_extension_function xFunction,
     void (*xDestroy)(void*)
   );
@@ -13154,7 +13625,7 @@ struct fts5_api {
 ** autoconf-based build
 */
 #if defined(_HAVE_SQLITE_CONFIG_H) && !defined(SQLITECONFIG_H)
-#include "config.h"
+#include "sqlite_cfg.h"
 #define SQLITECONFIG_H 1
 #endif
 
@@ -13234,7 +13705,7 @@ struct fts5_api {
 ** level of recursion for each term.  A stack overflow can result
 ** if the number of terms is too large.  In practice, most SQL
 ** never has more than 3 or 4 terms.  Use a value of 0 to disable
-** any limit on the number of terms in a compount SELECT.
+** any limit on the number of terms in a compound SELECT.
 */
 #ifndef SQLITE_MAX_COMPOUND_SELECT
 # define SQLITE_MAX_COMPOUND_SELECT 500
@@ -13384,8 +13855,8 @@ struct fts5_api {
 #endif
 
 /*
-** WAL mode depends on atomic aligned 32-bit loads and stores in a few
-** places.  The following macros try to make this explicit.
+** A few places in the code require atomic load/store of aligned
+** integer values.
 */
 #ifndef __has_extension
 # define __has_extension(x) 0     /* compatibility with non-clang compilers */
@@ -13441,15 +13912,22 @@ struct fts5_api {
 #endif
 
 /*
-** A macro to hint to the compiler that a function should not be
+** Macros to hint to the compiler that a function should or should not be
 ** inlined.
 */
 #if defined(__GNUC__)
 #  define SQLITE_NOINLINE  __attribute__((noinline))
+#  define SQLITE_INLINE    __attribute__((always_inline)) inline
 #elif defined(_MSC_VER) && _MSC_VER>=1310
 #  define SQLITE_NOINLINE  __declspec(noinline)
+#  define SQLITE_INLINE    __forceinline
 #else
 #  define SQLITE_NOINLINE
+#  define SQLITE_INLINE
+#endif
+#if defined(SQLITE_COVERAGE_TEST) || defined(__STRICT_ANSI__)
+# undef SQLITE_INLINE
+# define SQLITE_INLINE
 #endif
 
 /*
@@ -13471,6 +13949,16 @@ struct fts5_api {
 #  endif
 #endif
 
+/*
+** Enable SQLITE_USE_SEH by default on MSVC builds.  Only omit
+** SEH support if the -DSQLITE_OMIT_SEH option is given.
+*/
+#if defined(_MSC_VER) && !defined(SQLITE_OMIT_SEH)
+# define SQLITE_USE_SEH 1
+#else
+# undef SQLITE_USE_SEH
+#endif
+
 /*
 ** The SQLITE_THREADSAFE macro must be defined as 0, 1, or 2.
 ** 0 means mutexes are permanently disable and the library is never
@@ -14267,15 +14755,9 @@ typedef INT8_TYPE i8;              /* 1-byte signed integer */
 
 /*
 ** The datatype used to store estimates of the number of rows in a
-** table or index.  This is an unsigned integer type.  For 99.9% of
-** the world, a 32-bit integer is sufficient.  But a 64-bit integer
-** can be used at compile-time if desired.
+** table or index.
 */
-#ifdef SQLITE_64BIT_STATS
- typedef u64 tRowcnt;    /* 64-bit only if requested at compile-time */
-#else
- typedef u32 tRowcnt;    /* 32-bit is the default */
-#endif
+typedef u64 tRowcnt;
 
 /*
 ** Estimated quantities used for query planning are stored as 16-bit
@@ -14336,8 +14818,31 @@ typedef INT16_TYPE LogEst;
 ** the end of buffer S.  This macro returns true if P points to something
 ** contained within the buffer S.
 */
-#define SQLITE_WITHIN(P,S,E) (((uptr)(P)>=(uptr)(S))&&((uptr)(P)<(uptr)(E)))
+#define SQLITE_WITHIN(P,S,E)   (((uptr)(P)>=(uptr)(S))&&((uptr)(P)<(uptr)(E)))
 
+/*
+** P is one byte past the end of a large buffer. Return true if a span of bytes
+** between S..E crosses the end of that buffer.  In other words, return true
+** if the sub-buffer S..E-1 overflows the buffer whose last byte is P-1.
+**
+** S is the start of the span.  E is one byte past the end of end of span.
+**
+**                        P
+**     |-----------------|                FALSE
+**               |-------|
+**               S        E
+**
+**                        P
+**     |-----------------|
+**                    |-------|           TRUE
+**                    S        E
+**
+**                        P
+**     |-----------------|
+**                        |-------|       FALSE
+**                        S        E
+*/
+#define SQLITE_OVERFLOW(P,S,E) (((uptr)(S)<(uptr)(P))&&((uptr)(E)>(uptr)(P)))
 
 /*
 ** Macros to determine whether the machine is big or little endian,
@@ -14347,16 +14852,33 @@ typedef INT16_TYPE LogEst;
 ** using C-preprocessor macros.  If that is unsuccessful, or if
 ** -DSQLITE_BYTEORDER=0 is set, then byte-order is determined
 ** at run-time.
+**
+** If you are building SQLite on some obscure platform for which the
+** following ifdef magic does not work, you can always include either:
+**
+**    -DSQLITE_BYTEORDER=1234
+**
+** or
+**
+**    -DSQLITE_BYTEORDER=4321
+**
+** to cause the build to work for little-endian or big-endian processors,
+** respectively.
 */
-#ifndef SQLITE_BYTEORDER
-# if defined(i386)      || defined(__i386__)      || defined(_M_IX86) ||    \
+#ifndef SQLITE_BYTEORDER  /* Replicate changes at tag-20230904a */
+# if defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
+#   define SQLITE_BYTEORDER 4321
+# elif defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
+#   define SQLITE_BYTEORDER 1234
+# elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1
+#   define SQLITE_BYTEORDER 4321
+# elif defined(i386)    || defined(__i386__)      || defined(_M_IX86) ||    \
      defined(__x86_64)  || defined(__x86_64__)    || defined(_M_X64)  ||    \
      defined(_M_AMD64)  || defined(_M_ARM)        || defined(__x86)   ||    \
      defined(__ARMEL__) || defined(__AARCH64EL__) || defined(_M_ARM64)
-#   define SQLITE_BYTEORDER    1234
-# elif defined(sparc)     || defined(__ppc__) || \
-       defined(__ARMEB__) || defined(__AARCH64EB__)
-#   define SQLITE_BYTEORDER    4321
+#   define SQLITE_BYTEORDER 1234
+# elif defined(sparc)   || defined(__ARMEB__)     || defined(__AARCH64EB__)
+#   define SQLITE_BYTEORDER 4321
 # else
 #   define SQLITE_BYTEORDER 0
 # endif
@@ -14421,9 +14943,9 @@ typedef INT16_TYPE LogEst;
 ** pointers.  In that case, only verify 4-byte alignment.
 */
 #ifdef SQLITE_4_BYTE_ALIGNED_MALLOC
-# define EIGHT_BYTE_ALIGNMENT(X)   ((((char*)(X) - (char*)0)&3)==0)
+# define EIGHT_BYTE_ALIGNMENT(X)   ((((uptr)(X) - (uptr)0)&3)==0)
 #else
-# define EIGHT_BYTE_ALIGNMENT(X)   ((((char*)(X) - (char*)0)&7)==0)
+# define EIGHT_BYTE_ALIGNMENT(X)   ((((uptr)(X) - (uptr)0)&7)==0)
 #endif
 
 /*
@@ -14477,15 +14999,38 @@ SQLITE_PRIVATE u32 sqlite3TreeTrace;
     && (defined(SQLITE_TEST) || defined(SQLITE_ENABLE_SELECTTRACE) \
                              || defined(SQLITE_ENABLE_TREETRACE))
 # define TREETRACE_ENABLED 1
-# define SELECTTRACE(K,P,S,X)  \
+# define TREETRACE(K,P,S,X)  \
   if(sqlite3TreeTrace&(K))   \
     sqlite3DebugPrintf("%u/%d/%p: ",(S)->selId,(P)->addrExplain,(S)),\
     sqlite3DebugPrintf X
 #else
-# define SELECTTRACE(K,P,S,X)
+# define TREETRACE(K,P,S,X)
 # define TREETRACE_ENABLED 0
 #endif
 
+/* TREETRACE flag meanings:
+**
+**   0x00000001     Beginning and end of SELECT processing
+**   0x00000002     WHERE clause processing
+**   0x00000004     Query flattener
+**   0x00000008     Result-set wildcard expansion
+**   0x00000010     Query name resolution
+**   0x00000020     Aggregate analysis
+**   0x00000040     Window functions
+**   0x00000080     Generated column names
+**   0x00000100     Move HAVING terms into WHERE
+**   0x00000200     Count-of-view optimization
+**   0x00000400     Compound SELECT processing
+**   0x00000800     Drop superfluous ORDER BY
+**   0x00001000     LEFT JOIN simplifies to JOIN
+**   0x00002000     Constant propagation
+**   0x00004000     Push-down optimization
+**   0x00008000     After all FROM-clause analysis
+**   0x00010000     Beginning of DELETE/INSERT/UPDATE processing
+**   0x00020000     Transform DISTINCT into GROUP BY
+**   0x00040000     SELECT tree dump after all code has been generated
+*/
+
 /*
 ** Macros for "wheretrace"
 */
@@ -14498,6 +15043,36 @@ SQLITE_PRIVATE u32 sqlite3WhereTrace;
 # define WHERETRACE(K,X)
 #endif
 
+/*
+** Bits for the sqlite3WhereTrace mask:
+**
+** (---any--)   Top-level block structure
+** 0x-------F   High-level debug messages
+** 0x----FFF-   More detail
+** 0xFFFF----   Low-level debug messages
+**
+** 0x00000001   Code generation
+** 0x00000002   Solver
+** 0x00000004   Solver costs
+** 0x00000008   WhereLoop inserts
+**
+** 0x00000010   Display sqlite3_index_info xBestIndex calls
+** 0x00000020   Range an equality scan metrics
+** 0x00000040   IN operator decisions
+** 0x00000080   WhereLoop cost adjustements
+** 0x00000100
+** 0x00000200   Covering index decisions
+** 0x00000400   OR optimization
+** 0x00000800   Index scanner
+** 0x00001000   More details associated with code generation
+** 0x00002000
+** 0x00004000   Show all WHERE terms at key points
+** 0x00008000   Show the full SELECT statement at key places
+**
+** 0x00010000   Show more detail when printing WHERE terms
+** 0x00020000   Show WHERE terms returned from whereScanNext()
+*/
+
 
 /*
 ** An instance of the following structure is used to store the busy-handler
@@ -14518,7 +15093,7 @@ struct BusyHandler {
 /*
 ** Name of table that holds the database schema.
 **
-** The PREFERRED names are used whereever possible.  But LEGACY is also
+** The PREFERRED names are used wherever possible.  But LEGACY is also
 ** used for backwards compatibility.
 **
 **  1.  Queries can use either the PREFERRED or the LEGACY names
@@ -14627,16 +15202,19 @@ typedef struct Column Column;
 typedef struct Cte Cte;
 typedef struct CteUse CteUse;
 typedef struct Db Db;
+typedef struct DbClientData DbClientData;
 typedef struct DbFixer DbFixer;
 typedef struct Schema Schema;
 typedef struct Expr Expr;
 typedef struct ExprList ExprList;
 typedef struct FKey FKey;
+typedef struct FpDecode FpDecode;
 typedef struct FuncDestructor FuncDestructor;
 typedef struct FuncDef FuncDef;
 typedef struct FuncDefHash FuncDefHash;
 typedef struct IdList IdList;
 typedef struct Index Index;
+typedef struct IndexedExpr IndexedExpr;
 typedef struct IndexSample IndexSample;
 typedef struct KeyClass KeyClass;
 typedef struct KeyInfo KeyInfo;
@@ -14649,6 +15227,7 @@ typedef struct Parse Parse;
 typedef struct ParseCleanup ParseCleanup;
 typedef struct PreUpdate PreUpdate;
 typedef struct PrintfArguments PrintfArguments;
+typedef struct RCStr RCStr;
 typedef struct RenameToken RenameToken;
 typedef struct Returning Returning;
 typedef struct RowSet RowSet;
@@ -14702,6 +15281,7 @@ typedef struct With With;
 #define MASKBIT32(n)  (((unsigned int)1)<<(n))
 #define SMASKBIT32(n) ((n)<=31?((unsigned int)1)<<(n):0)
 #define ALLBITS       ((Bitmask)-1)
+#define TOPBIT        (((Bitmask)1)<<(BMS-1))
 
 /* A VList object records a mapping between parameters/variables/wildcards
 ** in the SQL statement (such as $abc, @pqr, or :xyz) and the integer
@@ -14716,6 +15296,331 @@ typedef int VList;
 ** "BusyHandler" typedefs. vdbe.h also requires a few of the opaque
 ** pointer types (i.e. FuncDef) defined above.
 */
+/************** Include os.h in the middle of sqliteInt.h ********************/
+/************** Begin file os.h **********************************************/
+/*
+** 2001 September 16
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This header file (together with is companion C source-code file
+** "os.c") attempt to abstract the underlying operating system so that
+** the SQLite library will work on both POSIX and windows systems.
+**
+** This header file is #include-ed by sqliteInt.h and thus ends up
+** being included by every source file.
+*/
+#ifndef _SQLITE_OS_H_
+#define _SQLITE_OS_H_
+
+/*
+** Attempt to automatically detect the operating system and setup the
+** necessary pre-processor macros for it.
+*/
+/************** Include os_setup.h in the middle of os.h *********************/
+/************** Begin file os_setup.h ****************************************/
+/*
+** 2013 November 25
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains pre-processor directives related to operating system
+** detection and/or setup.
+*/
+#ifndef SQLITE_OS_SETUP_H
+#define SQLITE_OS_SETUP_H
+
+/*
+** Figure out if we are dealing with Unix, Windows, or some other operating
+** system.
+**
+** After the following block of preprocess macros, all of
+**
+**    SQLITE_OS_KV
+**    SQLITE_OS_OTHER
+**    SQLITE_OS_UNIX
+**    SQLITE_OS_WIN
+**
+** will defined to either 1 or 0. One of them will be 1. The others will be 0.
+** If none of the macros are initially defined, then select either
+** SQLITE_OS_UNIX or SQLITE_OS_WIN depending on the target platform.
+**
+** If SQLITE_OS_OTHER=1 is specified at compile-time, then the application
+** must provide its own VFS implementation together with sqlite3_os_init()
+** and sqlite3_os_end() routines.
+*/
+#if !defined(SQLITE_OS_KV) && !defined(SQLITE_OS_OTHER) && \
+       !defined(SQLITE_OS_UNIX) && !defined(SQLITE_OS_WIN)
+#  if defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || \
+          defined(__MINGW32__) || defined(__BORLANDC__)
+#    define SQLITE_OS_WIN 1
+#    define SQLITE_OS_UNIX 0
+#  else
+#    define SQLITE_OS_WIN 0
+#    define SQLITE_OS_UNIX 1
+#  endif
+#endif
+#if SQLITE_OS_OTHER+1>1
+#  undef SQLITE_OS_KV
+#  define SQLITE_OS_KV 0
+#  undef SQLITE_OS_UNIX
+#  define SQLITE_OS_UNIX 0
+#  undef SQLITE_OS_WIN
+#  define SQLITE_OS_WIN 0
+#endif
+#if SQLITE_OS_KV+1>1
+#  undef SQLITE_OS_OTHER
+#  define SQLITE_OS_OTHER 0
+#  undef SQLITE_OS_UNIX
+#  define SQLITE_OS_UNIX 0
+#  undef SQLITE_OS_WIN
+#  define SQLITE_OS_WIN 0
+#  define SQLITE_OMIT_LOAD_EXTENSION 1
+#  define SQLITE_OMIT_WAL 1
+#  define SQLITE_OMIT_DEPRECATED 1
+#  undef SQLITE_TEMP_STORE
+#  define SQLITE_TEMP_STORE 3  /* Always use memory for temporary storage */
+#  define SQLITE_DQS 0
+#  define SQLITE_OMIT_SHARED_CACHE 1
+#  define SQLITE_OMIT_AUTOINIT 1
+#endif
+#if SQLITE_OS_UNIX+1>1
+#  undef SQLITE_OS_KV
+#  define SQLITE_OS_KV 0
+#  undef SQLITE_OS_OTHER
+#  define SQLITE_OS_OTHER 0
+#  undef SQLITE_OS_WIN
+#  define SQLITE_OS_WIN 0
+#endif
+#if SQLITE_OS_WIN+1>1
+#  undef SQLITE_OS_KV
+#  define SQLITE_OS_KV 0
+#  undef SQLITE_OS_OTHER
+#  define SQLITE_OS_OTHER 0
+#  undef SQLITE_OS_UNIX
+#  define SQLITE_OS_UNIX 0
+#endif
+
+
+#endif /* SQLITE_OS_SETUP_H */
+
+/************** End of os_setup.h ********************************************/
+/************** Continuing where we left off in os.h *************************/
+
+/* If the SET_FULLSYNC macro is not defined above, then make it
+** a no-op
+*/
+#ifndef SET_FULLSYNC
+# define SET_FULLSYNC(x,y)
+#endif
+
+/* Maximum pathname length.  Note: FILENAME_MAX defined by stdio.h
+*/
+#ifndef SQLITE_MAX_PATHLEN
+# define SQLITE_MAX_PATHLEN FILENAME_MAX
+#endif
+
+/* Maximum number of symlinks that will be resolved while trying to
+** expand a filename in xFullPathname() in the VFS.
+*/
+#ifndef SQLITE_MAX_SYMLINK
+# define SQLITE_MAX_SYMLINK 200
+#endif
+
+/*
+** The default size of a disk sector
+*/
+#ifndef SQLITE_DEFAULT_SECTOR_SIZE
+# define SQLITE_DEFAULT_SECTOR_SIZE 4096
+#endif
+
+/*
+** Temporary files are named starting with this prefix followed by 16 random
+** alphanumeric characters, and no file extension. They are stored in the
+** OS's standard temporary file directory, and are deleted prior to exit.
+** If sqlite is being embedded in another program, you may wish to change the
+** prefix to reflect your program's name, so that if your program exits
+** prematurely, old temporary files can be easily identified. This can be done
+** using -DSQLITE_TEMP_FILE_PREFIX=myprefix_ on the compiler command line.
+**
+** 2006-10-31:  The default prefix used to be "sqlite_".  But then
+** Mcafee started using SQLite in their anti-virus product and it
+** started putting files with the "sqlite" name in the c:/temp folder.
+** This annoyed many windows users.  Those users would then do a
+** Google search for "sqlite", find the telephone numbers of the
+** developers and call to wake them up at night and complain.
+** For this reason, the default name prefix is changed to be "sqlite"
+** spelled backwards.  So the temp files are still identified, but
+** anybody smart enough to figure out the code is also likely smart
+** enough to know that calling the developer will not help get rid
+** of the file.
+*/
+#ifndef SQLITE_TEMP_FILE_PREFIX
+# define SQLITE_TEMP_FILE_PREFIX "etilqs_"
+#endif
+
+/*
+** The following values may be passed as the second argument to
+** sqlite3OsLock(). The various locks exhibit the following semantics:
+**
+** SHARED:    Any number of processes may hold a SHARED lock simultaneously.
+** RESERVED:  A single process may hold a RESERVED lock on a file at
+**            any time. Other processes may hold and obtain new SHARED locks.
+** PENDING:   A single process may hold a PENDING lock on a file at
+**            any one time. Existing SHARED locks may persist, but no new
+**            SHARED locks may be obtained by other processes.
+** EXCLUSIVE: An EXCLUSIVE lock precludes all other locks.
+**
+** PENDING_LOCK may not be passed directly to sqlite3OsLock(). Instead, a
+** process that requests an EXCLUSIVE lock may actually obtain a PENDING
+** lock. This can be upgraded to an EXCLUSIVE lock by a subsequent call to
+** sqlite3OsLock().
+*/
+#define NO_LOCK         0
+#define SHARED_LOCK     1
+#define RESERVED_LOCK   2
+#define PENDING_LOCK    3
+#define EXCLUSIVE_LOCK  4
+
+/*
+** File Locking Notes:  (Mostly about windows but also some info for Unix)
+**
+** We cannot use LockFileEx() or UnlockFileEx() on Win95/98/ME because
+** those functions are not available.  So we use only LockFile() and
+** UnlockFile().
+**
+** LockFile() prevents not just writing but also reading by other processes.
+** A SHARED_LOCK is obtained by locking a single randomly-chosen
+** byte out of a specific range of bytes. The lock byte is obtained at
+** random so two separate readers can probably access the file at the
+** same time, unless they are unlucky and choose the same lock byte.
+** An EXCLUSIVE_LOCK is obtained by locking all bytes in the range.
+** There can only be one writer.  A RESERVED_LOCK is obtained by locking
+** a single byte of the file that is designated as the reserved lock byte.
+** A PENDING_LOCK is obtained by locking a designated byte different from
+** the RESERVED_LOCK byte.
+**
+** On WinNT/2K/XP systems, LockFileEx() and UnlockFileEx() are available,
+** which means we can use reader/writer locks.  When reader/writer locks
+** are used, the lock is placed on the same range of bytes that is used
+** for probabilistic locking in Win95/98/ME.  Hence, the locking scheme
+** will support two or more Win95 readers or two or more WinNT readers.
+** But a single Win95 reader will lock out all WinNT readers and a single
+** WinNT reader will lock out all other Win95 readers.
+**
+** The following #defines specify the range of bytes used for locking.
+** SHARED_SIZE is the number of bytes available in the pool from which
+** a random byte is selected for a shared lock.  The pool of bytes for
+** shared locks begins at SHARED_FIRST.
+**
+** The same locking strategy and
+** byte ranges are used for Unix.  This leaves open the possibility of having
+** clients on win95, winNT, and unix all talking to the same shared file
+** and all locking correctly.  To do so would require that samba (or whatever
+** tool is being used for file sharing) implements locks correctly between
+** windows and unix.  I'm guessing that isn't likely to happen, but by
+** using the same locking range we are at least open to the possibility.
+**
+** Locking in windows is manditory.  For this reason, we cannot store
+** actual data in the bytes used for locking.  The pager never allocates
+** the pages involved in locking therefore.  SHARED_SIZE is selected so
+** that all locks will fit on a single page even at the minimum page size.
+** PENDING_BYTE defines the beginning of the locks.  By default PENDING_BYTE
+** is set high so that we don't have to allocate an unused page except
+** for very large databases.  But one should test the page skipping logic
+** by setting PENDING_BYTE low and running the entire regression suite.
+**
+** Changing the value of PENDING_BYTE results in a subtly incompatible
+** file format.  Depending on how it is changed, you might not notice
+** the incompatibility right away, even running a full regression test.
+** The default location of PENDING_BYTE is the first byte past the
+** 1GB boundary.
+**
+*/
+#ifdef SQLITE_OMIT_WSD
+# define PENDING_BYTE     (0x40000000)
+#else
+# define PENDING_BYTE      sqlite3PendingByte
+#endif
+#define RESERVED_BYTE     (PENDING_BYTE+1)
+#define SHARED_FIRST      (PENDING_BYTE+2)
+#define SHARED_SIZE       510
+
+/*
+** Wrapper around OS specific sqlite3_os_init() function.
+*/
+SQLITE_PRIVATE int sqlite3OsInit(void);
+
+/*
+** Functions for accessing sqlite3_file methods
+*/
+SQLITE_PRIVATE void sqlite3OsClose(sqlite3_file*);
+SQLITE_PRIVATE int sqlite3OsRead(sqlite3_file*, void*, int amt, i64 offset);
+SQLITE_PRIVATE int sqlite3OsWrite(sqlite3_file*, const void*, int amt, i64 offset);
+SQLITE_PRIVATE int sqlite3OsTruncate(sqlite3_file*, i64 size);
+SQLITE_PRIVATE int sqlite3OsSync(sqlite3_file*, int);
+SQLITE_PRIVATE int sqlite3OsFileSize(sqlite3_file*, i64 *pSize);
+SQLITE_PRIVATE int sqlite3OsLock(sqlite3_file*, int);
+SQLITE_PRIVATE int sqlite3OsUnlock(sqlite3_file*, int);
+SQLITE_PRIVATE int sqlite3OsCheckReservedLock(sqlite3_file *id, int *pResOut);
+SQLITE_PRIVATE int sqlite3OsFileControl(sqlite3_file*,int,void*);
+SQLITE_PRIVATE void sqlite3OsFileControlHint(sqlite3_file*,int,void*);
+#define SQLITE_FCNTL_DB_UNCHANGED 0xca093fa0
+SQLITE_PRIVATE int sqlite3OsSectorSize(sqlite3_file *id);
+SQLITE_PRIVATE int sqlite3OsDeviceCharacteristics(sqlite3_file *id);
+#ifndef SQLITE_OMIT_WAL
+SQLITE_PRIVATE int sqlite3OsShmMap(sqlite3_file *,int,int,int,void volatile **);
+SQLITE_PRIVATE int sqlite3OsShmLock(sqlite3_file *id, int, int, int);
+SQLITE_PRIVATE void sqlite3OsShmBarrier(sqlite3_file *id);
+SQLITE_PRIVATE int sqlite3OsShmUnmap(sqlite3_file *id, int);
+#endif /* SQLITE_OMIT_WAL */
+SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64, int, void **);
+SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *, i64, void *);
+
+
+/*
+** Functions for accessing sqlite3_vfs methods
+*/
+SQLITE_PRIVATE int sqlite3OsOpen(sqlite3_vfs *, const char *, sqlite3_file*, int, int *);
+SQLITE_PRIVATE int sqlite3OsDelete(sqlite3_vfs *, const char *, int);
+SQLITE_PRIVATE int sqlite3OsAccess(sqlite3_vfs *, const char *, int, int *pResOut);
+SQLITE_PRIVATE int sqlite3OsFullPathname(sqlite3_vfs *, const char *, int, char *);
+#ifndef SQLITE_OMIT_LOAD_EXTENSION
+SQLITE_PRIVATE void *sqlite3OsDlOpen(sqlite3_vfs *, const char *);
+SQLITE_PRIVATE void sqlite3OsDlError(sqlite3_vfs *, int, char *);
+SQLITE_PRIVATE void (*sqlite3OsDlSym(sqlite3_vfs *, void *, const char *))(void);
+SQLITE_PRIVATE void sqlite3OsDlClose(sqlite3_vfs *, void *);
+#endif /* SQLITE_OMIT_LOAD_EXTENSION */
+SQLITE_PRIVATE int sqlite3OsRandomness(sqlite3_vfs *, int, char *);
+SQLITE_PRIVATE int sqlite3OsSleep(sqlite3_vfs *, int);
+SQLITE_PRIVATE int sqlite3OsGetLastError(sqlite3_vfs*);
+SQLITE_PRIVATE int sqlite3OsCurrentTimeInt64(sqlite3_vfs *, sqlite3_int64*);
+
+/*
+** Convenience functions for opening and closing files using
+** sqlite3_malloc() to obtain space for the file-handle structure.
+*/
+SQLITE_PRIVATE int sqlite3OsOpenMalloc(sqlite3_vfs *, const char *, sqlite3_file **, int,int*);
+SQLITE_PRIVATE void sqlite3OsCloseFree(sqlite3_file *);
+
+#endif /* _SQLITE_OS_H_ */
+
+/************** End of os.h **************************************************/
+/************** Continuing where we left off in sqliteInt.h ******************/
 /************** Include pager.h in the middle of sqliteInt.h *****************/
 /************** Begin file pager.h *******************************************/
 /*
@@ -14960,6 +15865,10 @@ SQLITE_PRIVATE   void sqlite3PagerRefdump(Pager*);
 # define enable_simulated_io_errors()
 #endif
 
+#if defined(SQLITE_USE_SEH) && !defined(SQLITE_OMIT_WAL)
+SQLITE_PRIVATE int sqlite3PagerWalSystemErrno(Pager*);
+#endif
+
 #endif /* SQLITE_PAGER_H */
 
 /************** End of pager.h ***********************************************/
@@ -15151,7 +16060,7 @@ SQLITE_PRIVATE int sqlite3BtreeNewDb(Btree *p);
 **     reduce network bandwidth.
 **
 ** Note that BTREE_HINT_FLAGS with BTREE_BULKLOAD is the only hint used by
-** standard SQLite.  The other hints are provided for extentions that use
+** standard SQLite.  The other hints are provided for extensions that use
 ** the SQLite parser and code generator but substitute their own storage
 ** engine.
 */
@@ -15289,15 +16198,21 @@ SQLITE_PRIVATE int sqlite3BtreePrevious(BtCursor*, int flags);
 SQLITE_PRIVATE i64 sqlite3BtreeIntegerKey(BtCursor*);
 SQLITE_PRIVATE void sqlite3BtreeCursorPin(BtCursor*);
 SQLITE_PRIVATE void sqlite3BtreeCursorUnpin(BtCursor*);
-#ifdef SQLITE_ENABLE_OFFSET_SQL_FUNC
 SQLITE_PRIVATE i64 sqlite3BtreeOffset(BtCursor*);
-#endif
 SQLITE_PRIVATE int sqlite3BtreePayload(BtCursor*, u32 offset, u32 amt, void*);
 SQLITE_PRIVATE const void *sqlite3BtreePayloadFetch(BtCursor*, u32 *pAmt);
 SQLITE_PRIVATE u32 sqlite3BtreePayloadSize(BtCursor*);
 SQLITE_PRIVATE sqlite3_int64 sqlite3BtreeMaxRecordSize(BtCursor*);
 
-SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck(sqlite3*,Btree*,Pgno*aRoot,int nRoot,int,int*);
+SQLITE_PRIVATE int sqlite3BtreeIntegrityCheck(
+  sqlite3 *db,  /* Database connection that is running the check */
+  Btree *p,     /* The btree to be checked */
+  Pgno *aRoot,  /* An array of root pages numbers for individual trees */
+  int nRoot,    /* Number of entries in aRoot[] */
+  int mxErr,    /* Stop reporting errors after this many */
+  int *pnErr,   /* OUT: Write number of errors seen to this variable */
+  char **pzOut  /* OUT: Write the error message string here */
+);
 SQLITE_PRIVATE struct Pager *sqlite3BtreePager(Btree*);
 SQLITE_PRIVATE i64 sqlite3BtreeRowCountEst(BtCursor*);
 
@@ -15336,6 +16251,8 @@ SQLITE_PRIVATE   int sqlite3BtreeCheckpoint(Btree*, int, int *, int *);
 
 SQLITE_PRIVATE int sqlite3BtreeTransferRow(BtCursor*, BtCursor*, i64);
 
+SQLITE_PRIVATE void sqlite3BtreeClearCache(Btree*);
+
 /*
 ** If we are not using shared cache, then there is no need to
 ** use mutexes to access the BtShared structures.  So make the
@@ -15452,14 +16369,14 @@ struct VdbeOp {
 #ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS
   char *zComment;          /* Comment to improve readability */
 #endif
-#ifdef VDBE_PROFILE
-  u32 cnt;                 /* Number of times this instruction was executed */
-  u64 cycles;              /* Total time spent executing this instruction */
-#endif
 #ifdef SQLITE_VDBE_COVERAGE
   u32 iSrcLine;            /* Source-code line that generated this opcode
                            ** with flags in the upper 8 bits */
 #endif
+#if defined(SQLITE_ENABLE_STMT_SCANSTATUS) || defined(VDBE_PROFILE)
+  u64 nExec;
+  u64 nCycle;
+#endif
 };
 typedef struct VdbeOp VdbeOp;
 
@@ -15560,48 +16477,48 @@ typedef struct VdbeOpList VdbeOpList;
 #define OP_Vacuum          5
 #define OP_VFilter         6 /* jump, synopsis: iplan=r[P3] zplan='P4'     */
 #define OP_VUpdate         7 /* synopsis: data=r[P3@P2]                    */
-#define OP_Goto            8 /* jump                                       */
-#define OP_Gosub           9 /* jump                                       */
-#define OP_InitCoroutine  10 /* jump                                       */
-#define OP_Yield          11 /* jump                                       */
-#define OP_MustBeInt      12 /* jump                                       */
-#define OP_Jump           13 /* jump                                       */
-#define OP_Once           14 /* jump                                       */
-#define OP_If             15 /* jump                                       */
-#define OP_IfNot          16 /* jump                                       */
-#define OP_IsNullOrType   17 /* jump, synopsis: if typeof(r[P1]) IN (P3,5) goto P2 */
-#define OP_IfNullRow      18 /* jump, synopsis: if P1.nullRow then r[P3]=NULL, goto P2 */
+#define OP_Init            8 /* jump, synopsis: Start at P2                */
+#define OP_Goto            9 /* jump                                       */
+#define OP_Gosub          10 /* jump                                       */
+#define OP_InitCoroutine  11 /* jump                                       */
+#define OP_Yield          12 /* jump                                       */
+#define OP_MustBeInt      13 /* jump                                       */
+#define OP_Jump           14 /* jump                                       */
+#define OP_Once           15 /* jump                                       */
+#define OP_If             16 /* jump                                       */
+#define OP_IfNot          17 /* jump                                       */
+#define OP_IsType         18 /* jump, synopsis: if typeof(P1.P3) in P5 goto P2 */
 #define OP_Not            19 /* same as TK_NOT, synopsis: r[P2]= !r[P1]    */
-#define OP_SeekLT         20 /* jump, synopsis: key=r[P3@P4]               */
-#define OP_SeekLE         21 /* jump, synopsis: key=r[P3@P4]               */
-#define OP_SeekGE         22 /* jump, synopsis: key=r[P3@P4]               */
-#define OP_SeekGT         23 /* jump, synopsis: key=r[P3@P4]               */
-#define OP_IfNotOpen      24 /* jump, synopsis: if( !csr[P1] ) goto P2     */
-#define OP_IfNoHope       25 /* jump, synopsis: key=r[P3@P4]               */
-#define OP_NoConflict     26 /* jump, synopsis: key=r[P3@P4]               */
-#define OP_NotFound       27 /* jump, synopsis: key=r[P3@P4]               */
-#define OP_Found          28 /* jump, synopsis: key=r[P3@P4]               */
-#define OP_SeekRowid      29 /* jump, synopsis: intkey=r[P3]               */
-#define OP_NotExists      30 /* jump, synopsis: intkey=r[P3]               */
-#define OP_Last           31 /* jump                                       */
-#define OP_IfSmaller      32 /* jump                                       */
-#define OP_SorterSort     33 /* jump                                       */
-#define OP_Sort           34 /* jump                                       */
-#define OP_Rewind         35 /* jump                                       */
-#define OP_SorterNext     36 /* jump                                       */
-#define OP_Prev           37 /* jump                                       */
-#define OP_Next           38 /* jump                                       */
-#define OP_IdxLE          39 /* jump, synopsis: key=r[P3@P4]               */
-#define OP_IdxGT          40 /* jump, synopsis: key=r[P3@P4]               */
-#define OP_IdxLT          41 /* jump, synopsis: key=r[P3@P4]               */
-#define OP_IdxGE          42 /* jump, synopsis: key=r[P3@P4]               */
+#define OP_IfNullRow      20 /* jump, synopsis: if P1.nullRow then r[P3]=NULL, goto P2 */
+#define OP_SeekLT         21 /* jump, synopsis: key=r[P3@P4]               */
+#define OP_SeekLE         22 /* jump, synopsis: key=r[P3@P4]               */
+#define OP_SeekGE         23 /* jump, synopsis: key=r[P3@P4]               */
+#define OP_SeekGT         24 /* jump, synopsis: key=r[P3@P4]               */
+#define OP_IfNotOpen      25 /* jump, synopsis: if( !csr[P1] ) goto P2     */
+#define OP_IfNoHope       26 /* jump, synopsis: key=r[P3@P4]               */
+#define OP_NoConflict     27 /* jump, synopsis: key=r[P3@P4]               */
+#define OP_NotFound       28 /* jump, synopsis: key=r[P3@P4]               */
+#define OP_Found          29 /* jump, synopsis: key=r[P3@P4]               */
+#define OP_SeekRowid      30 /* jump, synopsis: intkey=r[P3]               */
+#define OP_NotExists      31 /* jump, synopsis: intkey=r[P3]               */
+#define OP_Last           32 /* jump                                       */
+#define OP_IfSmaller      33 /* jump                                       */
+#define OP_SorterSort     34 /* jump                                       */
+#define OP_Sort           35 /* jump                                       */
+#define OP_Rewind         36 /* jump                                       */
+#define OP_SorterNext     37 /* jump                                       */
+#define OP_Prev           38 /* jump                                       */
+#define OP_Next           39 /* jump                                       */
+#define OP_IdxLE          40 /* jump, synopsis: key=r[P3@P4]               */
+#define OP_IdxGT          41 /* jump, synopsis: key=r[P3@P4]               */
+#define OP_IdxLT          42 /* jump, synopsis: key=r[P3@P4]               */
 #define OP_Or             43 /* same as TK_OR, synopsis: r[P3]=(r[P1] || r[P2]) */
 #define OP_And            44 /* same as TK_AND, synopsis: r[P3]=(r[P1] && r[P2]) */
-#define OP_RowSetRead     45 /* jump, synopsis: r[P3]=rowset(P1)           */
-#define OP_RowSetTest     46 /* jump, synopsis: if r[P3] in rowset(P1) goto P2 */
-#define OP_Program        47 /* jump                                       */
-#define OP_FkIfZero       48 /* jump, synopsis: if fkctr[P1]==0 goto P2    */
-#define OP_IfPos          49 /* jump, synopsis: if r[P1]>0 then r[P1]-=P3, goto P2 */
+#define OP_IdxGE          45 /* jump, synopsis: key=r[P3@P4]               */
+#define OP_RowSetRead     46 /* jump, synopsis: r[P3]=rowset(P1)           */
+#define OP_RowSetTest     47 /* jump, synopsis: if r[P3] in rowset(P1) goto P2 */
+#define OP_Program        48 /* jump                                       */
+#define OP_FkIfZero       49 /* jump, synopsis: if fkctr[P1]==0 goto P2    */
 #define OP_IsNull         50 /* jump, same as TK_ISNULL, synopsis: if r[P1]==NULL goto P2 */
 #define OP_NotNull        51 /* jump, same as TK_NOTNULL, synopsis: if r[P1]!=NULL goto P2 */
 #define OP_Ne             52 /* jump, same as TK_NE, synopsis: IF r[P3]!=r[P1] */
@@ -15611,12 +16528,12 @@ typedef struct VdbeOpList VdbeOpList;
 #define OP_Lt             56 /* jump, same as TK_LT, synopsis: IF r[P3]<r[P1] */
 #define OP_Ge             57 /* jump, same as TK_GE, synopsis: IF r[P3]>=r[P1] */
 #define OP_ElseEq         58 /* jump, same as TK_ESCAPE                    */
-#define OP_IfNotZero      59 /* jump, synopsis: if r[P1]!=0 then r[P1]--, goto P2 */
-#define OP_DecrJumpZero   60 /* jump, synopsis: if (--r[P1])==0 goto P2    */
-#define OP_IncrVacuum     61 /* jump                                       */
-#define OP_VNext          62 /* jump                                       */
-#define OP_Filter         63 /* jump, synopsis: if key(P3@P4) not in filter(P1) goto P2 */
-#define OP_Init           64 /* jump, synopsis: Start at P2                */
+#define OP_IfPos          59 /* jump, synopsis: if r[P1]>0 then r[P1]-=P3, goto P2 */
+#define OP_IfNotZero      60 /* jump, synopsis: if r[P1]!=0 then r[P1]--, goto P2 */
+#define OP_DecrJumpZero   61 /* jump, synopsis: if (--r[P1])==0 goto P2    */
+#define OP_IncrVacuum     62 /* jump                                       */
+#define OP_VNext          63 /* jump                                       */
+#define OP_Filter         64 /* jump, synopsis: if key(P3@P4) not in filter(P1) goto P2 */
 #define OP_PureFunc       65 /* synopsis: r[P3]=func(r[P2@NP])             */
 #define OP_Function       66 /* synopsis: r[P3]=func(r[P2@NP])             */
 #define OP_Return         67
@@ -15726,19 +16643,20 @@ typedef struct VdbeOpList VdbeOpList;
 #define OP_VCreate       171
 #define OP_VDestroy      172
 #define OP_VOpen         173
-#define OP_VInitIn       174 /* synopsis: r[P2]=ValueList(P1,P3)           */
-#define OP_VColumn       175 /* synopsis: r[P3]=vcolumn(P2)                */
-#define OP_VRename       176
-#define OP_Pagecount     177
-#define OP_MaxPgcnt      178
-#define OP_ClrSubtype    179 /* synopsis: r[P1].subtype = 0                */
-#define OP_FilterAdd     180 /* synopsis: filter(P1) += key(P3@P4)         */
-#define OP_Trace         181
-#define OP_CursorHint    182
-#define OP_ReleaseReg    183 /* synopsis: release r[P1@P2] mask P3         */
-#define OP_Noop          184
-#define OP_Explain       185
-#define OP_Abortable     186
+#define OP_VCheck        174
+#define OP_VInitIn       175 /* synopsis: r[P2]=ValueList(P1,P3)           */
+#define OP_VColumn       176 /* synopsis: r[P3]=vcolumn(P2)                */
+#define OP_VRename       177
+#define OP_Pagecount     178
+#define OP_MaxPgcnt      179
+#define OP_ClrSubtype    180 /* synopsis: r[P1].subtype = 0                */
+#define OP_FilterAdd     181 /* synopsis: filter(P1) += key(P3@P4)         */
+#define OP_Trace         182
+#define OP_CursorHint    183
+#define OP_ReleaseReg    184 /* synopsis: release r[P1@P2] mask P3         */
+#define OP_Noop          185
+#define OP_Explain       186
+#define OP_Abortable     187
 
 /* Properties such as "out2" or "jump" that are specified in
 ** comments following the "case" for each opcode in the vdbe.c
@@ -15750,31 +16668,32 @@ typedef struct VdbeOpList VdbeOpList;
 #define OPFLG_IN3         0x08  /* in3:   P3 is an input */
 #define OPFLG_OUT2        0x10  /* out2:  P2 is an output */
 #define OPFLG_OUT3        0x20  /* out3:  P3 is an output */
+#define OPFLG_NCYCLE      0x40  /* ncycle:Cycles count against P1 */
 #define OPFLG_INITIALIZER {\
-/*   0 */ 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x01, 0x00,\
-/*   8 */ 0x01, 0x01, 0x01, 0x03, 0x03, 0x01, 0x01, 0x03,\
-/*  16 */ 0x03, 0x03, 0x01, 0x12, 0x09, 0x09, 0x09, 0x09,\
-/*  24 */ 0x01, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x01,\
-/*  32 */ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,\
-/*  40 */ 0x01, 0x01, 0x01, 0x26, 0x26, 0x23, 0x0b, 0x01,\
-/*  48 */ 0x01, 0x03, 0x03, 0x03, 0x0b, 0x0b, 0x0b, 0x0b,\
-/*  56 */ 0x0b, 0x0b, 0x01, 0x03, 0x03, 0x01, 0x01, 0x01,\
+/*   0 */ 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x41, 0x00,\
+/*   8 */ 0x01, 0x01, 0x01, 0x01, 0x03, 0x03, 0x01, 0x01,\
+/*  16 */ 0x03, 0x03, 0x01, 0x12, 0x01, 0x49, 0x49, 0x49,\
+/*  24 */ 0x49, 0x01, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49,\
+/*  32 */ 0x41, 0x01, 0x41, 0x41, 0x41, 0x01, 0x41, 0x41,\
+/*  40 */ 0x41, 0x41, 0x41, 0x26, 0x26, 0x41, 0x23, 0x0b,\
+/*  48 */ 0x01, 0x01, 0x03, 0x03, 0x0b, 0x0b, 0x0b, 0x0b,\
+/*  56 */ 0x0b, 0x0b, 0x01, 0x03, 0x03, 0x03, 0x01, 0x41,\
 /*  64 */ 0x01, 0x00, 0x00, 0x02, 0x02, 0x08, 0x00, 0x10,\
 /*  72 */ 0x10, 0x10, 0x00, 0x10, 0x00, 0x10, 0x10, 0x00,\
 /*  80 */ 0x00, 0x10, 0x10, 0x00, 0x00, 0x00, 0x02, 0x02,\
-/*  88 */ 0x02, 0x00, 0x00, 0x12, 0x1e, 0x20, 0x00, 0x00,\
-/*  96 */ 0x00, 0x00, 0x10, 0x10, 0x00, 0x00, 0x26, 0x26,\
+/*  88 */ 0x02, 0x00, 0x00, 0x12, 0x1e, 0x20, 0x40, 0x00,\
+/*  96 */ 0x00, 0x00, 0x10, 0x10, 0x00, 0x40, 0x26, 0x26,\
 /* 104 */ 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26,\
-/* 112 */ 0x00, 0x00, 0x12, 0x00, 0x00, 0x10, 0x00, 0x00,\
-/* 120 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10,\
-/* 128 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,\
-/* 136 */ 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x10, 0x00,\
+/* 112 */ 0x40, 0x00, 0x12, 0x40, 0x40, 0x10, 0x40, 0x00,\
+/* 120 */ 0x00, 0x00, 0x40, 0x00, 0x40, 0x40, 0x10, 0x10,\
+/* 128 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x50,\
+/* 136 */ 0x00, 0x40, 0x04, 0x04, 0x00, 0x40, 0x50, 0x40,\
 /* 144 */ 0x10, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00,\
 /* 152 */ 0x00, 0x10, 0x00, 0x00, 0x06, 0x10, 0x00, 0x04,\
 /* 160 */ 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\
-/* 168 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00,\
-/* 176 */ 0x00, 0x10, 0x10, 0x02, 0x00, 0x00, 0x00, 0x00,\
-/* 184 */ 0x00, 0x00, 0x00,}
+/* 168 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x10, 0x50,\
+/* 176 */ 0x40, 0x00, 0x10, 0x10, 0x02, 0x00, 0x00, 0x00,\
+/* 184 */ 0x00, 0x00, 0x00, 0x00,}
 
 /* The resolve3P2Values() routine is able to run faster if it knows
 ** the value of the largest JUMP opcode.  The smaller the maximum
@@ -15827,14 +16746,20 @@ SQLITE_PRIVATE   void sqlite3VdbeNoJumpsOutsideSubrtn(Vdbe*,int,int,int);
 #endif
 SQLITE_PRIVATE VdbeOp *sqlite3VdbeAddOpList(Vdbe*, int nOp, VdbeOpList const *aOp,int iLineno);
 #ifndef SQLITE_OMIT_EXPLAIN
-SQLITE_PRIVATE   void sqlite3VdbeExplain(Parse*,u8,const char*,...);
+SQLITE_PRIVATE   int sqlite3VdbeExplain(Parse*,u8,const char*,...);
 SQLITE_PRIVATE   void sqlite3VdbeExplainPop(Parse*);
 SQLITE_PRIVATE   int sqlite3VdbeExplainParent(Parse*);
 # define ExplainQueryPlan(P)        sqlite3VdbeExplain P
+# ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+#  define ExplainQueryPlan2(V,P)     (V = sqlite3VdbeExplain P)
+# else
+#  define ExplainQueryPlan2(V,P)     ExplainQueryPlan(P)
+# endif
 # define ExplainQueryPlanPop(P)     sqlite3VdbeExplainPop(P)
 # define ExplainQueryPlanParent(P)  sqlite3VdbeExplainParent(P)
 #else
 # define ExplainQueryPlan(P)
+# define ExplainQueryPlan2(V,P)
 # define ExplainQueryPlanPop(P)
 # define ExplainQueryPlanParent(P) 0
 # define sqlite3ExplainBreakpoint(A,B) /*no-op*/
@@ -15850,6 +16775,7 @@ SQLITE_PRIVATE void sqlite3VdbeChangeP1(Vdbe*, int addr, int P1);
 SQLITE_PRIVATE void sqlite3VdbeChangeP2(Vdbe*, int addr, int P2);
 SQLITE_PRIVATE void sqlite3VdbeChangeP3(Vdbe*, int addr, int P3);
 SQLITE_PRIVATE void sqlite3VdbeChangeP5(Vdbe*, u16 P5);
+SQLITE_PRIVATE void sqlite3VdbeTypeofColumn(Vdbe*, int);
 SQLITE_PRIVATE void sqlite3VdbeJumpHere(Vdbe*, int addr);
 SQLITE_PRIVATE void sqlite3VdbeJumpHereOrPopInst(Vdbe*, int addr);
 SQLITE_PRIVATE int sqlite3VdbeChangeToNoop(Vdbe*, int addr);
@@ -15864,6 +16790,7 @@ SQLITE_PRIVATE void sqlite3VdbeAppendP4(Vdbe*, void *pP4, int p4type);
 SQLITE_PRIVATE void sqlite3VdbeSetP4KeyInfo(Parse*, Index*);
 SQLITE_PRIVATE void sqlite3VdbeUsesBtree(Vdbe*, int);
 SQLITE_PRIVATE VdbeOp *sqlite3VdbeGetOp(Vdbe*, int);
+SQLITE_PRIVATE VdbeOp *sqlite3VdbeGetLastOp(Vdbe*);
 SQLITE_PRIVATE int sqlite3VdbeMakeLabel(Parse*);
 SQLITE_PRIVATE void sqlite3VdbeRunOnlyOnce(Vdbe*);
 SQLITE_PRIVATE void sqlite3VdbeReusable(Vdbe*);
@@ -15941,7 +16868,7 @@ SQLITE_PRIVATE   void sqlite3VdbeNoopComment(Vdbe*, const char*, ...);
 ** The VdbeCoverage macros are used to set a coverage testing point
 ** for VDBE branch instructions.  The coverage testing points are line
 ** numbers in the sqlite3.c source file.  VDBE branch coverage testing
-** only works with an amalagmation build.  That's ok since a VDBE branch
+** only works with an amalgamation build.  That's ok since a VDBE branch
 ** coverage build designed for testing the test suite only.  No application
 ** should ever ship with VDBE branch coverage measuring turned on.
 **
@@ -15959,7 +16886,7 @@ SQLITE_PRIVATE   void sqlite3VdbeNoopComment(Vdbe*, const char*, ...);
 **                                     // NULL option is not possible
 **
 **    VdbeCoverageEqNe(v)              // Previous OP_Jump is only interested
-**                                     // in distingishing equal and not-equal.
+**                                     // in distinguishing equal and not-equal.
 **
 ** Every VDBE branch operation must be tagged with one of the macros above.
 ** If not, then when "make test" is run with -DSQLITE_VDBE_COVERAGE and
@@ -15969,7 +16896,7 @@ SQLITE_PRIVATE   void sqlite3VdbeNoopComment(Vdbe*, const char*, ...);
 ** During testing, the test application will invoke
 ** sqlite3_test_control(SQLITE_TESTCTRL_VDBE_COVERAGE,...) to set a callback
 ** routine that is invoked as each bytecode branch is taken.  The callback
-** contains the sqlite3.c source line number ov the VdbeCoverage macro and
+** contains the sqlite3.c source line number of the VdbeCoverage macro and
 ** flags to indicate whether or not the branch was taken.  The test application
 ** is responsible for keeping track of this and reporting byte-code branches
 ** that are never taken.
@@ -16005,14 +16932,22 @@ SQLITE_PRIVATE   void sqlite3VdbeSetLineNumber(Vdbe*,int);
 
 #ifdef SQLITE_ENABLE_STMT_SCANSTATUS
 SQLITE_PRIVATE void sqlite3VdbeScanStatus(Vdbe*, int, int, int, LogEst, const char*);
+SQLITE_PRIVATE void sqlite3VdbeScanStatusRange(Vdbe*, int, int, int);
+SQLITE_PRIVATE void sqlite3VdbeScanStatusCounters(Vdbe*, int, int, int);
 #else
-# define sqlite3VdbeScanStatus(a,b,c,d,e)
+# define sqlite3VdbeScanStatus(a,b,c,d,e,f)
+# define sqlite3VdbeScanStatusRange(a,b,c,d)
+# define sqlite3VdbeScanStatusCounters(a,b,c,d)
 #endif
 
 #if defined(SQLITE_DEBUG) || defined(VDBE_PROFILE)
 SQLITE_PRIVATE void sqlite3VdbePrintOp(FILE*, int, VdbeOp*);
 #endif
 
+#if defined(SQLITE_ENABLE_CURSOR_HINTS) && defined(SQLITE_DEBUG)
+SQLITE_PRIVATE int sqlite3CursorRangeHintExprCheck(Walker *pWalker, Expr *pExpr);
+#endif
+
 #endif /* SQLITE_VDBE_H */
 
 /************** End of vdbe.h ************************************************/
@@ -16061,7 +16996,7 @@ struct PgHdr {
   ** private to pcache.c and should not be accessed by other modules.
   ** pCache is grouped with the public elements for efficiency.
   */
-  i16 nRef;                      /* Number of users of this page */
+  i64 nRef;                      /* Number of users of this page */
   PgHdr *pDirtyNext;             /* Next element in list of dirty pages */
   PgHdr *pDirtyPrev;             /* Previous element in list of dirty pages */
                           /* NB: pDirtyNext and pDirtyPrev are undefined if the
@@ -16142,12 +17077,12 @@ SQLITE_PRIVATE void sqlite3PcacheClearSyncFlags(PCache *);
 SQLITE_PRIVATE void sqlite3PcacheClear(PCache*);
 
 /* Return the total number of outstanding page references */
-SQLITE_PRIVATE int sqlite3PcacheRefCount(PCache*);
+SQLITE_PRIVATE i64 sqlite3PcacheRefCount(PCache*);
 
 /* Increment the reference count of an existing page */
 SQLITE_PRIVATE void sqlite3PcacheRef(PgHdr*);
 
-SQLITE_PRIVATE int sqlite3PcachePageRefcount(PgHdr*);
+SQLITE_PRIVATE i64 sqlite3PcachePageRefcount(PgHdr*);
 
 /* Return the total number of pages stored in the cache */
 SQLITE_PRIVATE int sqlite3PcachePagecount(PCache*);
@@ -16212,297 +17147,6 @@ SQLITE_PRIVATE int sqlite3PCacheIsDirty(PCache *pCache);
 
 /************** End of pcache.h **********************************************/
 /************** Continuing where we left off in sqliteInt.h ******************/
-/************** Include os.h in the middle of sqliteInt.h ********************/
-/************** Begin file os.h **********************************************/
-/*
-** 2001 September 16
-**
-** The author disclaims copyright to this source code.  In place of
-** a legal notice, here is a blessing:
-**
-**    May you do good and not evil.
-**    May you find forgiveness for yourself and forgive others.
-**    May you share freely, never taking more than you give.
-**
-******************************************************************************
-**
-** This header file (together with is companion C source-code file
-** "os.c") attempt to abstract the underlying operating system so that
-** the SQLite library will work on both POSIX and windows systems.
-**
-** This header file is #include-ed by sqliteInt.h and thus ends up
-** being included by every source file.
-*/
-#ifndef _SQLITE_OS_H_
-#define _SQLITE_OS_H_
-
-/*
-** Attempt to automatically detect the operating system and setup the
-** necessary pre-processor macros for it.
-*/
-/************** Include os_setup.h in the middle of os.h *********************/
-/************** Begin file os_setup.h ****************************************/
-/*
-** 2013 November 25
-**
-** The author disclaims copyright to this source code.  In place of
-** a legal notice, here is a blessing:
-**
-**    May you do good and not evil.
-**    May you find forgiveness for yourself and forgive others.
-**    May you share freely, never taking more than you give.
-**
-******************************************************************************
-**
-** This file contains pre-processor directives related to operating system
-** detection and/or setup.
-*/
-#ifndef SQLITE_OS_SETUP_H
-#define SQLITE_OS_SETUP_H
-
-/*
-** Figure out if we are dealing with Unix, Windows, or some other operating
-** system.
-**
-** After the following block of preprocess macros, all of SQLITE_OS_UNIX,
-** SQLITE_OS_WIN, and SQLITE_OS_OTHER will defined to either 1 or 0.  One of
-** the three will be 1.  The other two will be 0.
-*/
-#if defined(SQLITE_OS_OTHER)
-#  if SQLITE_OS_OTHER==1
-#    undef SQLITE_OS_UNIX
-#    define SQLITE_OS_UNIX 0
-#    undef SQLITE_OS_WIN
-#    define SQLITE_OS_WIN 0
-#  else
-#    undef SQLITE_OS_OTHER
-#  endif
-#endif
-#if !defined(SQLITE_OS_UNIX) && !defined(SQLITE_OS_OTHER)
-#  define SQLITE_OS_OTHER 0
-#  ifndef SQLITE_OS_WIN
-#    if defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || \
-        defined(__MINGW32__) || defined(__BORLANDC__)
-#      define SQLITE_OS_WIN 1
-#      define SQLITE_OS_UNIX 0
-#    else
-#      define SQLITE_OS_WIN 0
-#      define SQLITE_OS_UNIX 1
-#    endif
-#  else
-#    define SQLITE_OS_UNIX 0
-#  endif
-#else
-#  ifndef SQLITE_OS_WIN
-#    define SQLITE_OS_WIN 0
-#  endif
-#endif
-
-#endif /* SQLITE_OS_SETUP_H */
-
-/************** End of os_setup.h ********************************************/
-/************** Continuing where we left off in os.h *************************/
-
-/* If the SET_FULLSYNC macro is not defined above, then make it
-** a no-op
-*/
-#ifndef SET_FULLSYNC
-# define SET_FULLSYNC(x,y)
-#endif
-
-/* Maximum pathname length.  Note: FILENAME_MAX defined by stdio.h
-*/
-#ifndef SQLITE_MAX_PATHLEN
-# define SQLITE_MAX_PATHLEN FILENAME_MAX
-#endif
-
-/* Maximum number of symlinks that will be resolved while trying to
-** expand a filename in xFullPathname() in the VFS.
-*/
-#ifndef SQLITE_MAX_SYMLINK
-# define SQLITE_MAX_SYMLINK 200
-#endif
-
-/*
-** The default size of a disk sector
-*/
-#ifndef SQLITE_DEFAULT_SECTOR_SIZE
-# define SQLITE_DEFAULT_SECTOR_SIZE 4096
-#endif
-
-/*
-** Temporary files are named starting with this prefix followed by 16 random
-** alphanumeric characters, and no file extension. They are stored in the
-** OS's standard temporary file directory, and are deleted prior to exit.
-** If sqlite is being embedded in another program, you may wish to change the
-** prefix to reflect your program's name, so that if your program exits
-** prematurely, old temporary files can be easily identified. This can be done
-** using -DSQLITE_TEMP_FILE_PREFIX=myprefix_ on the compiler command line.
-**
-** 2006-10-31:  The default prefix used to be "sqlite_".  But then
-** Mcafee started using SQLite in their anti-virus product and it
-** started putting files with the "sqlite" name in the c:/temp folder.
-** This annoyed many windows users.  Those users would then do a
-** Google search for "sqlite", find the telephone numbers of the
-** developers and call to wake them up at night and complain.
-** For this reason, the default name prefix is changed to be "sqlite"
-** spelled backwards.  So the temp files are still identified, but
-** anybody smart enough to figure out the code is also likely smart
-** enough to know that calling the developer will not help get rid
-** of the file.
-*/
-#ifndef SQLITE_TEMP_FILE_PREFIX
-# define SQLITE_TEMP_FILE_PREFIX "etilqs_"
-#endif
-
-/*
-** The following values may be passed as the second argument to
-** sqlite3OsLock(). The various locks exhibit the following semantics:
-**
-** SHARED:    Any number of processes may hold a SHARED lock simultaneously.
-** RESERVED:  A single process may hold a RESERVED lock on a file at
-**            any time. Other processes may hold and obtain new SHARED locks.
-** PENDING:   A single process may hold a PENDING lock on a file at
-**            any one time. Existing SHARED locks may persist, but no new
-**            SHARED locks may be obtained by other processes.
-** EXCLUSIVE: An EXCLUSIVE lock precludes all other locks.
-**
-** PENDING_LOCK may not be passed directly to sqlite3OsLock(). Instead, a
-** process that requests an EXCLUSIVE lock may actually obtain a PENDING
-** lock. This can be upgraded to an EXCLUSIVE lock by a subsequent call to
-** sqlite3OsLock().
-*/
-#define NO_LOCK         0
-#define SHARED_LOCK     1
-#define RESERVED_LOCK   2
-#define PENDING_LOCK    3
-#define EXCLUSIVE_LOCK  4
-
-/*
-** File Locking Notes:  (Mostly about windows but also some info for Unix)
-**
-** We cannot use LockFileEx() or UnlockFileEx() on Win95/98/ME because
-** those functions are not available.  So we use only LockFile() and
-** UnlockFile().
-**
-** LockFile() prevents not just writing but also reading by other processes.
-** A SHARED_LOCK is obtained by locking a single randomly-chosen
-** byte out of a specific range of bytes. The lock byte is obtained at
-** random so two separate readers can probably access the file at the
-** same time, unless they are unlucky and choose the same lock byte.
-** An EXCLUSIVE_LOCK is obtained by locking all bytes in the range.
-** There can only be one writer.  A RESERVED_LOCK is obtained by locking
-** a single byte of the file that is designated as the reserved lock byte.
-** A PENDING_LOCK is obtained by locking a designated byte different from
-** the RESERVED_LOCK byte.
-**
-** On WinNT/2K/XP systems, LockFileEx() and UnlockFileEx() are available,
-** which means we can use reader/writer locks.  When reader/writer locks
-** are used, the lock is placed on the same range of bytes that is used
-** for probabilistic locking in Win95/98/ME.  Hence, the locking scheme
-** will support two or more Win95 readers or two or more WinNT readers.
-** But a single Win95 reader will lock out all WinNT readers and a single
-** WinNT reader will lock out all other Win95 readers.
-**
-** The following #defines specify the range of bytes used for locking.
-** SHARED_SIZE is the number of bytes available in the pool from which
-** a random byte is selected for a shared lock.  The pool of bytes for
-** shared locks begins at SHARED_FIRST.
-**
-** The same locking strategy and
-** byte ranges are used for Unix.  This leaves open the possibility of having
-** clients on win95, winNT, and unix all talking to the same shared file
-** and all locking correctly.  To do so would require that samba (or whatever
-** tool is being used for file sharing) implements locks correctly between
-** windows and unix.  I'm guessing that isn't likely to happen, but by
-** using the same locking range we are at least open to the possibility.
-**
-** Locking in windows is manditory.  For this reason, we cannot store
-** actual data in the bytes used for locking.  The pager never allocates
-** the pages involved in locking therefore.  SHARED_SIZE is selected so
-** that all locks will fit on a single page even at the minimum page size.
-** PENDING_BYTE defines the beginning of the locks.  By default PENDING_BYTE
-** is set high so that we don't have to allocate an unused page except
-** for very large databases.  But one should test the page skipping logic
-** by setting PENDING_BYTE low and running the entire regression suite.
-**
-** Changing the value of PENDING_BYTE results in a subtly incompatible
-** file format.  Depending on how it is changed, you might not notice
-** the incompatibility right away, even running a full regression test.
-** The default location of PENDING_BYTE is the first byte past the
-** 1GB boundary.
-**
-*/
-#ifdef SQLITE_OMIT_WSD
-# define PENDING_BYTE     (0x40000000)
-#else
-# define PENDING_BYTE      sqlite3PendingByte
-#endif
-#define RESERVED_BYTE     (PENDING_BYTE+1)
-#define SHARED_FIRST      (PENDING_BYTE+2)
-#define SHARED_SIZE       510
-
-/*
-** Wrapper around OS specific sqlite3_os_init() function.
-*/
-SQLITE_PRIVATE int sqlite3OsInit(void);
-
-/*
-** Functions for accessing sqlite3_file methods
-*/
-SQLITE_PRIVATE void sqlite3OsClose(sqlite3_file*);
-SQLITE_PRIVATE int sqlite3OsRead(sqlite3_file*, void*, int amt, i64 offset);
-SQLITE_PRIVATE int sqlite3OsWrite(sqlite3_file*, const void*, int amt, i64 offset);
-SQLITE_PRIVATE int sqlite3OsTruncate(sqlite3_file*, i64 size);
-SQLITE_PRIVATE int sqlite3OsSync(sqlite3_file*, int);
-SQLITE_PRIVATE int sqlite3OsFileSize(sqlite3_file*, i64 *pSize);
-SQLITE_PRIVATE int sqlite3OsLock(sqlite3_file*, int);
-SQLITE_PRIVATE int sqlite3OsUnlock(sqlite3_file*, int);
-SQLITE_PRIVATE int sqlite3OsCheckReservedLock(sqlite3_file *id, int *pResOut);
-SQLITE_PRIVATE int sqlite3OsFileControl(sqlite3_file*,int,void*);
-SQLITE_PRIVATE void sqlite3OsFileControlHint(sqlite3_file*,int,void*);
-#define SQLITE_FCNTL_DB_UNCHANGED 0xca093fa0
-SQLITE_PRIVATE int sqlite3OsSectorSize(sqlite3_file *id);
-SQLITE_PRIVATE int sqlite3OsDeviceCharacteristics(sqlite3_file *id);
-#ifndef SQLITE_OMIT_WAL
-SQLITE_PRIVATE int sqlite3OsShmMap(sqlite3_file *,int,int,int,void volatile **);
-SQLITE_PRIVATE int sqlite3OsShmLock(sqlite3_file *id, int, int, int);
-SQLITE_PRIVATE void sqlite3OsShmBarrier(sqlite3_file *id);
-SQLITE_PRIVATE int sqlite3OsShmUnmap(sqlite3_file *id, int);
-#endif /* SQLITE_OMIT_WAL */
-SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64, int, void **);
-SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *, i64, void *);
-
-
-/*
-** Functions for accessing sqlite3_vfs methods
-*/
-SQLITE_PRIVATE int sqlite3OsOpen(sqlite3_vfs *, const char *, sqlite3_file*, int, int *);
-SQLITE_PRIVATE int sqlite3OsDelete(sqlite3_vfs *, const char *, int);
-SQLITE_PRIVATE int sqlite3OsAccess(sqlite3_vfs *, const char *, int, int *pResOut);
-SQLITE_PRIVATE int sqlite3OsFullPathname(sqlite3_vfs *, const char *, int, char *);
-#ifndef SQLITE_OMIT_LOAD_EXTENSION
-SQLITE_PRIVATE void *sqlite3OsDlOpen(sqlite3_vfs *, const char *);
-SQLITE_PRIVATE void sqlite3OsDlError(sqlite3_vfs *, int, char *);
-SQLITE_PRIVATE void (*sqlite3OsDlSym(sqlite3_vfs *, void *, const char *))(void);
-SQLITE_PRIVATE void sqlite3OsDlClose(sqlite3_vfs *, void *);
-#endif /* SQLITE_OMIT_LOAD_EXTENSION */
-SQLITE_PRIVATE int sqlite3OsRandomness(sqlite3_vfs *, int, char *);
-SQLITE_PRIVATE int sqlite3OsSleep(sqlite3_vfs *, int);
-SQLITE_PRIVATE int sqlite3OsGetLastError(sqlite3_vfs*);
-SQLITE_PRIVATE int sqlite3OsCurrentTimeInt64(sqlite3_vfs *, sqlite3_int64*);
-
-/*
-** Convenience functions for opening and closing files using
-** sqlite3_malloc() to obtain space for the file-handle structure.
-*/
-SQLITE_PRIVATE int sqlite3OsOpenMalloc(sqlite3_vfs *, const char *, sqlite3_file **, int,int*);
-SQLITE_PRIVATE void sqlite3OsCloseFree(sqlite3_file *);
-
-#endif /* _SQLITE_OS_H_ */
-
-/************** End of os.h **************************************************/
-/************** Continuing where we left off in sqliteInt.h ******************/
 /************** Include mutex.h in the middle of sqliteInt.h *****************/
 /************** Begin file mutex.h *******************************************/
 /*
@@ -16591,7 +17235,7 @@ SQLITE_API int sqlite3_mutex_held(sqlite3_mutex*);
 /*
 ** Default synchronous levels.
 **
-** Note that (for historcal reasons) the PAGER_SYNCHRONOUS_* macros differ
+** Note that (for historical reasons) the PAGER_SYNCHRONOUS_* macros differ
 ** from the SQLITE_DEFAULT_SYNCHRONOUS value by 1.
 **
 **           PAGER_SYNCHRONOUS       DEFAULT_SYNCHRONOUS
@@ -16630,7 +17274,7 @@ struct Db {
 ** An instance of the following structure stores a database schema.
 **
 ** Most Schema objects are associated with a Btree.  The exception is
-** the Schema for the TEMP databaes (sqlite3.aDb[1]) which is free-standing.
+** the Schema for the TEMP database (sqlite3.aDb[1]) which is free-standing.
 ** In shared cache mode, a single Schema object can be shared by multiple
 ** Btrees that refer to the same underlying BtShared object.
 **
@@ -16741,13 +17385,14 @@ struct Lookaside {
   LookasideSlot *pInit;   /* List of buffers not previously used */
   LookasideSlot *pFree;   /* List of available buffers */
 #ifndef SQLITE_OMIT_TWOSIZE_LOOKASIDE
-  LookasideSlot *pSmallInit; /* List of small buffers not prediously used */
+  LookasideSlot *pSmallInit; /* List of small buffers not previously used */
   LookasideSlot *pSmallFree; /* List of available small buffers */
   void *pMiddle;          /* First byte past end of full-size buffers and
                           ** the first byte of LOOKASIDE_SMALL buffers */
 #endif /* SQLITE_OMIT_TWOSIZE_LOOKASIDE */
   void *pStart;           /* First byte of available memory space */
   void *pEnd;             /* First byte past end of available space */
+  void *pTrueEnd;         /* True value of pEnd, when db->pnBytesFreed!=0 */
 };
 struct LookasideSlot {
   LookasideSlot *pNext;    /* Next buffer in the list of free buffers */
@@ -16757,7 +17402,7 @@ struct LookasideSlot {
 #define EnableLookaside   db->lookaside.bDisable--;\
    db->lookaside.sz=db->lookaside.bDisable?0:db->lookaside.szTrue
 
-/* Size of the smaller allocations in two-size lookside */
+/* Size of the smaller allocations in two-size lookaside */
 #ifdef SQLITE_OMIT_TWOSIZE_LOOKASIDE
 #  define LOOKASIDE_SMALL           0
 #else
@@ -16957,6 +17602,7 @@ struct sqlite3 {
   i64 nDeferredCons;            /* Net deferred constraints this transaction. */
   i64 nDeferredImmCons;         /* Net deferred immediate constraints */
   int *pnBytesFreed;            /* If not NULL, increment this in DbFree() */
+  DbClientData *pDbData;        /* sqlite3_set_clientdata() content */
 #ifdef SQLITE_ENABLE_UNLOCK_NOTIFY
   /* The following variables are all protected by the STATIC_MAIN
   ** mutex, not by sqlite3.mutex. They are used by code in notify.c.
@@ -17012,7 +17658,7 @@ struct sqlite3 {
 #define SQLITE_NullCallback   0x00000100  /* Invoke the callback once if the */
                                           /*   result set is empty */
 #define SQLITE_IgnoreChecks   0x00000200  /* Do not enforce check constraints */
-#define SQLITE_ReadUncommit   0x00000400  /* READ UNCOMMITTED in shared-cache */
+#define SQLITE_StmtScanStatus 0x00000400  /* Enable stmt_scanstats() counters */
 #define SQLITE_NoCkptOnClose  0x00000800  /* No checkpoint on close()/DETACH */
 #define SQLITE_ReverseOrder   0x00001000  /* Reverse unordered SELECTs */
 #define SQLITE_RecTriggers    0x00002000  /* Enable recursive triggers */
@@ -17038,6 +17684,8 @@ struct sqlite3 {
                                           /*   DELETE, or UPDATE and return */
                                           /*   the count using a callback. */
 #define SQLITE_CorruptRdOnly  HI(0x00002) /* Prohibit writes due to error */
+#define SQLITE_ReadUncommit   HI(0x00004) /* READ UNCOMMITTED in shared-cache */
+#define SQLITE_FkNoAction     HI(0x00008) /* Treat all FK as NO ACTION */
 
 /* Flags used only if debugging */
 #ifdef SQLITE_DEBUG
@@ -17092,6 +17740,10 @@ struct sqlite3 {
 #define SQLITE_ReleaseReg     0x00400000 /* Use OP_ReleaseReg for testing */
 #define SQLITE_FlttnUnionAll  0x00800000 /* Disable the UNION ALL flattener */
    /* TH3 expects this value  ^^^^^^^^^^ See flatten04.test */
+#define SQLITE_IndexedExpr    0x01000000 /* Pull exprs from index when able */
+#define SQLITE_Coroutines     0x02000000 /* Co-routines for subqueries */
+#define SQLITE_NullUnusedCols 0x04000000 /* NULL unused columns in subqueries */
+#define SQLITE_OnePass        0x08000000 /* Single-pass DELETE and UPDATE */
 #define SQLITE_AllOpts        0xffffffff /* All optimizations */
 
 /*
@@ -17174,10 +17826,17 @@ struct FuncDestructor {
 **     SQLITE_FUNC_ANYORDER    ==  NC_OrderAgg       == SF_OrderByReqd
 **     SQLITE_FUNC_LENGTH      ==  OPFLAG_LENGTHARG
 **     SQLITE_FUNC_TYPEOF      ==  OPFLAG_TYPEOFARG
+**     SQLITE_FUNC_BYTELEN     ==  OPFLAG_BYTELENARG
 **     SQLITE_FUNC_CONSTANT    ==  SQLITE_DETERMINISTIC from the API
 **     SQLITE_FUNC_DIRECT      ==  SQLITE_DIRECTONLY from the API
-**     SQLITE_FUNC_UNSAFE      ==  SQLITE_INNOCUOUS
+**     SQLITE_FUNC_UNSAFE      ==  SQLITE_INNOCUOUS  -- opposite meanings!!!
 **     SQLITE_FUNC_ENCMASK   depends on SQLITE_UTF* macros in the API
+**
+** Note that even though SQLITE_FUNC_UNSAFE and SQLITE_INNOCUOUS have the
+** same bit value, their meanings are inverted.  SQLITE_FUNC_UNSAFE is
+** used internally and if set means that the function has side effects.
+** SQLITE_INNOCUOUS is used by application code and means "not unsafe".
+** See multiple instances of tag-20230109-1.
 */
 #define SQLITE_FUNC_ENCMASK  0x0003 /* SQLITE_UTF8, SQLITE_UTF16BE or UTF16LE */
 #define SQLITE_FUNC_LIKE     0x0004 /* Candidate for the LIKE optimization */
@@ -17186,6 +17845,7 @@ struct FuncDestructor {
 #define SQLITE_FUNC_NEEDCOLL 0x0020 /* sqlite3GetFuncCollSeq() might be called*/
 #define SQLITE_FUNC_LENGTH   0x0040 /* Built-in length() function */
 #define SQLITE_FUNC_TYPEOF   0x0080 /* Built-in typeof() function */
+#define SQLITE_FUNC_BYTELEN  0x00c0 /* Built-in octet_length() function */
 #define SQLITE_FUNC_COUNT    0x0100 /* Built-in count(*) aggregate */
 /*                           0x0200 -- available for reuse */
 #define SQLITE_FUNC_UNLIKELY 0x0400 /* Built-in unlikely() function */
@@ -17194,14 +17854,15 @@ struct FuncDestructor {
 #define SQLITE_FUNC_SLOCHNG  0x2000 /* "Slow Change". Value constant during a
                                     ** single query - might change over time */
 #define SQLITE_FUNC_TEST     0x4000 /* Built-in testing functions */
-/*                           0x8000 -- available for reuse */
+#define SQLITE_FUNC_RUNONLY  0x8000 /* Cannot be used by valueFromFunction */
 #define SQLITE_FUNC_WINDOW   0x00010000 /* Built-in window-only function */
 #define SQLITE_FUNC_INTERNAL 0x00040000 /* For use by NestedParse() only */
 #define SQLITE_FUNC_DIRECT   0x00080000 /* Not for use in TRIGGERs or VIEWs */
-#define SQLITE_FUNC_SUBTYPE  0x00100000 /* Result likely to have sub-type */
+/* SQLITE_SUBTYPE            0x00100000 // Consumer of subtypes */
 #define SQLITE_FUNC_UNSAFE   0x00200000 /* Function has side effects */
 #define SQLITE_FUNC_INLINE   0x00400000 /* Functions implemented in-line */
 #define SQLITE_FUNC_BUILTIN  0x00800000 /* This is a built-in function */
+/*  SQLITE_RESULT_SUBTYPE    0x01000000 // Generator of subtypes */
 #define SQLITE_FUNC_ANYORDER 0x08000000 /* count/min/max aggregate */
 
 /* Identifier numbers for each in-line function */
@@ -17293,9 +17954,10 @@ struct FuncDestructor {
 #define MFUNCTION(zName, nArg, xPtr, xFunc) \
   {nArg, SQLITE_FUNC_BUILTIN|SQLITE_FUNC_CONSTANT|SQLITE_UTF8, \
    xPtr, 0, xFunc, 0, 0, 0, #zName, {0} }
-#define JFUNCTION(zName, nArg, iArg, xFunc) \
-  {nArg, SQLITE_FUNC_BUILTIN|SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS|\
-   SQLITE_FUNC_CONSTANT|SQLITE_UTF8, \
+#define JFUNCTION(zName, nArg, bUseCache, bWS, bRS, iArg, xFunc) \
+  {nArg, SQLITE_FUNC_BUILTIN|SQLITE_DETERMINISTIC|SQLITE_FUNC_CONSTANT|\
+   SQLITE_UTF8|((bUseCache)*SQLITE_FUNC_RUNONLY)|\
+   ((bRS)*SQLITE_SUBTYPE)|((bWS)*SQLITE_RESULT_SUBTYPE), \
    SQLITE_INT_TO_PTR(iArg), 0, xFunc, 0, 0, 0, #zName, {0} }
 #define INLINE_FUNC(zName, nArg, iArg, mFlags) \
   {nArg, SQLITE_FUNC_BUILTIN|\
@@ -17486,6 +18148,7 @@ struct CollSeq {
 #define SQLITE_AFF_NUMERIC  0x43  /* 'C' */
 #define SQLITE_AFF_INTEGER  0x44  /* 'D' */
 #define SQLITE_AFF_REAL     0x45  /* 'E' */
+#define SQLITE_AFF_FLEXNUM  0x46  /* 'F' */
 
 #define sqlite3IsNumericAffinity(X)  ((X)>=SQLITE_AFF_NUMERIC)
 
@@ -17556,6 +18219,7 @@ struct VTable {
   sqlite3_vtab *pVtab;      /* Pointer to vtab instance */
   int nRef;                 /* Number of pointers to this structure */
   u8 bConstraint;           /* True if constraints are supported */
+  u8 bAllSchemas;           /* True if might use any attached schema */
   u8 eVtabRisk;             /* Riskiness of allowing hacker access */
   int iSavepoint;           /* Depth of the SAVEPOINT stack */
   VTable *pNext;            /* Next in linked list (see above) */
@@ -17664,7 +18328,7 @@ struct Table {
 #ifndef SQLITE_OMIT_VIRTUALTABLE
 #  define IsVirtual(X)      ((X)->eTabType==TABTYP_VTAB)
 #  define ExprIsVtab(X)  \
-    ((X)->op==TK_COLUMN && (X)->y.pTab!=0 && (X)->y.pTab->eTabType==TABTYP_VTAB)
+   ((X)->op==TK_COLUMN && (X)->y.pTab->eTabType==TABTYP_VTAB)
 #else
 #  define IsVirtual(X)      0
 #  define ExprIsVtab(X)     0
@@ -17763,7 +18427,7 @@ struct FKey {
 ** foreign key.
 **
 ** The OE_Default value is a place holder that means to use whatever
-** conflict resolution algorthm is required from context.
+** conflict resolution algorithm is required from context.
 **
 ** The following symbolic values are used to record which type
 ** of conflict resolution action to take.
@@ -17881,10 +18545,22 @@ struct UnpackedRecord {
 ** The Index.onError field determines whether or not the indexed columns
 ** must be unique and what to do if they are not.  When Index.onError=OE_None,
 ** it means this is not a unique index.  Otherwise it is a unique index
-** and the value of Index.onError indicate the which conflict resolution
-** algorithm to employ whenever an attempt is made to insert a non-unique
+** and the value of Index.onError indicates which conflict resolution
+** algorithm to employ when an attempt is made to insert a non-unique
 ** element.
 **
+** The colNotIdxed bitmask is used in combination with SrcItem.colUsed
+** for a fast test to see if an index can serve as a covering index.
+** colNotIdxed has a 1 bit for every column of the original table that
+** is *not* available in the index.  Thus the expression
+** "colUsed & colNotIdxed" will be non-zero if the index is not a
+** covering index.  The most significant bit of of colNotIdxed will always
+** be true (note-20221022-a).  If a column beyond the 63rd column of the
+** table is used, the "colUsed & colNotIdxed" test will always be non-zero
+** and we have to assume either that the index is not covering, or use
+** an alternative (slower) algorithm to determine whether or not
+** the index is covering.
+**
 ** While parsing a CREATE TABLE or CREATE INDEX statement in order to
 ** generate VDBE code (as opposed to parsing one read from an sqlite_schema
 ** table as part of parsing an existing database schema), transient instances
@@ -17920,15 +18596,18 @@ struct Index {
   unsigned bNoQuery:1;     /* Do not use this index to optimize queries */
   unsigned bAscKeyBug:1;   /* True if the bba7b69f9849b5bf bug applies */
   unsigned bHasVCol:1;     /* Index references one or more VIRTUAL columns */
+  unsigned bHasExpr:1;     /* Index contains an expression, either a literal
+                           ** expression, or a reference to a VIRTUAL column */
 #ifdef SQLITE_ENABLE_STAT4
   int nSample;             /* Number of elements in aSample[] */
+  int mxSample;            /* Number of slots allocated to aSample[] */
   int nSampleCol;          /* Size of IndexSample.anEq[] and so on */
   tRowcnt *aAvgEq;         /* Average nEq values for keys not in aSample */
   IndexSample *aSample;    /* Samples of the left-most key */
   tRowcnt *aiRowEst;       /* Non-logarithmic stat1 data for this index */
   tRowcnt nRowEst0;        /* Non-logarithmic number of rows in the index */
 #endif
-  Bitmask colNotIdxed;     /* 0 for unindexed columns in pTab */
+  Bitmask colNotIdxed;     /* Unindexed columns in pTab */
 };
 
 /*
@@ -18003,16 +18682,15 @@ struct AggInfo {
                           ** from source tables rather than from accumulators */
   u8 useSortingIdx;       /* In direct mode, reference the sorting index rather
                           ** than the source table */
+  u16 nSortingColumn;     /* Number of columns in the sorting index */
   int sortingIdx;         /* Cursor number of the sorting index */
   int sortingIdxPTab;     /* Cursor number of pseudo-table */
-  int nSortingColumn;     /* Number of columns in the sorting index */
-  int mnReg, mxReg;       /* Range of registers allocated for aCol and aFunc */
+  int iFirstReg;          /* First register in range for aCol[] and aFunc[] */
   ExprList *pGroupBy;     /* The group by clause */
   struct AggInfo_col {    /* For each column used in source tables */
     Table *pTab;             /* Source table */
     Expr *pCExpr;            /* The original expression */
     int iTable;              /* Cursor number of the source table */
-    int iMem;                /* Memory location that acts as accumulator */
     i16 iColumn;             /* Column number within the source table */
     i16 iSorterColumn;       /* Column number in the sorting index */
   } *aCol;
@@ -18023,14 +18701,30 @@ struct AggInfo {
   struct AggInfo_func {   /* For each aggregate function */
     Expr *pFExpr;            /* Expression encoding the function */
     FuncDef *pFunc;          /* The aggregate function implementation */
-    int iMem;                /* Memory location that acts as accumulator */
     int iDistinct;           /* Ephemeral table used to enforce DISTINCT */
     int iDistAddr;           /* Address of OP_OpenEphemeral */
+    int iOBTab;              /* Ephemeral table to implement ORDER BY */
+    u8 bOBPayload;           /* iOBTab has payload columns separate from key */
+    u8 bOBUnique;            /* Enforce uniqueness on iOBTab keys */
   } *aFunc;
   int nFunc;              /* Number of entries in aFunc[] */
   u32 selId;              /* Select to which this AggInfo belongs */
+#ifdef SQLITE_DEBUG
+  Select *pSelect;        /* SELECT statement that this AggInfo supports */
+#endif
 };
 
+/*
+** Macros to compute aCol[] and aFunc[] register numbers.
+**
+** These macros should not be used prior to the call to
+** assignAggregateRegisters() that computes the value of pAggInfo->iFirstReg.
+** The assert()s that are part of this macro verify that constraint.
+*/
+#define AggInfoColumnReg(A,I)  (assert((A)->iFirstReg),(A)->iFirstReg+(I))
+#define AggInfoFuncReg(A,I)    \
+                      (assert((A)->iFirstReg),(A)->iFirstReg+(A)->nColumn+(I))
+
 /*
 ** The datatype ynVar is a signed integer, either 16-bit or 32-bit.
 ** Usually it is 16-bits.  But if SQLITE_MAX_VARIABLE_NUMBER is greater
@@ -18150,7 +18844,7 @@ struct Expr {
                          ** TK_REGISTER: register number
                          ** TK_TRIGGER: 1 -> new, 0 -> old
                          ** EP_Unlikely:  134217728 times likelihood
-                         ** TK_IN: ephemerial table holding RHS
+                         ** TK_IN: ephemeral table holding RHS
                          ** TK_SELECT_COLUMN: Number of columns on the LHS
                          ** TK_SELECT: 1st register of result vector */
   ynVar iColumn;         /* TK_COLUMN: column index.  -1 for rowid.
@@ -18196,7 +18890,7 @@ struct Expr {
 #define EP_Reduced    0x004000 /* Expr struct EXPR_REDUCEDSIZE bytes only */
 #define EP_Win        0x008000 /* Contains window functions */
 #define EP_TokenOnly  0x010000 /* Expr struct EXPR_TOKENONLYSIZE bytes only */
-#define EP_MemToken   0x020000 /* Need to sqlite3DbFree() Expr.zToken */
+#define EP_FullSize   0x020000 /* Expr structure must remain full sized */
 #define EP_IfNullRow  0x040000 /* The TK_IF_NULL_ROW opcode */
 #define EP_Unlikely   0x080000 /* unlikely() or likelihood() function */
 #define EP_ConstFunc  0x100000 /* A SQLITE_FUNC_CONSTANT or _SLOCHNG function */
@@ -18226,12 +18920,15 @@ struct Expr {
 #define ExprClearProperty(E,P)   (E)->flags&=~(P)
 #define ExprAlwaysTrue(E)   (((E)->flags&(EP_OuterON|EP_IsTrue))==EP_IsTrue)
 #define ExprAlwaysFalse(E)  (((E)->flags&(EP_OuterON|EP_IsFalse))==EP_IsFalse)
+#define ExprIsFullSize(E)   (((E)->flags&(EP_Reduced|EP_TokenOnly))==0)
 
 /* Macros used to ensure that the correct members of unions are accessed
 ** in Expr.
 */
 #define ExprUseUToken(E)    (((E)->flags&EP_IntValue)==0)
 #define ExprUseUValue(E)    (((E)->flags&EP_IntValue)!=0)
+#define ExprUseWOfst(E)     (((E)->flags&(EP_InnerON|EP_OuterON))==0)
+#define ExprUseWJoin(E)     (((E)->flags&(EP_InnerON|EP_OuterON))!=0)
 #define ExprUseXList(E)     (((E)->flags&EP_xIsSelect)==0)
 #define ExprUseXSelect(E)   (((E)->flags&EP_xIsSelect)!=0)
 #define ExprUseYTab(E)      (((E)->flags&(EP_WinFunc|EP_Subrtn))==0)
@@ -18341,6 +19038,7 @@ struct ExprList {
 #define ENAME_NAME  0       /* The AS clause of a result set */
 #define ENAME_SPAN  1       /* Complete text of the result set expression */
 #define ENAME_TAB   2       /* "DB.TABLE.NAME" for the result set */
+#define ENAME_ROWID 3       /* "DB.TABLE._rowid_" for * expansion of rowid */
 
 /*
 ** An instance of this structure can hold a simple list of identifiers,
@@ -18381,6 +19079,14 @@ struct IdList {
 ** The SrcItem object represents a single term in the FROM clause of a query.
 ** The SrcList object is mostly an array of SrcItems.
 **
+** The jointype starts out showing the join type between the current table
+** and the next table on the list.  The parser builds the list this way.
+** But sqlite3SrcListShiftJoinType() later shifts the jointypes so that each
+** jointype expresses the join between the table and the previous table.
+**
+** In the colUsed field, the high-order bit (bit 63) is set if the table
+** contains more than 63 columns and the 64-th or later column is used.
+**
 ** Union member validity:
 **
 **    u1.zIndexedBy          fg.isIndexedBy && !fg.isTabFunc
@@ -18412,7 +19118,7 @@ struct SrcItem {
     unsigned notCte :1;        /* This item may not match a CTE */
     unsigned isUsing :1;       /* u3.pUsing is valid */
     unsigned isOn :1;          /* u3.pOn was once valid and non-NULL */
-    unsigned isSynthUsing :1;  /* u3.pUsing is synthensized from NATURAL */
+    unsigned isSynthUsing :1;  /* u3.pUsing is synthesized from NATURAL */
     unsigned isNestedFrom :1;  /* pSelect is a SF_NestedFrom subquery */
   } fg;
   int iCursor;      /* The VDBE cursor number used to access this table */
@@ -18420,14 +19126,14 @@ struct SrcItem {
     Expr *pOn;        /* fg.isUsing==0 =>  The ON clause of a join */
     IdList *pUsing;   /* fg.isUsing==1 =>  The USING clause of a join */
   } u3;
-  Bitmask colUsed;  /* Bit N (1<<N) set if column N of pTab is used */
+  Bitmask colUsed;  /* Bit N set if column N used. Details above for N>62 */
   union {
     char *zIndexedBy;    /* Identifier from "INDEXED BY <zIndex>" clause */
     ExprList *pFuncArg;  /* Arguments to table-valued-function */
   } u1;
   union {
     Index *pIBIndex;  /* Index structure corresponding to u1.zIndexedBy */
-    CteUse *pCteUse;  /* CTE Usage info info fg.isCte is true */
+    CteUse *pCteUse;  /* CTE Usage info when fg.isCte is true */
   } u2;
 };
 
@@ -18441,23 +19147,11 @@ struct OnOrUsing {
 };
 
 /*
-** The following structure describes the FROM clause of a SELECT statement.
-** Each table or subquery in the FROM clause is a separate element of
-** the SrcList.a[] array.
-**
-** With the addition of multiple database support, the following structure
-** can also be used to describe a particular table such as the table that
-** is modified by an INSERT, DELETE, or UPDATE statement.  In standard SQL,
-** such a table must be a simple name: ID.  But in SQLite, the table can
-** now be identified by a database name, a dot, then the table name: ID.ID.
-**
-** The jointype starts out showing the join type between the current table
-** and the next table on the list.  The parser builds the list this way.
-** But sqlite3SrcListShiftJoinType() later shifts the jointypes so that each
-** jointype expresses the join between the table and the previous table.
+** This object represents one or more tables that are the source of
+** content for an SQL statement.  For example, a single SrcList object
+** is used to hold the FROM clause of a SELECT statement.  SrcList also
+** represents the target tables for DELETE, INSERT, and UPDATE statements.
 **
-** In the colUsed field, the high-order bit (bit 63) is set if the table
-** contains more than 63 columns and the 64-th or later column is used.
 */
 struct SrcList {
   int nSrc;        /* Number of tables or subqueries in the FROM clause */
@@ -18565,7 +19259,7 @@ struct NameContext {
 #define NC_HasAgg    0x000010 /* One or more aggregate functions seen */
 #define NC_IdxExpr   0x000020 /* True if resolving columns of CREATE INDEX */
 #define NC_SelfRef   0x00002e /* Combo: PartIdx, isCheck, GenCol, and IdxExpr */
-#define NC_VarSelect 0x000040 /* A correlated subquery has been seen */
+#define NC_Subquery  0x000040 /* A subquery has been seen */
 #define NC_UEList    0x000080 /* True if uNC.pEList is used */
 #define NC_UAggInfo  0x000100 /* True if uNC.pAggInfo is used */
 #define NC_UUpsert   0x000200 /* True if uNC.pUpsert is used */
@@ -18694,6 +19388,7 @@ struct Select {
 #define SF_MultiPart     0x2000000 /* Has multiple incompatible PARTITIONs */
 #define SF_CopyCte       0x4000000 /* SELECT statement is a copy of a CTE */
 #define SF_OrderByReqd   0x8000000 /* The ORDER BY clause may not be omitted */
+#define SF_UpdateFrom   0x10000000 /* Query originates with UPDATE FROM */
 
 /* True if S exists and has SF_NestedFrom */
 #define IsNestedFrom(S) ((S)!=0 && ((S)->selFlags&SF_NestedFrom)!=0)
@@ -18802,7 +19497,7 @@ struct SelectDest {
   int iSDParm2;        /* A second parameter for the eDest disposal method */
   int iSdst;           /* Base register where results are written */
   int nSdst;           /* Number of registers allocated */
-  char *zAffSdst;      /* Affinity used when eDest==SRT_Set */
+  char *zAffSdst;      /* Affinity used for SRT_Set */
   ExprList *pOrderBy;  /* Key columns for SRT_Queue and SRT_DistQueue */
 };
 
@@ -18861,11 +19556,34 @@ struct TriggerPrg {
 #else
   typedef unsigned int yDbMask;
 # define DbMaskTest(M,I)    (((M)&(((yDbMask)1)<<(I)))!=0)
-# define DbMaskZero(M)      (M)=0
-# define DbMaskSet(M,I)     (M)|=(((yDbMask)1)<<(I))
-# define DbMaskAllZero(M)   (M)==0
-# define DbMaskNonZero(M)   (M)!=0
+# define DbMaskZero(M)      ((M)=0)
+# define DbMaskSet(M,I)     ((M)|=(((yDbMask)1)<<(I)))
+# define DbMaskAllZero(M)   ((M)==0)
+# define DbMaskNonZero(M)   ((M)!=0)
+#endif
+
+/*
+** For each index X that has as one of its arguments either an expression
+** or the name of a virtual generated column, and if X is in scope such that
+** the value of the expression can simply be read from the index, then
+** there is an instance of this object on the Parse.pIdxExpr list.
+**
+** During code generation, while generating code to evaluate expressions,
+** this list is consulted and if a matching expression is found, the value
+** is read from the index rather than being recomputed.
+*/
+struct IndexedExpr {
+  Expr *pExpr;            /* The expression contained in the index */
+  int iDataCur;           /* The data cursor associated with the index */
+  int iIdxCur;            /* The index cursor */
+  int iIdxCol;            /* The index column that contains value of pExpr */
+  u8 bMaybeNullRow;       /* True if we need an OP_IfNullRow check */
+  u8 aff;                 /* Affinity of the pExpr expression */
+  IndexedExpr *pIENext;   /* Next in a list of all indexed expressions */
+#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS
+  const char *zIdxName;   /* Name of index, used only for bytecode comments */
 #endif
+};
 
 /*
 ** An instance of the ParseCleanup object specifies an operation that
@@ -18908,10 +19626,13 @@ struct Parse {
   u8 hasCompound;      /* Need to invoke convertCompoundSelectToSubquery() */
   u8 okConstFactor;    /* OK to factor out constants */
   u8 disableLookaside; /* Number of times lookaside has been disabled */
-  u8 disableVtab;      /* Disable all virtual tables for this parse */
+  u8 prepFlags;        /* SQLITE_PREPARE_* flags */
   u8 withinRJSubrtn;   /* Nesting level for RIGHT JOIN body subroutines */
 #if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
   u8 earlyCleanup;     /* OOM inside sqlite3ParserAddCleanup() */
+#endif
+#ifdef SQLITE_DEBUG
+  u8 ifNotExists;      /* Might be true if IF NOT EXISTS.  Assert()s only */
 #endif
   int nRangeReg;       /* Size of the temporary register block */
   int iRangeReg;       /* First register in temporary register block */
@@ -18925,6 +19646,8 @@ struct Parse {
   int nLabelAlloc;     /* Number of slots in aLabel */
   int *aLabel;         /* Space to hold the labels */
   ExprList *pConstExpr;/* Constant expressions */
+  IndexedExpr *pIdxEpr;/* List of expressions used by active indexes */
+  IndexedExpr *pIdxPartExpr; /* Exprs constrained by index WHERE clauses */
   Token constraintName;/* Name of the constraint currently being parsed */
   yDbMask writeMask;   /* Start a write transaction on these databases */
   yDbMask cookieMask;  /* Bitmask of schema verified databases */
@@ -18932,6 +19655,9 @@ struct Parse {
   int regRoot;         /* Register holding root page number for new objects */
   int nMaxArg;         /* Max args passed to user function by sub-program */
   int nSelect;         /* Number of SELECT stmts. Counter for Select.selId */
+#ifndef SQLITE_OMIT_PROGRESS_CALLBACK
+  u32 nProgressSteps;  /* xProgress steps taken during sqlite3_prepare() */
+#endif
 #ifndef SQLITE_OMIT_SHARED_CACHE
   int nTableLock;        /* Number of locks in aTableLock */
   TableLock *aTableLock; /* Required table locks for shared-cache mode */
@@ -18945,9 +19671,9 @@ struct Parse {
     int addrCrTab;         /* Address of OP_CreateBtree on CREATE TABLE */
     Returning *pReturning; /* The RETURNING clause */
   } u1;
-  u32 nQueryLoop;      /* Est number of iterations of a query (10*log2(N)) */
   u32 oldmask;         /* Mask of old.* columns referenced */
   u32 newmask;         /* Mask of new.* columns referenced */
+  LogEst nQueryLoop;   /* Est number of iterations of a query (10*log2(N)) */
   u8 eTriggerOp;       /* TK_UPDATE, TK_INSERT or TK_DELETE */
   u8 bReturning;       /* Coding a RETURNING trigger */
   u8 eOrconf;          /* Default ON CONFLICT policy for trigger steps */
@@ -19071,6 +19797,7 @@ struct AuthContext {
 #define OPFLAG_ISNOOP        0x40    /* OP_Delete does pre-update-hook only */
 #define OPFLAG_LENGTHARG     0x40    /* OP_Column only used for length() */
 #define OPFLAG_TYPEOFARG     0x80    /* OP_Column only used for typeof() */
+#define OPFLAG_BYTELENARG    0xc0    /* OP_Column only for octet_length() */
 #define OPFLAG_BULKCSR       0x01    /* OP_Open** used to open bulk cursor */
 #define OPFLAG_SEEKEQ        0x02    /* OP_Open** cursor uses EQ seek only */
 #define OPFLAG_FORDELETE     0x08    /* OP_Open should use BTREE_FORDELETE */
@@ -19192,6 +19919,7 @@ struct Returning {
   int iRetCur;          /* Transient table holding RETURNING results */
   int nRetCol;          /* Number of in pReturnEL after expansion */
   int iRetReg;          /* Register array for holding a row of RETURNING */
+  char zName[40];       /* Name of trigger: "sqlite_returning_%p" */
 };
 
 /*
@@ -19213,6 +19941,25 @@ struct sqlite3_str {
 
 #define isMalloced(X)  (((X)->printfFlags & SQLITE_PRINTF_MALLOCED)!=0)
 
+/*
+** The following object is the header for an "RCStr" or "reference-counted
+** string".  An RCStr is passed around and used like any other char*
+** that has been dynamically allocated.  The important interface
+** differences:
+**
+**   1.  RCStr strings are reference counted.  They are deallocated
+**       when the reference count reaches zero.
+**
+**   2.  Use sqlite3RCStrUnref() to free an RCStr string rather than
+**       sqlite3_free()
+**
+**   3.  Make a (read-only) copy of a read-only RCStr string using
+**       sqlite3RCStrRef().
+*/
+struct RCStr {
+  u64 nRCRef;            /* Number of references */
+  /* Total structure size should be a multiple of 8 bytes for alignment */
+};
 
 /*
 ** A pointer to this structure is used to communicate information
@@ -19239,7 +19986,7 @@ typedef struct {
 /* Tuning parameters are set using SQLITE_TESTCTRL_TUNE and are controlled
 ** on debug-builds of the CLI using ".testctrl tune ID VALUE".  Tuning
 ** parameters are for temporary use during development, to help find
-** optimial values for parameters in the query planner.  The should not
+** optimal values for parameters in the query planner.  The should not
 ** be used on trunk check-ins.  They are a temporary mechanism available
 ** for transient development builds only.
 **
@@ -19265,6 +20012,7 @@ struct Sqlite3Config {
   u8 bUseCis;                       /* Use covering indices for full-scans */
   u8 bSmallMalloc;                  /* Avoid large memory allocations if true */
   u8 bExtraSchemaChecks;            /* Verify type,name,tbl_name in schema */
+  u8 bUseLongDouble;                /* Make use of long double */
   int mxStrlen;                     /* Maximum string length */
   int neverCorrupt;                 /* Database is always well-formed */
   int szLookaside;                  /* Default lookaside buffer size */
@@ -19351,6 +20099,7 @@ struct Walker {
   void (*xSelectCallback2)(Walker*,Select*);/* Second callback for SELECTs */
   int walkerDepth;                          /* Number of subqueries */
   u16 eCode;                                /* A small processing code */
+  u16 mWFlags;                              /* Use-dependent flags */
   union {                                   /* Extra data for callback */
     NameContext *pNC;                         /* Naming context */
     int n;                                    /* A counter */
@@ -19360,15 +20109,16 @@ struct Walker {
     struct RefSrcList *pRefSrcList;           /* sqlite3ReferencesSrcList() */
     int *aiCol;                               /* array of column indexes */
     struct IdxCover *pIdxCover;               /* Check for index coverage */
-    struct IdxExprTrans *pIdxTrans;           /* Convert idxed expr to column */
     ExprList *pGroupBy;                       /* GROUP BY clause */
     Select *pSelect;                          /* HAVING to WHERE clause ctx */
     struct WindowRewrite *pRewrite;           /* Window rewrite context */
     struct WhereConst *pConst;                /* WHERE clause constants */
     struct RenameCtx *pRename;                /* RENAME COLUMN context */
     struct Table *pTab;                       /* Table of generated column */
+    struct CoveringIndexCheck *pCovIdxCk;     /* Check for covering index */
     SrcItem *pSrcItem;                        /* A single FROM clause item */
-    DbFixer *pFix;
+    DbFixer *pFix;                            /* See sqlite3FixSelect() */
+    Mem *aMem;                                /* See sqlite3BtreeCursorHint() */
   } u;
 };
 
@@ -19389,6 +20139,7 @@ struct DbFixer {
 
 /* Forward declarations */
 SQLITE_PRIVATE int sqlite3WalkExpr(Walker*, Expr*);
+SQLITE_PRIVATE int sqlite3WalkExprNN(Walker*, Expr*);
 SQLITE_PRIVATE int sqlite3WalkExprList(Walker*, ExprList*);
 SQLITE_PRIVATE int sqlite3WalkSelect(Walker*, Select*);
 SQLITE_PRIVATE int sqlite3WalkSelectExpr(Walker*, Select*);
@@ -19469,6 +20220,16 @@ struct CteUse {
 };
 
 
+/* Client data associated with sqlite3_set_clientdata() and
+** sqlite3_get_clientdata().
+*/
+struct DbClientData {
+  DbClientData *pNext;        /* Next in a linked list */
+  void *pData;                /* The data */
+  void (*xDestructor)(void*); /* Destructor.  Might be NULL */
+  char zName[1];              /* Name of this client data. MUST BE LAST */
+};
+
 #ifdef SQLITE_DEBUG
 /*
 ** An instance of the TreeView object is used for printing the content of
@@ -19638,6 +20399,8 @@ SQLITE_PRIVATE   int sqlite3CorruptPgnoError(int,Pgno);
 # define sqlite3Isxdigit(x)  (sqlite3CtypeMap[(unsigned char)(x)]&0x08)
 # define sqlite3Tolower(x)   (sqlite3UpperToLower[(unsigned char)(x)])
 # define sqlite3Isquote(x)   (sqlite3CtypeMap[(unsigned char)(x)]&0x80)
+# define sqlite3JsonId1(x)   (sqlite3CtypeMap[(unsigned char)(x)]&0x42)
+# define sqlite3JsonId2(x)   (sqlite3CtypeMap[(unsigned char)(x)]&0x46)
 #else
 # define sqlite3Toupper(x)   toupper((unsigned char)(x))
 # define sqlite3Isspace(x)   isspace((unsigned char)(x))
@@ -19647,6 +20410,8 @@ SQLITE_PRIVATE   int sqlite3CorruptPgnoError(int,Pgno);
 # define sqlite3Isxdigit(x)  isxdigit((unsigned char)(x))
 # define sqlite3Tolower(x)   tolower((unsigned char)(x))
 # define sqlite3Isquote(x)   ((x)=='"'||(x)=='\''||(x)=='['||(x)=='`')
+# define sqlite3JsonId1(x)   (sqlite3IsIdChar(x)&&(x)<'0')
+# define sqlite3JsonId2(x)   sqlite3IsIdChar(x)
 #endif
 SQLITE_PRIVATE int sqlite3IsIdChar(u8);
 
@@ -19674,6 +20439,7 @@ SQLITE_PRIVATE void *sqlite3DbReallocOrFree(sqlite3 *, void *, u64);
 SQLITE_PRIVATE void *sqlite3DbRealloc(sqlite3 *, void *, u64);
 SQLITE_PRIVATE void sqlite3DbFree(sqlite3*, void*);
 SQLITE_PRIVATE void sqlite3DbFreeNN(sqlite3*, void*);
+SQLITE_PRIVATE void sqlite3DbNNFreeNN(sqlite3*, void*);
 SQLITE_PRIVATE int sqlite3MallocSize(const void*);
 SQLITE_PRIVATE int sqlite3DbMallocSize(sqlite3*, const void*);
 SQLITE_PRIVATE void *sqlite3PageMalloc(int);
@@ -19694,12 +20460,14 @@ SQLITE_PRIVATE int sqlite3HeapNearlyFull(void);
 */
 #ifdef SQLITE_USE_ALLOCA
 # define sqlite3StackAllocRaw(D,N)   alloca(N)
-# define sqlite3StackAllocZero(D,N)  memset(alloca(N), 0, N)
+# define sqlite3StackAllocRawNN(D,N) alloca(N)
 # define sqlite3StackFree(D,P)
+# define sqlite3StackFreeNN(D,P)
 #else
 # define sqlite3StackAllocRaw(D,N)   sqlite3DbMallocRaw(D,N)
-# define sqlite3StackAllocZero(D,N)  sqlite3DbMallocZero(D,N)
+# define sqlite3StackAllocRawNN(D,N) sqlite3DbMallocRawNN(D,N)
 # define sqlite3StackFree(D,P)       sqlite3DbFree(D,P)
+# define sqlite3StackFreeNN(D,P)     sqlite3DbFreeNN(D,P)
 #endif
 
 /* Do not allow both MEMSYS5 and MEMSYS3 to be defined together.  If they
@@ -19763,6 +20531,20 @@ struct PrintfArguments {
   sqlite3_value **apArg;   /* The argument values */
 };
 
+/*
+** An instance of this object receives the decoding of a floating point
+** value into an approximate decimal representation.
+*/
+struct FpDecode {
+  char sign;           /* '+' or '-' */
+  char isSpecial;      /* 1: Infinity  2: NaN */
+  int n;               /* Significant digits in the decode */
+  int iDP;             /* Location of the decimal point */
+  char *z;             /* Start of significant digits */
+  char zBuf[24];       /* Storage for significant digits */
+};
+
+SQLITE_PRIVATE void sqlite3FpDecode(FpDecode*,double,int,int);
 SQLITE_PRIVATE char *sqlite3MPrintf(sqlite3*,const char*, ...);
 SQLITE_PRIVATE char *sqlite3VMPrintf(sqlite3*,const char*, va_list);
 #if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE)
@@ -19822,6 +20604,7 @@ SQLITE_PRIVATE   void sqlite3ShowWinFunc(const Window*);
 #endif
 
 SQLITE_PRIVATE void sqlite3SetString(char **, sqlite3*, const char*);
+SQLITE_PRIVATE void sqlite3ProgressCheck(Parse*);
 SQLITE_PRIVATE void sqlite3ErrorMsg(Parse*, const char*, ...);
 SQLITE_PRIVATE int sqlite3ErrorToParser(sqlite3*,int);
 SQLITE_PRIVATE void sqlite3Dequote(char*);
@@ -19836,6 +20619,10 @@ SQLITE_PRIVATE void sqlite3ReleaseTempReg(Parse*,int);
 SQLITE_PRIVATE int sqlite3GetTempRange(Parse*,int);
 SQLITE_PRIVATE void sqlite3ReleaseTempRange(Parse*,int,int);
 SQLITE_PRIVATE void sqlite3ClearTempRegCache(Parse*);
+SQLITE_PRIVATE void sqlite3TouchRegister(Parse*,int);
+#if defined(SQLITE_ENABLE_STAT4) || defined(SQLITE_DEBUG)
+SQLITE_PRIVATE int sqlite3FirstAvailableRegister(Parse*,int);
+#endif
 #ifdef SQLITE_DEBUG
 SQLITE_PRIVATE int sqlite3NoTempsInRange(Parse*,int,int);
 #endif
@@ -19847,6 +20634,8 @@ SQLITE_PRIVATE void sqlite3PExprAddSelect(Parse*, Expr*, Select*);
 SQLITE_PRIVATE Expr *sqlite3ExprAnd(Parse*,Expr*, Expr*);
 SQLITE_PRIVATE Expr *sqlite3ExprSimplifiedAndOr(Expr*);
 SQLITE_PRIVATE Expr *sqlite3ExprFunction(Parse*,ExprList*, const Token*, int);
+SQLITE_PRIVATE void sqlite3ExprAddFunctionOrderBy(Parse*,Expr*,ExprList*);
+SQLITE_PRIVATE void sqlite3ExprOrderByAggregateError(Parse*,Expr*);
 SQLITE_PRIVATE void sqlite3ExprFunctionUsable(Parse*,const Expr*,const FuncDef*);
 SQLITE_PRIVATE void sqlite3ExprAssignVarNumber(Parse*, Expr*, u32);
 SQLITE_PRIVATE void sqlite3ExprDelete(sqlite3*, Expr*);
@@ -19879,7 +20668,7 @@ SQLITE_PRIVATE const char *sqlite3ColumnColl(Column*);
 SQLITE_PRIVATE void sqlite3DeleteColumnNames(sqlite3*,Table*);
 SQLITE_PRIVATE void sqlite3GenerateColumnNames(Parse *pParse, Select *pSelect);
 SQLITE_PRIVATE int sqlite3ColumnsFromExprList(Parse*,ExprList*,i16*,Column**);
-SQLITE_PRIVATE void sqlite3SelectAddColumnTypeAndCollation(Parse*,Table*,Select*,char);
+SQLITE_PRIVATE void sqlite3SubqueryColumnTypes(Parse*,Table*,Select*,char);
 SQLITE_PRIVATE Table *sqlite3ResultSetOfSelect(Parse*,Select*,char);
 SQLITE_PRIVATE void sqlite3OpenSchemaTable(Parse *, int);
 SQLITE_PRIVATE Index *sqlite3PrimaryKeyIndex(Table*);
@@ -19986,7 +20775,7 @@ SQLITE_PRIVATE Select *sqlite3SelectNew(Parse*,ExprList*,SrcList*,Expr*,ExprList
                          Expr*,ExprList*,u32,Expr*);
 SQLITE_PRIVATE void sqlite3SelectDelete(sqlite3*, Select*);
 SQLITE_PRIVATE Table *sqlite3SrcListLookup(Parse*, SrcList*);
-SQLITE_PRIVATE int sqlite3IsReadOnly(Parse*, Table*, int);
+SQLITE_PRIVATE int sqlite3IsReadOnly(Parse*, Table*, Trigger*);
 SQLITE_PRIVATE void sqlite3OpenTable(Parse*, int iCur, int iDb, Table*, int);
 #if defined(SQLITE_ENABLE_UPDATE_DELETE_LIMIT) && !defined(SQLITE_OMIT_SUBQUERY)
 SQLITE_PRIVATE Expr *sqlite3LimitWhere(Parse*,SrcList*,Expr*,ExprList*,Expr*,char*);
@@ -20048,7 +20837,7 @@ SQLITE_PRIVATE int sqlite3ExprCompare(const Parse*,const Expr*,const Expr*, int)
 SQLITE_PRIVATE int sqlite3ExprCompareSkip(Expr*,Expr*,int);
 SQLITE_PRIVATE int sqlite3ExprListCompare(const ExprList*,const ExprList*, int);
 SQLITE_PRIVATE int sqlite3ExprImpliesExpr(const Parse*,const Expr*,const Expr*, int);
-SQLITE_PRIVATE int sqlite3ExprImpliesNonNullRow(Expr*,int);
+SQLITE_PRIVATE int sqlite3ExprImpliesNonNullRow(Expr*,int,int);
 SQLITE_PRIVATE void sqlite3AggInfoPersistWalkerInit(Walker*,Parse*);
 SQLITE_PRIVATE void sqlite3ExprAnalyzeAggregates(NameContext*, Expr*);
 SQLITE_PRIVATE void sqlite3ExprAnalyzeAggList(NameContext*,ExprList*);
@@ -20075,7 +20864,7 @@ SQLITE_PRIVATE int sqlite3ExprIsConstantNotJoin(Expr*);
 SQLITE_PRIVATE int sqlite3ExprIsConstantOrFunction(Expr*, u8);
 SQLITE_PRIVATE int sqlite3ExprIsConstantOrGroupBy(Parse*, Expr*, ExprList*);
 SQLITE_PRIVATE int sqlite3ExprIsTableConstant(Expr*,int);
-SQLITE_PRIVATE int sqlite3ExprIsTableConstraint(Expr*,const SrcItem*);
+SQLITE_PRIVATE int sqlite3ExprIsSingleTableConstraint(Expr*,const SrcList*,int);
 #ifdef SQLITE_ENABLE_CURSOR_HINTS
 SQLITE_PRIVATE int sqlite3ExprContainsSubquery(Expr*);
 #endif
@@ -20083,6 +20872,7 @@ SQLITE_PRIVATE int sqlite3ExprIsInteger(const Expr*, int*);
 SQLITE_PRIVATE int sqlite3ExprCanBeNull(const Expr*);
 SQLITE_PRIVATE int sqlite3ExprNeedsNoAffinityChange(const Expr*, char);
 SQLITE_PRIVATE int sqlite3IsRowid(const char*);
+SQLITE_PRIVATE const char *sqlite3RowidAlias(Table *pTab);
 SQLITE_PRIVATE void sqlite3GenerateRowDelete(
     Parse*,Table*,Trigger*,int,int,int,i16,u8,u8,u8,int);
 SQLITE_PRIVATE void sqlite3GenerateRowIndexDelete(Parse*, Table*, int, int, int*, int);
@@ -20197,8 +20987,10 @@ SQLITE_PRIVATE int sqlite3FixSrcList(DbFixer*, SrcList*);
 SQLITE_PRIVATE int sqlite3FixSelect(DbFixer*, Select*);
 SQLITE_PRIVATE int sqlite3FixExpr(DbFixer*, Expr*);
 SQLITE_PRIVATE int sqlite3FixTriggerStep(DbFixer*, TriggerStep*);
+
 SQLITE_PRIVATE int sqlite3RealSameAsInt(double,sqlite3_int64);
-SQLITE_PRIVATE void sqlite3Int64ToText(i64,char*);
+SQLITE_PRIVATE i64 sqlite3RealToI64(double);
+SQLITE_PRIVATE int sqlite3Int64ToText(i64,char*);
 SQLITE_PRIVATE int sqlite3AtoF(const char *z, double*, int, u8);
 SQLITE_PRIVATE int sqlite3GetInt32(const char *, int*);
 SQLITE_PRIVATE int sqlite3GetUInt32(const char*, u32*);
@@ -20243,11 +21035,13 @@ SQLITE_PRIVATE int sqlite3VarintLen(u64 v);
 
 
 SQLITE_PRIVATE const char *sqlite3IndexAffinityStr(sqlite3*, Index*);
+SQLITE_PRIVATE char *sqlite3TableAffinityStr(sqlite3*,const Table*);
 SQLITE_PRIVATE void sqlite3TableAffinity(Vdbe*, Table*, int);
 SQLITE_PRIVATE char sqlite3CompareAffinity(const Expr *pExpr, char aff2);
 SQLITE_PRIVATE int sqlite3IndexAffinityOk(const Expr *pExpr, char idx_affinity);
 SQLITE_PRIVATE char sqlite3TableColumnAffinity(const Table*,int);
 SQLITE_PRIVATE char sqlite3ExprAffinity(const Expr *pExpr);
+SQLITE_PRIVATE int sqlite3ExprDataType(const Expr *pExpr);
 SQLITE_PRIVATE int sqlite3Atoi64(const char*, i64*, int, u8);
 SQLITE_PRIVATE int sqlite3DecOrHexToI64(const char*, i64*);
 SQLITE_PRIVATE void sqlite3ErrorWithMsg(sqlite3*, int, const char*,...);
@@ -20264,6 +21058,9 @@ SQLITE_PRIVATE const char *sqlite3ErrName(int);
 
 #ifndef SQLITE_OMIT_DESERIALIZE
 SQLITE_PRIVATE int sqlite3MemdbInit(void);
+SQLITE_PRIVATE int sqlite3IsMemdb(const sqlite3_vfs*);
+#else
+# define sqlite3IsMemdb(X) 0
 #endif
 
 SQLITE_PRIVATE const char *sqlite3ErrStr(int);
@@ -20295,6 +21092,7 @@ SQLITE_PRIVATE void sqlite3FileSuffix3(const char*, char*);
 SQLITE_PRIVATE u8 sqlite3GetBoolean(const char *z,u8);
 
 SQLITE_PRIVATE const void *sqlite3ValueText(sqlite3_value*, u8);
+SQLITE_PRIVATE int sqlite3ValueIsOfClass(const sqlite3_value*, void(*)(void*));
 SQLITE_PRIVATE int sqlite3ValueBytes(sqlite3_value*, u8);
 SQLITE_PRIVATE void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8,
                         void(*)(void*));
@@ -20314,7 +21112,6 @@ SQLITE_PRIVATE const unsigned char sqlite3OpcodeProperty[];
 SQLITE_PRIVATE const char sqlite3StrBINARY[];
 SQLITE_PRIVATE const unsigned char sqlite3StdTypeLen[];
 SQLITE_PRIVATE const char sqlite3StdTypeAffinity[];
-SQLITE_PRIVATE const char sqlite3StdTypeMap[];
 SQLITE_PRIVATE const char *sqlite3StdType[];
 SQLITE_PRIVATE const unsigned char sqlite3UpperToLower[];
 SQLITE_PRIVATE const unsigned char *sqlite3aLTb;
@@ -20347,7 +21144,8 @@ SQLITE_PRIVATE int sqlite3MatchEName(
   const struct ExprList_item*,
   const char*,
   const char*,
-  const char*
+  const char*,
+  int*
 );
 SQLITE_PRIVATE Bitmask sqlite3ExprColUsed(Expr*);
 SQLITE_PRIVATE u8 sqlite3StrIHash(const char*);
@@ -20403,8 +21201,13 @@ SQLITE_PRIVATE void sqlite3OomClear(sqlite3*);
 SQLITE_PRIVATE int sqlite3ApiExit(sqlite3 *db, int);
 SQLITE_PRIVATE int sqlite3OpenTempDatabase(Parse *);
 
+SQLITE_PRIVATE char *sqlite3RCStrRef(char*);
+SQLITE_PRIVATE void sqlite3RCStrUnref(void*);
+SQLITE_PRIVATE char *sqlite3RCStrNew(u64);
+SQLITE_PRIVATE char *sqlite3RCStrResize(char*,u64);
+
 SQLITE_PRIVATE void sqlite3StrAccumInit(StrAccum*, sqlite3*, char*, int, int);
-SQLITE_PRIVATE int sqlite3StrAccumEnlarge(StrAccum*, int);
+SQLITE_PRIVATE int sqlite3StrAccumEnlarge(StrAccum*, i64);
 SQLITE_PRIVATE char *sqlite3StrAccumFinish(StrAccum*);
 SQLITE_PRIVATE void sqlite3StrAccumSetError(StrAccum*, u8);
 SQLITE_PRIVATE void sqlite3ResultStrAccum(sqlite3_context*,StrAccum*);
@@ -20518,10 +21321,7 @@ SQLITE_PRIVATE int sqlite3VtabCallDestroy(sqlite3*, int, const char *);
 SQLITE_PRIVATE int sqlite3VtabBegin(sqlite3 *, VTable *);
 
 SQLITE_PRIVATE FuncDef *sqlite3VtabOverloadFunction(sqlite3 *,FuncDef*, int nArg, Expr*);
-#if (defined(SQLITE_ENABLE_DBPAGE_VTAB) || defined(SQLITE_TEST)) \
-    && !defined(SQLITE_OMIT_VIRTUALTABLE)
-SQLITE_PRIVATE   void sqlite3VtabUsesAllSchemas(sqlite3_index_info*);
-#endif
+SQLITE_PRIVATE void sqlite3VtabUsesAllSchemas(Parse*);
 SQLITE_PRIVATE sqlite3_int64 sqlite3StmtCurrentTime(sqlite3_context*);
 SQLITE_PRIVATE int sqlite3VdbeParameterIndex(Vdbe*, const char*, int);
 SQLITE_PRIVATE int sqlite3TransferBindings(sqlite3_stmt *, sqlite3_stmt *);
@@ -20657,6 +21457,7 @@ SQLITE_PRIVATE   int sqlite3ExprCheckHeight(Parse*, int);
   #define sqlite3SelectExprHeight(x) 0
   #define sqlite3ExprCheckHeight(x,y)
 #endif
+SQLITE_PRIVATE void sqlite3ExprSetErrorOffset(Expr*,int);
 
 SQLITE_PRIVATE u32 sqlite3Get4byte(const u8*);
 SQLITE_PRIVATE void sqlite3Put4byte(u8*, u32);
@@ -20758,6 +21559,22 @@ SQLITE_PRIVATE void sqlite3VectorErrorMsg(Parse*, Expr*);
 SQLITE_PRIVATE const char **sqlite3CompileOptions(int *pnOpt);
 #endif
 
+#if SQLITE_OS_UNIX && defined(SQLITE_OS_KV_OPTIONAL)
+SQLITE_PRIVATE int sqlite3KvvfsInit(void);
+#endif
+
+#if defined(VDBE_PROFILE) \
+ || defined(SQLITE_PERFORMANCE_TRACE) \
+ || defined(SQLITE_ENABLE_STMT_SCANSTATUS)
+SQLITE_PRIVATE sqlite3_uint64 sqlite3Hwtime(void);
+#endif
+
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+# define IS_STMT_SCANSTATUS(db) (db->flags & SQLITE_StmtScanStatus)
+#else
+# define IS_STMT_SCANSTATUS(db) 0
+#endif
+
 #endif /* SQLITEINT_H */
 
 /************** End of sqliteInt.h *******************************************/
@@ -20799,101 +21616,6 @@ SQLITE_PRIVATE const char **sqlite3CompileOptions(int *pnOpt);
 */
 #ifdef SQLITE_PERFORMANCE_TRACE
 
-/*
-** hwtime.h contains inline assembler code for implementing
-** high-performance timing routines.
-*/
-/************** Include hwtime.h in the middle of os_common.h ****************/
-/************** Begin file hwtime.h ******************************************/
-/*
-** 2008 May 27
-**
-** The author disclaims copyright to this source code.  In place of
-** a legal notice, here is a blessing:
-**
-**    May you do good and not evil.
-**    May you find forgiveness for yourself and forgive others.
-**    May you share freely, never taking more than you give.
-**
-******************************************************************************
-**
-** This file contains inline asm code for retrieving "high-performance"
-** counters for x86 and x86_64 class CPUs.
-*/
-#ifndef SQLITE_HWTIME_H
-#define SQLITE_HWTIME_H
-
-/*
-** The following routine only works on pentium-class (or newer) processors.
-** It uses the RDTSC opcode to read the cycle count value out of the
-** processor and returns that value.  This can be used for high-res
-** profiling.
-*/
-#if !defined(__STRICT_ANSI__) && \
-    (defined(__GNUC__) || defined(_MSC_VER)) && \
-    (defined(i386) || defined(__i386__) || defined(_M_IX86))
-
-  #if defined(__GNUC__)
-
-  __inline__ sqlite_uint64 sqlite3Hwtime(void){
-     unsigned int lo, hi;
-     __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
-     return (sqlite_uint64)hi << 32 | lo;
-  }
-
-  #elif defined(_MSC_VER)
-
-  __declspec(naked) __inline sqlite_uint64 __cdecl sqlite3Hwtime(void){
-     __asm {
-        rdtsc
-        ret       ; return value at EDX:EAX
-     }
-  }
-
-  #endif
-
-#elif !defined(__STRICT_ANSI__) && (defined(__GNUC__) && defined(__x86_64__))
-
-  __inline__ sqlite_uint64 sqlite3Hwtime(void){
-      unsigned long val;
-      __asm__ __volatile__ ("rdtsc" : "=A" (val));
-      return val;
-  }
-
-#elif !defined(__STRICT_ANSI__) && (defined(__GNUC__) && defined(__ppc__))
-
-  __inline__ sqlite_uint64 sqlite3Hwtime(void){
-      unsigned long long retval;
-      unsigned long junk;
-      __asm__ __volatile__ ("\n\
-          1:      mftbu   %1\n\
-                  mftb    %L0\n\
-                  mftbu   %0\n\
-                  cmpw    %0,%1\n\
-                  bne     1b"
-                  : "=r" (retval), "=r" (junk));
-      return retval;
-  }
-
-#else
-
-  /*
-  ** asm() is needed for hardware timing support.  Without asm(),
-  ** disable the sqlite3Hwtime() routine.
-  **
-  ** sqlite3Hwtime() is only used for some obscure debugging
-  ** and analysis configurations, not in any deliverable, so this
-  ** should not be a great loss.
-  */
-SQLITE_PRIVATE   sqlite_uint64 sqlite3Hwtime(void){ return ((sqlite_uint64)0); }
-
-#endif
-
-#endif /* !defined(SQLITE_HWTIME_H) */
-
-/************** End of hwtime.h **********************************************/
-/************** Continuing where we left off in os_common.h ******************/
-
 static sqlite_uint64 g_start;
 static sqlite_uint64 g_elapsed;
 #define TIMER_START       g_start=sqlite3Hwtime()
@@ -20989,7 +21711,7 @@ SQLITE_API extern int sqlite3_open_file_count;
 ** autoconf-based build
 */
 #if defined(_HAVE_SQLITE_CONFIG_H) && !defined(SQLITECONFIG_H)
-/* #include "config.h" */
+/* #include "sqlite_cfg.h" */
 #define SQLITECONFIG_H 1
 #endif
 
@@ -21021,9 +21743,6 @@ static const char * const sqlite3azCompileOpt[] = {
 #ifdef SQLITE_4_BYTE_ALIGNED_MALLOC
   "4_BYTE_ALIGNED_MALLOC",
 #endif
-#ifdef SQLITE_64BIT_STATS
-  "64BIT_STATS",
-#endif
 #ifdef SQLITE_ALLOW_COVERING_INDEX_SCAN
 # if SQLITE_ALLOW_COVERING_INDEX_SCAN != 1
   "ALLOW_COVERING_INDEX_SCAN=" CTIMEOPT_VAL(SQLITE_ALLOW_COVERING_INDEX_SCAN),
@@ -21154,6 +21873,9 @@ static const char * const sqlite3azCompileOpt[] = {
 #ifdef SQLITE_DISABLE_SKIPAHEAD_DISTINCT
   "DISABLE_SKIPAHEAD_DISTINCT",
 #endif
+#ifdef SQLITE_DQS
+  "DQS=" CTIMEOPT_VAL(SQLITE_DQS),
+#endif
 #ifdef SQLITE_ENABLE_8_3_NAMES
   "ENABLE_8_3_NAMES=" CTIMEOPT_VAL(SQLITE_ENABLE_8_3_NAMES),
 #endif
@@ -21316,6 +22038,9 @@ static const char * const sqlite3azCompileOpt[] = {
 #ifdef SQLITE_EXPLAIN_ESTIMATED_ROWS
   "EXPLAIN_ESTIMATED_ROWS",
 #endif
+#ifdef SQLITE_EXTRA_AUTOEXT
+  "EXTRA_AUTOEXT=" CTIMEOPT_VAL(SQLITE_EXTRA_AUTOEXT),
+#endif
 #ifdef SQLITE_EXTRA_IFNULLROW
   "EXTRA_IFNULLROW",
 #endif
@@ -21357,6 +22082,9 @@ static const char * const sqlite3azCompileOpt[] = {
 #ifdef SQLITE_INTEGRITY_CHECK_ERROR_MAX
   "INTEGRITY_CHECK_ERROR_MAX=" CTIMEOPT_VAL(SQLITE_INTEGRITY_CHECK_ERROR_MAX),
 #endif
+#ifdef SQLITE_LEGACY_JSON_VALID
+  "LEGACY_JSON_VALID",
+#endif
 #ifdef SQLITE_LIKE_DOESNT_MATCH_BLOBS
   "LIKE_DOESNT_MATCH_BLOBS",
 #endif
@@ -21594,6 +22322,9 @@ static const char * const sqlite3azCompileOpt[] = {
 #ifdef SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS
   "OMIT_SCHEMA_VERSION_PRAGMAS",
 #endif
+#ifdef SQLITE_OMIT_SEH
+  "OMIT_SEH",
+#endif
 #ifdef SQLITE_OMIT_SHARED_CACHE
   "OMIT_SHARED_CACHE",
 #endif
@@ -21644,9 +22375,6 @@ static const char * const sqlite3azCompileOpt[] = {
 #ifdef SQLITE_OMIT_XFER_OPT
   "OMIT_XFER_OPT",
 #endif
-#ifdef SQLITE_PCACHE_SEPARATE_HEADER
-  "PCACHE_SEPARATE_HEADER",
-#endif
 #ifdef SQLITE_PERFORMANCE_TRACE
   "PERFORMANCE_TRACE",
 #endif
@@ -21848,7 +22576,7 @@ SQLITE_PRIVATE const unsigned char *sqlite3aGTb = &sqlite3UpperToLower[256+12-OP
 **   isalnum()                        0x06
 **   isxdigit()                       0x08
 **   toupper()                        0x20
-**   SQLite identifier character      0x40
+**   SQLite identifier character      0x40   $, _, or non-ascii
 **   Quote character                  0x80
 **
 ** Bit 0x20 is set if the mapped character requires translation to upper
@@ -21994,6 +22722,7 @@ SQLITE_PRIVATE SQLITE_WSD struct Sqlite3Config sqlite3Config = {
    SQLITE_ALLOW_COVERING_INDEX_SCAN,   /* bUseCis */
    0,                         /* bSmallMalloc */
    1,                         /* bExtraSchemaChecks */
+   sizeof(LONGDOUBLE_TYPE)>8, /* bUseLongDouble */
    0x7ffffffe,                /* mxStrlen */
    0,                         /* neverCorrupt */
    SQLITE_DEFAULT_LOOKASIDE,  /* szLookaside, nLookaside */
@@ -22042,7 +22771,7 @@ SQLITE_PRIVATE SQLITE_WSD struct Sqlite3Config sqlite3Config = {
    SQLITE_DEFAULT_SORTERREF_SIZE,   /* szSorterRef */
    0,                         /* iPrngSeed */
 #ifdef SQLITE_DEBUG
-   {0,0,0,0,0,0}              /* aTune */
+   {0,0,0,0,0,0},             /* aTune */
 #endif
 };
 
@@ -22126,10 +22855,6 @@ SQLITE_PRIVATE const char sqlite3StrBINARY[] = "BINARY";
 **
 **    sqlite3StdTypeAffinity[]    The affinity associated with each entry
 **                                in sqlite3StdType[].
-**
-**    sqlite3StdTypeMap[]         The type value (as returned from
-**                                sqlite3_column_type() or sqlite3_value_type())
-**                                for each entry in sqlite3StdType[].
 */
 SQLITE_PRIVATE const unsigned char sqlite3StdTypeLen[] = { 3, 4, 3, 7, 4, 4 };
 SQLITE_PRIVATE const char sqlite3StdTypeAffinity[] = {
@@ -22140,14 +22865,6 @@ SQLITE_PRIVATE const char sqlite3StdTypeAffinity[] = {
   SQLITE_AFF_REAL,
   SQLITE_AFF_TEXT
 };
-SQLITE_PRIVATE const char sqlite3StdTypeMap[] = {
-  0,
-  SQLITE_BLOB,
-  SQLITE_INTEGER,
-  SQLITE_INTEGER,
-  SQLITE_FLOAT,
-  SQLITE_TEXT
-};
 SQLITE_PRIVATE const char *sqlite3StdType[] = {
   "ANY",
   "BLOB",
@@ -22235,6 +22952,9 @@ typedef struct VdbeSorter VdbeSorter;
 /* Elements of the linked list at Vdbe.pAuxData */
 typedef struct AuxData AuxData;
 
+/* A cache of large TEXT or BLOB values in a VdbeCursor */
+typedef struct VdbeTxtBlbCache VdbeTxtBlbCache;
+
 /* Types of VDBE cursors */
 #define CURTYPE_BTREE       0
 #define CURTYPE_SORTER      1
@@ -22266,6 +22986,7 @@ struct VdbeCursor {
   Bool useRandomRowid:1;  /* Generate new record numbers semi-randomly */
   Bool isOrdered:1;       /* True if the table is not BTREE_UNORDERED */
   Bool noReuse:1;         /* OpenEphemeral may not reuse this cursor */
+  Bool colCache:1;        /* pCache pointer is initialized and non-NULL */
   u16 seekHit;            /* See the OP_SeekHit and OP_IfNoHope opcodes */
   union {                 /* pBtx for isEphermeral.  pAltMap otherwise */
     Btree *pBtx;            /* Separate file holding temporary table */
@@ -22306,6 +23027,7 @@ struct VdbeCursor {
 #ifdef SQLITE_ENABLE_COLUMN_USED_MASK
   u64 maskUsed;           /* Mask of columns used by this cursor */
 #endif
+  VdbeTxtBlbCache *pCache; /* Cache of large TEXT or BLOB values */
 
   /* 2*nField extra array elements allocated for aType[], beyond the one
   ** static element declared in the structure.  nField total array slots for
@@ -22318,12 +23040,25 @@ struct VdbeCursor {
 #define IsNullCursor(P) \
   ((P)->eCurType==CURTYPE_PSEUDO && (P)->nullRow && (P)->seekResult==0)
 
-
 /*
 ** A value for VdbeCursor.cacheStatus that means the cache is always invalid.
 */
 #define CACHE_STALE 0
 
+/*
+** Large TEXT or BLOB values can be slow to load, so we want to avoid
+** loading them more than once.  For that reason, large TEXT and BLOB values
+** can be stored in a cache defined by this object, and attached to the
+** VdbeCursor using the pCache field.
+*/
+struct VdbeTxtBlbCache {
+  char *pCValue;        /* A RCStr buffer to hold the value */
+  i64 iOffset;          /* File offset of the row being cached */
+  int iCol;             /* Column for which the cache is valid */
+  u32 cacheStatus;      /* Vdbe.cacheCtr value */
+  u32 colCacheCtr;      /* Column cache counter */
+};
+
 /*
 ** When a sub-program is executed (OP_Program), a structure of this type
 ** is allocated to store the current value of the program counter, as
@@ -22350,7 +23085,6 @@ struct VdbeFrame {
   Vdbe *v;                /* VM this frame belongs to */
   VdbeFrame *pParent;     /* Parent of this frame, or NULL if parent is main */
   Op *aOp;                /* Program instructions for parent frame */
-  i64 *anExec;            /* Event counters from parent frame */
   Mem *aMem;              /* Array of memory cells for parent frame */
   VdbeCursor **apCsr;     /* Array of Vdbe cursors for parent frame */
   u8 *aOnce;              /* Bitmask used by OP_Once */
@@ -22566,10 +23300,19 @@ typedef unsigned bft;  /* Bit Field Type */
 
 /* The ScanStatus object holds a single value for the
 ** sqlite3_stmt_scanstatus() interface.
+**
+** aAddrRange[]:
+**   This array is used by ScanStatus elements associated with EQP
+**   notes that make an SQLITE_SCANSTAT_NCYCLE value available. It is
+**   an array of up to 3 ranges of VM addresses for which the Vdbe.anCycle[]
+**   values should be summed to calculate the NCYCLE value. Each pair of
+**   integer addresses is a start and end address (both inclusive) for a range
+**   instructions. A start value of 0 indicates an empty range.
 */
 typedef struct ScanStatus ScanStatus;
 struct ScanStatus {
   int addrExplain;                /* OP_Explain for loop */
+  int aAddrRange[6];
   int addrLoop;                   /* Address of "loops" counter */
   int addrVisit;                  /* Address of "rows visited" counter */
   int iSelectID;                  /* The "Select-ID" for this loop */
@@ -22599,7 +23342,7 @@ struct DblquoteStr {
 */
 struct Vdbe {
   sqlite3 *db;            /* The database connection that owns this statement */
-  Vdbe *pPrev,*pNext;     /* Linked list of VDBEs with the same Vdbe.db */
+  Vdbe **ppVPrev,*pVNext; /* Linked list of VDBEs with the same Vdbe.db */
   Parse *pParse;          /* Parsing context used to create this Vdbe */
   ynVar nVar;             /* Number of entries in aVar[] */
   int nMem;               /* Number of memory locations currently allocated */
@@ -22625,7 +23368,7 @@ struct Vdbe {
   int nOp;                /* Number of instructions in the program */
   int nOpAlloc;           /* Slots allocated for aOp[] */
   Mem *aColName;          /* Column names to return */
-  Mem *pResultSet;        /* Pointer to an array of results */
+  Mem *pResultRow;        /* Current output row */
   char *zErrMsg;          /* Error message written here */
   VList *pVList;          /* Name of variables */
 #ifndef SQLITE_OMIT_TRACE
@@ -22636,16 +23379,18 @@ struct Vdbe {
   u32 nWrite;             /* Number of write operations that have occurred */
 #endif
   u16 nResColumn;         /* Number of columns in one row of the result set */
+  u16 nResAlloc;          /* Column slots allocated to aColName[] */
   u8 errorAction;         /* Recovery action to do in case of an error */
   u8 minWriteFileFormat;  /* Minimum file format for writable database files */
   u8 prepFlags;           /* SQLITE_PREPARE_* flags */
   u8 eVdbeState;          /* On of the VDBE_*_STATE values */
   bft expired:2;          /* 1: recompile VM immediately  2: when convenient */
-  bft explain:2;          /* True if EXPLAIN present on SQL command */
+  bft explain:2;          /* 0: normal, 1: EXPLAIN, 2: EXPLAIN QUERY PLAN */
   bft changeCntOn:1;      /* True to update the change-counter */
   bft usesStmtJournal:1;  /* True if uses a statement journal */
   bft readOnly:1;         /* True for statements that do not write */
   bft bIsReader:1;        /* True for statements that read */
+  bft haveEqpOps:1;       /* Bytecode supports EXPLAIN QUERY PLAN */
   yDbMask btreeMask;      /* Bitmask of db->aDb[] entries referenced */
   yDbMask lockMask;       /* Subset of btreeMask that requires a lock */
   u32 aCounter[9];        /* Counters used by sqlite3_stmt_status() */
@@ -22662,7 +23407,6 @@ struct Vdbe {
   SubProgram *pProgram;   /* Linked list of all sub-programs used by VM */
   AuxData *pAuxData;      /* Linked list of auxdata allocations */
 #ifdef SQLITE_ENABLE_STMT_SCANSTATUS
-  i64 *anExec;            /* Number of times each op has been executed */
   int nScan;              /* Entries in aScan[] */
   ScanStatus *aScan;      /* Scan definitions for sqlite3_stmt_scanstatus() */
 #endif
@@ -22693,7 +23437,7 @@ struct PreUpdate {
   i64 iKey1;                      /* First key value passed to hook */
   i64 iKey2;                      /* Second key value passed to hook */
   Mem *aNew;                      /* Array of new.* values */
-  Table *pTab;                    /* Schema object being upated */
+  Table *pTab;                    /* Schema object being updated */
   Index *pPk;                     /* PK index if pTab is WITHOUT ROWID */
 };
 
@@ -22783,6 +23527,7 @@ SQLITE_PRIVATE int sqlite3VdbeMemSetZeroBlob(Mem*,int);
 SQLITE_PRIVATE int sqlite3VdbeMemIsRowSet(const Mem*);
 #endif
 SQLITE_PRIVATE int sqlite3VdbeMemSetRowSet(Mem*);
+SQLITE_PRIVATE void sqlite3VdbeMemZeroTerminateIfAble(Mem*);
 SQLITE_PRIVATE int sqlite3VdbeMemMakeWriteable(Mem*);
 SQLITE_PRIVATE int sqlite3VdbeMemStringify(Mem*, u8, u8);
 SQLITE_PRIVATE int sqlite3IntFloatCompare(i64,double);
@@ -22829,6 +23574,8 @@ SQLITE_PRIVATE int sqlite3VdbeSorterRewind(const VdbeCursor *, int *);
 SQLITE_PRIVATE int sqlite3VdbeSorterWrite(const VdbeCursor *, Mem *);
 SQLITE_PRIVATE int sqlite3VdbeSorterCompare(const VdbeCursor *, Mem *, int, int *);
 
+SQLITE_PRIVATE void sqlite3VdbeValueListFree(void*);
+
 #ifdef SQLITE_DEBUG
 SQLITE_PRIVATE   void sqlite3VdbeIncrWriteCounter(Vdbe*, VdbeCursor*);
 SQLITE_PRIVATE   void sqlite3VdbeAssertAbortable(Vdbe*);
@@ -23157,6 +23904,8 @@ SQLITE_API int sqlite3_db_status(
 
       sqlite3BtreeEnterAll(db);
       db->pnBytesFreed = &nByte;
+      assert( db->lookaside.pEnd==db->lookaside.pTrueEnd );
+      db->lookaside.pEnd = db->lookaside.pStart;
       for(i=0; i<db->nDb; i++){
         Schema *pSchema = db->aDb[i].pSchema;
         if( ALWAYS(pSchema!=0) ){
@@ -23182,6 +23931,7 @@ SQLITE_API int sqlite3_db_status(
         }
       }
       db->pnBytesFreed = 0;
+      db->lookaside.pEnd = db->lookaside.pTrueEnd;
       sqlite3BtreeLeaveAll(db);
 
       *pHighwater = 0;
@@ -23199,9 +23949,12 @@ SQLITE_API int sqlite3_db_status(
       int nByte = 0;              /* Used to accumulate return value */
 
       db->pnBytesFreed = &nByte;
-      for(pVdbe=db->pVdbe; pVdbe; pVdbe=pVdbe->pNext){
+      assert( db->lookaside.pEnd==db->lookaside.pTrueEnd );
+      db->lookaside.pEnd = db->lookaside.pStart;
+      for(pVdbe=db->pVdbe; pVdbe; pVdbe=pVdbe->pVNext){
         sqlite3VdbeDelete(pVdbe);
       }
+      db->lookaside.pEnd = db->lookaside.pTrueEnd;
       db->pnBytesFreed = 0;
 
       *pHighwater = 0;  /* IMP: R-64479-57858 */
@@ -23338,6 +24091,7 @@ struct DateTime {
   char validTZ;       /* True (1) if tz is valid */
   char tzSet;         /* Timezone was set explicitly */
   char isError;       /* An overflow has occurred */
+  char useSubsec;     /* Display subsecond precision */
 };
 
 
@@ -23370,8 +24124,8 @@ struct DateTime {
 */
 static int getDigits(const char *zDate, const char *zFormat, ...){
   /* The aMx[] array translates the 3rd character of each format
-  ** spec into a max size:    a   b   c   d   e     f */
-  static const u16 aMx[] = { 12, 14, 24, 31, 59, 9999 };
+  ** spec into a max size:    a   b   c   d   e      f */
+  static const u16 aMx[] = { 12, 14, 24, 31, 59, 14712 };
   va_list ap;
   int cnt = 0;
   char nextC;
@@ -23537,7 +24291,7 @@ static void computeJD(DateTime *p){
   p->iJD = (sqlite3_int64)((X1 + X2 + D + B - 1524.5 ) * 86400000);
   p->validJD = 1;
   if( p->validHMS ){
-    p->iJD += p->h*3600000 + p->m*60000 + (sqlite3_int64)(p->s*1000);
+    p->iJD += p->h*3600000 + p->m*60000 + (sqlite3_int64)(p->s*1000 + 0.5);
     if( p->validTZ ){
       p->iJD -= p->tz*60000;
       p->validYMD = 0;
@@ -23652,6 +24406,11 @@ static int parseDateOrTime(
   }else if( sqlite3AtoF(zDate, &r, sqlite3Strlen30(zDate), SQLITE_UTF8)>0 ){
     setRawDateNumber(p, r);
     return 0;
+  }else if( (sqlite3StrICmp(zDate,"subsec")==0
+             || sqlite3StrICmp(zDate,"subsecond")==0)
+           && sqlite3NotPureFunc(context) ){
+    p->useSubsec = 1;
+    return setDateTimeToCurrent(context, p);
   }
   return 1;
 }
@@ -23707,17 +24466,14 @@ static void computeYMD(DateTime *p){
 ** Compute the Hour, Minute, and Seconds from the julian day number.
 */
 static void computeHMS(DateTime *p){
-  int s;
+  int day_ms, day_min; /* milliseconds, minutes into the day */
   if( p->validHMS ) return;
   computeJD(p);
-  s = (int)((p->iJD + 43200000) % 86400000);
-  p->s = s/1000.0;
-  s = (int)p->s;
-  p->s -= s;
-  p->h = s/3600;
-  s -= p->h*3600;
-  p->m = s/60;
-  p->s += s - p->m*60;
+  day_ms = (int)((p->iJD + 43200000) % 86400000);
+  p->s = (day_ms % 60000)/1000.0;
+  day_min = day_ms/60000;
+  p->m = day_min % 60;
+  p->h = day_min / 60;
   p->rawS = 0;
   p->validHMS = 1;
 }
@@ -23896,6 +24652,25 @@ static const struct {
   { 4, "year",   14713.0,   31536000.0  },
 };
 
+/*
+** If the DateTime p is raw number, try to figure out if it is
+** a julian day number of a unix timestamp.  Set the p value
+** appropriately.
+*/
+static void autoAdjustDate(DateTime *p){
+  if( !p->rawS || p->validJD ){
+    p->rawS = 0;
+  }else if( p->s>=-21086676*(i64)10000        /* -4713-11-24 12:00:00 */
+         && p->s<=(25340230*(i64)10000)+799   /*  9999-12-31 23:59:59 */
+  ){
+    double r = p->s*1000.0 + 210866760000000.0;
+    clearYMD_HMS_TZ(p);
+    p->iJD = (sqlite3_int64)(r + 0.5);
+    p->validJD = 1;
+    p->rawS = 0;
+  }
+}
+
 /*
 ** Process a modifier to a date-time stamp.  The modifiers are
 ** as follows:
@@ -23939,19 +24714,8 @@ static int parseModifier(
       */
       if( sqlite3_stricmp(z, "auto")==0 ){
         if( idx>1 ) return 1; /* IMP: R-33611-57934 */
-        if( !p->rawS || p->validJD ){
-          rc = 0;
-          p->rawS = 0;
-        }else if( p->s>=-21086676*(i64)10000        /* -4713-11-24 12:00:00 */
-               && p->s<=(25340230*(i64)10000)+799   /*  9999-12-31 23:59:59 */
-        ){
-          r = p->s*1000.0 + 210866760000000.0;
-          clearYMD_HMS_TZ(p);
-          p->iJD = (sqlite3_int64)(r + 0.5);
-          p->validJD = 1;
-          p->rawS = 0;
-          rc = 0;
-        }
+        autoAdjustDate(p);
+        rc = 0;
       }
       break;
     }
@@ -24010,7 +24774,7 @@ static int parseModifier(
           i64 iOrigJD;              /* Original localtime */
           i64 iGuess;               /* Guess at the corresponding utc time */
           int cnt = 0;              /* Safety to prevent infinite loop */
-          int iErr;                 /* Guess is off by this much */
+          i64 iErr;                 /* Guess is off by this much */
 
           computeJD(p);
           iGuess = iOrigJD = p->iJD;
@@ -24046,7 +24810,7 @@ static int parseModifier(
       */
       if( sqlite3_strnicmp(z, "weekday ", 8)==0
                && sqlite3AtoF(&z[8], &r, sqlite3Strlen30(&z[8]), SQLITE_UTF8)>0
-               && (n=(int)r)==r && n>=0 && r<7 ){
+               && r>=0.0 && r<7.0 && (n=(int)r)==r ){
         sqlite3_int64 Z;
         computeYMD_HMS(p);
         p->validTZ = 0;
@@ -24066,8 +24830,22 @@ static int parseModifier(
       **
       ** Move the date backwards to the beginning of the current day,
       ** or month or year.
+      **
+      **    subsecond
+      **    subsec
+      **
+      ** Show subsecond precision in the output of datetime() and
+      ** unixepoch() and strftime('%s').
       */
-      if( sqlite3_strnicmp(z, "start of ", 9)!=0 ) break;
+      if( sqlite3_strnicmp(z, "start of ", 9)!=0 ){
+        if( sqlite3_stricmp(z, "subsec")==0
+         || sqlite3_stricmp(z, "subsecond")==0
+        ){
+          p->useSubsec = 1;
+          rc = 0;
+        }
+        break;
+      }
       if( !p->validJD && !p->validYMD && !p->validHMS ) break;
       z += 9;
       computeYMD(p);
@@ -24103,18 +24881,73 @@ static int parseModifier(
     case '9': {
       double rRounder;
       int i;
-      for(n=1; z[n] && z[n]!=':' && !sqlite3Isspace(z[n]); n++){}
+      int Y,M,D,h,m,x;
+      const char *z2 = z;
+      char z0 = z[0];
+      for(n=1; z[n]; n++){
+        if( z[n]==':' ) break;
+        if( sqlite3Isspace(z[n]) ) break;
+        if( z[n]=='-' ){
+          if( n==5 && getDigits(&z[1], "40f", &Y)==1 ) break;
+          if( n==6 && getDigits(&z[1], "50f", &Y)==1 ) break;
+        }
+      }
       if( sqlite3AtoF(z, &r, n, SQLITE_UTF8)<=0 ){
-        rc = 1;
+        assert( rc==1 );
         break;
       }
-      if( z[n]==':' ){
+      if( z[n]=='-' ){
+        /* A modifier of the form (+|-)YYYY-MM-DD adds or subtracts the
+        ** specified number of years, months, and days.  MM is limited to
+        ** the range 0-11 and DD is limited to 0-30.
+        */
+        if( z0!='+' && z0!='-' ) break;  /* Must start with +/- */
+        if( n==5 ){
+          if( getDigits(&z[1], "40f-20a-20d", &Y, &M, &D)!=3 ) break;
+        }else{
+          assert( n==6 );
+          if( getDigits(&z[1], "50f-20a-20d", &Y, &M, &D)!=3 ) break;
+          z++;
+        }
+        if( M>=12 ) break;                   /* M range 0..11 */
+        if( D>=31 ) break;                   /* D range 0..30 */
+        computeYMD_HMS(p);
+        p->validJD = 0;
+        if( z0=='-' ){
+          p->Y -= Y;
+          p->M -= M;
+          D = -D;
+        }else{
+          p->Y += Y;
+          p->M += M;
+        }
+        x = p->M>0 ? (p->M-1)/12 : (p->M-12)/12;
+        p->Y += x;
+        p->M -= x*12;
+        computeJD(p);
+        p->validHMS = 0;
+        p->validYMD = 0;
+        p->iJD += (i64)D*86400000;
+        if( z[11]==0 ){
+          rc = 0;
+          break;
+        }
+        if( sqlite3Isspace(z[11])
+         && getDigits(&z[12], "20c:20e", &h, &m)==2
+        ){
+          z2 = &z[12];
+          n = 2;
+        }else{
+          break;
+        }
+      }
+      if( z2[n]==':' ){
         /* A modifier of the form (+|-)HH:MM:SS.FFF adds (or subtracts) the
         ** specified number of hours, minutes, seconds, and fractional seconds
         ** to the time.  The ".FFF" may be omitted.  The ":SS.FFF" may be
         ** omitted.
         */
-        const char *z2 = z;
+
         DateTime tx;
         sqlite3_int64 day;
         if( !sqlite3Isdigit(*z2) ) z2++;
@@ -24124,7 +24957,7 @@ static int parseModifier(
         tx.iJD -= 43200000;
         day = tx.iJD/86400000;
         tx.iJD -= day*86400000;
-        if( z[0]=='-' ) tx.iJD = -tx.iJD;
+        if( z0=='-' ) tx.iJD = -tx.iJD;
         computeJD(p);
         clearYMD_HMS_TZ(p);
         p->iJD += tx.iJD;
@@ -24140,7 +24973,7 @@ static int parseModifier(
       if( n>10 || n<3 ) break;
       if( sqlite3UpperToLower[(u8)z[n-1]]=='s' ) n--;
       computeJD(p);
-      rc = 1;
+      assert( rc==1 );
       rRounder = r<0 ? -0.5 : +0.5;
       for(i=0; i<ArraySize(aXformType); i++){
         if( aXformType[i].nName==n
@@ -24149,7 +24982,6 @@ static int parseModifier(
         ){
           switch( i ){
             case 4: { /* Special processing to add months */
-              int x;
               assert( strcmp(aXformType[i].zName,"month")==0 );
               computeYMD_HMS(p);
               p->M += (int)r;
@@ -24265,7 +25097,11 @@ static void unixepochFunc(
   DateTime x;
   if( isDate(context, argc, argv, &x)==0 ){
     computeJD(&x);
-    sqlite3_result_int64(context, x.iJD/1000 - 21086676*(i64)10000);
+    if( x.useSubsec ){
+      sqlite3_result_double(context, (x.iJD - 21086676*(i64)10000000)/1000.0);
+    }else{
+      sqlite3_result_int64(context, x.iJD/1000 - 21086676*(i64)10000);
+    }
   }
 }
 
@@ -24281,8 +25117,8 @@ static void datetimeFunc(
 ){
   DateTime x;
   if( isDate(context, argc, argv, &x)==0 ){
-    int Y, s;
-    char zBuf[24];
+    int Y, s, n;
+    char zBuf[32];
     computeYMD_HMS(&x);
     Y = x.Y;
     if( Y<0 ) Y = -Y;
@@ -24303,15 +25139,28 @@ static void datetimeFunc(
     zBuf[15] = '0' + (x.m/10)%10;
     zBuf[16] = '0' + (x.m)%10;
     zBuf[17] = ':';
-    s = (int)x.s;
-    zBuf[18] = '0' + (s/10)%10;
-    zBuf[19] = '0' + (s)%10;
-    zBuf[20] = 0;
+    if( x.useSubsec ){
+      s = (int)(1000.0*x.s + 0.5);
+      zBuf[18] = '0' + (s/10000)%10;
+      zBuf[19] = '0' + (s/1000)%10;
+      zBuf[20] = '.';
+      zBuf[21] = '0' + (s/100)%10;
+      zBuf[22] = '0' + (s/10)%10;
+      zBuf[23] = '0' + (s)%10;
+      zBuf[24] = 0;
+      n = 24;
+    }else{
+      s = (int)x.s;
+      zBuf[18] = '0' + (s/10)%10;
+      zBuf[19] = '0' + (s)%10;
+      zBuf[20] = 0;
+      n = 20;
+    }
     if( x.Y<0 ){
       zBuf[0] = '-';
-      sqlite3_result_text(context, zBuf, 20, SQLITE_TRANSIENT);
+      sqlite3_result_text(context, zBuf, n, SQLITE_TRANSIENT);
     }else{
-      sqlite3_result_text(context, &zBuf[1], 19, SQLITE_TRANSIENT);
+      sqlite3_result_text(context, &zBuf[1], n-1, SQLITE_TRANSIENT);
     }
   }
 }
@@ -24328,7 +25177,7 @@ static void timeFunc(
 ){
   DateTime x;
   if( isDate(context, argc, argv, &x)==0 ){
-    int s;
+    int s, n;
     char zBuf[16];
     computeHMS(&x);
     zBuf[0] = '0' + (x.h/10)%10;
@@ -24337,11 +25186,24 @@ static void timeFunc(
     zBuf[3] = '0' + (x.m/10)%10;
     zBuf[4] = '0' + (x.m)%10;
     zBuf[5] = ':';
-    s = (int)x.s;
-    zBuf[6] = '0' + (s/10)%10;
-    zBuf[7] = '0' + (s)%10;
-    zBuf[8] = 0;
-    sqlite3_result_text(context, zBuf, 8, SQLITE_TRANSIENT);
+    if( x.useSubsec ){
+      s = (int)(1000.0*x.s + 0.5);
+      zBuf[6] = '0' + (s/10000)%10;
+      zBuf[7] = '0' + (s/1000)%10;
+      zBuf[8] = '.';
+      zBuf[9] = '0' + (s/100)%10;
+      zBuf[10] = '0' + (s/10)%10;
+      zBuf[11] = '0' + (s)%10;
+      zBuf[12] = 0;
+      n = 12;
+    }else{
+      s = (int)x.s;
+      zBuf[6] = '0' + (s/10)%10;
+      zBuf[7] = '0' + (s)%10;
+      zBuf[8] = 0;
+      n = 8;
+    }
+    sqlite3_result_text(context, zBuf, n, SQLITE_TRANSIENT);
   }
 }
 
@@ -24396,7 +25258,7 @@ static void dateFunc(
 **   %M  minute 00-59
 **   %s  seconds since 1970-01-01
 **   %S  seconds 00-59
-**   %w  day of week 0-6  sunday==0
+**   %w  day of week 0-6  Sunday==0
 **   %W  week of year 00-53
 **   %Y  year 0000-9999
 **   %%  %
@@ -24422,13 +25284,16 @@ static void strftimeFunc(
   computeJD(&x);
   computeYMD_HMS(&x);
   for(i=j=0; zFmt[i]; i++){
+    char cf;
     if( zFmt[i]!='%' ) continue;
     if( j<i ) sqlite3_str_append(&sRes, zFmt+j, (int)(i-j));
     i++;
     j = i + 1;
-    switch( zFmt[i] ){
-      case 'd': {
-        sqlite3_str_appendf(&sRes, "%02d", x.D);
+    cf = zFmt[i];
+    switch( cf ){
+      case 'd':  /* Fall thru */
+      case 'e': {
+        sqlite3_str_appendf(&sRes, cf=='d' ? "%02d" : "%2d", x.D);
         break;
       }
       case 'f': {
@@ -24437,8 +25302,21 @@ static void strftimeFunc(
         sqlite3_str_appendf(&sRes, "%06.3f", s);
         break;
       }
-      case 'H': {
-        sqlite3_str_appendf(&sRes, "%02d", x.h);
+      case 'F': {
+        sqlite3_str_appendf(&sRes, "%04d-%02d-%02d", x.Y, x.M, x.D);
+        break;
+      }
+      case 'H':
+      case 'k': {
+        sqlite3_str_appendf(&sRes, cf=='H' ? "%02d" : "%2d", x.h);
+        break;
+      }
+      case 'I': /* Fall thru */
+      case 'l': {
+        int h = x.h;
+        if( h>12 ) h -= 12;
+        if( h==0 ) h = 12;
+        sqlite3_str_appendf(&sRes, cf=='I' ? "%02d" : "%2d", h);
         break;
       }
       case 'W': /* Fall thru */
@@ -24450,7 +25328,7 @@ static void strftimeFunc(
         y.D = 1;
         computeJD(&y);
         nDay = (int)((x.iJD-y.iJD+43200000)/86400000);
-        if( zFmt[i]=='W' ){
+        if( cf=='W' ){
           int wd;   /* 0=Monday, 1=Tuesday, ... 6=Sunday */
           wd = (int)(((x.iJD+43200000)/86400000)%7);
           sqlite3_str_appendf(&sRes,"%02d",(nDay+7-wd)/7);
@@ -24471,18 +25349,42 @@ static void strftimeFunc(
         sqlite3_str_appendf(&sRes,"%02d",x.m);
         break;
       }
+      case 'p': /* Fall thru */
+      case 'P': {
+        if( x.h>=12 ){
+          sqlite3_str_append(&sRes, cf=='p' ? "PM" : "pm", 2);
+        }else{
+          sqlite3_str_append(&sRes, cf=='p' ? "AM" : "am", 2);
+        }
+        break;
+      }
+      case 'R': {
+        sqlite3_str_appendf(&sRes, "%02d:%02d", x.h, x.m);
+        break;
+      }
       case 's': {
-        i64 iS = (i64)(x.iJD/1000 - 21086676*(i64)10000);
-        sqlite3_str_appendf(&sRes,"%lld",iS);
+        if( x.useSubsec ){
+          sqlite3_str_appendf(&sRes,"%.3f",
+                (x.iJD - 21086676*(i64)10000000)/1000.0);
+        }else{
+          i64 iS = (i64)(x.iJD/1000 - 21086676*(i64)10000);
+          sqlite3_str_appendf(&sRes,"%lld",iS);
+        }
         break;
       }
       case 'S': {
         sqlite3_str_appendf(&sRes,"%02d",(int)x.s);
         break;
       }
+      case 'T': {
+        sqlite3_str_appendf(&sRes,"%02d:%02d:%02d", x.h, x.m, (int)x.s);
+        break;
+      }
+      case 'u': /* Fall thru */
       case 'w': {
-        sqlite3_str_appendchar(&sRes, 1,
-                       (char)(((x.iJD+129600000)/86400000) % 7) + '0');
+        char c = (char)(((x.iJD+129600000)/86400000) % 7) + '0';
+        if( c=='0' && cf=='u' ) c = '7';
+        sqlite3_str_appendchar(&sRes, 1, c);
         break;
       }
       case 'Y': {
@@ -24531,6 +25433,117 @@ static void cdateFunc(
   dateFunc(context, 0, 0);
 }
 
+/*
+** timediff(DATE1, DATE2)
+**
+** Return the amount of time that must be added to DATE2 in order to
+** convert it into DATE2.  The time difference format is:
+**
+**     +YYYY-MM-DD HH:MM:SS.SSS
+**
+** The initial "+" becomes "-" if DATE1 occurs before DATE2.  For
+** date/time values A and B, the following invariant should hold:
+**
+**     datetime(A) == (datetime(B, timediff(A,B))
+**
+** Both DATE arguments must be either a julian day number, or an
+** ISO-8601 string.  The unix timestamps are not supported by this
+** routine.
+*/
+static void timediffFunc(
+  sqlite3_context *context,
+  int NotUsed1,
+  sqlite3_value **argv
+){
+  char sign;
+  int Y, M;
+  DateTime d1, d2;
+  sqlite3_str sRes;
+  UNUSED_PARAMETER(NotUsed1);
+  if( isDate(context, 1, &argv[0], &d1) ) return;
+  if( isDate(context, 1, &argv[1], &d2) ) return;
+  computeYMD_HMS(&d1);
+  computeYMD_HMS(&d2);
+  if( d1.iJD>=d2.iJD ){
+    sign = '+';
+    Y = d1.Y - d2.Y;
+    if( Y ){
+      d2.Y = d1.Y;
+      d2.validJD = 0;
+      computeJD(&d2);
+    }
+    M = d1.M - d2.M;
+    if( M<0 ){
+      Y--;
+      M += 12;
+    }
+    if( M!=0 ){
+      d2.M = d1.M;
+      d2.validJD = 0;
+      computeJD(&d2);
+    }
+    while( d1.iJD<d2.iJD ){
+      M--;
+      if( M<0 ){
+        M = 11;
+        Y--;
+      }
+      d2.M--;
+      if( d2.M<1 ){
+        d2.M = 12;
+        d2.Y--;
+      }
+      d2.validJD = 0;
+      computeJD(&d2);
+    }
+    d1.iJD -= d2.iJD;
+    d1.iJD += (u64)1486995408 * (u64)100000;
+  }else /* d1<d2 */{
+    sign = '-';
+    Y = d2.Y - d1.Y;
+    if( Y ){
+      d2.Y = d1.Y;
+      d2.validJD = 0;
+      computeJD(&d2);
+    }
+    M = d2.M - d1.M;
+    if( M<0 ){
+      Y--;
+      M += 12;
+    }
+    if( M!=0 ){
+      d2.M = d1.M;
+      d2.validJD = 0;
+      computeJD(&d2);
+    }
+    while( d1.iJD>d2.iJD ){
+      M--;
+      if( M<0 ){
+        M = 11;
+        Y--;
+      }
+      d2.M++;
+      if( d2.M>12 ){
+        d2.M = 1;
+        d2.Y++;
+      }
+      d2.validJD = 0;
+      computeJD(&d2);
+    }
+    d1.iJD = d2.iJD - d1.iJD;
+    d1.iJD += (u64)1486995408 * (u64)100000;
+  }
+  d1.validYMD = 0;
+  d1.validHMS = 0;
+  d1.validTZ = 0;
+  computeYMD_HMS(&d1);
+  sqlite3StrAccumInit(&sRes, 0, 0, 0, 100);
+  sqlite3_str_appendf(&sRes, "%c%04d-%02d-%02d %02d:%02d:%06.3f",
+       sign, Y, M, d1.D-1, d1.h, d1.m, d1.s);
+  sqlite3ResultStrAccum(context, &sRes);
+}
+
+
 /*
 ** current_timestamp()
 **
@@ -24605,6 +25618,7 @@ SQLITE_PRIVATE void sqlite3RegisterDateTimeFunctions(void){
     PURE_DATE(time,             -1, 0, 0, timeFunc      ),
     PURE_DATE(datetime,         -1, 0, 0, datetimeFunc  ),
     PURE_DATE(strftime,         -1, 0, 0, strftimeFunc  ),
+    PURE_DATE(timediff,          2, 0, 0, timediffFunc  ),
     DFUNCTION(current_time,      0, 0, 0, ctimeFunc     ),
     DFUNCTION(current_timestamp, 0, 0, 0, ctimestampFunc),
     DFUNCTION(current_date,      0, 0, 0, cdateFunc     ),
@@ -24727,9 +25741,11 @@ SQLITE_PRIVATE int sqlite3OsFileSize(sqlite3_file *id, i64 *pSize){
 }
 SQLITE_PRIVATE int sqlite3OsLock(sqlite3_file *id, int lockType){
   DO_OS_MALLOC_TEST(id);
+  assert( lockType>=SQLITE_LOCK_SHARED && lockType<=SQLITE_LOCK_EXCLUSIVE );
   return id->pMethods->xLock(id, lockType);
 }
 SQLITE_PRIVATE int sqlite3OsUnlock(sqlite3_file *id, int lockType){
+  assert( lockType==SQLITE_LOCK_NONE || lockType==SQLITE_LOCK_SHARED );
   return id->pMethods->xUnlock(id, lockType);
 }
 SQLITE_PRIVATE int sqlite3OsCheckReservedLock(sqlite3_file *id, int *pResOut){
@@ -24756,7 +25772,7 @@ SQLITE_PRIVATE int sqlite3OsFileControl(sqlite3_file *id, int op, void *pArg){
     /* Faults are not injected into COMMIT_PHASETWO because, assuming SQLite
     ** is using a regular VFS, it is called after the corresponding
     ** transaction has been committed. Injecting a fault at this point
-    ** confuses the test scripts - the COMMIT comand returns SQLITE_NOMEM
+    ** confuses the test scripts - the COMMIT command returns SQLITE_NOMEM
     ** but the transaction is committed anyway.
     **
     ** The core must call OsFileControl() though, not OsFileControlHint(),
@@ -24844,6 +25860,7 @@ SQLITE_PRIVATE int sqlite3OsOpen(
   ** down into the VFS layer.  Some SQLITE_OPEN_ flags (for example,
   ** SQLITE_OPEN_FULLMUTEX or SQLITE_OPEN_SHAREDCACHE) are blocked before
   ** reaching the VFS. */
+  assert( zPath || (flags & SQLITE_OPEN_EXCLUSIVE) );
   rc = pVfs->xOpen(pVfs, zPath, pFile, flags & 0x1087f7f, pFlagsOut);
   assert( rc==SQLITE_OK || pFile->pMethods==0 );
   return rc;
@@ -25376,7 +26393,7 @@ static void *sqlite3MemMalloc(int nByte){
 ** or sqlite3MemRealloc().
 **
 ** For this low-level routine, we already know that pPrior!=0 since
-** cases where pPrior==0 will have been intecepted and dealt with
+** cases where pPrior==0 will have been intercepted and dealt with
 ** by higher-level routines.
 */
 static void sqlite3MemFree(void *pPrior){
@@ -25464,7 +26481,7 @@ static int sqlite3MemInit(void *NotUsed){
     return SQLITE_OK;
   }
   len = sizeof(cpuCount);
-  /* One usually wants to use hw.acctivecpu for MT decisions, but not here */
+  /* One usually wants to use hw.activecpu for MT decisions, but not here */
   sysctlbyname("hw.ncpu", &cpuCount, &len, NULL, 0);
   if( cpuCount>1 ){
     /* defer MT decisions to system malloc */
@@ -27159,9 +28176,13 @@ static int memsys5Roundup(int n){
     if( n<=mem5.szAtom ) return mem5.szAtom;
     return mem5.szAtom*2;
   }
-  if( n>0x40000000 ) return 0;
+  if( n>0x10000000 ){
+    if( n>0x40000000 ) return 0;
+    if( n>0x20000000 ) return 0x40000000;
+    return 0x20000000;
+  }
   for(iFullSz=mem5.szAtom*8; iFullSz<n; iFullSz *= 4);
-  if( (iFullSz/2)>=n ) return iFullSz/2;
+  if( (iFullSz/2)>=(i64)n ) return iFullSz/2;
   return iFullSz;
 }
 
@@ -27452,7 +28473,7 @@ static void checkMutexFree(sqlite3_mutex *p){
   assert( SQLITE_MUTEX_FAST<2 );
   assert( SQLITE_MUTEX_WARNONCONTENTION<2 );
 
-#if SQLITE_ENABLE_API_ARMOR
+#ifdef SQLITE_ENABLE_API_ARMOR
   if( ((CheckMutex*)p)->iType<2 )
 #endif
   {
@@ -27927,7 +28948,7 @@ SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){
 
 /*
 ** The sqlite3_mutex.id, sqlite3_mutex.nRef, and sqlite3_mutex.owner fields
-** are necessary under two condidtions:  (1) Debug builds and (2) using
+** are necessary under two conditions:  (1) Debug builds and (2) using
 ** home-grown mutexes.  Encapsulate these conditions into a single #define.
 */
 #if defined(SQLITE_DEBUG) || defined(SQLITE_HOMEGROWN_RECURSIVE_MUTEX)
@@ -28124,7 +29145,7 @@ static sqlite3_mutex *pthreadMutexAlloc(int iType){
 */
 static void pthreadMutexFree(sqlite3_mutex *p){
   assert( p->nRef==0 );
-#if SQLITE_ENABLE_API_ARMOR
+#ifdef SQLITE_ENABLE_API_ARMOR
   if( p->id==SQLITE_MUTEX_FAST || p->id==SQLITE_MUTEX_RECURSIVE )
 #endif
   {
@@ -28428,7 +29449,7 @@ struct sqlite3_mutex {
   CRITICAL_SECTION mutex;    /* Mutex controlling the lock */
   int id;                    /* Mutex type */
 #ifdef SQLITE_DEBUG
-  volatile int nRef;         /* Number of enterances */
+  volatile int nRef;         /* Number of entrances */
   volatile DWORD owner;      /* Thread holding this mutex */
   volatile LONG trace;       /* True to trace changes */
 #endif
@@ -28477,7 +29498,7 @@ SQLITE_PRIVATE void sqlite3MemoryBarrier(void){
   SQLITE_MEMORY_BARRIER;
 #elif defined(__GNUC__)
   __sync_synchronize();
-#elif MSVC_VERSION>=1300
+#elif MSVC_VERSION>=1400
   _ReadWriteBarrier();
 #elif defined(MemoryBarrier)
   MemoryBarrier();
@@ -29062,18 +30083,34 @@ static void mallocWithAlarm(int n, void **pp){
   *pp = p;
 }
 
+/*
+** Maximum size of any single memory allocation.
+**
+** This is not a limit on the total amount of memory used.  This is
+** a limit on the size parameter to sqlite3_malloc() and sqlite3_realloc().
+**
+** The upper bound is slightly less than 2GiB:  0x7ffffeff == 2,147,483,391
+** This provides a 256-byte safety margin for defense against 32-bit
+** signed integer overflow bugs when computing memory allocation sizes.
+** Paranoid applications might want to reduce the maximum allocation size
+** further for an even larger safety margin.  0x3fffffff or 0x0fffffff
+** or even smaller would be reasonable upper bounds on the size of a memory
+** allocations for most applications.
+*/
+#ifndef SQLITE_MAX_ALLOCATION_SIZE
+# define SQLITE_MAX_ALLOCATION_SIZE  2147483391
+#endif
+#if SQLITE_MAX_ALLOCATION_SIZE>2147483391
+# error Maximum size for SQLITE_MAX_ALLOCATION_SIZE is 2147483391
+#endif
+
 /*
 ** Allocate memory.  This routine is like sqlite3_malloc() except that it
 ** assumes the memory subsystem has already been initialized.
 */
 SQLITE_PRIVATE void *sqlite3Malloc(u64 n){
   void *p;
-  if( n==0 || n>=0x7fffff00 ){
-    /* A memory allocation of a number of bytes which is near the maximum
-    ** signed integer value might cause an integer overflow inside of the
-    ** xMalloc().  Hence we limit the maximum size to 0x7fffff00, giving
-    ** 255 bytes of overhead.  SQLite itself will never use anything near
-    ** this amount.  The only way to reach the limit is with sqlite3_malloc() */
+  if( n==0 || n>SQLITE_MAX_ALLOCATION_SIZE ){
     p = 0;
   }else if( sqlite3GlobalConfig.bMemstat ){
     sqlite3_mutex_enter(mem0.mutex);
@@ -29109,7 +30146,7 @@ SQLITE_API void *sqlite3_malloc64(sqlite3_uint64 n){
 */
 #ifndef SQLITE_OMIT_LOOKASIDE
 static int isLookaside(sqlite3 *db, const void *p){
-  return SQLITE_WITHIN(p, db->lookaside.pStart, db->lookaside.pEnd);
+  return SQLITE_WITHIN(p, db->lookaside.pStart, db->lookaside.pTrueEnd);
 }
 #else
 #define isLookaside(A,B) 0
@@ -29133,18 +30170,16 @@ static int lookasideMallocSize(sqlite3 *db, const void *p){
 SQLITE_PRIVATE int sqlite3DbMallocSize(sqlite3 *db, const void *p){
   assert( p!=0 );
 #ifdef SQLITE_DEBUG
-  if( db==0 || !isLookaside(db,p) ){
-    if( db==0 ){
-      assert( sqlite3MemdebugNoType(p, (u8)~MEMTYPE_HEAP) );
-      assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) );
-    }else{
-      assert( sqlite3MemdebugHasType(p, (MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) );
-      assert( sqlite3MemdebugNoType(p, (u8)~(MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) );
-    }
+  if( db==0 ){
+    assert( sqlite3MemdebugNoType(p, (u8)~MEMTYPE_HEAP) );
+    assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) );
+  }else if( !isLookaside(db,p) ){
+    assert( sqlite3MemdebugHasType(p, (MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) );
+    assert( sqlite3MemdebugNoType(p, (u8)~(MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) );
   }
 #endif
   if( db ){
-    if( ((uptr)p)<(uptr)(db->lookaside.pEnd) ){
+    if( ((uptr)p)<(uptr)(db->lookaside.pTrueEnd) ){
 #ifndef SQLITE_OMIT_TWOSIZE_LOOKASIDE
       if( ((uptr)p)>=(uptr)(db->lookaside.pMiddle) ){
         assert( sqlite3_mutex_held(db->mutex) );
@@ -29200,14 +30235,11 @@ SQLITE_PRIVATE void sqlite3DbFreeNN(sqlite3 *db, void *p){
   assert( db==0 || sqlite3_mutex_held(db->mutex) );
   assert( p!=0 );
   if( db ){
-    if( db->pnBytesFreed ){
-      measureAllocationSize(db, p);
-      return;
-    }
     if( ((uptr)p)<(uptr)(db->lookaside.pEnd) ){
 #ifndef SQLITE_OMIT_TWOSIZE_LOOKASIDE
       if( ((uptr)p)>=(uptr)(db->lookaside.pMiddle) ){
         LookasideSlot *pBuf = (LookasideSlot*)p;
+        assert( db->pnBytesFreed==0 );
 #ifdef SQLITE_DEBUG
         memset(p, 0xaa, LOOKASIDE_SMALL);  /* Trash freed content */
 #endif
@@ -29218,6 +30250,7 @@ SQLITE_PRIVATE void sqlite3DbFreeNN(sqlite3 *db, void *p){
 #endif /* SQLITE_OMIT_TWOSIZE_LOOKASIDE */
       if( ((uptr)p)>=(uptr)(db->lookaside.pStart) ){
         LookasideSlot *pBuf = (LookasideSlot*)p;
+        assert( db->pnBytesFreed==0 );
 #ifdef SQLITE_DEBUG
         memset(p, 0xaa, db->lookaside.szTrue);  /* Trash freed content */
 #endif
@@ -29226,6 +30259,10 @@ SQLITE_PRIVATE void sqlite3DbFreeNN(sqlite3 *db, void *p){
         return;
       }
     }
+    if( db->pnBytesFreed ){
+      measureAllocationSize(db, p);
+      return;
+    }
   }
   assert( sqlite3MemdebugHasType(p, (MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) );
   assert( sqlite3MemdebugNoType(p, (u8)~(MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) );
@@ -29233,6 +30270,43 @@ SQLITE_PRIVATE void sqlite3DbFreeNN(sqlite3 *db, void *p){
   sqlite3MemdebugSetType(p, MEMTYPE_HEAP);
   sqlite3_free(p);
 }
+SQLITE_PRIVATE void sqlite3DbNNFreeNN(sqlite3 *db, void *p){
+  assert( db!=0 );
+  assert( sqlite3_mutex_held(db->mutex) );
+  assert( p!=0 );
+  if( ((uptr)p)<(uptr)(db->lookaside.pEnd) ){
+#ifndef SQLITE_OMIT_TWOSIZE_LOOKASIDE
+    if( ((uptr)p)>=(uptr)(db->lookaside.pMiddle) ){
+      LookasideSlot *pBuf = (LookasideSlot*)p;
+      assert( db->pnBytesFreed==0 );
+#ifdef SQLITE_DEBUG
+      memset(p, 0xaa, LOOKASIDE_SMALL);  /* Trash freed content */
+#endif
+      pBuf->pNext = db->lookaside.pSmallFree;
+      db->lookaside.pSmallFree = pBuf;
+      return;
+    }
+#endif /* SQLITE_OMIT_TWOSIZE_LOOKASIDE */
+    if( ((uptr)p)>=(uptr)(db->lookaside.pStart) ){
+      LookasideSlot *pBuf = (LookasideSlot*)p;
+      assert( db->pnBytesFreed==0 );
+#ifdef SQLITE_DEBUG
+      memset(p, 0xaa, db->lookaside.szTrue);  /* Trash freed content */
+#endif
+      pBuf->pNext = db->lookaside.pFree;
+      db->lookaside.pFree = pBuf;
+      return;
+    }
+  }
+  if( db->pnBytesFreed ){
+    measureAllocationSize(db, p);
+    return;
+  }
+  assert( sqlite3MemdebugHasType(p, (MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) );
+  assert( sqlite3MemdebugNoType(p, (u8)~(MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) );
+  sqlite3MemdebugSetType(p, MEMTYPE_HEAP);
+  sqlite3_free(p);
+}
 SQLITE_PRIVATE void sqlite3DbFree(sqlite3 *db, void *p){
   assert( db==0 || sqlite3_mutex_held(db->mutex) );
   if( p ) sqlite3DbFreeNN(db, p);
@@ -29532,9 +30606,14 @@ SQLITE_PRIVATE char *sqlite3DbStrNDup(sqlite3 *db, const char *z, u64 n){
 */
 SQLITE_PRIVATE char *sqlite3DbSpanDup(sqlite3 *db, const char *zStart, const char *zEnd){
   int n;
+#ifdef SQLITE_DEBUG
+  /* Because of the way the parser works, the span is guaranteed to contain
+  ** at least one non-space character */
+  for(n=0; sqlite3Isspace(zStart[n]); n++){ assert( &zStart[n]<zEnd ); }
+#endif
   while( sqlite3Isspace(zStart[0]) ) zStart++;
   n = (int)(zEnd - zStart);
-  while( ALWAYS(n>0) && sqlite3Isspace(zStart[n-1]) ) n--;
+  while( sqlite3Isspace(zStart[n-1]) ) n--;
   return sqlite3DbStrNDup(db, zStart, n);
 }
 
@@ -29630,7 +30709,7 @@ SQLITE_PRIVATE int sqlite3ApiExit(sqlite3* db, int rc){
   if( db->mallocFailed || rc ){
     return apiHandleError(db, rc);
   }
-  return rc & db->errMask;
+  return 0;
 }
 
 /************** End of malloc.c **********************************************/
@@ -29742,43 +30821,6 @@ static const et_info fmtinfo[] = {
 **    %!S   Like %S but prefer the zName over the zAlias
 */
 
-/* Floating point constants used for rounding */
-static const double arRound[] = {
-  5.0e-01, 5.0e-02, 5.0e-03, 5.0e-04, 5.0e-05,
-  5.0e-06, 5.0e-07, 5.0e-08, 5.0e-09, 5.0e-10,
-};
-
-/*
-** If SQLITE_OMIT_FLOATING_POINT is defined, then none of the floating point
-** conversions will work.
-*/
-#ifndef SQLITE_OMIT_FLOATING_POINT
-/*
-** "*val" is a double such that 0.1 <= *val < 10.0
-** Return the ascii code for the leading digit of *val, then
-** multiply "*val" by 10.0 to renormalize.
-**
-** Example:
-**     input:     *val = 3.14159
-**     output:    *val = 1.4159    function return = '3'
-**
-** The counter *cnt is incremented each time.  After counter exceeds
-** 16 (the number of significant digits in a 64-bit float) '0' is
-** always returned.
-*/
-static char et_getdigit(LONGDOUBLE_TYPE *val, int *cnt){
-  int digit;
-  LONGDOUBLE_TYPE d;
-  if( (*cnt)<=0 ) return '0';
-  (*cnt)--;
-  digit = (int)*val;
-  d = digit;
-  digit += '0';
-  *val = (*val - d)*10.0;
-  return (char)digit;
-}
-#endif /* SQLITE_OMIT_FLOATING_POINT */
-
 /*
 ** Set the StrAccum object to an error mode.
 */
@@ -29870,18 +30912,15 @@ SQLITE_API void sqlite3_str_vappendf(
   u8 bArgList;               /* True for SQLITE_PRINTF_SQLFUNC */
   char prefix;               /* Prefix character.  "+" or "-" or " " or '\0'. */
   sqlite_uint64 longvalue;   /* Value for integer types */
-  LONGDOUBLE_TYPE realvalue; /* Value for real types */
+  double realvalue;          /* Value for real types */
   const et_info *infop;      /* Pointer to the appropriate info structure */
   char *zOut;                /* Rendering buffer */
   int nOut;                  /* Size of the rendering buffer */
   char *zExtra = 0;          /* Malloced memory used by some conversion */
-#ifndef SQLITE_OMIT_FLOATING_POINT
-  int  exp, e2;              /* exponent of real numbers */
-  int nsd;                   /* Number of significant digits returned */
-  double rounder;            /* Used for rounding floating point values */
+  int exp, e2;               /* exponent of real numbers */
   etByte flag_dp;            /* True if decimal point should be shown */
   etByte flag_rtz;           /* True if trailing zeros should be removed */
-#endif
+
   PrintfArguments *pArgList = 0; /* Arguments for SQLITE_PRINTF_SQLFUNC */
   char buf[etBUFSIZE];       /* Conversion buffer */
 
@@ -30156,73 +31195,66 @@ SQLITE_API void sqlite3_str_vappendf(
         break;
       case etFLOAT:
       case etEXP:
-      case etGENERIC:
+      case etGENERIC: {
+        FpDecode s;
+        int iRound;
+        int j;
+
         if( bArgList ){
           realvalue = getDoubleArg(pArgList);
         }else{
           realvalue = va_arg(ap,double);
         }
-#ifdef SQLITE_OMIT_FLOATING_POINT
-        length = 0;
-#else
         if( precision<0 ) precision = 6;         /* Set default precision */
 #ifdef SQLITE_FP_PRECISION_LIMIT
         if( precision>SQLITE_FP_PRECISION_LIMIT ){
           precision = SQLITE_FP_PRECISION_LIMIT;
         }
 #endif
-        if( realvalue<0.0 ){
-          realvalue = -realvalue;
-          prefix = '-';
-        }else{
-          prefix = flag_prefix;
-        }
-        if( xtype==etGENERIC && precision>0 ) precision--;
-        testcase( precision>0xfff );
-        idx = precision & 0xfff;
-        rounder = arRound[idx%10];
-        while( idx>=10 ){ rounder *= 1.0e-10; idx -= 10; }
         if( xtype==etFLOAT ){
-          double rx = (double)realvalue;
-          sqlite3_uint64 u;
-          int ex;
-          memcpy(&u, &rx, sizeof(u));
-          ex = -1023 + (int)((u>>52)&0x7ff);
-          if( precision+(ex/3) < 15 ) rounder += realvalue*3e-16;
-          realvalue += rounder;
-        }
-        /* Normalize realvalue to within 10.0 > realvalue >= 1.0 */
-        exp = 0;
-        if( sqlite3IsNaN((double)realvalue) ){
-          bufpt = "NaN";
-          length = 3;
-          break;
+          iRound = -precision;
+        }else if( xtype==etGENERIC ){
+          iRound = precision;
+        }else{
+          iRound = precision+1;
         }
-        if( realvalue>0.0 ){
-          LONGDOUBLE_TYPE scale = 1.0;
-          while( realvalue>=1e100*scale && exp<=350 ){ scale *= 1e100;exp+=100;}
-          while( realvalue>=1e10*scale && exp<=350 ){ scale *= 1e10; exp+=10; }
-          while( realvalue>=10.0*scale && exp<=350 ){ scale *= 10.0; exp++; }
-          realvalue /= scale;
-          while( realvalue<1e-8 ){ realvalue *= 1e8; exp-=8; }
-          while( realvalue<1.0 ){ realvalue *= 10.0; exp--; }
-          if( exp>350 ){
+        sqlite3FpDecode(&s, realvalue, iRound, flag_altform2 ? 26 : 16);
+        if( s.isSpecial ){
+          if( s.isSpecial==2 ){
+            bufpt = flag_zeropad ? "null" : "NaN";
+            length = sqlite3Strlen30(bufpt);
+            break;
+          }else if( flag_zeropad ){
+            s.z[0] = '9';
+            s.iDP = 1000;
+            s.n = 1;
+          }else{
+            memcpy(buf, "-Inf", 5);
             bufpt = buf;
-            buf[0] = prefix;
-            memcpy(buf+(prefix!=0),"Inf",4);
-            length = 3+(prefix!=0);
+            if( s.sign=='-' ){
+              /* no-op */
+            }else if( flag_prefix ){
+              buf[0] = flag_prefix;
+            }else{
+              bufpt++;
+            }
+            length = sqlite3Strlen30(bufpt);
             break;
           }
         }
-        bufpt = buf;
+        if( s.sign=='-' ){
+          prefix = '-';
+        }else{
+          prefix = flag_prefix;
+        }
+
+        exp = s.iDP-1;
+        if( xtype==etGENERIC && precision>0 ) precision--;
+
         /*
         ** If the field type is etGENERIC, then convert to either etEXP
         ** or etFLOAT, as appropriate.
         */
-        if( xtype!=etFLOAT ){
-          realvalue += rounder;
-          if( realvalue>=10.0 ){ realvalue *= 0.1; exp++; }
-        }
         if( xtype==etGENERIC ){
           flag_rtz = !flag_alternateform;
           if( exp<-4 || exp>precision ){
@@ -30237,29 +31269,32 @@ SQLITE_API void sqlite3_str_vappendf(
         if( xtype==etEXP ){
           e2 = 0;
         }else{
-          e2 = exp;
+          e2 = s.iDP - 1;
         }
+        bufpt = buf;
         {
           i64 szBufNeeded;           /* Size of a temporary buffer needed */
           szBufNeeded = MAX(e2,0)+(i64)precision+(i64)width+15;
+          if( cThousand && e2>0 ) szBufNeeded += (e2+2)/3;
           if( szBufNeeded > etBUFSIZE ){
             bufpt = zExtra = printfTempBuf(pAccum, szBufNeeded);
             if( bufpt==0 ) return;
           }
         }
         zOut = bufpt;
-        nsd = 16 + flag_altform2*10;
         flag_dp = (precision>0 ?1:0) | flag_alternateform | flag_altform2;
         /* The sign in front of the number */
         if( prefix ){
           *(bufpt++) = prefix;
         }
         /* Digits prior to the decimal point */
+        j = 0;
         if( e2<0 ){
           *(bufpt++) = '0';
         }else{
           for(; e2>=0; e2--){
-            *(bufpt++) = et_getdigit(&realvalue,&nsd);
+            *(bufpt++) = j<s.n ? s.z[j++] : '0';
+            if( cThousand && (e2%3)==0 && e2>1 ) *(bufpt++) = ',';
           }
         }
         /* The decimal point */
@@ -30268,13 +31303,12 @@ SQLITE_API void sqlite3_str_vappendf(
         }
         /* "0" digits after the decimal point but before the first
         ** significant digit of the number */
-        for(e2++; e2<0; precision--, e2++){
-          assert( precision>0 );
+        for(e2++; e2<0 && precision>0; precision--, e2++){
           *(bufpt++) = '0';
         }
         /* Significant digits after the decimal point */
         while( (precision--)>0 ){
-          *(bufpt++) = et_getdigit(&realvalue,&nsd);
+          *(bufpt++) = j<s.n ? s.z[j++] : '0';
         }
         /* Remove trailing zeros and the "." if no digits follow the "." */
         if( flag_rtz && flag_dp ){
@@ -30290,6 +31324,7 @@ SQLITE_API void sqlite3_str_vappendf(
         }
         /* Add the "eNNN" suffix */
         if( xtype==etEXP ){
+          exp = s.iDP - 1;
           *(bufpt++) = aDigits[infop->charset];
           if( exp<0 ){
             *(bufpt++) = '-'; exp = -exp;
@@ -30323,8 +31358,8 @@ SQLITE_API void sqlite3_str_vappendf(
           while( nPad-- ) bufpt[i++] = '0';
           length = width;
         }
-#endif /* !defined(SQLITE_OMIT_FLOATING_POINT) */
         break;
+      }
       case etSIZE:
         if( !bArgList ){
           *(va_arg(ap,int*)) = pAccum->nChar;
@@ -30373,13 +31408,26 @@ SQLITE_API void sqlite3_str_vappendf(
           }
         }
         if( precision>1 ){
+          i64 nPrior = 1;
           width -= precision-1;
           if( width>1 && !flag_leftjustify ){
             sqlite3_str_appendchar(pAccum, width-1, ' ');
             width = 0;
           }
-          while( precision-- > 1 ){
-            sqlite3_str_append(pAccum, buf, length);
+          sqlite3_str_append(pAccum, buf, length);
+          precision--;
+          while( precision > 1 ){
+            i64 nCopyBytes;
+            if( nPrior > precision-1 ) nPrior = precision - 1;
+            nCopyBytes = length*nPrior;
+            if( nCopyBytes + pAccum->nChar >= pAccum->nAlloc ){
+              sqlite3StrAccumEnlarge(pAccum, nCopyBytes);
+            }
+            if( pAccum->accError ) break;
+            sqlite3_str_append(pAccum,
+                 &pAccum->zText[pAccum->nChar-nCopyBytes], nCopyBytes);
+            precision -= nPrior;
+            nPrior *= 2;
           }
         }
         bufpt = buf;
@@ -30607,9 +31655,9 @@ SQLITE_PRIVATE void sqlite3RecordErrorOffsetOfExpr(sqlite3 *db, const Expr *pExp
 ** Return the number of bytes of text that StrAccum is able to accept
 ** after the attempted enlargement.  The value returned might be zero.
 */
-SQLITE_PRIVATE int sqlite3StrAccumEnlarge(StrAccum *p, int N){
+SQLITE_PRIVATE int sqlite3StrAccumEnlarge(StrAccum *p, i64 N){
   char *zNew;
-  assert( p->nChar+(i64)N >= p->nAlloc ); /* Only called if really needed */
+  assert( p->nChar+N >= p->nAlloc ); /* Only called if really needed */
   if( p->accError ){
     testcase(p->accError==SQLITE_TOOBIG);
     testcase(p->accError==SQLITE_NOMEM);
@@ -30620,8 +31668,7 @@ SQLITE_PRIVATE int sqlite3StrAccumEnlarge(StrAccum *p, int N){
     return p->nAlloc - p->nChar - 1;
   }else{
     char *zOld = isMalloced(p) ? p->zText : 0;
-    i64 szNew = p->nChar;
-    szNew += (sqlite3_int64)N + 1;
+    i64 szNew = p->nChar + N + 1;
     if( szNew+p->nChar<=p->mxAlloc ){
       /* Force exponential buffer size growth as long as it does not overflow,
       ** to avoid having to call this routine too often */
@@ -30651,7 +31698,8 @@ SQLITE_PRIVATE int sqlite3StrAccumEnlarge(StrAccum *p, int N){
       return 0;
     }
   }
-  return N;
+  assert( N>=0 && N<=0x7fffffff );
+  return (int)N;
 }
 
 /*
@@ -30942,12 +31990,22 @@ SQLITE_API char *sqlite3_vsnprintf(int n, char *zBuf, const char *zFormat, va_li
   return zBuf;
 }
 SQLITE_API char *sqlite3_snprintf(int n, char *zBuf, const char *zFormat, ...){
-  char *z;
+  StrAccum acc;
   va_list ap;
+  if( n<=0 ) return zBuf;
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( zBuf==0 || zFormat==0 ) {
+    (void)SQLITE_MISUSE_BKPT;
+    if( zBuf ) zBuf[0] = 0;
+    return zBuf;
+  }
+#endif
+  sqlite3StrAccumInit(&acc, 0, zBuf, n, 0);
   va_start(ap,zFormat);
-  z = sqlite3_vsnprintf(n, zBuf, zFormat, ap);
+  sqlite3_str_vappendf(&acc, zFormat, ap);
   va_end(ap);
-  return z;
+  zBuf[acc.nChar] = 0;
+  return zBuf;
 }
 
 /*
@@ -31025,6 +32083,75 @@ SQLITE_API void sqlite3_str_appendf(StrAccum *p, const char *zFormat, ...){
   va_end(ap);
 }
 
+
+/*****************************************************************************
+** Reference counted string storage
+*****************************************************************************/
+
+/*
+** Increase the reference count of the string by one.
+**
+** The input parameter is returned.
+*/
+SQLITE_PRIVATE char *sqlite3RCStrRef(char *z){
+  RCStr *p = (RCStr*)z;
+  assert( p!=0 );
+  p--;
+  p->nRCRef++;
+  return z;
+}
+
+/*
+** Decrease the reference count by one.  Free the string when the
+** reference count reaches zero.
+*/
+SQLITE_PRIVATE void sqlite3RCStrUnref(void *z){
+  RCStr *p = (RCStr*)z;
+  assert( p!=0 );
+  p--;
+  assert( p->nRCRef>0 );
+  if( p->nRCRef>=2 ){
+    p->nRCRef--;
+  }else{
+    sqlite3_free(p);
+  }
+}
+
+/*
+** Create a new string that is capable of holding N bytes of text, not counting
+** the zero byte at the end.  The string is uninitialized.
+**
+** The reference count is initially 1.  Call sqlite3RCStrUnref() to free the
+** newly allocated string.
+**
+** This routine returns 0 on an OOM.
+*/
+SQLITE_PRIVATE char *sqlite3RCStrNew(u64 N){
+  RCStr *p = sqlite3_malloc64( N + sizeof(*p) + 1 );
+  if( p==0 ) return 0;
+  p->nRCRef = 1;
+  return (char*)&p[1];
+}
+
+/*
+** Change the size of the string so that it is able to hold N bytes.
+** The string might be reallocated, so return the new allocation.
+*/
+SQLITE_PRIVATE char *sqlite3RCStrResize(char *z, u64 N){
+  RCStr *p = (RCStr*)z;
+  RCStr *pNew;
+  assert( p!=0 );
+  p--;
+  assert( p->nRCRef==1 );
+  pNew = sqlite3_realloc64(p, N+sizeof(RCStr)+1);
+  if( pNew==0 ){
+    sqlite3_free(p);
+    return 0;
+  }else{
+    return (char*)&pNew[1];
+  }
+}
+
 /************** End of printf.c **********************************************/
 /************** Begin file treeview.c ****************************************/
 /*
@@ -31247,6 +32374,13 @@ SQLITE_PRIVATE void sqlite3TreeViewSrcList(TreeView *pView, const SrcList *pSrc)
     if( pItem->fg.isOn || (pItem->fg.isUsing==0 && pItem->u3.pOn!=0) ){
       sqlite3_str_appendf(&x, " ON");
     }
+    if( pItem->fg.isTabFunc )      sqlite3_str_appendf(&x, " isTabFunc");
+    if( pItem->fg.isCorrelated )   sqlite3_str_appendf(&x, " isCorrelated");
+    if( pItem->fg.isMaterialized ) sqlite3_str_appendf(&x, " isMaterialized");
+    if( pItem->fg.viaCoroutine )   sqlite3_str_appendf(&x, " viaCoroutine");
+    if( pItem->fg.notCte )         sqlite3_str_appendf(&x, " notCte");
+    if( pItem->fg.isNestedFrom )   sqlite3_str_appendf(&x, " isNestedFrom");
+
     sqlite3StrAccumFinish(&x);
     sqlite3TreeViewItem(pView, zLine, i<pSrc->nSrc-1);
     n = 0;
@@ -31434,6 +32568,7 @@ SQLITE_PRIVATE void sqlite3TreeViewWindow(TreeView *pView, const Window *pWin, u
     sqlite3TreeViewItem(pView, "FILTER", 1);
     sqlite3TreeViewExpr(pView, pWin->pFilter, 0);
     sqlite3TreeViewPop(&pView);
+    if( pWin->eFrmType==TK_FILTER ) return;
   }
   sqlite3TreeViewPush(&pView, more);
   if( pWin->zName ){
@@ -31443,7 +32578,7 @@ SQLITE_PRIVATE void sqlite3TreeViewWindow(TreeView *pView, const Window *pWin, u
   }
   if( pWin->zBase )    nElement++;
   if( pWin->pOrderBy ) nElement++;
-  if( pWin->eFrmType ) nElement++;
+  if( pWin->eFrmType!=0 && pWin->eFrmType!=TK_FILTER ) nElement++;
   if( pWin->eExclude ) nElement++;
   if( pWin->zBase ){
     sqlite3TreeViewPush(&pView, (--nElement)>0);
@@ -31456,7 +32591,7 @@ SQLITE_PRIVATE void sqlite3TreeViewWindow(TreeView *pView, const Window *pWin, u
   if( pWin->pOrderBy ){
     sqlite3TreeViewExprList(pView, pWin->pOrderBy, (--nElement)>0, "ORDER-BY");
   }
-  if( pWin->eFrmType ){
+  if( pWin->eFrmType!=0 && pWin->eFrmType!=TK_FILTER ){
     char zBuf[30];
     const char *zFrmType = "ROWS";
     if( pWin->eFrmType==TK_RANGE ) zFrmType = "RANGE";
@@ -31516,7 +32651,7 @@ SQLITE_PRIVATE void sqlite3TreeViewExpr(TreeView *pView, const Expr *pExpr, u8 m
     sqlite3TreeViewPop(&pView);
     return;
   }
-  if( pExpr->flags || pExpr->affExpr || pExpr->vvaFlags ){
+  if( pExpr->flags || pExpr->affExpr || pExpr->vvaFlags || pExpr->pAggInfo ){
     StrAccum x;
     sqlite3StrAccumInit(&x, 0, zFlgs, sizeof(zFlgs), 0);
     sqlite3_str_appendf(&x, " fg.af=%x.%c",
@@ -31533,6 +32668,9 @@ SQLITE_PRIVATE void sqlite3TreeViewExpr(TreeView *pView, const Expr *pExpr, u8 m
     if( ExprHasVVAProperty(pExpr, EP_Immutable) ){
       sqlite3_str_appendf(&x, " IMMUTABLE");
     }
+    if( pExpr->pAggInfo!=0 ){
+      sqlite3_str_appendf(&x, " agg-column[%d]", pExpr->iAgg);
+    }
     sqlite3StrAccumFinish(&x);
   }else{
     zFlgs[0] = 0;
@@ -31662,7 +32800,8 @@ SQLITE_PRIVATE void sqlite3TreeViewExpr(TreeView *pView, const Expr *pExpr, u8 m
       };
       assert( pExpr->op2==TK_IS || pExpr->op2==TK_ISNOT );
       assert( pExpr->pRight );
-      assert( sqlite3ExprSkipCollate(pExpr->pRight)->op==TK_TRUEFALSE );
+      assert( sqlite3ExprSkipCollateAndLikely(pExpr->pRight)->op
+                  == TK_TRUEFALSE );
       x = (pExpr->op2==TK_ISNOT)*2 + sqlite3ExprTruthValue(pExpr->pRight);
       zUniOp = azOp[x];
       break;
@@ -31700,7 +32839,7 @@ SQLITE_PRIVATE void sqlite3TreeViewExpr(TreeView *pView, const Expr *pExpr, u8 m
         assert( ExprUseXList(pExpr) );
         pFarg = pExpr->x.pList;
 #ifndef SQLITE_OMIT_WINDOWFUNC
-        pWin = ExprHasProperty(pExpr, EP_WinFunc) ? pExpr->y.pWin : 0;
+        pWin = IsWindowFunc(pExpr) ? pExpr->y.pWin : 0;
 #else
         pWin = 0;
 #endif
@@ -31726,7 +32865,13 @@ SQLITE_PRIVATE void sqlite3TreeViewExpr(TreeView *pView, const Expr *pExpr, u8 m
         sqlite3TreeViewLine(pView, "FUNCTION %Q%s", pExpr->u.zToken, zFlgs);
       }
       if( pFarg ){
-        sqlite3TreeViewExprList(pView, pFarg, pWin!=0, 0);
+        sqlite3TreeViewExprList(pView, pFarg, pWin!=0 || pExpr->pLeft, 0);
+        if( pExpr->pLeft ){
+          Expr *pOB = pExpr->pLeft;
+          assert( pOB->op==TK_ORDER );
+          assert( ExprUseXList(pOB) );
+          sqlite3TreeViewExprList(pView, pOB->x.pList, pWin!=0, "ORDERBY");
+        }
       }
 #ifndef SQLITE_OMIT_WINDOWFUNC
       if( pWin ){
@@ -31735,6 +32880,10 @@ SQLITE_PRIVATE void sqlite3TreeViewExpr(TreeView *pView, const Expr *pExpr, u8 m
 #endif
       break;
     }
+    case TK_ORDER: {
+      sqlite3TreeViewExprList(pView, pExpr->x.pList, 0, "ORDERBY");
+      break;
+    }
 #ifndef SQLITE_OMIT_SUBQUERY
     case TK_EXISTS: {
       assert( ExprUseXSelect(pExpr) );
@@ -32344,16 +33493,41 @@ SQLITE_PRIVATE void sqlite3ShowWinFunc(const Window *p){ sqlite3TreeViewWinFunc(
 ** This structure is the current state of the generator.
 */
 static SQLITE_WSD struct sqlite3PrngType {
-  unsigned char isInit;          /* True if initialized */
-  unsigned char i, j;            /* State variables */
-  unsigned char s[256];          /* State variables */
+  u32 s[16];                 /* 64 bytes of chacha20 state */
+  u8 out[64];                /* Output bytes */
+  u8 n;                      /* Output bytes remaining */
 } sqlite3Prng;
 
+
+/* The RFC-7539 ChaCha20 block function
+*/
+#define ROTL(a,b) (((a) << (b)) | ((a) >> (32 - (b))))
+#define QR(a, b, c, d) ( \
+    a += b, d ^= a, d = ROTL(d,16), \
+    c += d, b ^= c, b = ROTL(b,12), \
+    a += b, d ^= a, d = ROTL(d, 8), \
+    c += d, b ^= c, b = ROTL(b, 7))
+static void chacha_block(u32 *out, const u32 *in){
+  int i;
+  u32 x[16];
+  memcpy(x, in, 64);
+  for(i=0; i<10; i++){
+    QR(x[0], x[4], x[ 8], x[12]);
+    QR(x[1], x[5], x[ 9], x[13]);
+    QR(x[2], x[6], x[10], x[14]);
+    QR(x[3], x[7], x[11], x[15]);
+    QR(x[0], x[5], x[10], x[15]);
+    QR(x[1], x[6], x[11], x[12]);
+    QR(x[2], x[7], x[ 8], x[13]);
+    QR(x[3], x[4], x[ 9], x[14]);
+  }
+  for(i=0; i<16; i++) out[i] = x[i]+in[i];
+}
+
 /*
 ** Return N random bytes.
 */
 SQLITE_API void sqlite3_randomness(int N, void *pBuf){
-  unsigned char t;
   unsigned char *zBuf = pBuf;
 
   /* The "wsdPrng" macro will resolve to the pseudo-random number generator
@@ -32383,53 +33557,46 @@ SQLITE_API void sqlite3_randomness(int N, void *pBuf){
 
   sqlite3_mutex_enter(mutex);
   if( N<=0 || pBuf==0 ){
-    wsdPrng.isInit = 0;
+    wsdPrng.s[0] = 0;
     sqlite3_mutex_leave(mutex);
     return;
   }
 
   /* Initialize the state of the random number generator once,
-  ** the first time this routine is called.  The seed value does
-  ** not need to contain a lot of randomness since we are not
-  ** trying to do secure encryption or anything like that...
-  **
-  ** Nothing in this file or anywhere else in SQLite does any kind of
-  ** encryption.  The RC4 algorithm is being used as a PRNG (pseudo-random
-  ** number generator) not as an encryption device.
+  ** the first time this routine is called.
   */
-  if( !wsdPrng.isInit ){
+  if( wsdPrng.s[0]==0 ){
     sqlite3_vfs *pVfs = sqlite3_vfs_find(0);
-    int i;
-    char k[256];
-    wsdPrng.j = 0;
-    wsdPrng.i = 0;
+    static const u32 chacha20_init[] = {
+      0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
+    };
+    memcpy(&wsdPrng.s[0], chacha20_init, 16);
     if( NEVER(pVfs==0) ){
-      memset(k, 0, sizeof(k));
+      memset(&wsdPrng.s[4], 0, 44);
     }else{
-      sqlite3OsRandomness(pVfs, 256, k);
-    }
-    for(i=0; i<256; i++){
-      wsdPrng.s[i] = (u8)i;
+      sqlite3OsRandomness(pVfs, 44, (char*)&wsdPrng.s[4]);
     }
-    for(i=0; i<256; i++){
-      wsdPrng.j += wsdPrng.s[i] + k[i];
-      t = wsdPrng.s[wsdPrng.j];
-      wsdPrng.s[wsdPrng.j] = wsdPrng.s[i];
-      wsdPrng.s[i] = t;
-    }
-    wsdPrng.isInit = 1;
+    wsdPrng.s[15] = wsdPrng.s[12];
+    wsdPrng.s[12] = 0;
+    wsdPrng.n = 0;
   }
 
   assert( N>0 );
-  do{
-    wsdPrng.i++;
-    t = wsdPrng.s[wsdPrng.i];
-    wsdPrng.j += t;
-    wsdPrng.s[wsdPrng.i] = wsdPrng.s[wsdPrng.j];
-    wsdPrng.s[wsdPrng.j] = t;
-    t += wsdPrng.s[wsdPrng.i];
-    *(zBuf++) = wsdPrng.s[t];
-  }while( --N );
+  while( 1 /* exit by break */ ){
+    if( N<=wsdPrng.n ){
+      memcpy(zBuf, &wsdPrng.out[wsdPrng.n-N], N);
+      wsdPrng.n -= N;
+      break;
+    }
+    if( wsdPrng.n>0 ){
+      memcpy(zBuf, wsdPrng.out, wsdPrng.n);
+      N -= wsdPrng.n;
+      zBuf += wsdPrng.n;
+    }
+    wsdPrng.s[12]++;
+    chacha_block((u32*)wsdPrng.out, wsdPrng.s);
+    wsdPrng.n = 64;
+  }
   sqlite3_mutex_leave(mutex);
 }
 
@@ -33303,7 +34470,7 @@ SQLITE_PRIVATE void sqlite3UtfSelfTest(void){
 /*
 ** Calls to sqlite3FaultSim() are used to simulate a failure during testing,
 ** or to bypass normal error detection during testing in order to let
-** execute proceed futher downstream.
+** execute proceed further downstream.
 **
 ** In deployment, sqlite3FaultSim() *always* return SQLITE_OK (0).  The
 ** sqlite3FaultSim() function only returns non-zero during testing.
@@ -33420,6 +34587,23 @@ SQLITE_PRIVATE void sqlite3ErrorClear(sqlite3 *db){
 */
 SQLITE_PRIVATE void sqlite3SystemError(sqlite3 *db, int rc){
   if( rc==SQLITE_IOERR_NOMEM ) return;
+#ifdef SQLITE_USE_SEH
+  if( rc==SQLITE_IOERR_IN_PAGE ){
+    int ii;
+    int iErr;
+    sqlite3BtreeEnterAll(db);
+    for(ii=0; ii<db->nDb; ii++){
+      if( db->aDb[ii].pBt ){
+        iErr = sqlite3PagerWalSystemErrno(sqlite3BtreePager(db->aDb[ii].pBt));
+        if( iErr ){
+          db->iSysErrno = iErr;
+        }
+      }
+    }
+    sqlite3BtreeLeaveAll(db);
+    return;
+  }
+#endif
   rc &= 0xff;
   if( rc==SQLITE_CANTOPEN || rc==SQLITE_IOERR ){
     db->iSysErrno = sqlite3OsGetLastError(db->pVfs);
@@ -33454,6 +34638,30 @@ SQLITE_PRIVATE void sqlite3ErrorWithMsg(sqlite3 *db, int err_code, const char *z
   }
 }
 
+/*
+** Check for interrupts and invoke progress callback.
+*/
+SQLITE_PRIVATE void sqlite3ProgressCheck(Parse *p){
+  sqlite3 *db = p->db;
+  if( AtomicLoad(&db->u1.isInterrupted) ){
+    p->nErr++;
+    p->rc = SQLITE_INTERRUPT;
+  }
+#ifndef SQLITE_OMIT_PROGRESS_CALLBACK
+  if( db->xProgress ){
+    if( p->rc==SQLITE_INTERRUPT ){
+      p->nProgressSteps = 0;
+    }else if( (++p->nProgressSteps)>=db->nProgressOps ){
+      if( db->xProgress(db->pProgressArg) ){
+        p->nErr++;
+        p->rc = SQLITE_INTERRUPT;
+      }
+      p->nProgressSteps = 0;
+    }
+  }
+#endif
+}
+
 /*
 ** Add an error message to pParse->zErrMsg and increment pParse->nErr.
 **
@@ -33645,43 +34853,40 @@ SQLITE_PRIVATE u8 sqlite3StrIHash(const char *z){
   return h;
 }
 
-/*
-** Compute 10 to the E-th power.  Examples:  E==1 results in 10.
-** E==2 results in 100.  E==50 results in 1.0e50.
+/* Double-Double multiplication.  (x[0],x[1]) *= (y,yy)
 **
-** This routine only works for values of E between 1 and 341.
+** Reference:
+**   T. J. Dekker, "A Floating-Point Technique for Extending the
+**   Available Precision".  1971-07-26.
 */
-static LONGDOUBLE_TYPE sqlite3Pow10(int E){
-#if defined(_MSC_VER)
-  static const LONGDOUBLE_TYPE x[] = {
-    1.0e+001L,
-    1.0e+002L,
-    1.0e+004L,
-    1.0e+008L,
-    1.0e+016L,
-    1.0e+032L,
-    1.0e+064L,
-    1.0e+128L,
-    1.0e+256L
-  };
-  LONGDOUBLE_TYPE r = 1.0;
-  int i;
-  assert( E>=0 && E<=307 );
-  for(i=0; E!=0; i++, E >>=1){
-    if( E & 1 ) r *= x[i];
-  }
-  return r;
-#else
-  LONGDOUBLE_TYPE x = 10.0;
-  LONGDOUBLE_TYPE r = 1.0;
-  while(1){
-    if( E & 1 ) r *= x;
-    E >>= 1;
-    if( E==0 ) break;
-    x *= x;
-  }
-  return r;
-#endif
+static void dekkerMul2(volatile double *x, double y, double yy){
+  /*
+  ** The "volatile" keywords on parameter x[] and on local variables
+  ** below are needed force intermediate results to be truncated to
+  ** binary64 rather than be carried around in an extended-precision
+  ** format.  The truncation is necessary for the Dekker algorithm to
+  ** work.  Intel x86 floating point might omit the truncation without
+  ** the use of volatile.
+  */
+  volatile double tx, ty, p, q, c, cc;
+  double hx, hy;
+  u64 m;
+  memcpy(&m, (void*)&x[0], 8);
+  m &= 0xfffffffffc000000LL;
+  memcpy(&hx, &m, 8);
+  tx = x[0] - hx;
+  memcpy(&m, &y, 8);
+  m &= 0xfffffffffc000000LL;
+  memcpy(&hy, &m, 8);
+  ty = y - hy;
+  p = hx*hy;
+  q = hx*ty + tx*hy;
+  c = p+q;
+  cc = p - c + q + tx*ty;
+  cc = x[0]*yy + x[1]*y + cc;
+  x[0] = c + cc;
+  x[1] = c - x[0];
+  x[1] += cc;
 }
 
 /*
@@ -33722,12 +34927,11 @@ SQLITE_PRIVATE int sqlite3AtoF(const char *z, double *pResult, int length, u8 en
   const char *zEnd;
   /* sign * significand * (10 ^ (esign * exponent)) */
   int sign = 1;    /* sign of significand */
-  i64 s = 0;       /* significand */
+  u64 s = 0;       /* significand */
   int d = 0;       /* adjust exponent for shifting decimal point */
   int esign = 1;   /* sign of exponent */
   int e = 0;       /* exponent */
   int eValid = 1;  /* True exponent is either not used or is well-formed */
-  double result;
   int nDigit = 0;  /* Number of digits processed */
   int eType = 1;   /* 1: pure integer,  2+: fractional  -1 or less: bad UTF16 */
 
@@ -33767,7 +34971,7 @@ SQLITE_PRIVATE int sqlite3AtoF(const char *z, double *pResult, int length, u8 en
   while( z<zEnd && sqlite3Isdigit(*z) ){
     s = s*10 + (*z - '0');
     z+=incr; nDigit++;
-    if( s>=((LARGEST_INT64-9)/10) ){
+    if( s>=((LARGEST_UINT64-9)/10) ){
       /* skip non-significant significand digits
       ** (increase exponent by d to shift decimal left) */
       while( z<zEnd && sqlite3Isdigit(*z) ){ z+=incr; d++; }
@@ -33782,7 +34986,7 @@ SQLITE_PRIVATE int sqlite3AtoF(const char *z, double *pResult, int length, u8 en
     /* copy digits from after decimal to significand
     ** (decrease exponent by d to shift decimal right) */
     while( z<zEnd && sqlite3Isdigit(*z) ){
-      if( s<((LARGEST_INT64-9)/10) ){
+      if( s<((LARGEST_UINT64-9)/10) ){
         s = s*10 + (*z - '0');
         d--;
         nDigit++;
@@ -33822,79 +35026,89 @@ SQLITE_PRIVATE int sqlite3AtoF(const char *z, double *pResult, int length, u8 en
   while( z<zEnd && sqlite3Isspace(*z) ) z+=incr;
 
 do_atof_calc:
-  /* adjust exponent by d, and update sign */
-  e = (e*esign) + d;
-  if( e<0 ) {
-    esign = -1;
-    e *= -1;
-  } else {
-    esign = 1;
+  /* Zero is a special case */
+  if( s==0 ){
+    *pResult = sign<0 ? -0.0 : +0.0;
+    goto atof_return;
   }
 
-  if( s==0 ) {
-    /* In the IEEE 754 standard, zero is signed. */
-    result = sign<0 ? -(double)0 : (double)0;
-  } else {
-    /* Attempt to reduce exponent.
-    **
-    ** Branches that are not required for the correct answer but which only
-    ** help to obtain the correct answer faster are marked with special
-    ** comments, as a hint to the mutation tester.
-    */
-    while( e>0 ){                                       /*OPTIMIZATION-IF-TRUE*/
-      if( esign>0 ){
-        if( s>=(LARGEST_INT64/10) ) break;             /*OPTIMIZATION-IF-FALSE*/
-        s *= 10;
-      }else{
-        if( s%10!=0 ) break;                           /*OPTIMIZATION-IF-FALSE*/
-        s /= 10;
-      }
-      e--;
-    }
+  /* adjust exponent by d, and update sign */
+  e = (e*esign) + d;
 
-    /* adjust the sign of significand */
-    s = sign<0 ? -s : s;
+  /* Try to adjust the exponent to make it smaller */
+  while( e>0 && s<(LARGEST_UINT64/10) ){
+    s *= 10;
+    e--;
+  }
+  while( e<0 && (s%10)==0 ){
+    s /= 10;
+    e++;
+  }
 
-    if( e==0 ){                                         /*OPTIMIZATION-IF-TRUE*/
-      result = (double)s;
+  if( e==0 ){
+    *pResult = s;
+  }else if( sqlite3Config.bUseLongDouble ){
+    LONGDOUBLE_TYPE r = (LONGDOUBLE_TYPE)s;
+    if( e>0 ){
+      while( e>=100  ){ e-=100; r *= 1.0e+100L; }
+      while( e>=10   ){ e-=10;  r *= 1.0e+10L;  }
+      while( e>=1    ){ e-=1;   r *= 1.0e+01L;  }
     }else{
-      /* attempt to handle extremely small/large numbers better */
-      if( e>307 ){                                      /*OPTIMIZATION-IF-TRUE*/
-        if( e<342 ){                                    /*OPTIMIZATION-IF-TRUE*/
-          LONGDOUBLE_TYPE scale = sqlite3Pow10(e-308);
-          if( esign<0 ){
-            result = s / scale;
-            result /= 1.0e+308;
-          }else{
-            result = s * scale;
-            result *= 1.0e+308;
-          }
-        }else{ assert( e>=342 );
-          if( esign<0 ){
-            result = 0.0*s;
-          }else{
+      while( e<=-100 ){ e+=100; r *= 1.0e-100L; }
+      while( e<=-10  ){ e+=10;  r *= 1.0e-10L;  }
+      while( e<=-1   ){ e+=1;   r *= 1.0e-01L;  }
+    }
+    assert( r>=0.0 );
+    if( r>+1.7976931348623157081452742373e+308L ){
 #ifdef INFINITY
-            result = INFINITY*s;
+      *pResult = +INFINITY;
 #else
-            result = 1e308*1e308*s;  /* Infinity */
+      *pResult = 1.0e308*10.0;
 #endif
-          }
-        }
-      }else{
-        LONGDOUBLE_TYPE scale = sqlite3Pow10(e);
-        if( esign<0 ){
-          result = s / scale;
-        }else{
-          result = s * scale;
-        }
+    }else{
+      *pResult = (double)r;
+    }
+  }else{
+    double rr[2];
+    u64 s2;
+    rr[0] = (double)s;
+    s2 = (u64)rr[0];
+    rr[1] = s>=s2 ? (double)(s - s2) : -(double)(s2 - s);
+    if( e>0 ){
+      while( e>=100  ){
+        e -= 100;
+        dekkerMul2(rr, 1.0e+100, -1.5902891109759918046e+83);
+      }
+      while( e>=10   ){
+        e -= 10;
+        dekkerMul2(rr, 1.0e+10, 0.0);
+      }
+      while( e>=1    ){
+        e -= 1;
+        dekkerMul2(rr, 1.0e+01, 0.0);
+      }
+    }else{
+      while( e<=-100 ){
+        e += 100;
+        dekkerMul2(rr, 1.0e-100, -1.99918998026028836196e-117);
+      }
+      while( e<=-10  ){
+        e += 10;
+        dekkerMul2(rr, 1.0e-10, -3.6432197315497741579e-27);
+      }
+      while( e<=-1   ){
+        e += 1;
+        dekkerMul2(rr, 1.0e-01, -5.5511151231257827021e-18);
       }
     }
+    *pResult = rr[0]+rr[1];
+    if( sqlite3IsNaN(*pResult) ) *pResult = 1e300*1e300;
   }
+  if( sign<0 ) *pResult = -*pResult;
+  assert( !sqlite3IsNaN(*pResult) );
 
-  /* store the result */
-  *pResult = result;
-
-  /* return true if number and no extra non-whitespace chracters after */
+atof_return:
+  /* return true if number and no extra non-whitespace characters after */
   if( z==zEnd && nDigit>0 && eValid && eType>0 ){
     return eType;
   }else if( eType>=2 && (eType==3 || eValid) && nDigit>0 ){
@@ -33911,11 +35125,14 @@ SQLITE_PRIVATE int sqlite3AtoF(const char *z, double *pResult, int length, u8 en
 #endif
 
 /*
-** Render an signed 64-bit integer as text.  Store the result in zOut[].
+** Render an signed 64-bit integer as text.  Store the result in zOut[] and
+** return the length of the string that was stored, in bytes.  The value
+** returned does not include the zero terminator at the end of the output
+** string.
 **
 ** The caller must ensure that zOut[] is at least 21 bytes in size.
 */
-SQLITE_PRIVATE void sqlite3Int64ToText(i64 v, char *zOut){
+SQLITE_PRIVATE int sqlite3Int64ToText(i64 v, char *zOut){
   int i;
   u64 x;
   char zTemp[22];
@@ -33926,12 +35143,15 @@ SQLITE_PRIVATE void sqlite3Int64ToText(i64 v, char *zOut){
   }
   i = sizeof(zTemp)-2;
   zTemp[sizeof(zTemp)-1] = 0;
-  do{
-    zTemp[i--] = (x%10) + '0';
+  while( 1 /*exit-by-break*/ ){
+    zTemp[i] = (x%10) + '0';
     x = x/10;
-  }while( x );
-  if( v<0 ) zTemp[i--] = '-';
-  memcpy(zOut, &zTemp[i+1], sizeof(zTemp)-1-i);
+    if( x==0 ) break;
+    i--;
+  };
+  if( v<0 ) zTemp[--i] = '-';
+  memcpy(zOut, &zTemp[i], sizeof(zTemp)-i);
+  return sizeof(zTemp)-1-i;
 }
 
 /*
@@ -34024,7 +35244,7 @@ SQLITE_PRIVATE int sqlite3Atoi64(const char *zNum, i64 *pNum, int length, u8 enc
     /* This test and assignment is needed only to suppress UB warnings
     ** from clang and -fsanitize=undefined.  This test and assignment make
     ** the code a little larger and slower, and no harm comes from omitting
-    ** them, but we must appaise the undefined-behavior pharisees. */
+    ** them, but we must appease the undefined-behavior pharisees. */
     *pNum = neg ? SMALLEST_INT64 : LARGEST_INT64;
   }else if( neg ){
     *pNum = -(i64)u;
@@ -34096,11 +35316,15 @@ SQLITE_PRIVATE int sqlite3DecOrHexToI64(const char *z, i64 *pOut){
       u = u*16 + sqlite3HexToInt(z[k]);
     }
     memcpy(pOut, &u, 8);
-    return (z[k]==0 && k-i<=16) ? 0 : 2;
+    if( k-i>16 ) return 2;
+    if( z[k]!=0 ) return 1;
+    return 0;
   }else
 #endif /* SQLITE_OMIT_HEX_INTEGER */
   {
-    return sqlite3Atoi64(z, pOut, sqlite3Strlen30(z), SQLITE_UTF8);
+    int n = (int)(0x3fffffff&strspn(z,"+- \n\t0123456789"));
+    if( z[n] ) n++;
+    return sqlite3Atoi64(z, pOut, n, SQLITE_UTF8);
   }
 }
 
@@ -34132,7 +35356,7 @@ SQLITE_PRIVATE int sqlite3GetInt32(const char *zNum, int *pValue){
     u32 u = 0;
     zNum += 2;
     while( zNum[0]=='0' ) zNum++;
-    for(i=0; sqlite3Isxdigit(zNum[i]) && i<8; i++){
+    for(i=0; i<8 && sqlite3Isxdigit(zNum[i]); i++){
       u = u*16 + sqlite3HexToInt(zNum[i]);
     }
     if( (u&0x80000000)==0 && sqlite3Isxdigit(zNum[i])==0 ){
@@ -34179,6 +35403,153 @@ SQLITE_PRIVATE int sqlite3Atoi(const char *z){
   return x;
 }
 
+/*
+** Decode a floating-point value into an approximate decimal
+** representation.
+**
+** Round the decimal representation to n significant digits if
+** n is positive.  Or round to -n signficant digits after the
+** decimal point if n is negative.  No rounding is performed if
+** n is zero.
+**
+** The significant digits of the decimal representation are
+** stored in p->z[] which is a often (but not always) a pointer
+** into the middle of p->zBuf[].  There are p->n significant digits.
+** The p->z[] array is *not* zero-terminated.
+*/
+SQLITE_PRIVATE void sqlite3FpDecode(FpDecode *p, double r, int iRound, int mxRound){
+  int i;
+  u64 v;
+  int e, exp = 0;
+  p->isSpecial = 0;
+  p->z = p->zBuf;
+
+  /* Convert negative numbers to positive.  Deal with Infinity, 0.0, and
+  ** NaN. */
+  if( r<0.0 ){
+    p->sign = '-';
+    r = -r;
+  }else if( r==0.0 ){
+    p->sign = '+';
+    p->n = 1;
+    p->iDP = 1;
+    p->z = "0";
+    return;
+  }else{
+    p->sign = '+';
+  }
+  memcpy(&v,&r,8);
+  e = v>>52;
+  if( (e&0x7ff)==0x7ff ){
+    p->isSpecial = 1 + (v!=0x7ff0000000000000LL);
+    p->n = 0;
+    p->iDP = 0;
+    return;
+  }
+
+  /* Multiply r by powers of ten until it lands somewhere in between
+  ** 1.0e+19 and 1.0e+17.
+  */
+  if( sqlite3Config.bUseLongDouble ){
+    LONGDOUBLE_TYPE rr = r;
+    if( rr>=1.0e+19 ){
+      while( rr>=1.0e+119L ){ exp+=100; rr *= 1.0e-100L; }
+      while( rr>=1.0e+29L  ){ exp+=10;  rr *= 1.0e-10L;  }
+      while( rr>=1.0e+19L  ){ exp++;    rr *= 1.0e-1L;   }
+    }else{
+      while( rr<1.0e-97L   ){ exp-=100; rr *= 1.0e+100L; }
+      while( rr<1.0e+07L   ){ exp-=10;  rr *= 1.0e+10L;  }
+      while( rr<1.0e+17L   ){ exp--;    rr *= 1.0e+1L;   }
+    }
+    v = (u64)rr;
+  }else{
+    /* If high-precision floating point is not available using "long double",
+    ** then use Dekker-style double-double computation to increase the
+    ** precision.
+    **
+    ** The error terms on constants like 1.0e+100 computed using the
+    ** decimal extension, for example as follows:
+    **
+    **   SELECT decimal_exp(decimal_sub('1.0e+100',decimal(1.0e+100)));
+    */
+    double rr[2];
+    rr[0] = r;
+    rr[1] = 0.0;
+    if( rr[0]>9.223372036854774784e+18 ){
+      while( rr[0]>9.223372036854774784e+118 ){
+        exp += 100;
+        dekkerMul2(rr, 1.0e-100, -1.99918998026028836196e-117);
+      }
+      while( rr[0]>9.223372036854774784e+28 ){
+        exp += 10;
+        dekkerMul2(rr, 1.0e-10, -3.6432197315497741579e-27);
+      }
+      while( rr[0]>9.223372036854774784e+18 ){
+        exp += 1;
+        dekkerMul2(rr, 1.0e-01, -5.5511151231257827021e-18);
+      }
+    }else{
+      while( rr[0]<9.223372036854774784e-83  ){
+        exp -= 100;
+        dekkerMul2(rr, 1.0e+100, -1.5902891109759918046e+83);
+      }
+      while( rr[0]<9.223372036854774784e+07  ){
+        exp -= 10;
+        dekkerMul2(rr, 1.0e+10, 0.0);
+      }
+      while( rr[0]<9.22337203685477478e+17  ){
+        exp -= 1;
+        dekkerMul2(rr, 1.0e+01, 0.0);
+      }
+    }
+    v = rr[1]<0.0 ? (u64)rr[0]-(u64)(-rr[1]) : (u64)rr[0]+(u64)rr[1];
+  }
+
+
+  /* Extract significant digits. */
+  i = sizeof(p->zBuf)-1;
+  assert( v>0 );
+  while( v ){  p->zBuf[i--] = (v%10) + '0'; v /= 10; }
+  assert( i>=0 && i<sizeof(p->zBuf)-1 );
+  p->n = sizeof(p->zBuf) - 1 - i;
+  assert( p->n>0 );
+  assert( p->n<sizeof(p->zBuf) );
+  p->iDP = p->n + exp;
+  if( iRound<0 ){
+    iRound = p->iDP - iRound;
+    if( iRound==0 && p->zBuf[i+1]>='5' ){
+      iRound = 1;
+      p->zBuf[i--] = '0';
+      p->n++;
+      p->iDP++;
+    }
+  }
+  if( iRound>0 && (iRound<p->n || p->n>mxRound) ){
+    char *z = &p->zBuf[i+1];
+    if( iRound>mxRound ) iRound = mxRound;
+    p->n = iRound;
+    if( z[iRound]>='5' ){
+      int j = iRound-1;
+      while( 1 /*exit-by-break*/ ){
+        z[j]++;
+        if( z[j]<='9' ) break;
+        z[j] = '0';
+        if( j==0 ){
+          p->z[i--] = '1';
+          p->n++;
+          p->iDP++;
+          break;
+        }else{
+          j--;
+        }
+      }
+    }
+  }
+  p->z = &p->zBuf[i+1];
+  assert( i+p->n < sizeof(p->zBuf) );
+  while( ALWAYS(p->n>0) && p->z[p->n-1]=='0' ){ p->n--; }
+}
+
 /*
 ** Try to convert z into an unsigned 32-bit integer.  Return true on
 ** success and false if there is an error.
@@ -34442,121 +35813,32 @@ SQLITE_PRIVATE u8 sqlite3GetVarint(const unsigned char *p, u64 *v){
 ** this function assumes the single-byte case has already been handled.
 */
 SQLITE_PRIVATE u8 sqlite3GetVarint32(const unsigned char *p, u32 *v){
-  u32 a,b;
+  u64 v64;
+  u8 n;
 
-  /* The 1-byte case.  Overwhelmingly the most common.  Handled inline
-  ** by the getVarin32() macro */
-  a = *p;
-  /* a: p0 (unmasked) */
-#ifndef getVarint32
-  if (!(a&0x80))
-  {
-    /* Values between 0 and 127 */
-    *v = a;
-    return 1;
-  }
-#endif
+  /* Assume that the single-byte case has already been handled by
+  ** the getVarint32() macro */
+  assert( (p[0] & 0x80)!=0 );
 
-  /* The 2-byte case */
-  p++;
-  b = *p;
-  /* b: p1 (unmasked) */
-  if (!(b&0x80))
-  {
-    /* Values between 128 and 16383 */
-    a &= 0x7f;
-    a = a<<7;
-    *v = a | b;
+  if( (p[1] & 0x80)==0 ){
+    /* This is the two-byte case */
+    *v = ((p[0]&0x7f)<<7) | p[1];
     return 2;
   }
-
-  /* The 3-byte case */
-  p++;
-  a = a<<14;
-  a |= *p;
-  /* a: p0<<14 | p2 (unmasked) */
-  if (!(a&0x80))
-  {
-    /* Values between 16384 and 2097151 */
-    a &= (0x7f<<14)|(0x7f);
-    b &= 0x7f;
-    b = b<<7;
-    *v = a | b;
+  if( (p[2] & 0x80)==0 ){
+    /* This is the three-byte case */
+    *v = ((p[0]&0x7f)<<14) | ((p[1]&0x7f)<<7) | p[2];
     return 3;
   }
-
-  /* A 32-bit varint is used to store size information in btrees.
-  ** Objects are rarely larger than 2MiB limit of a 3-byte varint.
-  ** A 3-byte varint is sufficient, for example, to record the size
-  ** of a 1048569-byte BLOB or string.
-  **
-  ** We only unroll the first 1-, 2-, and 3- byte cases.  The very
-  ** rare larger cases can be handled by the slower 64-bit varint
-  ** routine.
-  */
-#if 1
-  {
-    u64 v64;
-    u8 n;
-
-    n = sqlite3GetVarint(p-2, &v64);
-    assert( n>3 && n<=9 );
-    if( (v64 & SQLITE_MAX_U32)!=v64 ){
-      *v = 0xffffffff;
-    }else{
-      *v = (u32)v64;
-    }
-    return n;
-  }
-
-#else
-  /* For following code (kept for historical record only) shows an
-  ** unrolling for the 3- and 4-byte varint cases.  This code is
-  ** slightly faster, but it is also larger and much harder to test.
-  */
-  p++;
-  b = b<<14;
-  b |= *p;
-  /* b: p1<<14 | p3 (unmasked) */
-  if (!(b&0x80))
-  {
-    /* Values between 2097152 and 268435455 */
-    b &= (0x7f<<14)|(0x7f);
-    a &= (0x7f<<14)|(0x7f);
-    a = a<<7;
-    *v = a | b;
-    return 4;
-  }
-
-  p++;
-  a = a<<14;
-  a |= *p;
-  /* a: p0<<28 | p2<<14 | p4 (unmasked) */
-  if (!(a&0x80))
-  {
-    /* Values  between 268435456 and 34359738367 */
-    a &= SLOT_4_2_0;
-    b &= SLOT_4_2_0;
-    b = b<<7;
-    *v = a | b;
-    return 5;
-  }
-
-  /* We can only reach this point when reading a corrupt database
-  ** file.  In that case we are not in any hurry.  Use the (relatively
-  ** slow) general-purpose sqlite3GetVarint() routine to extract the
-  ** value. */
-  {
-    u64 v64;
-    u8 n;
-
-    p -= 4;
-    n = sqlite3GetVarint(p, &v64);
-    assert( n>5 && n<=9 );
+  /* four or more bytes */
+  n = sqlite3GetVarint(p, &v64);
+  assert( n>3 && n<=9 );
+  if( (v64 & SQLITE_MAX_U32)!=v64 ){
+    *v = 0xffffffff;
+  }else{
     *v = (u32)v64;
-    return n;
   }
-#endif
+  return n;
 }
 
 /*
@@ -34707,7 +35989,7 @@ SQLITE_PRIVATE int sqlite3SafetyCheckSickOrOk(sqlite3 *db){
 }
 
 /*
-** Attempt to add, substract, or multiply the 64-bit signed value iB against
+** Attempt to add, subtract, or multiply the 64-bit signed value iB against
 ** the other 64-bit signed integer at *pA and store the result in *pA.
 ** Return 0 on success.  Or if the operation would have resulted in an
 ** overflow, leave *pA unchanged and return 1.
@@ -34993,6 +36275,104 @@ SQLITE_PRIVATE int sqlite3VListNameToNum(VList *pIn, const char *zName, int nNam
   return 0;
 }
 
+/*
+** High-resolution hardware timer used for debugging and testing only.
+*/
+#if defined(VDBE_PROFILE)  \
+ || defined(SQLITE_PERFORMANCE_TRACE) \
+ || defined(SQLITE_ENABLE_STMT_SCANSTATUS)
+/************** Include hwtime.h in the middle of util.c *********************/
+/************** Begin file hwtime.h ******************************************/
+/*
+** 2008 May 27
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains inline asm code for retrieving "high-performance"
+** counters for x86 and x86_64 class CPUs.
+*/
+#ifndef SQLITE_HWTIME_H
+#define SQLITE_HWTIME_H
+
+/*
+** The following routine only works on Pentium-class (or newer) processors.
+** It uses the RDTSC opcode to read the cycle count value out of the
+** processor and returns that value.  This can be used for high-res
+** profiling.
+*/
+#if !defined(__STRICT_ANSI__) && \
+    (defined(__GNUC__) || defined(_MSC_VER)) && \
+    (defined(i386) || defined(__i386__) || defined(_M_IX86))
+
+  #if defined(__GNUC__)
+
+  __inline__ sqlite_uint64 sqlite3Hwtime(void){
+     unsigned int lo, hi;
+     __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+     return (sqlite_uint64)hi << 32 | lo;
+  }
+
+  #elif defined(_MSC_VER)
+
+  __declspec(naked) __inline sqlite_uint64 __cdecl sqlite3Hwtime(void){
+     __asm {
+        rdtsc
+        ret       ; return value at EDX:EAX
+     }
+  }
+
+  #endif
+
+#elif !defined(__STRICT_ANSI__) && (defined(__GNUC__) && defined(__x86_64__))
+
+  __inline__ sqlite_uint64 sqlite3Hwtime(void){
+     unsigned int lo, hi;
+     __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+     return (sqlite_uint64)hi << 32 | lo;
+  }
+
+#elif !defined(__STRICT_ANSI__) && (defined(__GNUC__) && defined(__ppc__))
+
+  __inline__ sqlite_uint64 sqlite3Hwtime(void){
+      unsigned long long retval;
+      unsigned long junk;
+      __asm__ __volatile__ ("\n\
+          1:      mftbu   %1\n\
+                  mftb    %L0\n\
+                  mftbu   %0\n\
+                  cmpw    %0,%1\n\
+                  bne     1b"
+                  : "=r" (retval), "=r" (junk));
+      return retval;
+  }
+
+#else
+
+  /*
+  ** asm() is needed for hardware timing support.  Without asm(),
+  ** disable the sqlite3Hwtime() routine.
+  **
+  ** sqlite3Hwtime() is only used for some obscure debugging
+  ** and analysis configurations, not in any deliverable, so this
+  ** should not be a great loss.
+  */
+SQLITE_PRIVATE   sqlite_uint64 sqlite3Hwtime(void){ return ((sqlite_uint64)0); }
+
+#endif
+
+#endif /* !defined(SQLITE_HWTIME_H) */
+
+/************** End of hwtime.h **********************************************/
+/************** Continuing where we left off in util.c ***********************/
+#endif
+
 /************** End of util.c ************************************************/
 /************** Begin file hash.c ********************************************/
 /*
@@ -35094,7 +36474,7 @@ static void insertElement(
 }
 
 
-/* Resize the hash table so that it cantains "new_size" buckets.
+/* Resize the hash table so that it contains "new_size" buckets.
 **
 ** The hash table might fail to resize if sqlite3_malloc() fails or
 ** if the new size is the same as the prior size.
@@ -35163,12 +36543,13 @@ static HashElem *findElementWithHash(
     count = pH->count;
   }
   if( pHash ) *pHash = h;
-  while( count-- ){
+  while( count ){
     assert( elem!=0 );
     if( sqlite3StrICmp(elem->pKey,pKey)==0 ){
       return elem;
     }
     elem = elem->next;
+    count--;
   }
   return &nullElement;
 }
@@ -35287,48 +36668,48 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){
     /*   5 */ "Vacuum"           OpHelp(""),
     /*   6 */ "VFilter"          OpHelp("iplan=r[P3] zplan='P4'"),
     /*   7 */ "VUpdate"          OpHelp("data=r[P3@P2]"),
-    /*   8 */ "Goto"             OpHelp(""),
-    /*   9 */ "Gosub"            OpHelp(""),
-    /*  10 */ "InitCoroutine"    OpHelp(""),
-    /*  11 */ "Yield"            OpHelp(""),
-    /*  12 */ "MustBeInt"        OpHelp(""),
-    /*  13 */ "Jump"             OpHelp(""),
-    /*  14 */ "Once"             OpHelp(""),
-    /*  15 */ "If"               OpHelp(""),
-    /*  16 */ "IfNot"            OpHelp(""),
-    /*  17 */ "IsNullOrType"     OpHelp("if typeof(r[P1]) IN (P3,5) goto P2"),
-    /*  18 */ "IfNullRow"        OpHelp("if P1.nullRow then r[P3]=NULL, goto P2"),
+    /*   8 */ "Init"             OpHelp("Start at P2"),
+    /*   9 */ "Goto"             OpHelp(""),
+    /*  10 */ "Gosub"            OpHelp(""),
+    /*  11 */ "InitCoroutine"    OpHelp(""),
+    /*  12 */ "Yield"            OpHelp(""),
+    /*  13 */ "MustBeInt"        OpHelp(""),
+    /*  14 */ "Jump"             OpHelp(""),
+    /*  15 */ "Once"             OpHelp(""),
+    /*  16 */ "If"               OpHelp(""),
+    /*  17 */ "IfNot"            OpHelp(""),
+    /*  18 */ "IsType"           OpHelp("if typeof(P1.P3) in P5 goto P2"),
     /*  19 */ "Not"              OpHelp("r[P2]= !r[P1]"),
-    /*  20 */ "SeekLT"           OpHelp("key=r[P3@P4]"),
-    /*  21 */ "SeekLE"           OpHelp("key=r[P3@P4]"),
-    /*  22 */ "SeekGE"           OpHelp("key=r[P3@P4]"),
-    /*  23 */ "SeekGT"           OpHelp("key=r[P3@P4]"),
-    /*  24 */ "IfNotOpen"        OpHelp("if( !csr[P1] ) goto P2"),
-    /*  25 */ "IfNoHope"         OpHelp("key=r[P3@P4]"),
-    /*  26 */ "NoConflict"       OpHelp("key=r[P3@P4]"),
-    /*  27 */ "NotFound"         OpHelp("key=r[P3@P4]"),
-    /*  28 */ "Found"            OpHelp("key=r[P3@P4]"),
-    /*  29 */ "SeekRowid"        OpHelp("intkey=r[P3]"),
-    /*  30 */ "NotExists"        OpHelp("intkey=r[P3]"),
-    /*  31 */ "Last"             OpHelp(""),
-    /*  32 */ "IfSmaller"        OpHelp(""),
-    /*  33 */ "SorterSort"       OpHelp(""),
-    /*  34 */ "Sort"             OpHelp(""),
-    /*  35 */ "Rewind"           OpHelp(""),
-    /*  36 */ "SorterNext"       OpHelp(""),
-    /*  37 */ "Prev"             OpHelp(""),
-    /*  38 */ "Next"             OpHelp(""),
-    /*  39 */ "IdxLE"            OpHelp("key=r[P3@P4]"),
-    /*  40 */ "IdxGT"            OpHelp("key=r[P3@P4]"),
-    /*  41 */ "IdxLT"            OpHelp("key=r[P3@P4]"),
-    /*  42 */ "IdxGE"            OpHelp("key=r[P3@P4]"),
+    /*  20 */ "IfNullRow"        OpHelp("if P1.nullRow then r[P3]=NULL, goto P2"),
+    /*  21 */ "SeekLT"           OpHelp("key=r[P3@P4]"),
+    /*  22 */ "SeekLE"           OpHelp("key=r[P3@P4]"),
+    /*  23 */ "SeekGE"           OpHelp("key=r[P3@P4]"),
+    /*  24 */ "SeekGT"           OpHelp("key=r[P3@P4]"),
+    /*  25 */ "IfNotOpen"        OpHelp("if( !csr[P1] ) goto P2"),
+    /*  26 */ "IfNoHope"         OpHelp("key=r[P3@P4]"),
+    /*  27 */ "NoConflict"       OpHelp("key=r[P3@P4]"),
+    /*  28 */ "NotFound"         OpHelp("key=r[P3@P4]"),
+    /*  29 */ "Found"            OpHelp("key=r[P3@P4]"),
+    /*  30 */ "SeekRowid"        OpHelp("intkey=r[P3]"),
+    /*  31 */ "NotExists"        OpHelp("intkey=r[P3]"),
+    /*  32 */ "Last"             OpHelp(""),
+    /*  33 */ "IfSmaller"        OpHelp(""),
+    /*  34 */ "SorterSort"       OpHelp(""),
+    /*  35 */ "Sort"             OpHelp(""),
+    /*  36 */ "Rewind"           OpHelp(""),
+    /*  37 */ "SorterNext"       OpHelp(""),
+    /*  38 */ "Prev"             OpHelp(""),
+    /*  39 */ "Next"             OpHelp(""),
+    /*  40 */ "IdxLE"            OpHelp("key=r[P3@P4]"),
+    /*  41 */ "IdxGT"            OpHelp("key=r[P3@P4]"),
+    /*  42 */ "IdxLT"            OpHelp("key=r[P3@P4]"),
     /*  43 */ "Or"               OpHelp("r[P3]=(r[P1] || r[P2])"),
     /*  44 */ "And"              OpHelp("r[P3]=(r[P1] && r[P2])"),
-    /*  45 */ "RowSetRead"       OpHelp("r[P3]=rowset(P1)"),
-    /*  46 */ "RowSetTest"       OpHelp("if r[P3] in rowset(P1) goto P2"),
-    /*  47 */ "Program"          OpHelp(""),
-    /*  48 */ "FkIfZero"         OpHelp("if fkctr[P1]==0 goto P2"),
-    /*  49 */ "IfPos"            OpHelp("if r[P1]>0 then r[P1]-=P3, goto P2"),
+    /*  45 */ "IdxGE"            OpHelp("key=r[P3@P4]"),
+    /*  46 */ "RowSetRead"       OpHelp("r[P3]=rowset(P1)"),
+    /*  47 */ "RowSetTest"       OpHelp("if r[P3] in rowset(P1) goto P2"),
+    /*  48 */ "Program"          OpHelp(""),
+    /*  49 */ "FkIfZero"         OpHelp("if fkctr[P1]==0 goto P2"),
     /*  50 */ "IsNull"           OpHelp("if r[P1]==NULL goto P2"),
     /*  51 */ "NotNull"          OpHelp("if r[P1]!=NULL goto P2"),
     /*  52 */ "Ne"               OpHelp("IF r[P3]!=r[P1]"),
@@ -35338,12 +36719,12 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){
     /*  56 */ "Lt"               OpHelp("IF r[P3]<r[P1]"),
     /*  57 */ "Ge"               OpHelp("IF r[P3]>=r[P1]"),
     /*  58 */ "ElseEq"           OpHelp(""),
-    /*  59 */ "IfNotZero"        OpHelp("if r[P1]!=0 then r[P1]--, goto P2"),
-    /*  60 */ "DecrJumpZero"     OpHelp("if (--r[P1])==0 goto P2"),
-    /*  61 */ "IncrVacuum"       OpHelp(""),
-    /*  62 */ "VNext"            OpHelp(""),
-    /*  63 */ "Filter"           OpHelp("if key(P3@P4) not in filter(P1) goto P2"),
-    /*  64 */ "Init"             OpHelp("Start at P2"),
+    /*  59 */ "IfPos"            OpHelp("if r[P1]>0 then r[P1]-=P3, goto P2"),
+    /*  60 */ "IfNotZero"        OpHelp("if r[P1]!=0 then r[P1]--, goto P2"),
+    /*  61 */ "DecrJumpZero"     OpHelp("if (--r[P1])==0 goto P2"),
+    /*  62 */ "IncrVacuum"       OpHelp(""),
+    /*  63 */ "VNext"            OpHelp(""),
+    /*  64 */ "Filter"           OpHelp("if key(P3@P4) not in filter(P1) goto P2"),
     /*  65 */ "PureFunc"         OpHelp("r[P3]=func(r[P2@NP])"),
     /*  66 */ "Function"         OpHelp("r[P3]=func(r[P2@NP])"),
     /*  67 */ "Return"           OpHelp(""),
@@ -35453,25 +36834,1008 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){
     /* 171 */ "VCreate"          OpHelp(""),
     /* 172 */ "VDestroy"         OpHelp(""),
     /* 173 */ "VOpen"            OpHelp(""),
-    /* 174 */ "VInitIn"          OpHelp("r[P2]=ValueList(P1,P3)"),
-    /* 175 */ "VColumn"          OpHelp("r[P3]=vcolumn(P2)"),
-    /* 176 */ "VRename"          OpHelp(""),
-    /* 177 */ "Pagecount"        OpHelp(""),
-    /* 178 */ "MaxPgcnt"         OpHelp(""),
-    /* 179 */ "ClrSubtype"       OpHelp("r[P1].subtype = 0"),
-    /* 180 */ "FilterAdd"        OpHelp("filter(P1) += key(P3@P4)"),
-    /* 181 */ "Trace"            OpHelp(""),
-    /* 182 */ "CursorHint"       OpHelp(""),
-    /* 183 */ "ReleaseReg"       OpHelp("release r[P1@P2] mask P3"),
-    /* 184 */ "Noop"             OpHelp(""),
-    /* 185 */ "Explain"          OpHelp(""),
-    /* 186 */ "Abortable"        OpHelp(""),
+    /* 174 */ "VCheck"           OpHelp(""),
+    /* 175 */ "VInitIn"          OpHelp("r[P2]=ValueList(P1,P3)"),
+    /* 176 */ "VColumn"          OpHelp("r[P3]=vcolumn(P2)"),
+    /* 177 */ "VRename"          OpHelp(""),
+    /* 178 */ "Pagecount"        OpHelp(""),
+    /* 179 */ "MaxPgcnt"         OpHelp(""),
+    /* 180 */ "ClrSubtype"       OpHelp("r[P1].subtype = 0"),
+    /* 181 */ "FilterAdd"        OpHelp("filter(P1) += key(P3@P4)"),
+    /* 182 */ "Trace"            OpHelp(""),
+    /* 183 */ "CursorHint"       OpHelp(""),
+    /* 184 */ "ReleaseReg"       OpHelp("release r[P1@P2] mask P3"),
+    /* 185 */ "Noop"             OpHelp(""),
+    /* 186 */ "Explain"          OpHelp(""),
+    /* 187 */ "Abortable"        OpHelp(""),
   };
   return azName[i];
 }
 #endif
 
 /************** End of opcodes.c *********************************************/
+/************** Begin file os_kv.c *******************************************/
+/*
+** 2022-09-06
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains an experimental VFS layer that operates on a
+** Key/Value storage engine where both keys and values must be pure
+** text.
+*/
+/* #include <sqliteInt.h> */
+#if SQLITE_OS_KV || (SQLITE_OS_UNIX && defined(SQLITE_OS_KV_OPTIONAL))
+
+/*****************************************************************************
+** Debugging logic
+*/
+
+/* SQLITE_KV_TRACE() is used for tracing calls to kvstorage routines. */
+#if 0
+#define SQLITE_KV_TRACE(X)  printf X
+#else
+#define SQLITE_KV_TRACE(X)
+#endif
+
+/* SQLITE_KV_LOG() is used for tracing calls to the VFS interface */
+#if 0
+#define SQLITE_KV_LOG(X)  printf X
+#else
+#define SQLITE_KV_LOG(X)
+#endif
+
+
+/*
+** Forward declaration of objects used by this VFS implementation
+*/
+typedef struct KVVfsFile KVVfsFile;
+
+/* A single open file.  There are only two files represented by this
+** VFS - the database and the rollback journal.
+*/
+struct KVVfsFile {
+  sqlite3_file base;              /* IO methods */
+  const char *zClass;             /* Storage class */
+  int isJournal;                  /* True if this is a journal file */
+  unsigned int nJrnl;             /* Space allocated for aJrnl[] */
+  char *aJrnl;                    /* Journal content */
+  int szPage;                     /* Last known page size */
+  sqlite3_int64 szDb;             /* Database file size.  -1 means unknown */
+  char *aData;                    /* Buffer to hold page data */
+};
+#define SQLITE_KVOS_SZ 133073
+
+/*
+** Methods for KVVfsFile
+*/
+static int kvvfsClose(sqlite3_file*);
+static int kvvfsReadDb(sqlite3_file*, void*, int iAmt, sqlite3_int64 iOfst);
+static int kvvfsReadJrnl(sqlite3_file*, void*, int iAmt, sqlite3_int64 iOfst);
+static int kvvfsWriteDb(sqlite3_file*,const void*,int iAmt, sqlite3_int64);
+static int kvvfsWriteJrnl(sqlite3_file*,const void*,int iAmt, sqlite3_int64);
+static int kvvfsTruncateDb(sqlite3_file*, sqlite3_int64 size);
+static int kvvfsTruncateJrnl(sqlite3_file*, sqlite3_int64 size);
+static int kvvfsSyncDb(sqlite3_file*, int flags);
+static int kvvfsSyncJrnl(sqlite3_file*, int flags);
+static int kvvfsFileSizeDb(sqlite3_file*, sqlite3_int64 *pSize);
+static int kvvfsFileSizeJrnl(sqlite3_file*, sqlite3_int64 *pSize);
+static int kvvfsLock(sqlite3_file*, int);
+static int kvvfsUnlock(sqlite3_file*, int);
+static int kvvfsCheckReservedLock(sqlite3_file*, int *pResOut);
+static int kvvfsFileControlDb(sqlite3_file*, int op, void *pArg);
+static int kvvfsFileControlJrnl(sqlite3_file*, int op, void *pArg);
+static int kvvfsSectorSize(sqlite3_file*);
+static int kvvfsDeviceCharacteristics(sqlite3_file*);
+
+/*
+** Methods for sqlite3_vfs
+*/
+static int kvvfsOpen(sqlite3_vfs*, const char *, sqlite3_file*, int , int *);
+static int kvvfsDelete(sqlite3_vfs*, const char *zName, int syncDir);
+static int kvvfsAccess(sqlite3_vfs*, const char *zName, int flags, int *);
+static int kvvfsFullPathname(sqlite3_vfs*, const char *zName, int, char *zOut);
+static void *kvvfsDlOpen(sqlite3_vfs*, const char *zFilename);
+static int kvvfsRandomness(sqlite3_vfs*, int nByte, char *zOut);
+static int kvvfsSleep(sqlite3_vfs*, int microseconds);
+static int kvvfsCurrentTime(sqlite3_vfs*, double*);
+static int kvvfsCurrentTimeInt64(sqlite3_vfs*, sqlite3_int64*);
+
+static sqlite3_vfs sqlite3OsKvvfsObject = {
+  1,                              /* iVersion */
+  sizeof(KVVfsFile),              /* szOsFile */
+  1024,                           /* mxPathname */
+  0,                              /* pNext */
+  "kvvfs",                        /* zName */
+  0,                              /* pAppData */
+  kvvfsOpen,                      /* xOpen */
+  kvvfsDelete,                    /* xDelete */
+  kvvfsAccess,                    /* xAccess */
+  kvvfsFullPathname,              /* xFullPathname */
+  kvvfsDlOpen,                    /* xDlOpen */
+  0,                              /* xDlError */
+  0,                              /* xDlSym */
+  0,                              /* xDlClose */
+  kvvfsRandomness,                /* xRandomness */
+  kvvfsSleep,                     /* xSleep */
+  kvvfsCurrentTime,               /* xCurrentTime */
+  0,                              /* xGetLastError */
+  kvvfsCurrentTimeInt64           /* xCurrentTimeInt64 */
+};
+
+/* Methods for sqlite3_file objects referencing a database file
+*/
+static sqlite3_io_methods kvvfs_db_io_methods = {
+  1,                              /* iVersion */
+  kvvfsClose,                     /* xClose */
+  kvvfsReadDb,                    /* xRead */
+  kvvfsWriteDb,                   /* xWrite */
+  kvvfsTruncateDb,                /* xTruncate */
+  kvvfsSyncDb,                    /* xSync */
+  kvvfsFileSizeDb,                /* xFileSize */
+  kvvfsLock,                      /* xLock */
+  kvvfsUnlock,                    /* xUnlock */
+  kvvfsCheckReservedLock,         /* xCheckReservedLock */
+  kvvfsFileControlDb,             /* xFileControl */
+  kvvfsSectorSize,                /* xSectorSize */
+  kvvfsDeviceCharacteristics,     /* xDeviceCharacteristics */
+  0,                              /* xShmMap */
+  0,                              /* xShmLock */
+  0,                              /* xShmBarrier */
+  0,                              /* xShmUnmap */
+  0,                              /* xFetch */
+  0                               /* xUnfetch */
+};
+
+/* Methods for sqlite3_file objects referencing a rollback journal
+*/
+static sqlite3_io_methods kvvfs_jrnl_io_methods = {
+  1,                              /* iVersion */
+  kvvfsClose,                     /* xClose */
+  kvvfsReadJrnl,                  /* xRead */
+  kvvfsWriteJrnl,                 /* xWrite */
+  kvvfsTruncateJrnl,              /* xTruncate */
+  kvvfsSyncJrnl,                  /* xSync */
+  kvvfsFileSizeJrnl,              /* xFileSize */
+  kvvfsLock,                      /* xLock */
+  kvvfsUnlock,                    /* xUnlock */
+  kvvfsCheckReservedLock,         /* xCheckReservedLock */
+  kvvfsFileControlJrnl,           /* xFileControl */
+  kvvfsSectorSize,                /* xSectorSize */
+  kvvfsDeviceCharacteristics,     /* xDeviceCharacteristics */
+  0,                              /* xShmMap */
+  0,                              /* xShmLock */
+  0,                              /* xShmBarrier */
+  0,                              /* xShmUnmap */
+  0,                              /* xFetch */
+  0                               /* xUnfetch */
+};
+
+/****** Storage subsystem **************************************************/
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+/* Forward declarations for the low-level storage engine
+*/
+static int kvstorageWrite(const char*, const char *zKey, const char *zData);
+static int kvstorageDelete(const char*, const char *zKey);
+static int kvstorageRead(const char*, const char *zKey, char *zBuf, int nBuf);
+#define KVSTORAGE_KEY_SZ  32
+
+/* Expand the key name with an appropriate prefix and put the result
+** zKeyOut[].  The zKeyOut[] buffer is assumed to hold at least
+** KVSTORAGE_KEY_SZ bytes.
+*/
+static void kvstorageMakeKey(
+  const char *zClass,
+  const char *zKeyIn,
+  char *zKeyOut
+){
+  sqlite3_snprintf(KVSTORAGE_KEY_SZ, zKeyOut, "kvvfs-%s-%s", zClass, zKeyIn);
+}
+
+/* Write content into a key.  zClass is the particular namespace of the
+** underlying key/value store to use - either "local" or "session".
+**
+** Both zKey and zData are zero-terminated pure text strings.
+**
+** Return the number of errors.
+*/
+static int kvstorageWrite(
+  const char *zClass,
+  const char *zKey,
+  const char *zData
+){
+  FILE *fd;
+  char zXKey[KVSTORAGE_KEY_SZ];
+  kvstorageMakeKey(zClass, zKey, zXKey);
+  fd = fopen(zXKey, "wb");
+  if( fd ){
+    SQLITE_KV_TRACE(("KVVFS-WRITE  %-15s (%d) %.50s%s\n", zXKey,
+                 (int)strlen(zData), zData,
+                 strlen(zData)>50 ? "..." : ""));
+    fputs(zData, fd);
+    fclose(fd);
+    return 0;
+  }else{
+    return 1;
+  }
+}
+
+/* Delete a key (with its corresponding data) from the key/value
+** namespace given by zClass.  If the key does not previously exist,
+** this routine is a no-op.
+*/
+static int kvstorageDelete(const char *zClass, const char *zKey){
+  char zXKey[KVSTORAGE_KEY_SZ];
+  kvstorageMakeKey(zClass, zKey, zXKey);
+  unlink(zXKey);
+  SQLITE_KV_TRACE(("KVVFS-DELETE %-15s\n", zXKey));
+  return 0;
+}
+
+/* Read the value associated with a zKey from the key/value namespace given
+** by zClass and put the text data associated with that key in the first
+** nBuf bytes of zBuf[].  The value might be truncated if zBuf is not large
+** enough to hold it all.  The value put into zBuf must always be zero
+** terminated, even if it gets truncated because nBuf is not large enough.
+**
+** Return the total number of bytes in the data, without truncation, and
+** not counting the final zero terminator.   Return -1 if the key does
+** not exist.
+**
+** If nBuf<=0 then this routine simply returns the size of the data without
+** actually reading it.
+*/
+static int kvstorageRead(
+  const char *zClass,
+  const char *zKey,
+  char *zBuf,
+  int nBuf
+){
+  FILE *fd;
+  struct stat buf;
+  char zXKey[KVSTORAGE_KEY_SZ];
+  kvstorageMakeKey(zClass, zKey, zXKey);
+  if( access(zXKey, R_OK)!=0
+   || stat(zXKey, &buf)!=0
+   || !S_ISREG(buf.st_mode)
+  ){
+    SQLITE_KV_TRACE(("KVVFS-READ   %-15s (-1)\n", zXKey));
+    return -1;
+  }
+  if( nBuf<=0 ){
+    return (int)buf.st_size;
+  }else if( nBuf==1 ){
+    zBuf[0] = 0;
+    SQLITE_KV_TRACE(("KVVFS-READ   %-15s (%d)\n", zXKey,
+                 (int)buf.st_size));
+    return (int)buf.st_size;
+  }
+  if( nBuf > buf.st_size + 1 ){
+    nBuf = buf.st_size + 1;
+  }
+  fd = fopen(zXKey, "rb");
+  if( fd==0 ){
+    SQLITE_KV_TRACE(("KVVFS-READ   %-15s (-1)\n", zXKey));
+    return -1;
+  }else{
+    sqlite3_int64 n = fread(zBuf, 1, nBuf-1, fd);
+    fclose(fd);
+    zBuf[n] = 0;
+    SQLITE_KV_TRACE(("KVVFS-READ   %-15s (%lld) %.50s%s\n", zXKey,
+                 n, zBuf, n>50 ? "..." : ""));
+    return (int)n;
+  }
+}
+
+/*
+** An internal level of indirection which enables us to replace the
+** kvvfs i/o methods with JavaScript implementations in WASM builds.
+** Maintenance reminder: if this struct changes in any way, the JSON
+** rendering of its structure must be updated in
+** sqlite3_wasm_enum_json(). There are no binary compatibility
+** concerns, so it does not need an iVersion member. This file is
+** necessarily always compiled together with sqlite3_wasm_enum_json(),
+** and JS code dynamically creates the mapping of members based on
+** that JSON description.
+*/
+typedef struct sqlite3_kvvfs_methods sqlite3_kvvfs_methods;
+struct sqlite3_kvvfs_methods {
+  int (*xRead)(const char *zClass, const char *zKey, char *zBuf, int nBuf);
+  int (*xWrite)(const char *zClass, const char *zKey, const char *zData);
+  int (*xDelete)(const char *zClass, const char *zKey);
+  const int nKeySize;
+};
+
+/*
+** This object holds the kvvfs I/O methods which may be swapped out
+** for JavaScript-side implementations in WASM builds. In such builds
+** it cannot be const, but in native builds it should be so that
+** the compiler can hopefully optimize this level of indirection out.
+** That said, kvvfs is intended primarily for use in WASM builds.
+**
+** Note that this is not explicitly flagged as static because the
+** amalgamation build will tag it with SQLITE_PRIVATE.
+*/
+#ifndef SQLITE_WASM
+const
+#endif
+SQLITE_PRIVATE sqlite3_kvvfs_methods sqlite3KvvfsMethods = {
+kvstorageRead,
+kvstorageWrite,
+kvstorageDelete,
+KVSTORAGE_KEY_SZ
+};
+
+/****** Utility subroutines ************************************************/
+
+/*
+** Encode binary into the text encoded used to persist on disk.
+** The output text is stored in aOut[], which must be at least
+** nData+1 bytes in length.
+**
+** Return the actual length of the encoded text, not counting the
+** zero terminator at the end.
+**
+** Encoding format
+** ---------------
+**
+**   *  Non-zero bytes are encoded as upper-case hexadecimal
+**
+**   *  A sequence of one or more zero-bytes that are not at the
+**      beginning of the buffer are encoded as a little-endian
+**      base-26 number using a..z.  "a" means 0.  "b" means 1,
+**      "z" means 25.  "ab" means 26.  "ac" means 52.  And so forth.
+**
+**   *  Because there is no overlap between the encoding characters
+**      of hexadecimal and base-26 numbers, it is always clear where
+**      one stops and the next begins.
+*/
+static int kvvfsEncode(const char *aData, int nData, char *aOut){
+  int i, j;
+  const unsigned char *a = (const unsigned char*)aData;
+  for(i=j=0; i<nData; i++){
+    unsigned char c = a[i];
+    if( c!=0 ){
+      aOut[j++] = "0123456789ABCDEF"[c>>4];
+      aOut[j++] = "0123456789ABCDEF"[c&0xf];
+    }else{
+      /* A sequence of 1 or more zeros is stored as a little-endian
+      ** base-26 number using a..z as the digits. So one zero is "b".
+      ** Two zeros is "c". 25 zeros is "z", 26 zeros is "ab", 27 is "bb",
+      ** and so forth.
+      */
+      int k;
+      for(k=1; i+k<nData && a[i+k]==0; k++){}
+      i += k-1;
+      while( k>0 ){
+        aOut[j++] = 'a'+(k%26);
+        k /= 26;
+      }
+    }
+  }
+  aOut[j] = 0;
+  return j;
+}
+
+static const signed char kvvfsHexValue[256] = {
+  -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
+   0,  1,  2,  3,  4,  5,  6,  7,    8,  9, -1, -1, -1, -1, -1, -1,
+  -1, 10, 11, 12, 13, 14, 15, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
+
+  -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1
+};
+
+/*
+** Decode the text encoding back to binary.  The binary content is
+** written into pOut, which must be at least nOut bytes in length.
+**
+** The return value is the number of bytes actually written into aOut[].
+*/
+static int kvvfsDecode(const char *a, char *aOut, int nOut){
+  int i, j;
+  int c;
+  const unsigned char *aIn = (const unsigned char*)a;
+  i = 0;
+  j = 0;
+  while( 1 ){
+    c = kvvfsHexValue[aIn[i]];
+    if( c<0 ){
+      int n = 0;
+      int mult = 1;
+      c = aIn[i];
+      if( c==0 ) break;
+      while( c>='a' && c<='z' ){
+        n += (c - 'a')*mult;
+        mult *= 26;
+        c = aIn[++i];
+      }
+      if( j+n>nOut ) return -1;
+      memset(&aOut[j], 0, n);
+      j += n;
+      if( c==0 || mult==1 ) break; /* progress stalled if mult==1 */
+    }else{
+      aOut[j] = c<<4;
+      c = kvvfsHexValue[aIn[++i]];
+      if( c<0 ) break;
+      aOut[j++] += c;
+      i++;
+    }
+  }
+  return j;
+}
+
+/*
+** Decode a complete journal file.  Allocate space in pFile->aJrnl
+** and store the decoding there.  Or leave pFile->aJrnl set to NULL
+** if an error is encountered.
+**
+** The first few characters of the text encoding will be a little-endian
+** base-26 number (digits a..z) that is the total number of bytes
+** in the decoded journal file image.  This base-26 number is followed
+** by a single space, then the encoding of the journal.  The space
+** separator is required to act as a terminator for the base-26 number.
+*/
+static void kvvfsDecodeJournal(
+  KVVfsFile *pFile,      /* Store decoding in pFile->aJrnl */
+  const char *zTxt,      /* Text encoding.  Zero-terminated */
+  int nTxt               /* Bytes in zTxt, excluding zero terminator */
+){
+  unsigned int n = 0;
+  int c, i, mult;
+  i = 0;
+  mult = 1;
+  while( (c = zTxt[i++])>='a' && c<='z' ){
+    n += (zTxt[i] - 'a')*mult;
+    mult *= 26;
+  }
+  sqlite3_free(pFile->aJrnl);
+  pFile->aJrnl = sqlite3_malloc64( n );
+  if( pFile->aJrnl==0 ){
+    pFile->nJrnl = 0;
+    return;
+  }
+  pFile->nJrnl = n;
+  n = kvvfsDecode(zTxt+i, pFile->aJrnl, pFile->nJrnl);
+  if( n<pFile->nJrnl ){
+    sqlite3_free(pFile->aJrnl);
+    pFile->aJrnl = 0;
+    pFile->nJrnl = 0;
+  }
+}
+
+/*
+** Read or write the "sz" element, containing the database file size.
+*/
+static sqlite3_int64 kvvfsReadFileSize(KVVfsFile *pFile){
+  char zData[50];
+  zData[0] = 0;
+  sqlite3KvvfsMethods.xRead(pFile->zClass, "sz", zData, sizeof(zData)-1);
+  return strtoll(zData, 0, 0);
+}
+static int kvvfsWriteFileSize(KVVfsFile *pFile, sqlite3_int64 sz){
+  char zData[50];
+  sqlite3_snprintf(sizeof(zData), zData, "%lld", sz);
+  return sqlite3KvvfsMethods.xWrite(pFile->zClass, "sz", zData);
+}
+
+/****** sqlite3_io_methods methods ******************************************/
+
+/*
+** Close an kvvfs-file.
+*/
+static int kvvfsClose(sqlite3_file *pProtoFile){
+  KVVfsFile *pFile = (KVVfsFile *)pProtoFile;
+
+  SQLITE_KV_LOG(("xClose %s %s\n", pFile->zClass,
+             pFile->isJournal ? "journal" : "db"));
+  sqlite3_free(pFile->aJrnl);
+  sqlite3_free(pFile->aData);
+  return SQLITE_OK;
+}
+
+/*
+** Read from the -journal file.
+*/
+static int kvvfsReadJrnl(
+  sqlite3_file *pProtoFile,
+  void *zBuf,
+  int iAmt,
+  sqlite_int64 iOfst
+){
+  KVVfsFile *pFile = (KVVfsFile*)pProtoFile;
+  assert( pFile->isJournal );
+  SQLITE_KV_LOG(("xRead('%s-journal',%d,%lld)\n", pFile->zClass, iAmt, iOfst));
+  if( pFile->aJrnl==0 ){
+    int szTxt = kvstorageRead(pFile->zClass, "jrnl", 0, 0);
+    char *aTxt;
+    if( szTxt<=4 ){
+      return SQLITE_IOERR;
+    }
+    aTxt = sqlite3_malloc64( szTxt+1 );
+    if( aTxt==0 ) return SQLITE_NOMEM;
+    kvstorageRead(pFile->zClass, "jrnl", aTxt, szTxt+1);
+    kvvfsDecodeJournal(pFile, aTxt, szTxt);
+    sqlite3_free(aTxt);
+    if( pFile->aJrnl==0 ) return SQLITE_IOERR;
+  }
+  if( iOfst+iAmt>pFile->nJrnl ){
+    return SQLITE_IOERR_SHORT_READ;
+  }
+  memcpy(zBuf, pFile->aJrnl+iOfst, iAmt);
+  return SQLITE_OK;
+}
+
+/*
+** Read from the database file.
+*/
+static int kvvfsReadDb(
+  sqlite3_file *pProtoFile,
+  void *zBuf,
+  int iAmt,
+  sqlite_int64 iOfst
+){
+  KVVfsFile *pFile = (KVVfsFile*)pProtoFile;
+  unsigned int pgno;
+  int got, n;
+  char zKey[30];
+  char *aData = pFile->aData;
+  assert( iOfst>=0 );
+  assert( iAmt>=0 );
+  SQLITE_KV_LOG(("xRead('%s-db',%d,%lld)\n", pFile->zClass, iAmt, iOfst));
+  if( iOfst+iAmt>=512 ){
+    if( (iOfst % iAmt)!=0 ){
+      return SQLITE_IOERR_READ;
+    }
+    if( (iAmt & (iAmt-1))!=0 || iAmt<512 || iAmt>65536 ){
+      return SQLITE_IOERR_READ;
+    }
+    pFile->szPage = iAmt;
+    pgno = 1 + iOfst/iAmt;
+  }else{
+    pgno = 1;
+  }
+  sqlite3_snprintf(sizeof(zKey), zKey, "%u", pgno);
+  got = sqlite3KvvfsMethods.xRead(pFile->zClass, zKey,
+                                  aData, SQLITE_KVOS_SZ-1);
+  if( got<0 ){
+    n = 0;
+  }else{
+    aData[got] = 0;
+    if( iOfst+iAmt<512 ){
+      int k = iOfst+iAmt;
+      aData[k*2] = 0;
+      n = kvvfsDecode(aData, &aData[2000], SQLITE_KVOS_SZ-2000);
+      if( n>=iOfst+iAmt ){
+        memcpy(zBuf, &aData[2000+iOfst], iAmt);
+        n = iAmt;
+      }else{
+        n = 0;
+      }
+    }else{
+      n = kvvfsDecode(aData, zBuf, iAmt);
+    }
+  }
+  if( n<iAmt ){
+    memset(zBuf+n, 0, iAmt-n);
+    return SQLITE_IOERR_SHORT_READ;
+  }
+  return SQLITE_OK;
+}
+
+
+/*
+** Write into the -journal file.
+*/
+static int kvvfsWriteJrnl(
+  sqlite3_file *pProtoFile,
+  const void *zBuf,
+  int iAmt,
+  sqlite_int64 iOfst
+){
+  KVVfsFile *pFile = (KVVfsFile*)pProtoFile;
+  sqlite3_int64 iEnd = iOfst+iAmt;
+  SQLITE_KV_LOG(("xWrite('%s-journal',%d,%lld)\n", pFile->zClass, iAmt, iOfst));
+  if( iEnd>=0x10000000 ) return SQLITE_FULL;
+  if( pFile->aJrnl==0 || pFile->nJrnl<iEnd ){
+    char *aNew = sqlite3_realloc(pFile->aJrnl, iEnd);
+    if( aNew==0 ){
+      return SQLITE_IOERR_NOMEM;
+    }
+    pFile->aJrnl = aNew;
+    if( pFile->nJrnl<iOfst ){
+      memset(pFile->aJrnl+pFile->nJrnl, 0, iOfst-pFile->nJrnl);
+    }
+    pFile->nJrnl = iEnd;
+  }
+  memcpy(pFile->aJrnl+iOfst, zBuf, iAmt);
+  return SQLITE_OK;
+}
+
+/*
+** Write into the database file.
+*/
+static int kvvfsWriteDb(
+  sqlite3_file *pProtoFile,
+  const void *zBuf,
+  int iAmt,
+  sqlite_int64 iOfst
+){
+  KVVfsFile *pFile = (KVVfsFile*)pProtoFile;
+  unsigned int pgno;
+  char zKey[30];
+  char *aData = pFile->aData;
+  SQLITE_KV_LOG(("xWrite('%s-db',%d,%lld)\n", pFile->zClass, iAmt, iOfst));
+  assert( iAmt>=512 && iAmt<=65536 );
+  assert( (iAmt & (iAmt-1))==0 );
+  assert( pFile->szPage<0 || pFile->szPage==iAmt );
+  pFile->szPage = iAmt;
+  pgno = 1 + iOfst/iAmt;
+  sqlite3_snprintf(sizeof(zKey), zKey, "%u", pgno);
+  kvvfsEncode(zBuf, iAmt, aData);
+  if( sqlite3KvvfsMethods.xWrite(pFile->zClass, zKey, aData) ){
+    return SQLITE_IOERR;
+  }
+  if( iOfst+iAmt > pFile->szDb ){
+    pFile->szDb = iOfst + iAmt;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Truncate an kvvfs-file.
+*/
+static int kvvfsTruncateJrnl(sqlite3_file *pProtoFile, sqlite_int64 size){
+  KVVfsFile *pFile = (KVVfsFile *)pProtoFile;
+  SQLITE_KV_LOG(("xTruncate('%s-journal',%lld)\n", pFile->zClass, size));
+  assert( size==0 );
+  sqlite3KvvfsMethods.xDelete(pFile->zClass, "jrnl");
+  sqlite3_free(pFile->aJrnl);
+  pFile->aJrnl = 0;
+  pFile->nJrnl = 0;
+  return SQLITE_OK;
+}
+static int kvvfsTruncateDb(sqlite3_file *pProtoFile, sqlite_int64 size){
+  KVVfsFile *pFile = (KVVfsFile *)pProtoFile;
+  if( pFile->szDb>size
+   && pFile->szPage>0
+   && (size % pFile->szPage)==0
+  ){
+    char zKey[50];
+    unsigned int pgno, pgnoMax;
+    SQLITE_KV_LOG(("xTruncate('%s-db',%lld)\n", pFile->zClass, size));
+    pgno = 1 + size/pFile->szPage;
+    pgnoMax = 2 + pFile->szDb/pFile->szPage;
+    while( pgno<=pgnoMax ){
+      sqlite3_snprintf(sizeof(zKey), zKey, "%u", pgno);
+      sqlite3KvvfsMethods.xDelete(pFile->zClass, zKey);
+      pgno++;
+    }
+    pFile->szDb = size;
+    return kvvfsWriteFileSize(pFile, size) ? SQLITE_IOERR : SQLITE_OK;
+  }
+  return SQLITE_IOERR;
+}
+
+/*
+** Sync an kvvfs-file.
+*/
+static int kvvfsSyncJrnl(sqlite3_file *pProtoFile, int flags){
+  int i, n;
+  KVVfsFile *pFile = (KVVfsFile *)pProtoFile;
+  char *zOut;
+  SQLITE_KV_LOG(("xSync('%s-journal')\n", pFile->zClass));
+  if( pFile->nJrnl<=0 ){
+    return kvvfsTruncateJrnl(pProtoFile, 0);
+  }
+  zOut = sqlite3_malloc64( pFile->nJrnl*2 + 50 );
+  if( zOut==0 ){
+    return SQLITE_IOERR_NOMEM;
+  }
+  n = pFile->nJrnl;
+  i = 0;
+  do{
+    zOut[i++] = 'a' + (n%26);
+    n /= 26;
+  }while( n>0 );
+  zOut[i++] = ' ';
+  kvvfsEncode(pFile->aJrnl, pFile->nJrnl, &zOut[i]);
+  i = sqlite3KvvfsMethods.xWrite(pFile->zClass, "jrnl", zOut);
+  sqlite3_free(zOut);
+  return i ? SQLITE_IOERR : SQLITE_OK;
+}
+static int kvvfsSyncDb(sqlite3_file *pProtoFile, int flags){
+  return SQLITE_OK;
+}
+
+/*
+** Return the current file-size of an kvvfs-file.
+*/
+static int kvvfsFileSizeJrnl(sqlite3_file *pProtoFile, sqlite_int64 *pSize){
+  KVVfsFile *pFile = (KVVfsFile *)pProtoFile;
+  SQLITE_KV_LOG(("xFileSize('%s-journal')\n", pFile->zClass));
+  *pSize = pFile->nJrnl;
+  return SQLITE_OK;
+}
+static int kvvfsFileSizeDb(sqlite3_file *pProtoFile, sqlite_int64 *pSize){
+  KVVfsFile *pFile = (KVVfsFile *)pProtoFile;
+  SQLITE_KV_LOG(("xFileSize('%s-db')\n", pFile->zClass));
+  if( pFile->szDb>=0 ){
+    *pSize = pFile->szDb;
+  }else{
+    *pSize = kvvfsReadFileSize(pFile);
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Lock an kvvfs-file.
+*/
+static int kvvfsLock(sqlite3_file *pProtoFile, int eLock){
+  KVVfsFile *pFile = (KVVfsFile *)pProtoFile;
+  assert( !pFile->isJournal );
+  SQLITE_KV_LOG(("xLock(%s,%d)\n", pFile->zClass, eLock));
+
+  if( eLock!=SQLITE_LOCK_NONE ){
+    pFile->szDb = kvvfsReadFileSize(pFile);
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Unlock an kvvfs-file.
+*/
+static int kvvfsUnlock(sqlite3_file *pProtoFile, int eLock){
+  KVVfsFile *pFile = (KVVfsFile *)pProtoFile;
+  assert( !pFile->isJournal );
+  SQLITE_KV_LOG(("xUnlock(%s,%d)\n", pFile->zClass, eLock));
+  if( eLock==SQLITE_LOCK_NONE ){
+    pFile->szDb = -1;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Check if another file-handle holds a RESERVED lock on an kvvfs-file.
+*/
+static int kvvfsCheckReservedLock(sqlite3_file *pProtoFile, int *pResOut){
+  SQLITE_KV_LOG(("xCheckReservedLock\n"));
+  *pResOut = 0;
+  return SQLITE_OK;
+}
+
+/*
+** File control method. For custom operations on an kvvfs-file.
+*/
+static int kvvfsFileControlJrnl(sqlite3_file *pProtoFile, int op, void *pArg){
+  SQLITE_KV_LOG(("xFileControl(%d) on journal\n", op));
+  return SQLITE_NOTFOUND;
+}
+static int kvvfsFileControlDb(sqlite3_file *pProtoFile, int op, void *pArg){
+  SQLITE_KV_LOG(("xFileControl(%d) on database\n", op));
+  if( op==SQLITE_FCNTL_SYNC ){
+    KVVfsFile *pFile = (KVVfsFile *)pProtoFile;
+    int rc = SQLITE_OK;
+    SQLITE_KV_LOG(("xSync('%s-db')\n", pFile->zClass));
+    if( pFile->szDb>0 && 0!=kvvfsWriteFileSize(pFile, pFile->szDb) ){
+      rc = SQLITE_IOERR;
+    }
+    return rc;
+  }
+  return SQLITE_NOTFOUND;
+}
+
+/*
+** Return the sector-size in bytes for an kvvfs-file.
+*/
+static int kvvfsSectorSize(sqlite3_file *pFile){
+  return 512;
+}
+
+/*
+** Return the device characteristic flags supported by an kvvfs-file.
+*/
+static int kvvfsDeviceCharacteristics(sqlite3_file *pProtoFile){
+  return 0;
+}
+
+/****** sqlite3_vfs methods *************************************************/
+
+/*
+** Open an kvvfs file handle.
+*/
+static int kvvfsOpen(
+  sqlite3_vfs *pProtoVfs,
+  const char *zName,
+  sqlite3_file *pProtoFile,
+  int flags,
+  int *pOutFlags
+){
+  KVVfsFile *pFile = (KVVfsFile*)pProtoFile;
+  if( zName==0 ) zName = "";
+  SQLITE_KV_LOG(("xOpen(\"%s\")\n", zName));
+  if( strcmp(zName, "local")==0
+   || strcmp(zName, "session")==0
+  ){
+    pFile->isJournal = 0;
+    pFile->base.pMethods = &kvvfs_db_io_methods;
+  }else
+  if( strcmp(zName, "local-journal")==0
+   || strcmp(zName, "session-journal")==0
+  ){
+    pFile->isJournal = 1;
+    pFile->base.pMethods = &kvvfs_jrnl_io_methods;
+  }else{
+    return SQLITE_CANTOPEN;
+  }
+  if( zName[0]=='s' ){
+    pFile->zClass = "session";
+  }else{
+    pFile->zClass = "local";
+  }
+  pFile->aData = sqlite3_malloc64(SQLITE_KVOS_SZ);
+  if( pFile->aData==0 ){
+    return SQLITE_NOMEM;
+  }
+  pFile->aJrnl = 0;
+  pFile->nJrnl = 0;
+  pFile->szPage = -1;
+  pFile->szDb = -1;
+  return SQLITE_OK;
+}
+
+/*
+** Delete the file located at zPath. If the dirSync argument is true,
+** ensure the file-system modifications are synced to disk before
+** returning.
+*/
+static int kvvfsDelete(sqlite3_vfs *pVfs, const char *zPath, int dirSync){
+  if( strcmp(zPath, "local-journal")==0 ){
+    sqlite3KvvfsMethods.xDelete("local", "jrnl");
+  }else
+  if( strcmp(zPath, "session-journal")==0 ){
+    sqlite3KvvfsMethods.xDelete("session", "jrnl");
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Test for access permissions. Return true if the requested permission
+** is available, or false otherwise.
+*/
+static int kvvfsAccess(
+  sqlite3_vfs *pProtoVfs,
+  const char *zPath,
+  int flags,
+  int *pResOut
+){
+  SQLITE_KV_LOG(("xAccess(\"%s\")\n", zPath));
+  if( strcmp(zPath, "local-journal")==0 ){
+    *pResOut = sqlite3KvvfsMethods.xRead("local", "jrnl", 0, 0)>0;
+  }else
+  if( strcmp(zPath, "session-journal")==0 ){
+    *pResOut = sqlite3KvvfsMethods.xRead("session", "jrnl", 0, 0)>0;
+  }else
+  if( strcmp(zPath, "local")==0 ){
+    *pResOut = sqlite3KvvfsMethods.xRead("local", "sz", 0, 0)>0;
+  }else
+  if( strcmp(zPath, "session")==0 ){
+    *pResOut = sqlite3KvvfsMethods.xRead("session", "sz", 0, 0)>0;
+  }else
+  {
+    *pResOut = 0;
+  }
+  SQLITE_KV_LOG(("xAccess returns %d\n",*pResOut));
+  return SQLITE_OK;
+}
+
+/*
+** Populate buffer zOut with the full canonical pathname corresponding
+** to the pathname in zPath. zOut is guaranteed to point to a buffer
+** of at least (INST_MAX_PATHNAME+1) bytes.
+*/
+static int kvvfsFullPathname(
+  sqlite3_vfs *pVfs,
+  const char *zPath,
+  int nOut,
+  char *zOut
+){
+  size_t nPath;
+#ifdef SQLITE_OS_KV_ALWAYS_LOCAL
+  zPath = "local";
+#endif
+  nPath = strlen(zPath);
+  SQLITE_KV_LOG(("xFullPathname(\"%s\")\n", zPath));
+  if( nOut<nPath+1 ) nPath = nOut - 1;
+  memcpy(zOut, zPath, nPath);
+  zOut[nPath] = 0;
+  return SQLITE_OK;
+}
+
+/*
+** Open the dynamic library located at zPath and return a handle.
+*/
+static void *kvvfsDlOpen(sqlite3_vfs *pVfs, const char *zPath){
+  return 0;
+}
+
+/*
+** Populate the buffer pointed to by zBufOut with nByte bytes of
+** random data.
+*/
+static int kvvfsRandomness(sqlite3_vfs *pVfs, int nByte, char *zBufOut){
+  memset(zBufOut, 0, nByte);
+  return nByte;
+}
+
+/*
+** Sleep for nMicro microseconds. Return the number of microseconds
+** actually slept.
+*/
+static int kvvfsSleep(sqlite3_vfs *pVfs, int nMicro){
+  return SQLITE_OK;
+}
+
+/*
+** Return the current time as a Julian Day number in *pTimeOut.
+*/
+static int kvvfsCurrentTime(sqlite3_vfs *pVfs, double *pTimeOut){
+  sqlite3_int64 i = 0;
+  int rc;
+  rc = kvvfsCurrentTimeInt64(0, &i);
+  *pTimeOut = i/86400000.0;
+  return rc;
+}
+#include <sys/time.h>
+static int kvvfsCurrentTimeInt64(sqlite3_vfs *pVfs, sqlite3_int64 *pTimeOut){
+  static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000;
+  struct timeval sNow;
+  (void)gettimeofday(&sNow, 0);  /* Cannot fail given valid arguments */
+  *pTimeOut = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000;
+  return SQLITE_OK;
+}
+#endif /* SQLITE_OS_KV || SQLITE_OS_UNIX */
+
+#if SQLITE_OS_KV
+/*
+** This routine is called initialize the KV-vfs as the default VFS.
+*/
+SQLITE_API int sqlite3_os_init(void){
+  return sqlite3_vfs_register(&sqlite3OsKvvfsObject, 1);
+}
+SQLITE_API int sqlite3_os_end(void){
+  return SQLITE_OK;
+}
+#endif /* SQLITE_OS_KV */
+
+#if SQLITE_OS_UNIX && defined(SQLITE_OS_KV_OPTIONAL)
+SQLITE_PRIVATE int sqlite3KvvfsInit(void){
+  return sqlite3_vfs_register(&sqlite3OsKvvfsObject, 0);
+}
+#endif
+
+/************** End of os_kv.c ***********************************************/
 /************** Begin file os_unix.c *****************************************/
 /*
 ** 2004 May 22
@@ -35497,7 +37861,7 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){
 ** This source file is organized into divisions where the logic for various
 ** subfunctions is contained within the appropriate division.  PLEASE
 ** KEEP THE STRUCTURE OF THIS FILE INTACT.  New code should be placed
-** in the correct division and should be clearly labeled.
+** in the correct division and should be clearly labelled.
 **
 ** The layout of divisions is as follows:
 **
@@ -35547,7 +37911,7 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){
 #endif
 
 /* Use pread() and pwrite() if they are available */
-#if defined(__APPLE__)
+#if defined(__APPLE__) || defined(__linux__)
 # define HAVE_PREAD 1
 # define HAVE_PWRITE 1
 #endif
@@ -35562,15 +37926,16 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){
 /*
 ** standard include files.
 */
-#include <sys/types.h>
-#include <sys/stat.h>
+#include <sys/types.h>   /* amalgamator: keep */
+#include <sys/stat.h>    /* amalgamator: keep */
 #include <fcntl.h>
 #include <sys/ioctl.h>
-#include <unistd.h>
+#include <unistd.h>      /* amalgamator: keep */
 /* #include <time.h> */
-#include <sys/time.h>
+#include <sys/time.h>    /* amalgamator: keep */
 #include <errno.h>
-#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
+#if (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) \
+  && !defined(SQLITE_WASI)
 # include <sys/mman.h>
 #endif
 
@@ -35658,9 +38023,46 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){
 */
 #define SQLITE_MAX_SYMLINKS 100
 
+/*
+** Remove and stub certain info for WASI (WebAssembly System
+** Interface) builds.
+*/
+#ifdef SQLITE_WASI
+# undef HAVE_FCHMOD
+# undef HAVE_FCHOWN
+# undef HAVE_MREMAP
+# define HAVE_MREMAP 0
+# ifndef SQLITE_DEFAULT_UNIX_VFS
+#  define SQLITE_DEFAULT_UNIX_VFS "unix-dotfile"
+   /* ^^^ should SQLITE_DEFAULT_UNIX_VFS be "unix-none"? */
+# endif
+# ifndef F_RDLCK
+#  define F_RDLCK 0
+#  define F_WRLCK 1
+#  define F_UNLCK 2
+#  if __LONG_MAX == 0x7fffffffL
+#   define F_GETLK 12
+#   define F_SETLK 13
+#   define F_SETLKW 14
+#  else
+#   define F_GETLK 5
+#   define F_SETLK 6
+#   define F_SETLKW 7
+#  endif
+# endif
+#else /* !SQLITE_WASI */
+# ifndef HAVE_FCHMOD
+#  define HAVE_FCHMOD
+# endif
+#endif /* SQLITE_WASI */
+
+#ifdef SQLITE_WASI
+# define osGetpid(X) (pid_t)1
+#else
 /* Always cast the getpid() return type for compatibility with
 ** kernel modules in VxWorks. */
-#define osGetpid(X) (pid_t)getpid()
+# define osGetpid(X) (pid_t)getpid()
+#endif
 
 /*
 ** Only set the lastErrno if the error code is a real error and not
@@ -35932,7 +38334,11 @@ static struct unix_syscall {
 #define osPwrite64  ((ssize_t(*)(int,const void*,size_t,off64_t))\
                     aSyscall[13].pCurrent)
 
+#if defined(HAVE_FCHMOD)
   { "fchmod",       (sqlite3_syscall_ptr)fchmod,          0  },
+#else
+  { "fchmod",       (sqlite3_syscall_ptr)0,               0  },
+#endif
 #define osFchmod    ((int(*)(int,mode_t))aSyscall[14].pCurrent)
 
 #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
@@ -35968,14 +38374,16 @@ static struct unix_syscall {
 #endif
 #define osGeteuid   ((uid_t(*)(void))aSyscall[21].pCurrent)
 
-#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
+#if (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) \
+  && !defined(SQLITE_WASI)
   { "mmap",         (sqlite3_syscall_ptr)mmap,            0 },
 #else
   { "mmap",         (sqlite3_syscall_ptr)0,               0 },
 #endif
 #define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[22].pCurrent)
 
-#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
+#if (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) \
+  && !defined(SQLITE_WASI)
   { "munmap",       (sqlite3_syscall_ptr)munmap,          0 },
 #else
   { "munmap",       (sqlite3_syscall_ptr)0,               0 },
@@ -36040,7 +38448,7 @@ static int robustFchown(int fd, uid_t uid, gid_t gid){
 
 /*
 ** This is the xSetSystemCall() method of sqlite3_vfs for all of the
-** "unix" VFSes.  Return SQLITE_OK opon successfully updating the
+** "unix" VFSes.  Return SQLITE_OK upon successfully updating the
 ** system call pointer, or SQLITE_NOTFOUND if there is no configurable
 ** system call named zName.
 */
@@ -36161,6 +38569,9 @@ static int robust_open(const char *z, int f, mode_t m){
       break;
     }
     if( fd>=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break;
+    if( (f & (O_EXCL|O_CREAT))==(O_EXCL|O_CREAT) ){
+      (void)osUnlink(z);
+    }
     osClose(fd);
     sqlite3_log(SQLITE_WARNING,
                 "attempt to open \"%s\" as file descriptor %d", z, fd);
@@ -36559,7 +38970,7 @@ static void vxworksReleaseFileId(struct vxworksFileId *pId){
 ** If you close a file descriptor that points to a file that has locks,
 ** all locks on that file that are owned by the current process are
 ** released.  To work around this problem, each unixInodeInfo object
-** maintains a count of the number of pending locks on tha inode.
+** maintains a count of the number of pending locks on the inode.
 ** When an attempt is made to close an unixFile, if there are
 ** other unixFile open on the same inode that are holding locks, the call
 ** to close() the file descriptor is deferred until all of the locks clear.
@@ -36573,7 +38984,7 @@ static void vxworksReleaseFileId(struct vxworksFileId *pId){
 ** not posix compliant.  Under LinuxThreads, a lock created by thread
 ** A cannot be modified or overridden by a different thread B.
 ** Only thread A can modify the lock.  Locking behavior is correct
-** if the appliation uses the newer Native Posix Thread Library (NPTL)
+** if the application uses the newer Native Posix Thread Library (NPTL)
 ** on linux - with NPTL a lock created by thread A can override locks
 ** in thread B.  But there is no way to know at compile-time which
 ** threading library is being used.  So there is no way to know at
@@ -36775,7 +39186,7 @@ static void storeLastErrno(unixFile *pFile, int error){
 }
 
 /*
-** Close all file descriptors accumuated in the unixInodeInfo->pUnused list.
+** Close all file descriptors accumulated in the unixInodeInfo->pUnused list.
 */
 static void closePendingFds(unixFile *pFile){
   unixInodeInfo *pInode = pFile->pInode;
@@ -37123,7 +39534,7 @@ static int unixFileLock(unixFile *pFile, struct flock *pLock){
 **
 **    UNLOCKED -> SHARED
 **    SHARED -> RESERVED
-**    SHARED -> (PENDING) -> EXCLUSIVE
+**    SHARED -> EXCLUSIVE
 **    RESERVED -> (PENDING) -> EXCLUSIVE
 **    PENDING -> EXCLUSIVE
 **
@@ -37138,7 +39549,7 @@ static int unixLock(sqlite3_file *id, int eFileLock){
   ** slightly in order to be compatible with Windows95 systems simultaneously
   ** accessing the same database file, in case that is ever required.
   **
-  ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
+  ** Symbols defined in os.h identify the 'pending byte' and the 'reserved
   ** byte', each single bytes at well known offsets, and the 'shared byte
   ** range', a range of 510 bytes at a well known offset.
   **
@@ -37146,7 +39557,7 @@ static int unixLock(sqlite3_file *id, int eFileLock){
   ** byte'.  If this is successful, 'shared byte range' is read-locked
   ** and the lock on the 'pending byte' released.  (Legacy note:  When
   ** SQLite was first developed, Windows95 systems were still very common,
-  ** and Widnows95 lacks a shared-lock capability.  So on Windows95, a
+  ** and Windows95 lacks a shared-lock capability.  So on Windows95, a
   ** single randomly selected by from the 'shared byte range' is locked.
   ** Windows95 is now pretty much extinct, but this work-around for the
   ** lack of shared-locks on Windows95 lives on, for backwards
@@ -37156,19 +39567,20 @@ static int unixLock(sqlite3_file *id, int eFileLock){
   ** A RESERVED lock is implemented by grabbing a write-lock on the
   ** 'reserved byte'.
   **
-  ** A process may only obtain a PENDING lock after it has obtained a
-  ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
-  ** on the 'pending byte'. This ensures that no new SHARED locks can be
-  ** obtained, but existing SHARED locks are allowed to persist. A process
-  ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
-  ** This property is used by the algorithm for rolling back a journal file
-  ** after a crash.
+  ** An EXCLUSIVE lock may only be requested after either a SHARED or
+  ** RESERVED lock is held. An EXCLUSIVE lock is implemented by obtaining
+  ** a write-lock on the entire 'shared byte range'. Since all other locks
+  ** require a read-lock on one of the bytes within this range, this ensures
+  ** that no other locks are held on the database.
   **
-  ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
-  ** implemented by obtaining a write-lock on the entire 'shared byte
-  ** range'. Since all other locks require a read-lock on one of the bytes
-  ** within this range, this ensures that no other locks are held on the
-  ** database.
+  ** If a process that holds a RESERVED lock requests an EXCLUSIVE, then
+  ** a PENDING lock is obtained first. A PENDING lock is implemented by
+  ** obtaining a write-lock on the 'pending byte'. This ensures that no new
+  ** SHARED locks can be obtained, but existing SHARED locks are allowed to
+  ** persist. If the call to this function fails to obtain the EXCLUSIVE
+  ** lock in this case, it holds the PENDING lock instead. The client may
+  ** then re-attempt the EXCLUSIVE lock later on, after existing SHARED
+  ** locks have cleared.
   */
   int rc = SQLITE_OK;
   unixFile *pFile = (unixFile*)id;
@@ -37194,7 +39606,7 @@ static int unixLock(sqlite3_file *id, int eFileLock){
 
   /* Make sure the locking sequence is correct.
   **  (1) We never move from unlocked to anything higher than shared lock.
-  **  (2) SQLite never explicitly requests a pendig lock.
+  **  (2) SQLite never explicitly requests a pending lock.
   **  (3) A shared lock is always held when a reserve lock is requested.
   */
   assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK );
@@ -37239,7 +39651,7 @@ static int unixLock(sqlite3_file *id, int eFileLock){
   lock.l_len = 1L;
   lock.l_whence = SEEK_SET;
   if( eFileLock==SHARED_LOCK
-      || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK)
+   || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock==RESERVED_LOCK)
   ){
     lock.l_type = (eFileLock==SHARED_LOCK?F_RDLCK:F_WRLCK);
     lock.l_start = PENDING_BYTE;
@@ -37250,6 +39662,9 @@ static int unixLock(sqlite3_file *id, int eFileLock){
         storeLastErrno(pFile, tErrno);
       }
       goto end_lock;
+    }else if( eFileLock==EXCLUSIVE_LOCK ){
+      pFile->eFileLock = PENDING_LOCK;
+      pInode->eFileLock = PENDING_LOCK;
     }
   }
 
@@ -37337,13 +39752,9 @@ static int unixLock(sqlite3_file *id, int eFileLock){
   }
 #endif
 
-
   if( rc==SQLITE_OK ){
     pFile->eFileLock = eFileLock;
     pInode->eFileLock = eFileLock;
-  }else if( eFileLock==EXCLUSIVE_LOCK ){
-    pFile->eFileLock = PENDING_LOCK;
-    pInode->eFileLock = PENDING_LOCK;
   }
 
 end_lock:
@@ -38413,7 +40824,7 @@ static int afpLock(sqlite3_file *id, int eFileLock){
 
   /* Make sure the locking sequence is correct
   **  (1) We never move from unlocked to anything higher than shared lock.
-  **  (2) SQLite never explicitly requests a pendig lock.
+  **  (2) SQLite never explicitly requests a pending lock.
   **  (3) A shared lock is always held when a reserve lock is requested.
   */
   assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK );
@@ -38529,7 +40940,7 @@ static int afpLock(sqlite3_file *id, int eFileLock){
       if( !(failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST +
                          pInode->sharedByte, 1, 0)) ){
         int failed2 = SQLITE_OK;
-        /* now attemmpt to get the exclusive lock range */
+        /* now attempt to get the exclusive lock range */
         failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST,
                                SHARED_SIZE, 1);
         if( failed && (failed2 = afpSetLock(context->dbPath, pFile,
@@ -38578,9 +40989,6 @@ static int afpUnlock(sqlite3_file *id, int eFileLock) {
   unixInodeInfo *pInode;
   afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
   int skipShared = 0;
-#ifdef SQLITE_TEST
-  int h = pFile->h;
-#endif
 
   assert( pFile );
   OSTRACE(("UNLOCK  %d %d was %d(%d,%d) pid=%d (afp)\n", pFile->h, eFileLock,
@@ -38596,9 +41004,6 @@ static int afpUnlock(sqlite3_file *id, int eFileLock) {
   assert( pInode->nShared!=0 );
   if( pFile->eFileLock>SHARED_LOCK ){
     assert( pInode->eFileLock==pFile->eFileLock );
-    SimulateIOErrorBenign(1);
-    SimulateIOError( h=(-1) )
-    SimulateIOErrorBenign(0);
 
 #ifdef SQLITE_DEBUG
     /* When reducing a lock such that other processes can start
@@ -38647,9 +41052,6 @@ static int afpUnlock(sqlite3_file *id, int eFileLock) {
     unsigned long long sharedLockByte = SHARED_FIRST+pInode->sharedByte;
     pInode->nShared--;
     if( pInode->nShared==0 ){
-      SimulateIOErrorBenign(1);
-      SimulateIOError( h=(-1) )
-      SimulateIOErrorBenign(0);
       if( !skipShared ){
         rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 0);
       }
@@ -38750,12 +41152,6 @@ static int nfsUnlock(sqlite3_file *id, int eFileLock){
 ** Seek to the offset passed as the second argument, then read cnt
 ** bytes into pBuf. Return the number of bytes actually read.
 **
-** NB:  If you define USE_PREAD or USE_PREAD64, then it might also
-** be necessary to define _XOPEN_SOURCE to be 500.  This varies from
-** one system to another.  Since SQLite does not define USE_PREAD
-** in any form by default, we will not attempt to define _XOPEN_SOURCE.
-** See tickets #2741 and #2681.
-**
 ** To avoid stomping the errno value on a failed read the lastErrno value
 ** is set before returning.
 */
@@ -38830,7 +41226,7 @@ static int unixRead(
 #endif
 
 #if SQLITE_MAX_MMAP_SIZE>0
-  /* Deal with as much of this read request as possible by transfering
+  /* Deal with as much of this read request as possible by transferring
   ** data from the memory mapping using memcpy().  */
   if( offset<pFile->mmapSize ){
     if( offset+amt <= pFile->mmapSize ){
@@ -38982,7 +41378,7 @@ static int unixWrite(
 #endif
 
 #if defined(SQLITE_MMAP_READWRITE) && SQLITE_MAX_MMAP_SIZE>0
-  /* Deal with as much of this write request as possible by transfering
+  /* Deal with as much of this write request as possible by transferring
   ** data from the memory mapping using memcpy().  */
   if( offset<pFile->mmapSize ){
     if( offset+amt <= pFile->mmapSize ){
@@ -39104,7 +41500,7 @@ static int full_fsync(int fd, int fullSync, int dataOnly){
   /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
   ** no-op.  But go ahead and call fstat() to validate the file
   ** descriptor as we need a method to provoke a failure during
-  ** coverate testing.
+  ** coverage testing.
   */
 #ifdef SQLITE_NO_SYNC
   {
@@ -41934,12 +44330,10 @@ static void appendOnePathElement(
   if( zName[0]=='.' ){
     if( nName==1 ) return;
     if( zName[1]=='.' && nName==2 ){
-      if( pPath->nUsed<=1 ){
-        pPath->rc = SQLITE_ERROR;
-        return;
+      if( pPath->nUsed>1 ){
+        assert( pPath->zOut[0]=='/' );
+        while( pPath->zOut[--pPath->nUsed]!='/' ){}
       }
-      assert( pPath->zOut[0]=='/' );
-      while( pPath->zOut[--pPath->nUsed]!='/' ){}
       return;
     }
   }
@@ -42151,12 +44545,17 @@ static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){
 ** than the argument.
 */
 static int unixSleep(sqlite3_vfs *NotUsed, int microseconds){
-#if OS_VXWORKS
+#if !defined(HAVE_NANOSLEEP) || HAVE_NANOSLEEP+0
   struct timespec sp;
-
   sp.tv_sec = microseconds / 1000000;
   sp.tv_nsec = (microseconds % 1000000) * 1000;
+
+  /* Almost all modern unix systems support nanosleep().  But if you are
+  ** compiling for one of the rare exceptions, you can use
+  ** -DHAVE_NANOSLEEP=0 (perhaps in conjuction with -DHAVE_USLEEP if
+  ** usleep() is available) in order to bypass the use of nanosleep() */
   nanosleep(&sp, NULL);
+
   UNUSED_PARAMETER(NotUsed);
   return microseconds;
 #elif defined(HAVE_USLEEP) && HAVE_USLEEP
@@ -43533,8 +45932,16 @@ SQLITE_API int sqlite3_os_init(void){
 
   /* Register all VFSes defined in the aVfs[] array */
   for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
+#ifdef SQLITE_DEFAULT_UNIX_VFS
+    sqlite3_vfs_register(&aVfs[i],
+           0==strcmp(aVfs[i].zName,SQLITE_DEFAULT_UNIX_VFS));
+#else
     sqlite3_vfs_register(&aVfs[i], i==0);
+#endif
   }
+#ifdef SQLITE_OS_KV_OPTIONAL
+  sqlite3KvvfsInit();
+#endif
   unixBigLock = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1);
 
 #ifndef SQLITE_OMIT_WAL
@@ -44738,7 +47145,7 @@ static struct win_syscall {
 
 /*
 ** This is the xSetSystemCall() method of sqlite3_vfs for all of the
-** "win32" VFSes.  Return SQLITE_OK opon successfully updating the
+** "win32" VFSes.  Return SQLITE_OK upon successfully updating the
 ** system call pointer, or SQLITE_NOTFOUND if there is no configurable
 ** system call named zName.
 */
@@ -45497,8 +47904,9 @@ SQLITE_API int sqlite3_win32_set_directory8(
   const char *zValue  /* New value for directory being set or reset */
 ){
   char **ppDirectory = 0;
+  int rc;
 #ifndef SQLITE_OMIT_AUTOINIT
-  int rc = sqlite3_initialize();
+  rc = sqlite3_initialize();
   if( rc ) return rc;
 #endif
   sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_TEMPDIR));
@@ -46317,7 +48725,7 @@ static int winRead(
            pFile->h, pBuf, amt, offset, pFile->locktype));
 
 #if SQLITE_MAX_MMAP_SIZE>0
-  /* Deal with as much of this read request as possible by transfering
+  /* Deal with as much of this read request as possible by transferring
   ** data from the memory mapping using memcpy().  */
   if( offset<pFile->mmapSize ){
     if( offset+amt <= pFile->mmapSize ){
@@ -46395,7 +48803,7 @@ static int winWrite(
            pFile->h, pBuf, amt, offset, pFile->locktype));
 
 #if defined(SQLITE_MMAP_READWRITE) && SQLITE_MAX_MMAP_SIZE>0
-  /* Deal with as much of this write request as possible by transfering
+  /* Deal with as much of this write request as possible by transferring
   ** data from the memory mapping using memcpy().  */
   if( offset<pFile->mmapSize ){
     if( offset+amt <= pFile->mmapSize ){
@@ -46505,7 +48913,7 @@ static int winTruncate(sqlite3_file *id, sqlite3_int64 nByte){
     ** all references to memory-mapped content are closed.  That is doable,
     ** but involves adding a few branches in the common write code path which
     ** could slow down normal operations slightly.  Hence, we have decided for
-    ** now to simply make trancations a no-op if there are pending reads.  We
+    ** now to simply make transactions a no-op if there are pending reads.  We
     ** can maybe revisit this decision in the future.
     */
     return SQLITE_OK;
@@ -46564,7 +48972,7 @@ static int winTruncate(sqlite3_file *id, sqlite3_int64 nByte){
 #ifdef SQLITE_TEST
 /*
 ** Count the number of fullsyncs and normal syncs.  This is used to test
-** that syncs and fullsyncs are occuring at the right times.
+** that syncs and fullsyncs are occurring at the right times.
 */
 SQLITE_API int sqlite3_sync_count = 0;
 SQLITE_API int sqlite3_fullsync_count = 0;
@@ -46921,7 +49329,7 @@ static int winLock(sqlite3_file *id, int locktype){
   */
   if( locktype==EXCLUSIVE_LOCK && res ){
     assert( pFile->locktype>=SHARED_LOCK );
-    res = winUnlockReadLock(pFile);
+    (void)winUnlockReadLock(pFile);
     res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS, SHARED_FIRST, 0,
                       SHARED_SIZE, 0);
     if( res ){
@@ -48303,9 +50711,10 @@ static int winMakeEndInDirSep(int nBuf, char *zBuf){
 }
 
 /*
-** If sqlite3_temp_directory is not, take the mutex and return true.
+** If sqlite3_temp_directory is defined, take the mutex and return true.
 **
-** If sqlite3_temp_directory is NULL, omit the mutex and return false.
+** If sqlite3_temp_directory is NULL (undefined), omit the mutex and
+** return false.
 */
 static int winTempDirDefined(void){
   sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_TEMPDIR));
@@ -48324,6 +50733,7 @@ static int winGetTempname(sqlite3_vfs *pVfs, char **pzBuf){
     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
     "0123456789";
   size_t i, j;
+  DWORD pid;
   int nPre = sqlite3Strlen30(SQLITE_TEMP_FILE_PREFIX);
   int nMax, nBuf, nDir, nLen;
   char *zBuf;
@@ -48536,7 +50946,10 @@ static int winGetTempname(sqlite3_vfs *pVfs, char **pzBuf){
 
   j = sqlite3Strlen30(zBuf);
   sqlite3_randomness(15, &zBuf[j]);
+  pid = osGetCurrentProcessId();
   for(i=0; i<15; i++, j++){
+    zBuf[j] += pid & 0xff;
+    pid >>= 8;
     zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
   }
   zBuf[j] = 0;
@@ -48774,7 +51187,7 @@ static int winOpen(
       if( isReadWrite ){
         int rc2, isRO = 0;
         sqlite3BeginBenignMalloc();
-        rc2 = winAccess(pVfs, zName, SQLITE_ACCESS_READ, &isRO);
+        rc2 = winAccess(pVfs, zUtf8Name, SQLITE_ACCESS_READ, &isRO);
         sqlite3EndBenignMalloc();
         if( rc2==SQLITE_OK && isRO ) break;
       }
@@ -48791,7 +51204,7 @@ static int winOpen(
       if( isReadWrite ){
         int rc2, isRO = 0;
         sqlite3BeginBenignMalloc();
-        rc2 = winAccess(pVfs, zName, SQLITE_ACCESS_READ, &isRO);
+        rc2 = winAccess(pVfs, zUtf8Name, SQLITE_ACCESS_READ, &isRO);
         sqlite3EndBenignMalloc();
         if( rc2==SQLITE_OK && isRO ) break;
       }
@@ -48811,7 +51224,7 @@ static int winOpen(
       if( isReadWrite ){
         int rc2, isRO = 0;
         sqlite3BeginBenignMalloc();
-        rc2 = winAccess(pVfs, zName, SQLITE_ACCESS_READ, &isRO);
+        rc2 = winAccess(pVfs, zUtf8Name, SQLITE_ACCESS_READ, &isRO);
         sqlite3EndBenignMalloc();
         if( rc2==SQLITE_OK && isRO ) break;
       }
@@ -49034,6 +51447,13 @@ static int winAccess(
   OSTRACE(("ACCESS name=%s, flags=%x, pResOut=%p\n",
            zFilename, flags, pResOut));
 
+  if( zFilename==0 ){
+    *pResOut = 0;
+    OSTRACE(("ACCESS name=%s, pResOut=%p, *pResOut=%d, rc=SQLITE_OK\n",
+             zFilename, pResOut, *pResOut));
+    return SQLITE_OK;
+  }
+
   zConverted = winConvertFromUtf8Filename(zFilename);
   if( zConverted==0 ){
     OSTRACE(("ACCESS name=%s, rc=SQLITE_IOERR_NOMEM\n", zFilename));
@@ -49341,7 +51761,8 @@ static int winFullPathname(
   char *zFull                   /* Output buffer */
 ){
   int rc;
-  sqlite3_mutex *pMutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_TEMPDIR);
+  MUTEX_LOGIC( sqlite3_mutex *pMutex; )
+  MUTEX_LOGIC( pMutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_TEMPDIR); )
   sqlite3_mutex_enter(pMutex);
   rc = winFullPathnameNoMutex(pVfs, zRelative, nFull, zFull);
   sqlite3_mutex_leave(pMutex);
@@ -49883,6 +52304,7 @@ static int memdbTruncate(sqlite3_file*, sqlite3_int64 size);
 static int memdbSync(sqlite3_file*, int flags);
 static int memdbFileSize(sqlite3_file*, sqlite3_int64 *pSize);
 static int memdbLock(sqlite3_file*, int);
+static int memdbUnlock(sqlite3_file*, int);
 /* static int memdbCheckReservedLock(sqlite3_file*, int *pResOut);// not used */
 static int memdbFileControl(sqlite3_file*, int op, void *pArg);
 /* static int memdbSectorSize(sqlite3_file*); // not used */
@@ -49941,7 +52363,7 @@ static const sqlite3_io_methods memdb_io_methods = {
   memdbSync,                       /* xSync */
   memdbFileSize,                   /* xFileSize */
   memdbLock,                       /* xLock */
-  memdbLock,                       /* xUnlock - same as xLock in this case */
+  memdbUnlock,                     /* xUnlock */
   0, /* memdbCheckReservedLock, */ /* xCheckReservedLock */
   memdbFileControl,                /* xFileControl */
   0, /* memdbSectorSize,*/         /* xSectorSize */
@@ -50142,39 +52564,81 @@ static int memdbLock(sqlite3_file *pFile, int eLock){
   MemFile *pThis = (MemFile*)pFile;
   MemStore *p = pThis->pStore;
   int rc = SQLITE_OK;
-  if( eLock==pThis->eLock ) return SQLITE_OK;
+  if( eLock<=pThis->eLock ) return SQLITE_OK;
   memdbEnter(p);
-  if( eLock>SQLITE_LOCK_SHARED ){
-    if( p->mFlags & SQLITE_DESERIALIZE_READONLY ){
-      rc = SQLITE_READONLY;
-    }else if( pThis->eLock<=SQLITE_LOCK_SHARED ){
-      if( p->nWrLock ){
-        rc = SQLITE_BUSY;
-      }else{
-        p->nWrLock = 1;
+
+  assert( p->nWrLock==0 || p->nWrLock==1 );
+  assert( pThis->eLock<=SQLITE_LOCK_SHARED || p->nWrLock==1 );
+  assert( pThis->eLock==SQLITE_LOCK_NONE || p->nRdLock>=1 );
+
+  if( eLock>SQLITE_LOCK_SHARED && (p->mFlags & SQLITE_DESERIALIZE_READONLY) ){
+    rc = SQLITE_READONLY;
+  }else{
+    switch( eLock ){
+      case SQLITE_LOCK_SHARED: {
+        assert( pThis->eLock==SQLITE_LOCK_NONE );
+        if( p->nWrLock>0 ){
+          rc = SQLITE_BUSY;
+        }else{
+          p->nRdLock++;
+        }
+        break;
+      };
+
+      case SQLITE_LOCK_RESERVED:
+      case SQLITE_LOCK_PENDING: {
+        assert( pThis->eLock>=SQLITE_LOCK_SHARED );
+        if( ALWAYS(pThis->eLock==SQLITE_LOCK_SHARED) ){
+          if( p->nWrLock>0 ){
+            rc = SQLITE_BUSY;
+          }else{
+            p->nWrLock = 1;
+          }
+        }
+        break;
+      }
+
+      default: {
+        assert(  eLock==SQLITE_LOCK_EXCLUSIVE );
+        assert( pThis->eLock>=SQLITE_LOCK_SHARED );
+        if( p->nRdLock>1 ){
+          rc = SQLITE_BUSY;
+        }else if( pThis->eLock==SQLITE_LOCK_SHARED ){
+          p->nWrLock = 1;
+        }
+        break;
       }
     }
-  }else if( eLock==SQLITE_LOCK_SHARED ){
-    if( pThis->eLock > SQLITE_LOCK_SHARED ){
-      assert( p->nWrLock==1 );
-      p->nWrLock = 0;
-    }else if( p->nWrLock ){
-      rc = SQLITE_BUSY;
-    }else{
-      p->nRdLock++;
+  }
+  if( rc==SQLITE_OK ) pThis->eLock = eLock;
+  memdbLeave(p);
+  return rc;
+}
+
+/*
+** Unlock an memdb-file.
+*/
+static int memdbUnlock(sqlite3_file *pFile, int eLock){
+  MemFile *pThis = (MemFile*)pFile;
+  MemStore *p = pThis->pStore;
+  if( eLock>=pThis->eLock ) return SQLITE_OK;
+  memdbEnter(p);
+
+  assert( eLock==SQLITE_LOCK_SHARED || eLock==SQLITE_LOCK_NONE );
+  if( eLock==SQLITE_LOCK_SHARED ){
+    if( ALWAYS(pThis->eLock>SQLITE_LOCK_SHARED) ){
+      p->nWrLock--;
     }
   }else{
-    assert( eLock==SQLITE_LOCK_NONE );
     if( pThis->eLock>SQLITE_LOCK_SHARED ){
-      assert( p->nWrLock==1 );
-      p->nWrLock = 0;
+      p->nWrLock--;
     }
-    assert( p->nRdLock>0 );
     p->nRdLock--;
   }
-  if( rc==SQLITE_OK ) pThis->eLock = eLock;
+
+  pThis->eLock = eLock;
   memdbLeave(p);
-  return rc;
+  return SQLITE_OK;
 }
 
 #if 0
@@ -50284,7 +52748,7 @@ static int memdbOpen(
 
   memset(pFile, 0, sizeof(*pFile));
   szName = sqlite3Strlen30(zName);
-  if( szName>1 && zName[0]=='/' ){
+  if( szName>1 && (zName[0]=='/' || zName[0]=='\\') ){
     int i;
 #ifndef SQLITE_MUTEX_OMIT
     sqlite3_mutex *pVfsMutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1);
@@ -50631,6 +53095,13 @@ SQLITE_API int sqlite3_deserialize(
   return rc;
 }
 
+/*
+** Return true if the VFS is the memvfs.
+*/
+SQLITE_PRIVATE int sqlite3IsMemdb(const sqlite3_vfs *pVfs){
+  return pVfs==&memdb_vfs;
+}
+
 /*
 ** This routine is called when the extension is loaded.
 ** Register the new VFS.
@@ -50843,7 +53314,7 @@ SQLITE_PRIVATE int sqlite3BitvecSet(Bitvec *p, u32 i){
   h = BITVEC_HASH(i++);
   /* if there wasn't a hash collision, and this doesn't */
   /* completely fill the hash, then just add it without */
-  /* worring about sub-dividing and re-hashing. */
+  /* worrying about sub-dividing and re-hashing. */
   if( !p->u.aHash[h] ){
     if (p->nSet<(BITVEC_NINT-1)) {
       goto bitvec_set_end;
@@ -51110,7 +53581,7 @@ SQLITE_PRIVATE int sqlite3BitvecBuiltinTest(int sz, int *aOp){
 struct PCache {
   PgHdr *pDirty, *pDirtyTail;         /* List of dirty pages in LRU order */
   PgHdr *pSynced;                     /* Last synced page in dirty page list */
-  int nRefSum;                        /* Sum of ref counts over all pages */
+  i64 nRefSum;                        /* Sum of ref counts over all pages */
   int szCache;                        /* Configured cache size */
   int szSpill;                        /* Size before spilling occurs */
   int szPage;                         /* Size of every page in this cache */
@@ -51135,12 +53606,24 @@ struct PCache {
   int sqlite3PcacheTrace = 2;       /* 0: off  1: simple  2: cache dumps */
   int sqlite3PcacheMxDump = 9999;   /* Max cache entries for pcacheDump() */
 # define pcacheTrace(X) if(sqlite3PcacheTrace){sqlite3DebugPrintf X;}
-  void pcacheDump(PCache *pCache){
-    int N;
-    int i, j;
-    sqlite3_pcache_page *pLower;
+  static void pcachePageTrace(int i, sqlite3_pcache_page *pLower){
     PgHdr *pPg;
     unsigned char *a;
+    int j;
+    if( pLower==0 ){
+      printf("%3d: NULL\n", i);
+    }else{
+      pPg = (PgHdr*)pLower->pExtra;
+      printf("%3d: nRef %2lld flgs %02x data ", i, pPg->nRef, pPg->flags);
+      a = (unsigned char *)pLower->pBuf;
+      for(j=0; j<12; j++) printf("%02x", a[j]);
+      printf(" ptr %p\n", pPg);
+    }
+  }
+  static void pcacheDump(PCache *pCache){
+    int N;
+    int i;
+    sqlite3_pcache_page *pLower;
 
     if( sqlite3PcacheTrace<2 ) return;
     if( pCache->pCache==0 ) return;
@@ -51148,22 +53631,42 @@ struct PCache {
     if( N>sqlite3PcacheMxDump ) N = sqlite3PcacheMxDump;
     for(i=1; i<=N; i++){
        pLower = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, i, 0);
-       if( pLower==0 ) continue;
-       pPg = (PgHdr*)pLower->pExtra;
-       printf("%3d: nRef %2d flgs %02x data ", i, pPg->nRef, pPg->flags);
-       a = (unsigned char *)pLower->pBuf;
-       for(j=0; j<12; j++) printf("%02x", a[j]);
-       printf("\n");
-       if( pPg->pPage==0 ){
+       pcachePageTrace(i, pLower);
+       if( pLower && ((PgHdr*)pLower)->pPage==0 ){
          sqlite3GlobalConfig.pcache2.xUnpin(pCache->pCache, pLower, 0);
        }
     }
   }
-  #else
+#else
 # define pcacheTrace(X)
+# define pcachePageTrace(PGNO, X)
 # define pcacheDump(X)
 #endif
 
+/*
+** Return 1 if pPg is on the dirty list for pCache.  Return 0 if not.
+** This routine runs inside of assert() statements only.
+*/
+#if defined(SQLITE_ENABLE_EXPENSIVE_ASSERT)
+static int pageOnDirtyList(PCache *pCache, PgHdr *pPg){
+  PgHdr *p;
+  for(p=pCache->pDirty; p; p=p->pDirtyNext){
+    if( p==pPg ) return 1;
+  }
+  return 0;
+}
+static int pageNotOnDirtyList(PCache *pCache, PgHdr *pPg){
+  PgHdr *p;
+  for(p=pCache->pDirty; p; p=p->pDirtyNext){
+    if( p==pPg ) return 0;
+  }
+  return 1;
+}
+#else
+# define pageOnDirtyList(A,B)    1
+# define pageNotOnDirtyList(A,B) 1
+#endif
+
 /*
 ** Check invariants on a PgHdr entry.  Return true if everything is OK.
 ** Return false if any invariant is violated.
@@ -51182,8 +53685,13 @@ SQLITE_PRIVATE int sqlite3PcachePageSanity(PgHdr *pPg){
   assert( pCache!=0 );      /* Every page has an associated PCache */
   if( pPg->flags & PGHDR_CLEAN ){
     assert( (pPg->flags & PGHDR_DIRTY)==0 );/* Cannot be both CLEAN and DIRTY */
-    assert( pCache->pDirty!=pPg );          /* CLEAN pages not on dirty list */
-    assert( pCache->pDirtyTail!=pPg );
+    assert( pageNotOnDirtyList(pCache, pPg) );/* CLEAN pages not on dirtylist */
+  }else{
+    assert( (pPg->flags & PGHDR_DIRTY)!=0 );/* If not CLEAN must be DIRTY */
+    assert( pPg->pDirtyNext==0 || pPg->pDirtyNext->pDirtyPrev==pPg );
+    assert( pPg->pDirtyPrev==0 || pPg->pDirtyPrev->pDirtyNext==pPg );
+    assert( pPg->pDirtyPrev!=0 || pCache->pDirty==pPg );
+    assert( pageOnDirtyList(pCache, pPg) );
   }
   /* WRITEABLE pages must also be DIRTY */
   if( pPg->flags & PGHDR_WRITEABLE ){
@@ -51313,7 +53821,7 @@ static int numberOfCachePages(PCache *p){
     return p->szCache;
   }else{
     i64 n;
-    /* IMPLEMANTATION-OF: R-59858-46238 If the argument N is negative, then the
+    /* IMPLEMENTATION-OF: R-59858-46238 If the argument N is negative, then the
     ** number of cache pages is adjusted to be a number of pages that would
     ** use approximately abs(N*1024) bytes of memory based on the current
     ** page size. */
@@ -51457,8 +53965,9 @@ SQLITE_PRIVATE sqlite3_pcache_page *sqlite3PcacheFetch(
   assert( createFlag==0 || pCache->eCreate==eCreate );
   assert( createFlag==0 || eCreate==1+(!pCache->bPurgeable||!pCache->pDirty) );
   pRes = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, eCreate);
-  pcacheTrace(("%p.FETCH %d%s (result: %p)\n",pCache,pgno,
+  pcacheTrace(("%p.FETCH %d%s (result: %p) ",pCache,pgno,
                createFlag?" create":"",pRes));
+  pcachePageTrace(pgno, pRes);
   return pRes;
 }
 
@@ -51586,6 +54095,7 @@ SQLITE_PRIVATE void SQLITE_NOINLINE sqlite3PcacheRelease(PgHdr *p){
       pcacheUnpin(p);
     }else{
       pcacheManageDirtyList(p, PCACHE_DIRTYLIST_FRONT);
+      assert( sqlite3PcachePageSanity(p) );
     }
   }
 }
@@ -51629,6 +54139,7 @@ SQLITE_PRIVATE void sqlite3PcacheMakeDirty(PgHdr *p){
       pcacheTrace(("%p.DIRTY %d\n",p->pCache,p->pgno));
       assert( (p->flags & (PGHDR_DIRTY|PGHDR_CLEAN))==PGHDR_DIRTY );
       pcacheManageDirtyList(p, PCACHE_DIRTYLIST_ADD);
+      assert( sqlite3PcachePageSanity(p) );
     }
     assert( sqlite3PcachePageSanity(p) );
   }
@@ -51691,14 +54202,24 @@ SQLITE_PRIVATE void sqlite3PcacheClearSyncFlags(PCache *pCache){
 */
 SQLITE_PRIVATE void sqlite3PcacheMove(PgHdr *p, Pgno newPgno){
   PCache *pCache = p->pCache;
+  sqlite3_pcache_page *pOther;
   assert( p->nRef>0 );
   assert( newPgno>0 );
   assert( sqlite3PcachePageSanity(p) );
   pcacheTrace(("%p.MOVE %d -> %d\n",pCache,p->pgno,newPgno));
+  pOther = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, newPgno, 0);
+  if( pOther ){
+    PgHdr *pXPage = (PgHdr*)pOther->pExtra;
+    assert( pXPage->nRef==0 );
+    pXPage->nRef++;
+    pCache->nRefSum++;
+    sqlite3PcacheDrop(pXPage);
+  }
   sqlite3GlobalConfig.pcache2.xRekey(pCache->pCache, p->pPage, p->pgno,newPgno);
   p->pgno = newPgno;
   if( (p->flags&PGHDR_DIRTY) && (p->flags&PGHDR_NEED_SYNC) ){
     pcacheManageDirtyList(p, PCACHE_DIRTYLIST_FRONT);
+    assert( sqlite3PcachePageSanity(p) );
   }
 }
 
@@ -51788,7 +54309,7 @@ static PgHdr *pcacheMergeDirtyList(PgHdr *pA, PgHdr *pB){
 }
 
 /*
-** Sort the list of pages in accending order by pgno.  Pages are
+** Sort the list of pages in ascending order by pgno.  Pages are
 ** connected by pDirty pointers.  The pDirtyPrev pointers are
 ** corrupted by this sort.
 **
@@ -51847,14 +54368,14 @@ SQLITE_PRIVATE PgHdr *sqlite3PcacheDirtyList(PCache *pCache){
 ** This is not the total number of pages referenced, but the sum of the
 ** reference count for all pages.
 */
-SQLITE_PRIVATE int sqlite3PcacheRefCount(PCache *pCache){
+SQLITE_PRIVATE i64 sqlite3PcacheRefCount(PCache *pCache){
   return pCache->nRefSum;
 }
 
 /*
 ** Return the number of references to the page supplied as an argument.
 */
-SQLITE_PRIVATE int sqlite3PcachePageRefcount(PgHdr *p){
+SQLITE_PRIVATE i64 sqlite3PcachePageRefcount(PgHdr *p){
   return p->nRef;
 }
 
@@ -51996,12 +54517,13 @@ SQLITE_PRIVATE void sqlite3PcacheIterateDirty(PCache *pCache, void (*xIter)(PgHd
 ** size can vary according to architecture, compile-time options, and
 ** SQLite library version number.
 **
-** If SQLITE_PCACHE_SEPARATE_HEADER is defined, then the extension is obtained
-** using a separate memory allocation from the database page content.  This
-** seeks to overcome the "clownshoe" problem (also called "internal
-** fragmentation" in academic literature) of allocating a few bytes more
-** than a power of two with the memory allocator rounding up to the next
-** power of two, and leaving the rounded-up space unused.
+** Historical note:  It used to be that if the SQLITE_PCACHE_SEPARATE_HEADER
+** was defined, then the page content would be held in a separate memory
+** allocation from the PgHdr1.  This was intended to avoid clownshoe memory
+** allocations.  However, the btree layer needs a small (16-byte) overrun
+** area after the page content buffer.  The header serves as that overrun
+** area.  Therefore SQLITE_PCACHE_SEPARATE_HEADER was discontinued to avoid
+** any possibility of a memory error.
 **
 ** This module tracks pointers to PgHdr1 objects.  Only pcache.c communicates
 ** with this module.  Information is passed back and forth as PgHdr1 pointers.
@@ -52027,7 +54549,7 @@ SQLITE_PRIVATE void sqlite3PcacheIterateDirty(PCache *pCache, void (*xIter)(PgHd
 ** If N is positive, then N pages worth of memory are allocated using a single
 ** sqlite3Malloc() call and that memory is used for the first N pages allocated.
 ** Or if N is negative, then -1024*N bytes of memory are allocated and used
-** for as many pages as can be accomodated.
+** for as many pages as can be accommodated.
 **
 ** Only one of (2) or (3) can be used.  Once the memory available to (2) or
 ** (3) is exhausted, subsequent allocations fail over to the general-purpose
@@ -52046,30 +54568,40 @@ typedef struct PGroup PGroup;
 
 /*
 ** Each cache entry is represented by an instance of the following
-** structure. Unless SQLITE_PCACHE_SEPARATE_HEADER is defined, a buffer of
-** PgHdr1.pCache->szPage bytes is allocated directly before this structure
-** in memory.
+** structure. A buffer of PgHdr1.pCache->szPage bytes is allocated
+** directly before this structure and is used to cache the page content.
 **
-** Note: Variables isBulkLocal and isAnchor were once type "u8". That works,
+** When reading a corrupt database file, it is possible that SQLite might
+** read a few bytes (no more than 16 bytes) past the end of the page buffer.
+** It will only read past the end of the page buffer, never write.  This
+** object is positioned immediately after the page buffer to serve as an
+** overrun area, so that overreads are harmless.
+**
+** Variables isBulkLocal and isAnchor were once type "u8". That works,
 ** but causes a 2-byte gap in the structure for most architectures (since
 ** pointers must be either 4 or 8-byte aligned). As this structure is located
 ** in memory directly after the associated page data, if the database is
 ** corrupt, code at the b-tree layer may overread the page buffer and
 ** read part of this structure before the corruption is detected. This
-** can cause a valgrind error if the unitialized gap is accessed. Using u16
-** ensures there is no such gap, and therefore no bytes of unitialized memory
-** in the structure.
+** can cause a valgrind error if the uninitialized gap is accessed. Using u16
+** ensures there is no such gap, and therefore no bytes of uninitialized
+** memory in the structure.
+**
+** The pLruNext and pLruPrev pointers form a double-linked circular list
+** of all pages that are unpinned.  The PGroup.lru element (which should be
+** the only element on the list with PgHdr1.isAnchor set to 1) forms the
+** beginning and the end of the list.
 */
 struct PgHdr1 {
-  sqlite3_pcache_page page;      /* Base class. Must be first. pBuf & pExtra */
-  unsigned int iKey;             /* Key value (page number) */
-  u16 isBulkLocal;               /* This page from bulk local storage */
-  u16 isAnchor;                  /* This is the PGroup.lru element */
-  PgHdr1 *pNext;                 /* Next in hash table chain */
-  PCache1 *pCache;               /* Cache that currently owns this page */
-  PgHdr1 *pLruNext;              /* Next in LRU list of unpinned pages */
-  PgHdr1 *pLruPrev;              /* Previous in LRU list of unpinned pages */
-                                 /* NB: pLruPrev is only valid if pLruNext!=0 */
+  sqlite3_pcache_page page; /* Base class. Must be first. pBuf & pExtra */
+  unsigned int iKey;        /* Key value (page number) */
+  u16 isBulkLocal;          /* This page from bulk local storage */
+  u16 isAnchor;             /* This is the PGroup.lru element */
+  PgHdr1 *pNext;            /* Next in hash table chain */
+  PCache1 *pCache;          /* Cache that currently owns this page */
+  PgHdr1 *pLruNext;         /* Next in circular LRU list of unpinned pages */
+  PgHdr1 *pLruPrev;         /* Previous in LRU list of unpinned pages */
+                            /* NB: pLruPrev is only valid if pLruNext!=0 */
 };
 
 /*
@@ -52395,25 +54927,13 @@ static PgHdr1 *pcache1AllocPage(PCache1 *pCache, int benignMalloc){
     pcache1LeaveMutex(pCache->pGroup);
 #endif
     if( benignMalloc ){ sqlite3BeginBenignMalloc(); }
-#ifdef SQLITE_PCACHE_SEPARATE_HEADER
-    pPg = pcache1Alloc(pCache->szPage);
-    p = sqlite3Malloc(sizeof(PgHdr1) + pCache->szExtra);
-    if( !pPg || !p ){
-      pcache1Free(pPg);
-      sqlite3_free(p);
-      pPg = 0;
-    }
-#else
     pPg = pcache1Alloc(pCache->szAlloc);
-#endif
     if( benignMalloc ){ sqlite3EndBenignMalloc(); }
 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
     pcache1EnterMutex(pCache->pGroup);
 #endif
     if( pPg==0 ) return 0;
-#ifndef SQLITE_PCACHE_SEPARATE_HEADER
     p = (PgHdr1 *)&((u8 *)pPg)[pCache->szPage];
-#endif
     p->page.pBuf = pPg;
     p->page.pExtra = &p[1];
     p->isBulkLocal = 0;
@@ -52437,9 +54957,6 @@ static void pcache1FreePage(PgHdr1 *p){
     pCache->pFree = p;
   }else{
     pcache1Free(p->page.pBuf);
-#ifdef SQLITE_PCACHE_SEPARATE_HEADER
-    sqlite3_free(p);
-#endif
   }
   (*pCache->pnPurgeable)--;
 }
@@ -53080,23 +55597,26 @@ static void pcache1Rekey(
   PCache1 *pCache = (PCache1 *)p;
   PgHdr1 *pPage = (PgHdr1 *)pPg;
   PgHdr1 **pp;
-  unsigned int h;
+  unsigned int hOld, hNew;
   assert( pPage->iKey==iOld );
   assert( pPage->pCache==pCache );
+  assert( iOld!=iNew );               /* The page number really is changing */
 
   pcache1EnterMutex(pCache->pGroup);
 
-  h = iOld%pCache->nHash;
-  pp = &pCache->apHash[h];
+  assert( pcache1FetchNoMutex(p, iOld, 0)==pPage ); /* pPg really is iOld */
+  hOld = iOld%pCache->nHash;
+  pp = &pCache->apHash[hOld];
   while( (*pp)!=pPage ){
     pp = &(*pp)->pNext;
   }
   *pp = pPage->pNext;
 
-  h = iNew%pCache->nHash;
+  assert( pcache1FetchNoMutex(p, iNew, 0)==0 ); /* iNew not in cache */
+  hNew = iNew%pCache->nHash;
   pPage->iKey = iNew;
-  pPage->pNext = pCache->apHash[h];
-  pCache->apHash[h] = pPage;
+  pPage->pNext = pCache->apHash[hNew];
+  pCache->apHash[hNew] = pPage;
   if( iNew>pCache->iMaxKey ){
     pCache->iMaxKey = iNew;
   }
@@ -53203,9 +55723,6 @@ SQLITE_PRIVATE int sqlite3PcacheReleaseMemory(int nReq){
        &&  p->isAnchor==0
     ){
       nFree += pcache1MemSize(p->page.pBuf);
-#ifdef SQLITE_PCACHE_SEPARATE_HEADER
-      nFree += sqlite3MemSize(p);
-#endif
       assert( PAGE_IS_UNPINNED(p) );
       pcache1PinPage(p);
       pcache1RemoveFromHash(p, 1);
@@ -53286,7 +55803,7 @@ SQLITE_PRIVATE void sqlite3PcacheStats(
 ** The TEST primitive includes a "batch" number.  The TEST primitive
 ** will only see elements that were inserted before the last change
 ** in the batch number.  In other words, if an INSERT occurs between
-** two TESTs where the TESTs have the same batch nubmer, then the
+** two TESTs where the TESTs have the same batch number, then the
 ** value added by the INSERT will not be visible to the second TEST.
 ** The initial batch number is zero, so if the very first TEST contains
 ** a non-zero batch number, it will see all prior INSERTs.
@@ -53818,6 +56335,7 @@ SQLITE_PRIVATE int sqlite3RowSetTest(RowSet *pRowSet, int iBatch, sqlite3_int64
 # define sqlite3WalFramesize(z)                  0
 # define sqlite3WalFindFrame(x,y,z)              0
 # define sqlite3WalFile(x)                       0
+# undef SQLITE_USE_SEH
 #else
 
 #define WAL_SAVEPOINT_NDATA 4
@@ -53924,6 +56442,10 @@ SQLITE_PRIVATE int sqlite3WalWriteLock(Wal *pWal, int bLock);
 SQLITE_PRIVATE void sqlite3WalDb(Wal *pWal, sqlite3 *db);
 #endif
 
+#ifdef SQLITE_USE_SEH
+SQLITE_PRIVATE int sqlite3WalSystemErrno(Wal*);
+#endif
+
 #endif /* ifndef SQLITE_OMIT_WAL */
 #endif /* SQLITE_WAL_H */
 
@@ -54209,7 +56731,7 @@ int sqlite3PagerTrace=1;  /* True to enable tracing */
 **    outstanding transactions have been abandoned, the pager is able to
 **    transition back to OPEN state, discarding the contents of the
 **    page-cache and any other in-memory state at the same time. Everything
-**    is reloaded from disk (and, if necessary, hot-journal rollback peformed)
+**    is reloaded from disk (and, if necessary, hot-journal rollback performed)
 **    when a read-transaction is next opened on the pager (transitioning
 **    the pager into READER state). At that point the system has recovered
 **    from the error.
@@ -55400,9 +57922,32 @@ static int writeJournalHdr(Pager *pPager){
     memset(zHeader, 0, sizeof(aJournalMagic)+4);
   }
 
+
+
   /* The random check-hash initializer */
-  sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
+  if( pPager->journalMode!=PAGER_JOURNALMODE_MEMORY ){
+    sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
+  }
+#ifdef SQLITE_DEBUG
+  else{
+    /* The Pager.cksumInit variable is usually randomized above to protect
+    ** against there being existing records in the journal file. This is
+    ** dangerous, as following a crash they may be mistaken for records
+    ** written by the current transaction and rolled back into the database
+    ** file, causing corruption. The following assert statements verify
+    ** that this is not required in "journal_mode=memory" mode, as in that
+    ** case the journal file is always 0 bytes in size at this point.
+    ** It is advantageous to avoid the sqlite3_randomness() call if possible
+    ** as it takes the global PRNG mutex.  */
+    i64 sz = 0;
+    sqlite3OsFileSize(pPager->jfd, &sz);
+    assert( sz==0 );
+    assert( pPager->journalOff==journalHdrOffset(pPager) );
+    assert( sqlite3JournalIsInMemory(pPager->jfd) );
+  }
+#endif
   put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
+
   /* The initial database size */
   put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbOrigSize);
   /* The assumed sector size for this process */
@@ -55582,7 +58127,7 @@ static int readJournalHdr(
 **   + 4 bytes: super-journal name checksum.
 **   + 8 bytes: aJournalMagic[].
 **
-** The super-journal page checksum is the sum of the bytes in thesuper-journal
+** The super-journal page checksum is the sum of the bytes in the super-journal
 ** name, where each byte is interpreted as a signed 8-bit integer.
 **
 ** If zSuper is a NULL pointer (occurs for a single database transaction),
@@ -55635,7 +58180,7 @@ static int writeSuperJournal(Pager *pPager, const char *zSuper){
   }
   pPager->journalOff += (nSuper+20);
 
-  /* If the pager is in peristent-journal mode, then the physical
+  /* If the pager is in persistent-journal mode, then the physical
   ** journal-file may extend past the end of the super-journal name
   ** and 8 bytes of magic data just written to the file. This is
   ** dangerous because the code to rollback a hot-journal file
@@ -55805,7 +58350,7 @@ static void pager_unlock(Pager *pPager){
 
 /*
 ** This function is called whenever an IOERR or FULL error that requires
-** the pager to transition into the ERROR state may ahve occurred.
+** the pager to transition into the ERROR state may have occurred.
 ** The first argument is a pointer to the pager structure, the second
 ** the error-code about to be returned by a pager API function. The
 ** value returned is a copy of the second argument to this function.
@@ -56046,6 +58591,9 @@ static int pager_end_transaction(Pager *pPager, int hasSuper, int bCommit){
   return (rc==SQLITE_OK?rc2:rc);
 }
 
+/* Forward reference */
+static int pager_playback(Pager *pPager, int isHot);
+
 /*
 ** Execute a rollback if a transaction is active and unlock the
 ** database file.
@@ -56074,13 +58622,28 @@ static void pagerUnlockAndRollback(Pager *pPager){
       assert( pPager->eState==PAGER_READER );
       pager_end_transaction(pPager, 0, 0);
     }
+  }else if( pPager->eState==PAGER_ERROR
+         && pPager->journalMode==PAGER_JOURNALMODE_MEMORY
+         && isOpen(pPager->jfd)
+  ){
+    /* Special case for a ROLLBACK due to I/O error with an in-memory
+    ** journal:  We have to rollback immediately, before the journal is
+    ** closed, because once it is closed, all content is forgotten. */
+    int errCode = pPager->errCode;
+    u8 eLock = pPager->eLock;
+    pPager->eState = PAGER_OPEN;
+    pPager->errCode = SQLITE_OK;
+    pPager->eLock = EXCLUSIVE_LOCK;
+    pager_playback(pPager, 1);
+    pPager->errCode = errCode;
+    pPager->eLock = eLock;
   }
   pager_unlock(pPager);
 }
 
 /*
 ** Parameter aData must point to a buffer of pPager->pageSize bytes
-** of data. Compute and return a checksum based ont the contents of the
+** of data. Compute and return a checksum based on the contents of the
 ** page of data and the current value of pPager->cksumInit.
 **
 ** This is not a real checksum. It is really just the sum of the
@@ -56513,6 +59076,8 @@ static int pager_truncate(Pager *pPager, Pgno nPage){
   int rc = SQLITE_OK;
   assert( pPager->eState!=PAGER_ERROR );
   assert( pPager->eState!=PAGER_READER );
+  PAGERTRACE(("Truncate %d npage %u\n", PAGERID(pPager), nPage));
+
 
   if( isOpen(pPager->fd)
    && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN)
@@ -56843,7 +59408,7 @@ static int pager_playback(Pager *pPager, int isHot){
     ** see if it is possible to delete the super-journal.
     */
     assert( zSuper==&pPager->pTmpSpace[4] );
-    memset(&zSuper[-4], 0, 4);
+    memset(pPager->pTmpSpace, 0, 4);
     rc = pager_delsuper(pPager, zSuper);
     testcase( rc!=SQLITE_OK );
   }
@@ -57044,7 +59609,7 @@ static int pagerWalFrames(
   assert( pPager->pWal );
   assert( pList );
 #ifdef SQLITE_DEBUG
-  /* Verify that the page list is in accending order */
+  /* Verify that the page list is in ascending order */
   for(p=pList; p && p->pDirty; p=p->pDirty){
     assert( p->pgno < p->pDirty->pgno );
   }
@@ -57175,7 +59740,7 @@ static int pagerPagecount(Pager *pPager, Pgno *pnPage){
 #ifndef SQLITE_OMIT_WAL
 /*
 ** Check if the *-wal file that corresponds to the database opened by pPager
-** exists if the database is not empy, or verify that the *-wal file does
+** exists if the database is not empty, or verify that the *-wal file does
 ** not exist (by deleting it) if the database file is empty.
 **
 ** If the database is not empty and the *-wal file exists, open the pager
@@ -57464,7 +60029,6 @@ SQLITE_PRIVATE void sqlite3PagerShrink(Pager *pPager){
 ** Numeric values associated with these states are OFF==1, NORMAL=2,
 ** and FULL=3.
 */
-#ifndef SQLITE_OMIT_PAGER_PRAGMAS
 SQLITE_PRIVATE void sqlite3PagerSetFlags(
   Pager *pPager,        /* The pager to set safety level for */
   unsigned pgFlags      /* Various flags */
@@ -57499,7 +60063,6 @@ SQLITE_PRIVATE void sqlite3PagerSetFlags(
     pPager->doNotSpill |= SPILLFLAG_OFF;
   }
 }
-#endif
 
 /*
 ** The following global variable is incremented whenever the library
@@ -58587,11 +61150,7 @@ SQLITE_PRIVATE int sqlite3PagerOpen(
   int rc = SQLITE_OK;      /* Return code */
   int tempFile = 0;        /* True for temp files (incl. in-memory files) */
   int memDb = 0;           /* True if this is an in-memory file */
-#ifndef SQLITE_OMIT_DESERIALIZE
   int memJM = 0;           /* Memory journal mode */
-#else
-# define memJM 0
-#endif
   int readOnly = 0;        /* True if this is a read-only file */
   int journalFileSize;     /* Bytes to allocate for each journal fd */
   char *zPathname = 0;     /* Full path to database file */
@@ -58601,7 +61160,6 @@ SQLITE_PRIVATE int sqlite3PagerOpen(
   u32 szPageDflt = SQLITE_DEFAULT_PAGE_SIZE;  /* Default page size */
   const char *zUri = 0;    /* URI args to copy */
   int nUriByte = 1;        /* Number of bytes of URI args at *zUri */
-  int nUri = 0;            /* Number of URI parameters */
 
   /* Figure out how much space is required for each journal file-handle
   ** (there are two of them, the main journal and the sub-journal).  */
@@ -58649,7 +61207,6 @@ SQLITE_PRIVATE int sqlite3PagerOpen(
     while( *z ){
       z += strlen(z)+1;
       z += strlen(z)+1;
-      nUri++;
     }
     nUriByte = (int)(&z[1] - zUri);
     assert( nUriByte>=1 );
@@ -58712,12 +61269,13 @@ SQLITE_PRIVATE int sqlite3PagerOpen(
   ** specific formatting and order of the various filenames, so if the format
   ** changes here, be sure to change it there as well.
   */
+  assert( SQLITE_PTRSIZE==sizeof(Pager*) );
   pPtr = (u8 *)sqlite3MallocZero(
     ROUND8(sizeof(*pPager)) +            /* Pager structure */
     ROUND8(pcacheSize) +                 /* PCache object */
     ROUND8(pVfs->szOsFile) +             /* The main db file */
     journalFileSize * 2 +                /* The two journal files */
-    sizeof(pPager) +                     /* Space to hold a pointer */
+    SQLITE_PTRSIZE +                     /* Space to hold a pointer */
     4 +                                  /* Database prefix */
     nPathname + 1 +                      /* database filename */
     nUriByte +                           /* query parameters */
@@ -58738,7 +61296,7 @@ SQLITE_PRIVATE int sqlite3PagerOpen(
   pPager->sjfd = (sqlite3_file*)pPtr;     pPtr += journalFileSize;
   pPager->jfd =  (sqlite3_file*)pPtr;     pPtr += journalFileSize;
   assert( EIGHT_BYTE_ALIGNMENT(pPager->jfd) );
-  memcpy(pPtr, &pPager, sizeof(pPager));  pPtr += sizeof(pPager);
+  memcpy(pPtr, &pPager, SQLITE_PTRSIZE);  pPtr += SQLITE_PTRSIZE;
 
   /* Fill in the Pager.zFilename and pPager.zQueryParam fields */
                                           pPtr += 4;  /* Skip zero prefix */
@@ -58792,9 +61350,7 @@ SQLITE_PRIVATE int sqlite3PagerOpen(
     int fout = 0;                    /* VFS flags returned by xOpen() */
     rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd, vfsFlags, &fout);
     assert( !memDb );
-#ifndef SQLITE_OMIT_DESERIALIZE
     pPager->memVfs = memJM = (fout&SQLITE_OPEN_MEMORY)!=0;
-#endif
     readOnly = (fout&SQLITE_OPEN_READONLY)!=0;
 
     /* If the file was successfully opened for read/write access,
@@ -58905,18 +61461,7 @@ SQLITE_PRIVATE int sqlite3PagerOpen(
   pPager->memDb = (u8)memDb;
   pPager->readOnly = (u8)readOnly;
   assert( useJournal || pPager->tempFile );
-  pPager->noSync = pPager->tempFile;
-  if( pPager->noSync ){
-    assert( pPager->fullSync==0 );
-    assert( pPager->extraSync==0 );
-    assert( pPager->syncFlags==0 );
-    assert( pPager->walSyncFlags==0 );
-  }else{
-    pPager->fullSync = 1;
-    pPager->extraSync = 0;
-    pPager->syncFlags = SQLITE_SYNC_NORMAL;
-    pPager->walSyncFlags = SQLITE_SYNC_NORMAL | (SQLITE_SYNC_NORMAL<<2);
-  }
+  sqlite3PagerSetFlags(pPager, (SQLITE_DEFAULT_SYNCHRONOUS+1)|PAGER_CACHESPILL);
   /* pPager->pFirst = 0; */
   /* pPager->pFirstSynced = 0; */
   /* pPager->pLast = 0; */
@@ -58942,15 +61487,18 @@ SQLITE_PRIVATE int sqlite3PagerOpen(
 
 /*
 ** Return the sqlite3_file for the main database given the name
-** of the corresonding WAL or Journal name as passed into
+** of the corresponding WAL or Journal name as passed into
 ** xOpen.
 */
 SQLITE_API sqlite3_file *sqlite3_database_file_object(const char *zName){
   Pager *pPager;
+  const char *p;
   while( zName[-1]!=0 || zName[-2]!=0 || zName[-3]!=0 || zName[-4]!=0 ){
     zName--;
   }
-  pPager = *(Pager**)(zName - 4 - sizeof(Pager*));
+  p = zName - 4 - sizeof(Pager*);
+  assert( EIGHT_BYTE_ALIGNMENT(p) );
+  pPager = *(Pager**)p;
   return pPager->fd;
 }
 
@@ -59445,6 +61993,10 @@ static int getPageNormal(
     if( !isOpen(pPager->fd) || pPager->dbSize<pgno || noContent ){
       if( pgno>pPager->mxPgno ){
         rc = SQLITE_FULL;
+        if( pgno<=pPager->dbSize ){
+          sqlite3PcacheRelease(pPg);
+          pPg = 0;
+        }
         goto pager_acquire_err;
       }
       if( noContent ){
@@ -59580,8 +62132,20 @@ SQLITE_PRIVATE int sqlite3PagerGet(
   DbPage **ppPage,    /* Write a pointer to the page here */
   int flags           /* PAGER_GET_XXX flags */
 ){
-  /* printf("PAGE %u\n", pgno); fflush(stdout); */
+#if 0   /* Trace page fetch by setting to 1 */
+  int rc;
+  printf("PAGE %u\n", pgno);
+  fflush(stdout);
+  rc = pPager->xGet(pPager, pgno, ppPage, flags);
+  if( rc ){
+    printf("PAGE %u failed with 0x%02x\n", pgno, rc);
+    fflush(stdout);
+  }
+  return rc;
+#else
+  /* Normal, high-speed version of sqlite3PagerGet() */
   return pPager->xGet(pPager, pgno, ppPage, flags);
+#endif
 }
 
 /*
@@ -59609,10 +62173,12 @@ SQLITE_PRIVATE DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
 /*
 ** Release a page reference.
 **
-** The sqlite3PagerUnref() and sqlite3PagerUnrefNotNull() may only be
-** used if we know that the page being released is not the last page.
+** The sqlite3PagerUnref() and sqlite3PagerUnrefNotNull() may only be used
+** if we know that the page being released is not the last reference to page1.
 ** The btree layer always holds page1 open until the end, so these first
-** to routines can be used to release any page other than BtShared.pPage1.
+** two routines can be used to release any page other than BtShared.pPage1.
+** The assert() at tag-20230419-2 proves that this constraint is always
+** honored.
 **
 ** Use sqlite3PagerUnrefPageOne() to release page1.  This latter routine
 ** checks the total number of outstanding pages and if the number of
@@ -59628,7 +62194,7 @@ SQLITE_PRIVATE void sqlite3PagerUnrefNotNull(DbPage *pPg){
     sqlite3PcacheRelease(pPg);
   }
   /* Do not use this routine to release the last reference to page1 */
-  assert( sqlite3PcacheRefCount(pPager->pPCache)>0 );
+  assert( sqlite3PcacheRefCount(pPager->pPCache)>0 ); /* tag-20230419-2 */
 }
 SQLITE_PRIVATE void sqlite3PagerUnref(DbPage *pPg){
   if( pPg ) sqlite3PagerUnrefNotNull(pPg);
@@ -59694,6 +62260,7 @@ static int pager_open_journal(Pager *pPager){
 
         if( pPager->tempFile ){
           flags |= (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL);
+          flags |= SQLITE_OPEN_EXCLUSIVE;
           nSpill = sqlite3Config.nStmtSpill;
         }else{
           flags |= SQLITE_OPEN_MAIN_JOURNAL;
@@ -60176,7 +62743,7 @@ static int pager_incr_changecounter(Pager *pPager, int isDirectMode){
 # define DIRECT_MODE isDirectMode
 #endif
 
-  if( !pPager->changeCountDone && ALWAYS(pPager->dbSize>0) ){
+  if( !pPager->changeCountDone && pPager->dbSize>0 ){
     PgHdr *pPgHdr;                /* Reference to page 1 */
 
     assert( !pPager->tempFile && isOpen(pPager->fd) );
@@ -60454,6 +63021,13 @@ SQLITE_PRIVATE int sqlite3PagerCommitPhaseOne(
         rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_BEGIN_ATOMIC_WRITE, 0);
         if( rc==SQLITE_OK ){
           rc = pager_write_pagelist(pPager, pList);
+          if( rc==SQLITE_OK && pPager->dbSize>pPager->dbFileSize ){
+            char *pTmp = pPager->pTmpSpace;
+            int szPage = (int)pPager->pageSize;
+            memset(pTmp, 0, szPage);
+            rc = sqlite3OsWrite(pPager->fd, pTmp, szPage,
+                      ((i64)pPager->dbSize*pPager->pageSize)-szPage);
+          }
           if( rc==SQLITE_OK ){
             rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_COMMIT_ATOMIC_WRITE, 0);
           }
@@ -60916,7 +63490,11 @@ SQLITE_PRIVATE int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint){
 */
 SQLITE_PRIVATE const char *sqlite3PagerFilename(const Pager *pPager, int nullIfMemDb){
   static const char zFake[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
-  return (nullIfMemDb && pPager->memDb) ? &zFake[4] : pPager->zFilename;
+  if( nullIfMemDb && (pPager->memDb || sqlite3IsMemdb(pPager->pVfs)) ){
+    return &zFake[4];
+  }else{
+    return pPager->zFilename;
+  }
 }
 
 /*
@@ -61216,7 +63794,7 @@ SQLITE_PRIVATE int sqlite3PagerSetJournalMode(Pager *pPager, int eMode){
     assert( pPager->eState!=PAGER_ERROR );
     pPager->journalMode = (u8)eMode;
 
-    /* When transistioning from TRUNCATE or PERSIST to any other journal
+    /* When transitioning from TRUNCATE or PERSIST to any other journal
     ** mode except WAL, unless the pager is in locking_mode=exclusive mode,
     ** delete the journal file.
     */
@@ -61261,7 +63839,7 @@ SQLITE_PRIVATE int sqlite3PagerSetJournalMode(Pager *pPager, int eMode){
         }
         assert( state==pPager->eState );
       }
-    }else if( eMode==PAGER_JOURNALMODE_OFF ){
+    }else if( eMode==PAGER_JOURNALMODE_OFF || eMode==PAGER_JOURNALMODE_MEMORY ){
       sqlite3OsClose(pPager->jfd);
     }
   }
@@ -61285,7 +63863,7 @@ SQLITE_PRIVATE int sqlite3PagerGetJournalMode(Pager *pPager){
 SQLITE_PRIVATE int sqlite3PagerOkToChangeJournalMode(Pager *pPager){
   assert( assert_pager_state(pPager) );
   if( pPager->eState>=PAGER_WRITER_CACHEMOD ) return 0;
-  if( isOpen(pPager->jfd) && pPager->journalOff>0 ) return 0;
+  if( NEVER(isOpen(pPager->jfd) && pPager->journalOff>0) ) return 0;
   return 1;
 }
 
@@ -61383,13 +63961,15 @@ SQLITE_PRIVATE int sqlite3PagerWalSupported(Pager *pPager){
 */
 static int pagerExclusiveLock(Pager *pPager){
   int rc;                         /* Return code */
+  u8 eOrigLock;                   /* Original lock */
 
-  assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK );
+  assert( pPager->eLock>=SHARED_LOCK );
+  eOrigLock = pPager->eLock;
   rc = pagerLockDb(pPager, EXCLUSIVE_LOCK);
   if( rc!=SQLITE_OK ){
     /* If the attempt to grab the exclusive lock failed, release the
     ** pending lock that may have been obtained instead.  */
-    pagerUnlockDb(pPager, SHARED_LOCK);
+    pagerUnlockDb(pPager, eOrigLock);
   }
 
   return rc;
@@ -61642,6 +64222,12 @@ SQLITE_PRIVATE int sqlite3PagerWalFramesize(Pager *pPager){
 }
 #endif
 
+#ifdef SQLITE_USE_SEH
+SQLITE_PRIVATE int sqlite3PagerWalSystemErrno(Pager *pPager){
+  return sqlite3WalSystemErrno(pPager->pWal);
+}
+#endif
+
 #endif /* SQLITE_OMIT_DISKIO */
 
 /************** End of pager.c ***********************************************/
@@ -61932,7 +64518,7 @@ SQLITE_PRIVATE int sqlite3WalTrace = 0;
 **
 ** Technically, the various VFSes are free to implement these locks however
 ** they see fit.  However, compatibility is encouraged so that VFSes can
-** interoperate.  The standard implemention used on both unix and windows
+** interoperate.  The standard implementation used on both unix and windows
 ** is for the index number to indicate a byte offset into the
 ** WalCkptInfo.aLock[] array in the wal-index header.  In other words, all
 ** locks are on the shm file.  The WALINDEX_LOCK_OFFSET constant (which
@@ -62008,7 +64594,7 @@ struct WalIndexHdr {
 ** the mxFrame for that reader.  The value READMARK_NOT_USED (0xffffffff)
 ** for any aReadMark[] means that entry is unused.  aReadMark[0] is
 ** a special case; its value is never used and it exists as a place-holder
-** to avoid having to offset aReadMark[] indexs by one.  Readers holding
+** to avoid having to offset aReadMark[] indexes by one.  Readers holding
 ** WAL_READ_LOCK(0) always ignore the entire WAL and read all content
 ** directly from the database.
 **
@@ -62176,7 +64762,15 @@ struct Wal {
   u32 iReCksum;              /* On commit, recalculate checksums from here */
   const char *zWalName;      /* Name of WAL file */
   u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
+#ifdef SQLITE_USE_SEH
+  u32 lockMask;              /* Mask of locks held */
+  void *pFree;               /* Pointer to sqlite3_free() if exception thrown */
+  u32 *pWiValue;             /* Value to write into apWiData[iWiPg] */
+  int iWiPg;                 /* Write pWiValue into apWiData[iWiPg] */
+  int iSysErrno;             /* System error code following exception */
+#endif
 #ifdef SQLITE_DEBUG
+  int nSehTry;               /* Number of nested SEH_TRY{} blocks */
   u8 lockError;              /* True if a locking error has occurred */
 #endif
 #ifdef SQLITE_ENABLE_SNAPSHOT
@@ -62258,6 +64852,113 @@ struct WalIterator {
     sizeof(ht_slot)*HASHTABLE_NSLOT + HASHTABLE_NPAGE*sizeof(u32) \
 )
 
+/*
+** Structured Exception Handling (SEH) is a Windows-specific technique
+** for catching exceptions raised while accessing memory-mapped files.
+**
+** The -DSQLITE_USE_SEH compile-time option means to use SEH to catch and
+** deal with system-level errors that arise during WAL -shm file processing.
+** Without this compile-time option, any system-level faults that appear
+** while accessing the memory-mapped -shm file will cause a process-wide
+** signal to be deliver, which will more than likely cause the entire
+** process to exit.
+*/
+#ifdef SQLITE_USE_SEH
+#include <Windows.h>
+
+/* Beginning of a block of code in which an exception might occur */
+# define SEH_TRY    __try { \
+   assert( walAssertLockmask(pWal) && pWal->nSehTry==0 ); \
+   VVA_ONLY(pWal->nSehTry++);
+
+/* The end of a block of code in which an exception might occur */
+# define SEH_EXCEPT(X) \
+   VVA_ONLY(pWal->nSehTry--); \
+   assert( pWal->nSehTry==0 ); \
+   } __except( sehExceptionFilter(pWal, GetExceptionCode(), GetExceptionInformation() ) ){ X }
+
+/* Simulate a memory-mapping fault in the -shm file for testing purposes */
+# define SEH_INJECT_FAULT sehInjectFault(pWal)
+
+/*
+** The second argument is the return value of GetExceptionCode() for the
+** current exception. Return EXCEPTION_EXECUTE_HANDLER if the exception code
+** indicates that the exception may have been caused by accessing the *-shm
+** file mapping. Or EXCEPTION_CONTINUE_SEARCH otherwise.
+*/
+static int sehExceptionFilter(Wal *pWal, int eCode, EXCEPTION_POINTERS *p){
+  VVA_ONLY(pWal->nSehTry--);
+  if( eCode==EXCEPTION_IN_PAGE_ERROR ){
+    if( p && p->ExceptionRecord && p->ExceptionRecord->NumberParameters>=3 ){
+      /* From MSDN: For this type of exception, the first element of the
+      ** ExceptionInformation[] array is a read-write flag - 0 if the exception
+      ** was thrown while reading, 1 if while writing. The second element is
+      ** the virtual address being accessed. The "third array element specifies
+      ** the underlying NTSTATUS code that resulted in the exception". */
+      pWal->iSysErrno = (int)p->ExceptionRecord->ExceptionInformation[2];
+    }
+    return EXCEPTION_EXECUTE_HANDLER;
+  }
+  return EXCEPTION_CONTINUE_SEARCH;
+}
+
+/*
+** If one is configured, invoke the xTestCallback callback with 650 as
+** the argument. If it returns true, throw the same exception that is
+** thrown by the system if the *-shm file mapping is accessed after it
+** has been invalidated.
+*/
+static void sehInjectFault(Wal *pWal){
+  int res;
+  assert( pWal->nSehTry>0 );
+
+  res = sqlite3FaultSim(650);
+  if( res!=0 ){
+    ULONG_PTR aArg[3];
+    aArg[0] = 0;
+    aArg[1] = 0;
+    aArg[2] = (ULONG_PTR)res;
+    RaiseException(EXCEPTION_IN_PAGE_ERROR, 0, 3, (const ULONG_PTR*)aArg);
+  }
+}
+
+/*
+** There are two ways to use this macro. To set a pointer to be freed
+** if an exception is thrown:
+**
+**   SEH_FREE_ON_ERROR(0, pPtr);
+**
+** and to cancel the same:
+**
+**   SEH_FREE_ON_ERROR(pPtr, 0);
+**
+** In the first case, there must not already be a pointer registered to
+** be freed. In the second case, pPtr must be the registered pointer.
+*/
+#define SEH_FREE_ON_ERROR(X,Y) \
+  assert( (X==0 || Y==0) && pWal->pFree==X ); pWal->pFree = Y
+
+/*
+** There are two ways to use this macro. To arrange for pWal->apWiData[iPg]
+** to be set to pValue if an exception is thrown:
+**
+**   SEH_SET_ON_ERROR(iPg, pValue);
+**
+** and to cancel the same:
+**
+**   SEH_SET_ON_ERROR(0, 0);
+*/
+#define SEH_SET_ON_ERROR(X,Y)  pWal->iWiPg = X; pWal->pWiValue = Y
+
+#else
+# define SEH_TRY          VVA_ONLY(pWal->nSehTry++);
+# define SEH_EXCEPT(X)    VVA_ONLY(pWal->nSehTry--); assert( pWal->nSehTry==0 );
+# define SEH_INJECT_FAULT assert( pWal->nSehTry>0 );
+# define SEH_FREE_ON_ERROR(X,Y)
+# define SEH_SET_ON_ERROR(X,Y)
+#endif /* ifdef SQLITE_USE_SEH */
+
+
 /*
 ** Obtain a pointer to the iPage'th page of the wal-index. The wal-index
 ** is broken into pages of WALINDEX_PGSZ bytes. Wal-index pages are
@@ -62330,6 +65031,7 @@ static int walIndexPage(
   int iPage,               /* The page we seek */
   volatile u32 **ppPage    /* Write the page pointer here */
 ){
+  SEH_INJECT_FAULT;
   if( pWal->nWiData<=iPage || (*ppPage = pWal->apWiData[iPage])==0 ){
     return walIndexPageRealloc(pWal, iPage, ppPage);
   }
@@ -62341,6 +65043,7 @@ static int walIndexPage(
 */
 static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
   assert( pWal->nWiData>0 && pWal->apWiData[0] );
+  SEH_INJECT_FAULT;
   return (volatile WalCkptInfo*)&(pWal->apWiData[0][sizeof(WalIndexHdr)/2]);
 }
 
@@ -62349,6 +65052,7 @@ static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
 */
 static volatile WalIndexHdr *walIndexHdr(Wal *pWal){
   assert( pWal->nWiData>0 && pWal->apWiData[0] );
+  SEH_INJECT_FAULT;
   return (volatile WalIndexHdr*)pWal->apWiData[0];
 }
 
@@ -62394,19 +65098,40 @@ static void walChecksumBytes(
   assert( nByte>=8 );
   assert( (nByte&0x00000007)==0 );
   assert( nByte<=65536 );
+  assert( nByte%4==0 );
 
-  if( nativeCksum ){
+  if( !nativeCksum ){
     do {
+      s1 += BYTESWAP32(aData[0]) + s2;
+      s2 += BYTESWAP32(aData[1]) + s1;
+      aData += 2;
+    }while( aData<aEnd );
+  }else if( nByte%64==0 ){
+    do {
+      s1 += *aData++ + s2;
+      s2 += *aData++ + s1;
+      s1 += *aData++ + s2;
+      s2 += *aData++ + s1;
+      s1 += *aData++ + s2;
+      s2 += *aData++ + s1;
+      s1 += *aData++ + s2;
+      s2 += *aData++ + s1;
+      s1 += *aData++ + s2;
+      s2 += *aData++ + s1;
+      s1 += *aData++ + s2;
+      s2 += *aData++ + s1;
+      s1 += *aData++ + s2;
+      s2 += *aData++ + s1;
       s1 += *aData++ + s2;
       s2 += *aData++ + s1;
     }while( aData<aEnd );
   }else{
     do {
-      s1 += BYTESWAP32(aData[0]) + s2;
-      s2 += BYTESWAP32(aData[1]) + s1;
-      aData += 2;
+      s1 += *aData++ + s2;
+      s2 += *aData++ + s1;
     }while( aData<aEnd );
   }
+  assert( aData==aEnd );
 
   aOut[0] = s1;
   aOut[1] = s2;
@@ -62517,7 +65242,7 @@ static int walDecodeFrame(
     return 0;
   }
 
-  /* A frame is only valid if the page number is creater than zero.
+  /* A frame is only valid if the page number is greater than zero.
   */
   pgno = sqlite3Get4byte(&aFrame[0]);
   if( pgno==0 ){
@@ -62525,7 +65250,7 @@ static int walDecodeFrame(
   }
 
   /* A frame is only valid if a checksum of the WAL header,
-  ** all prior frams, the first 16 bytes of this frame-header,
+  ** all prior frames, the first 16 bytes of this frame-header,
   ** and the frame-data matches the checksum in the last 8
   ** bytes of this frame-header.
   */
@@ -62585,12 +65310,18 @@ static int walLockShared(Wal *pWal, int lockIdx){
   WALTRACE(("WAL%p: acquire SHARED-%s %s\n", pWal,
             walLockName(lockIdx), rc ? "failed" : "ok"));
   VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && (rc&0xFF)!=SQLITE_BUSY); )
+#ifdef SQLITE_USE_SEH
+  if( rc==SQLITE_OK ) pWal->lockMask |= (1 << lockIdx);
+#endif
   return rc;
 }
 static void walUnlockShared(Wal *pWal, int lockIdx){
   if( pWal->exclusiveMode ) return;
   (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1,
                          SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED);
+#ifdef SQLITE_USE_SEH
+  pWal->lockMask &= ~(1 << lockIdx);
+#endif
   WALTRACE(("WAL%p: release SHARED-%s\n", pWal, walLockName(lockIdx)));
 }
 static int walLockExclusive(Wal *pWal, int lockIdx, int n){
@@ -62601,12 +65332,20 @@ static int walLockExclusive(Wal *pWal, int lockIdx, int n){
   WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal,
             walLockName(lockIdx), n, rc ? "failed" : "ok"));
   VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && (rc&0xFF)!=SQLITE_BUSY); )
+#ifdef SQLITE_USE_SEH
+  if( rc==SQLITE_OK ){
+    pWal->lockMask |= (((1<<n)-1) << (SQLITE_SHM_NLOCK+lockIdx));
+  }
+#endif
   return rc;
 }
 static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){
   if( pWal->exclusiveMode ) return;
   (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
                          SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE);
+#ifdef SQLITE_USE_SEH
+  pWal->lockMask &= ~(((1<<n)-1) << (SQLITE_SHM_NLOCK+lockIdx));
+#endif
   WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal,
              walLockName(lockIdx), n));
 }
@@ -62698,6 +65437,7 @@ static int walFramePage(u32 iFrame){
 */
 static u32 walFramePgno(Wal *pWal, u32 iFrame){
   int iHash = walFramePage(iFrame);
+  SEH_INJECT_FAULT;
   if( iHash==0 ){
     return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1];
   }
@@ -62957,6 +65697,7 @@ static int walIndexRecover(Wal *pWal){
     /* Malloc a buffer to read frames into. */
     szFrame = szPage + WAL_FRAME_HDRSIZE;
     aFrame = (u8 *)sqlite3_malloc64(szFrame + WALINDEX_PGSZ);
+    SEH_FREE_ON_ERROR(0, aFrame);
     if( !aFrame ){
       rc = SQLITE_NOMEM_BKPT;
       goto recovery_error;
@@ -62975,6 +65716,7 @@ static int walIndexRecover(Wal *pWal){
       rc = walIndexPage(pWal, iPg, (volatile u32**)&aShare);
       assert( aShare!=0 || rc!=SQLITE_OK );
       if( aShare==0 ) break;
+      SEH_SET_ON_ERROR(iPg, aShare);
       pWal->apWiData[iPg] = aPrivate;
 
       for(iFrame=iFirst; iFrame<=iLast; iFrame++){
@@ -63002,6 +65744,7 @@ static int walIndexRecover(Wal *pWal){
         }
       }
       pWal->apWiData[iPg] = aShare;
+      SEH_SET_ON_ERROR(0,0);
       nHdr = (iPg==0 ? WALINDEX_HDR_SIZE : 0);
       nHdr32 = nHdr / sizeof(u32);
 #ifndef SQLITE_SAFER_WALINDEX_RECOVERY
@@ -63032,9 +65775,11 @@ static int walIndexRecover(Wal *pWal){
         }
       }
 #endif
+      SEH_INJECT_FAULT;
       if( iFrame<=iLast ) break;
     }
 
+    SEH_FREE_ON_ERROR(aFrame, 0);
     sqlite3_free(aFrame);
   }
 
@@ -63062,6 +65807,7 @@ static int walIndexRecover(Wal *pWal){
         }else{
           pInfo->aReadMark[i] = READMARK_NOT_USED;
         }
+        SEH_INJECT_FAULT;
         walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
       }else if( rc!=SQLITE_BUSY ){
         goto recovery_error;
@@ -63219,7 +65965,7 @@ SQLITE_PRIVATE int sqlite3WalOpen(
 }
 
 /*
-** Change the size to which the WAL file is trucated on each reset.
+** Change the size to which the WAL file is truncated on each reset.
 */
 SQLITE_PRIVATE void sqlite3WalLimit(Wal *pWal, i64 iLimit){
   if( pWal ) pWal->mxWalSize = iLimit;
@@ -63445,23 +66191,16 @@ static int walIteratorInit(Wal *pWal, u32 nBackfill, WalIterator **pp){
   nByte = sizeof(WalIterator)
         + (nSegment-1)*sizeof(struct WalSegment)
         + iLast*sizeof(ht_slot);
-  p = (WalIterator *)sqlite3_malloc64(nByte);
+  p = (WalIterator *)sqlite3_malloc64(nByte
+      + sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast)
+  );
   if( !p ){
     return SQLITE_NOMEM_BKPT;
   }
   memset(p, 0, nByte);
   p->nSegment = nSegment;
-
-  /* Allocate temporary space used by the merge-sort routine. This block
-  ** of memory will be freed before this function returns.
-  */
-  aTmp = (ht_slot *)sqlite3_malloc64(
-      sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast)
-  );
-  if( !aTmp ){
-    rc = SQLITE_NOMEM_BKPT;
-  }
-
+  aTmp = (ht_slot*)&(((u8*)p)[nByte]);
+  SEH_FREE_ON_ERROR(0, p);
   for(i=walFramePage(nBackfill+1); rc==SQLITE_OK && i<nSegment; i++){
     WalHashLoc sLoc;
 
@@ -63489,9 +66228,8 @@ static int walIteratorInit(Wal *pWal, u32 nBackfill, WalIterator **pp){
       p->aSegment[i].aPgno = (u32 *)sLoc.aPgno;
     }
   }
-  sqlite3_free(aTmp);
-
   if( rc!=SQLITE_OK ){
+    SEH_FREE_ON_ERROR(p, 0);
     walIteratorFree(p);
     p = 0;
   }
@@ -63717,13 +66455,13 @@ static int walCheckpoint(
     mxSafeFrame = pWal->hdr.mxFrame;
     mxPage = pWal->hdr.nPage;
     for(i=1; i<WAL_NREADER; i++){
-      u32 y = AtomicLoad(pInfo->aReadMark+i);
+      u32 y = AtomicLoad(pInfo->aReadMark+i); SEH_INJECT_FAULT;
       if( mxSafeFrame>y ){
         assert( y<=pWal->hdr.mxFrame );
         rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1);
         if( rc==SQLITE_OK ){
           u32 iMark = (i==1 ? mxSafeFrame : READMARK_NOT_USED);
-          AtomicStore(pInfo->aReadMark+i, iMark);
+          AtomicStore(pInfo->aReadMark+i, iMark); SEH_INJECT_FAULT;
           walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
         }else if( rc==SQLITE_BUSY ){
           mxSafeFrame = y;
@@ -63744,8 +66482,7 @@ static int walCheckpoint(
      && (rc = walBusyLock(pWal,xBusy,pBusyArg,WAL_READ_LOCK(0),1))==SQLITE_OK
     ){
       u32 nBackfill = pInfo->nBackfill;
-
-      pInfo->nBackfillAttempted = mxSafeFrame;
+      pInfo->nBackfillAttempted = mxSafeFrame; SEH_INJECT_FAULT;
 
       /* Sync the WAL to disk */
       rc = sqlite3OsSync(pWal->pWalFd, CKPT_SYNC_FLAGS(sync_flags));
@@ -63776,6 +66513,7 @@ static int walCheckpoint(
       while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){
         i64 iOffset;
         assert( walFramePgno(pWal, iFrame)==iDbpage );
+        SEH_INJECT_FAULT;
         if( AtomicLoad(&db->u1.isInterrupted) ){
           rc = db->mallocFailed ? SQLITE_NOMEM_BKPT : SQLITE_INTERRUPT;
           break;
@@ -63805,7 +66543,7 @@ static int walCheckpoint(
           }
         }
         if( rc==SQLITE_OK ){
-          AtomicStore(&pInfo->nBackfill, mxSafeFrame);
+          AtomicStore(&pInfo->nBackfill, mxSafeFrame); SEH_INJECT_FAULT;
         }
       }
 
@@ -63827,6 +66565,7 @@ static int walCheckpoint(
   */
   if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){
     assert( pWal->writeLock );
+    SEH_INJECT_FAULT;
     if( pInfo->nBackfill<pWal->hdr.mxFrame ){
       rc = SQLITE_BUSY;
     }else if( eMode>=SQLITE_CHECKPOINT_RESTART ){
@@ -63858,6 +66597,7 @@ static int walCheckpoint(
   }
 
  walcheckpoint_out:
+  SEH_FREE_ON_ERROR(pIter, 0);
   walIteratorFree(pIter);
   return rc;
 }
@@ -63880,6 +66620,93 @@ static void walLimitSize(Wal *pWal, i64 nMax){
   }
 }
 
+#ifdef SQLITE_USE_SEH
+/*
+** This is the "standard" exception handler used in a few places to handle
+** an exception thrown by reading from the *-shm mapping after it has become
+** invalid in SQLITE_USE_SEH builds. It is used as follows:
+**
+**   SEH_TRY { ... }
+**   SEH_EXCEPT( rc = walHandleException(pWal); )
+**
+** This function does three things:
+**
+**   1) Determines the locks that should be held, based on the contents of
+**      the Wal.readLock, Wal.writeLock and Wal.ckptLock variables. All other
+**      held locks are assumed to be transient locks that would have been
+**      released had the exception not been thrown and are dropped.
+**
+**   2) Frees the pointer at Wal.pFree, if any, using sqlite3_free().
+**
+**   3) Set pWal->apWiData[pWal->iWiPg] to pWal->pWiValue if not NULL
+**
+**   4) Returns SQLITE_IOERR.
+*/
+static int walHandleException(Wal *pWal){
+  if( pWal->exclusiveMode==0 ){
+    static const int S = 1;
+    static const int E = (1<<SQLITE_SHM_NLOCK);
+    int ii;
+    u32 mUnlock = pWal->lockMask & ~(
+        (pWal->readLock<0 ? 0 : (S << WAL_READ_LOCK(pWal->readLock)))
+        | (pWal->writeLock ? (E << WAL_WRITE_LOCK) : 0)
+        | (pWal->ckptLock ? (E << WAL_CKPT_LOCK) : 0)
+        );
+    for(ii=0; ii<SQLITE_SHM_NLOCK; ii++){
+      if( (S<<ii) & mUnlock ) walUnlockShared(pWal, ii);
+      if( (E<<ii) & mUnlock ) walUnlockExclusive(pWal, ii, 1);
+    }
+  }
+  sqlite3_free(pWal->pFree);
+  pWal->pFree = 0;
+  if( pWal->pWiValue ){
+    pWal->apWiData[pWal->iWiPg] = pWal->pWiValue;
+    pWal->pWiValue = 0;
+  }
+  return SQLITE_IOERR_IN_PAGE;
+}
+
+/*
+** Assert that the Wal.lockMask mask, which indicates the locks held
+** by the connenction, is consistent with the Wal.readLock, Wal.writeLock
+** and Wal.ckptLock variables. To be used as:
+**
+**   assert( walAssertLockmask(pWal) );
+*/
+static int walAssertLockmask(Wal *pWal){
+  if( pWal->exclusiveMode==0 ){
+    static const int S = 1;
+    static const int E = (1<<SQLITE_SHM_NLOCK);
+    u32 mExpect = (
+        (pWal->readLock<0 ? 0 : (S << WAL_READ_LOCK(pWal->readLock)))
+      | (pWal->writeLock ? (E << WAL_WRITE_LOCK) : 0)
+      | (pWal->ckptLock ? (E << WAL_CKPT_LOCK) : 0)
+#ifdef SQLITE_ENABLE_SNAPSHOT
+      | (pWal->pSnapshot ? (pWal->lockMask & (1 << WAL_CKPT_LOCK)) : 0)
+#endif
+    );
+    assert( mExpect==pWal->lockMask );
+  }
+  return 1;
+}
+
+/*
+** Return and zero the "system error" field set when an
+** EXCEPTION_IN_PAGE_ERROR exception is caught.
+*/
+SQLITE_PRIVATE int sqlite3WalSystemErrno(Wal *pWal){
+  int iRet = 0;
+  if( pWal ){
+    iRet = pWal->iSysErrno;
+    pWal->iSysErrno = 0;
+  }
+  return iRet;
+}
+
+#else
+# define walAssertLockmask(x) 1
+#endif /* ifdef SQLITE_USE_SEH */
+
 /*
 ** Close a connection to a log file.
 */
@@ -63894,6 +66721,8 @@ SQLITE_PRIVATE int sqlite3WalClose(
   if( pWal ){
     int isDelete = 0;             /* True to unlink wal and wal-index files */
 
+    assert( walAssertLockmask(pWal) );
+
     /* If an EXCLUSIVE lock can be obtained on the database file (using the
     ** ordinary, rollback-mode locking methods, this guarantees that the
     ** connection associated with this log file is the only connection to
@@ -63918,7 +66747,7 @@ SQLITE_PRIVATE int sqlite3WalClose(
         );
         if( bPersist!=1 ){
           /* Try to delete the WAL file if the checkpoint completed and
-          ** fsyned (rc==SQLITE_OK) and if we are not in persistent-wal
+          ** fsynced (rc==SQLITE_OK) and if we are not in persistent-wal
           ** mode (!bPersist) */
           isDelete = 1;
         }else if( pWal->mxWalSize>=0 ){
@@ -63985,7 +66814,7 @@ static SQLITE_NO_TSAN int walIndexTryHdr(Wal *pWal, int *pChanged){
   ** give false-positive warnings about these accesses because the tools do not
   ** account for the double-read and the memory barrier. The use of mutexes
   ** here would be problematic as the memory being accessed is potentially
-  ** shared among multiple processes and not all mutex implementions work
+  ** shared among multiple processes and not all mutex implementations work
   ** reliably in that environment.
   */
   aHdr = walIndexHdr(pWal);
@@ -64436,6 +67265,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
   assert( pWal->nWiData>0 );
   assert( pWal->apWiData[0]!=0 );
   pInfo = walCkptInfo(pWal);
+  SEH_INJECT_FAULT;
   if( !useWal && AtomicLoad(&pInfo->nBackfill)==pWal->hdr.mxFrame
 #ifdef SQLITE_ENABLE_SNAPSHOT
    && (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0)
@@ -64485,7 +67315,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
   }
 #endif
   for(i=1; i<WAL_NREADER; i++){
-    u32 thisMark = AtomicLoad(pInfo->aReadMark+i);
+    u32 thisMark = AtomicLoad(pInfo->aReadMark+i); SEH_INJECT_FAULT;
     if( mxReadMark<=thisMark && thisMark<=mxFrame ){
       assert( thisMark!=READMARK_NOT_USED );
       mxReadMark = thisMark;
@@ -64551,7 +67381,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
   ** we can guarantee that the checkpointer that set nBackfill could not
   ** see any pages past pWal->hdr.mxFrame, this problem does not come up.
   */
-  pWal->minFrame = AtomicLoad(&pInfo->nBackfill)+1;
+  pWal->minFrame = AtomicLoad(&pInfo->nBackfill)+1; SEH_INJECT_FAULT;
   walShmBarrier(pWal);
   if( AtomicLoad(pInfo->aReadMark+mxI)!=mxReadMark
    || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr))
@@ -64566,6 +67396,54 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
 }
 
 #ifdef SQLITE_ENABLE_SNAPSHOT
+/*
+** This function does the work of sqlite3WalSnapshotRecover().
+*/
+static int walSnapshotRecover(
+  Wal *pWal,                      /* WAL handle */
+  void *pBuf1,                    /* Temp buffer pWal->szPage bytes in size */
+  void *pBuf2                     /* Temp buffer pWal->szPage bytes in size */
+){
+  int szPage = (int)pWal->szPage;
+  int rc;
+  i64 szDb;                       /* Size of db file in bytes */
+
+  rc = sqlite3OsFileSize(pWal->pDbFd, &szDb);
+  if( rc==SQLITE_OK ){
+    volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
+    u32 i = pInfo->nBackfillAttempted;
+    for(i=pInfo->nBackfillAttempted; i>AtomicLoad(&pInfo->nBackfill); i--){
+      WalHashLoc sLoc;          /* Hash table location */
+      u32 pgno;                 /* Page number in db file */
+      i64 iDbOff;               /* Offset of db file entry */
+      i64 iWalOff;              /* Offset of wal file entry */
+
+      rc = walHashGet(pWal, walFramePage(i), &sLoc);
+      if( rc!=SQLITE_OK ) break;
+      assert( i - sLoc.iZero - 1 >=0 );
+      pgno = sLoc.aPgno[i-sLoc.iZero-1];
+      iDbOff = (i64)(pgno-1) * szPage;
+
+      if( iDbOff+szPage<=szDb ){
+        iWalOff = walFrameOffset(i, szPage) + WAL_FRAME_HDRSIZE;
+        rc = sqlite3OsRead(pWal->pWalFd, pBuf1, szPage, iWalOff);
+
+        if( rc==SQLITE_OK ){
+          rc = sqlite3OsRead(pWal->pDbFd, pBuf2, szPage, iDbOff);
+        }
+
+        if( rc!=SQLITE_OK || 0==memcmp(pBuf1, pBuf2, szPage) ){
+          break;
+        }
+      }
+
+      pInfo->nBackfillAttempted = i-1;
+    }
+  }
+
+  return rc;
+}
+
 /*
 ** Attempt to reduce the value of the WalCkptInfo.nBackfillAttempted
 ** variable so that older snapshots can be accessed. To do this, loop
@@ -64591,50 +67469,21 @@ SQLITE_PRIVATE int sqlite3WalSnapshotRecover(Wal *pWal){
   assert( pWal->readLock>=0 );
   rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1);
   if( rc==SQLITE_OK ){
-    volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
-    int szPage = (int)pWal->szPage;
-    i64 szDb;                   /* Size of db file in bytes */
-
-    rc = sqlite3OsFileSize(pWal->pDbFd, &szDb);
-    if( rc==SQLITE_OK ){
-      void *pBuf1 = sqlite3_malloc(szPage);
-      void *pBuf2 = sqlite3_malloc(szPage);
-      if( pBuf1==0 || pBuf2==0 ){
-        rc = SQLITE_NOMEM;
-      }else{
-        u32 i = pInfo->nBackfillAttempted;
-        for(i=pInfo->nBackfillAttempted; i>AtomicLoad(&pInfo->nBackfill); i--){
-          WalHashLoc sLoc;          /* Hash table location */
-          u32 pgno;                 /* Page number in db file */
-          i64 iDbOff;               /* Offset of db file entry */
-          i64 iWalOff;              /* Offset of wal file entry */
-
-          rc = walHashGet(pWal, walFramePage(i), &sLoc);
-          if( rc!=SQLITE_OK ) break;
-          assert( i - sLoc.iZero - 1 >=0 );
-          pgno = sLoc.aPgno[i-sLoc.iZero-1];
-          iDbOff = (i64)(pgno-1) * szPage;
-
-          if( iDbOff+szPage<=szDb ){
-            iWalOff = walFrameOffset(i, szPage) + WAL_FRAME_HDRSIZE;
-            rc = sqlite3OsRead(pWal->pWalFd, pBuf1, szPage, iWalOff);
-
-            if( rc==SQLITE_OK ){
-              rc = sqlite3OsRead(pWal->pDbFd, pBuf2, szPage, iDbOff);
-            }
-
-            if( rc!=SQLITE_OK || 0==memcmp(pBuf1, pBuf2, szPage) ){
-              break;
-            }
-          }
-
-          pInfo->nBackfillAttempted = i-1;
-        }
+    void *pBuf1 = sqlite3_malloc(pWal->szPage);
+    void *pBuf2 = sqlite3_malloc(pWal->szPage);
+    if( pBuf1==0 || pBuf2==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      pWal->ckptLock = 1;
+      SEH_TRY {
+        rc = walSnapshotRecover(pWal, pBuf1, pBuf2);
       }
-
-      sqlite3_free(pBuf1);
-      sqlite3_free(pBuf2);
+      SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; )
+      pWal->ckptLock = 0;
     }
+
+    sqlite3_free(pBuf1);
+    sqlite3_free(pBuf2);
     walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1);
   }
 
@@ -64643,28 +67492,20 @@ SQLITE_PRIVATE int sqlite3WalSnapshotRecover(Wal *pWal){
 #endif /* SQLITE_ENABLE_SNAPSHOT */
 
 /*
-** Begin a read transaction on the database.
-**
-** This routine used to be called sqlite3OpenSnapshot() and with good reason:
-** it takes a snapshot of the state of the WAL and wal-index for the current
-** instant in time.  The current thread will continue to use this snapshot.
-** Other threads might append new content to the WAL and wal-index but
-** that extra content is ignored by the current thread.
-**
-** If the database contents have changes since the previous read
-** transaction, then *pChanged is set to 1 before returning.  The
-** Pager layer will use this to know that its cache is stale and
-** needs to be flushed.
+** This function does the work of sqlite3WalBeginReadTransaction() (see
+** below). That function simply calls this one inside an SEH_TRY{...} block.
 */
-SQLITE_PRIVATE int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
+static int walBeginReadTransaction(Wal *pWal, int *pChanged){
   int rc;                         /* Return code */
   int cnt = 0;                    /* Number of TryBeginRead attempts */
 #ifdef SQLITE_ENABLE_SNAPSHOT
+  int ckptLock = 0;
   int bChanged = 0;
   WalIndexHdr *pSnapshot = pWal->pSnapshot;
 #endif
 
   assert( pWal->ckptLock==0 );
+  assert( pWal->nSehTry>0 );
 
 #ifdef SQLITE_ENABLE_SNAPSHOT
   if( pSnapshot ){
@@ -64687,7 +67528,7 @@ SQLITE_PRIVATE int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
     if( rc!=SQLITE_OK ){
       return rc;
     }
-    pWal->ckptLock = 1;
+    ckptLock = 1;
   }
 #endif
 
@@ -64751,15 +67592,37 @@ SQLITE_PRIVATE int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
   }
 
   /* Release the shared CKPT lock obtained above. */
-  if( pWal->ckptLock ){
+  if( ckptLock ){
     assert( pSnapshot );
     walUnlockShared(pWal, WAL_CKPT_LOCK);
-    pWal->ckptLock = 0;
   }
 #endif
   return rc;
 }
 
+/*
+** Begin a read transaction on the database.
+**
+** This routine used to be called sqlite3OpenSnapshot() and with good reason:
+** it takes a snapshot of the state of the WAL and wal-index for the current
+** instant in time.  The current thread will continue to use this snapshot.
+** Other threads might append new content to the WAL and wal-index but
+** that extra content is ignored by the current thread.
+**
+** If the database contents have changes since the previous read
+** transaction, then *pChanged is set to 1 before returning.  The
+** Pager layer will use this to know that its cache is stale and
+** needs to be flushed.
+*/
+SQLITE_PRIVATE int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
+  int rc;
+  SEH_TRY {
+    rc = walBeginReadTransaction(pWal, pChanged);
+  }
+  SEH_EXCEPT( rc = walHandleException(pWal); )
+  return rc;
+}
+
 /*
 ** Finish with a read transaction.  All this does is release the
 ** read-lock.
@@ -64780,7 +67643,7 @@ SQLITE_PRIVATE void sqlite3WalEndReadTransaction(Wal *pWal){
 ** Return SQLITE_OK if successful, or an error code if an error occurs. If an
 ** error does occur, the final value of *piRead is undefined.
 */
-SQLITE_PRIVATE int sqlite3WalFindFrame(
+static int walFindFrame(
   Wal *pWal,                      /* WAL handle */
   Pgno pgno,                      /* Database page number to read data for */
   u32 *piRead                     /* OUT: Frame number (or zero) */
@@ -64843,6 +67706,7 @@ SQLITE_PRIVATE int sqlite3WalFindFrame(
     }
     nCollide = HASHTABLE_NSLOT;
     iKey = walHash(pgno);
+    SEH_INJECT_FAULT;
     while( (iH = AtomicLoad(&sLoc.aHash[iKey]))!=0 ){
       u32 iFrame = iH + sLoc.iZero;
       if( iFrame<=iLast && iFrame>=pWal->minFrame && sLoc.aPgno[iH-1]==pgno ){
@@ -64879,6 +67743,30 @@ SQLITE_PRIVATE int sqlite3WalFindFrame(
   return SQLITE_OK;
 }
 
+/*
+** Search the wal file for page pgno. If found, set *piRead to the frame that
+** contains the page. Otherwise, if pgno is not in the wal file, set *piRead
+** to zero.
+**
+** Return SQLITE_OK if successful, or an error code if an error occurs. If an
+** error does occur, the final value of *piRead is undefined.
+**
+** The difference between this function and walFindFrame() is that this
+** function wraps walFindFrame() in an SEH_TRY{...} block.
+*/
+SQLITE_PRIVATE int sqlite3WalFindFrame(
+  Wal *pWal,                      /* WAL handle */
+  Pgno pgno,                      /* Database page number to read data for */
+  u32 *piRead                     /* OUT: Frame number (or zero) */
+){
+  int rc;
+  SEH_TRY {
+    rc = walFindFrame(pWal, pgno, piRead);
+  }
+  SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; )
+  return rc;
+}
+
 /*
 ** Read the contents of frame iRead from the wal file into buffer pOut
 ** (which is nOut bytes in size). Return SQLITE_OK if successful, or an
@@ -64960,12 +67848,17 @@ SQLITE_PRIVATE int sqlite3WalBeginWriteTransaction(Wal *pWal){
   ** time the read transaction on this connection was started, then
   ** the write is disallowed.
   */
-  if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){
+  SEH_TRY {
+    if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){
+      rc = SQLITE_BUSY_SNAPSHOT;
+    }
+  }
+  SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; )
+
+  if( rc!=SQLITE_OK ){
     walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
     pWal->writeLock = 0;
-    rc = SQLITE_BUSY_SNAPSHOT;
   }
-
   return rc;
 }
 
@@ -65001,30 +67894,33 @@ SQLITE_PRIVATE int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *p
     Pgno iMax = pWal->hdr.mxFrame;
     Pgno iFrame;
 
-    /* Restore the clients cache of the wal-index header to the state it
-    ** was in before the client began writing to the database.
-    */
-    memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr));
-
-    for(iFrame=pWal->hdr.mxFrame+1;
-        ALWAYS(rc==SQLITE_OK) && iFrame<=iMax;
-        iFrame++
-    ){
-      /* This call cannot fail. Unless the page for which the page number
-      ** is passed as the second argument is (a) in the cache and
-      ** (b) has an outstanding reference, then xUndo is either a no-op
-      ** (if (a) is false) or simply expels the page from the cache (if (b)
-      ** is false).
-      **
-      ** If the upper layer is doing a rollback, it is guaranteed that there
-      ** are no outstanding references to any page other than page 1. And
-      ** page 1 is never written to the log until the transaction is
-      ** committed. As a result, the call to xUndo may not fail.
+    SEH_TRY {
+      /* Restore the clients cache of the wal-index header to the state it
+      ** was in before the client began writing to the database.
       */
-      assert( walFramePgno(pWal, iFrame)!=1 );
-      rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame));
+      memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr));
+
+      for(iFrame=pWal->hdr.mxFrame+1;
+          ALWAYS(rc==SQLITE_OK) && iFrame<=iMax;
+          iFrame++
+      ){
+        /* This call cannot fail. Unless the page for which the page number
+        ** is passed as the second argument is (a) in the cache and
+        ** (b) has an outstanding reference, then xUndo is either a no-op
+        ** (if (a) is false) or simply expels the page from the cache (if (b)
+        ** is false).
+        **
+        ** If the upper layer is doing a rollback, it is guaranteed that there
+        ** are no outstanding references to any page other than page 1. And
+        ** page 1 is never written to the log until the transaction is
+        ** committed. As a result, the call to xUndo may not fail.
+        */
+        assert( walFramePgno(pWal, iFrame)!=1 );
+        rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame));
+      }
+      if( iMax!=pWal->hdr.mxFrame ) walCleanupHash(pWal);
     }
-    if( iMax!=pWal->hdr.mxFrame ) walCleanupHash(pWal);
+    SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; )
   }
   return rc;
 }
@@ -65068,7 +67964,10 @@ SQLITE_PRIVATE int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){
     pWal->hdr.mxFrame = aWalData[0];
     pWal->hdr.aFrameCksum[0] = aWalData[1];
     pWal->hdr.aFrameCksum[1] = aWalData[2];
-    walCleanupHash(pWal);
+    SEH_TRY {
+      walCleanupHash(pWal);
+    }
+    SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; )
   }
 
   return rc;
@@ -65249,7 +68148,7 @@ static int walRewriteChecksums(Wal *pWal, u32 iLast){
 ** Write a set of frames to the log. The caller must hold the write-lock
 ** on the log file (obtained using sqlite3WalBeginWriteTransaction()).
 */
-SQLITE_PRIVATE int sqlite3WalFrames(
+static int walFrames(
   Wal *pWal,                      /* Wal handle to write to */
   int szPage,                     /* Database page-size in bytes */
   PgHdr *pList,                   /* List of dirty pages to write */
@@ -65337,7 +68236,9 @@ SQLITE_PRIVATE int sqlite3WalFrames(
       if( rc ) return rc;
     }
   }
-  assert( (int)pWal->szPage==szPage );
+  if( (int)pWal->szPage!=szPage ){
+    return SQLITE_CORRUPT_BKPT;  /* TH3 test case: cov1/corrupt155.test */
+  }
 
   /* Setup information needed to write frames into the WAL */
   w.pWal = pWal;
@@ -65358,7 +68259,7 @@ SQLITE_PRIVATE int sqlite3WalFrames(
     ** checksums must be recomputed when the transaction is committed.  */
     if( iFirst && (p->pDirty || isCommit==0) ){
       u32 iWrite = 0;
-      VVA_ONLY(rc =) sqlite3WalFindFrame(pWal, p->pgno, &iWrite);
+      VVA_ONLY(rc =) walFindFrame(pWal, p->pgno, &iWrite);
       assert( rc==SQLITE_OK || iWrite==0 );
       if( iWrite>=iFirst ){
         i64 iOff = walFrameOffset(iWrite, szPage) + WAL_FRAME_HDRSIZE;
@@ -65477,6 +68378,29 @@ SQLITE_PRIVATE int sqlite3WalFrames(
   return rc;
 }
 
+/*
+** Write a set of frames to the log. The caller must hold the write-lock
+** on the log file (obtained using sqlite3WalBeginWriteTransaction()).
+**
+** The difference between this function and walFrames() is that this
+** function wraps walFrames() in an SEH_TRY{...} block.
+*/
+SQLITE_PRIVATE int sqlite3WalFrames(
+  Wal *pWal,                      /* Wal handle to write to */
+  int szPage,                     /* Database page-size in bytes */
+  PgHdr *pList,                   /* List of dirty pages to write */
+  Pgno nTruncate,                 /* Database size after this commit */
+  int isCommit,                   /* True if this is a commit */
+  int sync_flags                  /* Flags to pass to OsSync() (or 0) */
+){
+  int rc;
+  SEH_TRY {
+    rc = walFrames(pWal, szPage, pList, nTruncate, isCommit, sync_flags);
+  }
+  SEH_EXCEPT( rc = walHandleException(pWal); )
+  return rc;
+}
+
 /*
 ** This routine is called to implement sqlite3_wal_checkpoint() and
 ** related interfaces.
@@ -65556,30 +68480,33 @@ SQLITE_PRIVATE int sqlite3WalCheckpoint(
 
 
   /* Read the wal-index header. */
-  if( rc==SQLITE_OK ){
-    walDisableBlocking(pWal);
-    rc = walIndexReadHdr(pWal, &isChanged);
-    (void)walEnableBlocking(pWal);
-    if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){
-      sqlite3OsUnfetch(pWal->pDbFd, 0, 0);
+  SEH_TRY {
+    if( rc==SQLITE_OK ){
+      walDisableBlocking(pWal);
+      rc = walIndexReadHdr(pWal, &isChanged);
+      (void)walEnableBlocking(pWal);
+      if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){
+        sqlite3OsUnfetch(pWal->pDbFd, 0, 0);
+      }
     }
-  }
-
-  /* Copy data from the log to the database file. */
-  if( rc==SQLITE_OK ){
 
-    if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){
-      rc = SQLITE_CORRUPT_BKPT;
-    }else{
-      rc = walCheckpoint(pWal, db, eMode2, xBusy2, pBusyArg, sync_flags, zBuf);
-    }
+    /* Copy data from the log to the database file. */
+    if( rc==SQLITE_OK ){
+      if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){
+        rc = SQLITE_CORRUPT_BKPT;
+      }else{
+        rc = walCheckpoint(pWal, db, eMode2, xBusy2, pBusyArg, sync_flags,zBuf);
+      }
 
-    /* If no error occurred, set the output variables. */
-    if( rc==SQLITE_OK || rc==SQLITE_BUSY ){
-      if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame;
-      if( pnCkpt ) *pnCkpt = (int)(walCkptInfo(pWal)->nBackfill);
+      /* If no error occurred, set the output variables. */
+      if( rc==SQLITE_OK || rc==SQLITE_BUSY ){
+        if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame;
+        SEH_INJECT_FAULT;
+        if( pnCkpt ) *pnCkpt = (int)(walCkptInfo(pWal)->nBackfill);
+      }
     }
   }
+  SEH_EXCEPT( rc = walHandleException(pWal); )
 
   if( isChanged ){
     /* If a new wal-index header was loaded before the checkpoint was
@@ -65656,7 +68583,9 @@ SQLITE_PRIVATE int sqlite3WalExclusiveMode(Wal *pWal, int op){
   ** locks are taken in this case). Nor should the pager attempt to
   ** upgrade to exclusive-mode following such an error.
   */
+#ifndef SQLITE_USE_SEH
   assert( pWal->readLock>=0 || pWal->lockError );
+#endif
   assert( pWal->readLock>=0 || (op<=0 && pWal->exclusiveMode==0) );
 
   if( op==0 ){
@@ -65757,16 +68686,19 @@ SQLITE_API int sqlite3_snapshot_cmp(sqlite3_snapshot *p1, sqlite3_snapshot *p2){
 */
 SQLITE_PRIVATE int sqlite3WalSnapshotCheck(Wal *pWal, sqlite3_snapshot *pSnapshot){
   int rc;
-  rc = walLockShared(pWal, WAL_CKPT_LOCK);
-  if( rc==SQLITE_OK ){
-    WalIndexHdr *pNew = (WalIndexHdr*)pSnapshot;
-    if( memcmp(pNew->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt))
-     || pNew->mxFrame<walCkptInfo(pWal)->nBackfillAttempted
-    ){
-      rc = SQLITE_ERROR_SNAPSHOT;
-      walUnlockShared(pWal, WAL_CKPT_LOCK);
+  SEH_TRY {
+    rc = walLockShared(pWal, WAL_CKPT_LOCK);
+    if( rc==SQLITE_OK ){
+      WalIndexHdr *pNew = (WalIndexHdr*)pSnapshot;
+      if( memcmp(pNew->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt))
+       || pNew->mxFrame<walCkptInfo(pWal)->nBackfillAttempted
+      ){
+        rc = SQLITE_ERROR_SNAPSHOT;
+        walUnlockShared(pWal, WAL_CKPT_LOCK);
+      }
     }
   }
+  SEH_EXCEPT( rc = walHandleException(pWal); )
   return rc;
 }
 
@@ -65997,7 +68929,7 @@ SQLITE_PRIVATE sqlite3_file *sqlite3WalFile(Wal *pWal){
 ** byte are used.  The integer consists of all bytes that have bit 8 set and
 ** the first byte with bit 8 clear.  The most significant byte of the integer
 ** appears first.  A variable-length integer may not be more than 9 bytes long.
-** As a special case, all 8 bytes of the 9th byte are used as data.  This
+** As a special case, all 8 bits of the 9th byte are used as data.  This
 ** allows a 64-bit integer to be encoded in 9 bytes.
 **
 **    0x00                      becomes  0x00000000
@@ -66005,7 +68937,7 @@ SQLITE_PRIVATE sqlite3_file *sqlite3WalFile(Wal *pWal){
 **    0x81 0x00                 becomes  0x00000080
 **    0x82 0x00                 becomes  0x00000100
 **    0x80 0x7f                 becomes  0x0000007f
-**    0x8a 0x91 0xd1 0xac 0x78  becomes  0x12345678
+**    0x81 0x91 0xd1 0xac 0x78  becomes  0x12345678
 **    0x81 0x81 0x81 0x81 0x01  becomes  0x10204081
 **
 ** Variable length integers are used for rowids and to hold the number of
@@ -66088,7 +69020,7 @@ typedef struct CellInfo CellInfo;
 ** page that has been loaded into memory.  The information in this object
 ** is derived from the raw on-disk page content.
 **
-** As each database page is loaded into memory, the pager allocats an
+** As each database page is loaded into memory, the pager allocates an
 ** instance of this object and zeros the first 8 bytes.  (This is the
 ** "extra" information associated with each page of the pager.)
 **
@@ -66381,7 +69313,7 @@ struct BtCursor {
 #define BTCF_WriteFlag    0x01   /* True if a write cursor */
 #define BTCF_ValidNKey    0x02   /* True if info.nKey is valid */
 #define BTCF_ValidOvfl    0x04   /* True if aOverflow is valid */
-#define BTCF_AtLast       0x08   /* Cursor is pointing ot the last entry */
+#define BTCF_AtLast       0x08   /* Cursor is pointing to the last entry */
 #define BTCF_Incrblob     0x10   /* True if an incremental I/O handle */
 #define BTCF_Multiple     0x20   /* Maybe another cursor on the same btree */
 #define BTCF_Pinned       0x40   /* Cursor is busy and cannot be moved */
@@ -66499,15 +69431,15 @@ struct BtCursor {
 ** So, this macro is defined instead.
 */
 #ifndef SQLITE_OMIT_AUTOVACUUM
-#define ISAUTOVACUUM (pBt->autoVacuum)
+#define ISAUTOVACUUM(pBt) (pBt->autoVacuum)
 #else
-#define ISAUTOVACUUM 0
+#define ISAUTOVACUUM(pBt) 0
 #endif
 
 
 /*
-** This structure is passed around through all the sanity checking routines
-** in order to keep track of some global state information.
+** This structure is passed around through all the PRAGMA integrity_check
+** checking routines in order to keep track of some global state information.
 **
 ** The aRef[] array is allocated so that there is 1 bit for each page in
 ** the database. As the integrity-check proceeds, for each page used in
@@ -66520,13 +69452,15 @@ struct IntegrityCk {
   BtShared *pBt;    /* The tree being checked out */
   Pager *pPager;    /* The associated pager.  Also accessible by pBt->pPager */
   u8 *aPgRef;       /* 1 bit per page in the db (see above) */
-  Pgno nPage;       /* Number of pages in the database */
+  Pgno nCkPage;     /* Pages in the database.  0 for partial check */
   int mxErr;        /* Stop accumulating errors when this reaches zero */
   int nErr;         /* Number of messages written to zErrMsg so far */
-  int bOomFault;    /* A memory allocation error has occurred */
+  int rc;           /* SQLITE_OK, SQLITE_NOMEM, or SQLITE_INTERRUPT */
+  u32 nStep;        /* Number of steps into the integrity_check process */
   const char *zPfx; /* Error message prefix */
-  Pgno v1;          /* Value for first %u substitution in zPfx */
-  int v2;           /* Value for second %d substitution in zPfx */
+  Pgno v0;          /* Value for first %u substitution in zPfx (root page) */
+  Pgno v1;          /* Value for second %u substitution in zPfx (current pg) */
+  int v2;           /* Value for third %d substitution in zPfx */
   StrAccum errMsg;  /* Accumulate the error message text here */
   u32 *heap;        /* Min-heap used for analyzing cell coverage */
   sqlite3 *db;      /* Database connection running the check */
@@ -66542,7 +69476,7 @@ struct IntegrityCk {
 
 /*
 ** get2byteAligned(), unlike get2byte(), requires that its argument point to a
-** two-byte aligned address.  get2bytea() is only used for accessing the
+** two-byte aligned address.  get2byteAligned() is only used for accessing the
 ** cell addresses in a btree header.
 */
 #if SQLITE_BYTEORDER==4321
@@ -66719,7 +69653,7 @@ SQLITE_PRIVATE int sqlite3BtreeHoldsMutex(Btree *p){
 **
 ** There is a corresponding leave-all procedures.
 **
-** Enter the mutexes in accending order by BtShared pointer address
+** Enter the mutexes in ascending order by BtShared pointer address
 ** to avoid the possibility of deadlock when two threads with
 ** two or more btrees in common both try to lock all their btrees
 ** at the same instant.
@@ -66793,6 +69727,7 @@ SQLITE_PRIVATE int sqlite3BtreeHoldsAllMutexes(sqlite3 *db){
 SQLITE_PRIVATE int sqlite3SchemaMutexHeld(sqlite3 *db, int iDb, Schema *pSchema){
   Btree *p;
   assert( db!=0 );
+  if( db->pVfs==0 && db->nDb==0 ) return 1;
   if( pSchema ) iDb = sqlite3SchemaToIndex(db, pSchema);
   assert( iDb>=0 && iDb<db->nDb );
   if( !sqlite3_mutex_held(db->mutex) ) return 0;
@@ -66988,8 +69923,8 @@ SQLITE_PRIVATE sqlite3_uint64 sqlite3BtreeSeekCount(Btree *pBt){
 int corruptPageError(int lineno, MemPage *p){
   char *zMsg;
   sqlite3BeginBenignMalloc();
-  zMsg = sqlite3_mprintf("database corruption page %d of %s",
-      (int)p->pgno, sqlite3PagerFilename(p->pBt->pPager, 0)
+  zMsg = sqlite3_mprintf("database corruption page %u of %s",
+             p->pgno, sqlite3PagerFilename(p->pBt->pPager, 0)
   );
   sqlite3EndBenignMalloc();
   if( zMsg ){
@@ -67798,8 +70733,25 @@ SQLITE_PRIVATE int sqlite3BtreeCursorRestore(BtCursor *pCur, int *pDifferentRow)
 */
 SQLITE_PRIVATE void sqlite3BtreeCursorHint(BtCursor *pCur, int eHintType, ...){
   /* Used only by system that substitute their own storage engine */
+#ifdef SQLITE_DEBUG
+  if( ALWAYS(eHintType==BTREE_HINT_RANGE) ){
+    va_list ap;
+    Expr *pExpr;
+    Walker w;
+    memset(&w, 0, sizeof(w));
+    w.xExprCallback = sqlite3CursorRangeHintExprCheck;
+    va_start(ap, eHintType);
+    pExpr = va_arg(ap, Expr*);
+    w.u.aMem = va_arg(ap, Mem*);
+    va_end(ap);
+    assert( pExpr!=0 );
+    assert( w.u.aMem!=0 );
+    sqlite3WalkExpr(&w, pExpr);
+  }
+#endif /* SQLITE_DEBUG */
 }
-#endif
+#endif /* SQLITE_ENABLE_CURSOR_HINTS */
+
 
 /*
 ** Provide flag hints to the cursor.
@@ -67884,7 +70836,7 @@ static void ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent, int *pRC){
   pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);
 
   if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){
-    TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent));
+    TRACE(("PTRMAP_UPDATE: %u->(%u,%u)\n", key, eType, parent));
     *pRC= rc = sqlite3PagerWrite(pDbPage);
     if( rc==SQLITE_OK ){
       pPtrmap[offset] = eType;
@@ -68083,27 +71035,31 @@ static void btreeParseCellPtr(
   iKey = *pIter;
   if( iKey>=0x80 ){
     u8 x;
-    iKey = ((iKey&0x7f)<<7) | ((x = *++pIter) & 0x7f);
+    iKey = (iKey<<7) ^ (x = *++pIter);
     if( x>=0x80 ){
-      iKey = (iKey<<7) | ((x =*++pIter) & 0x7f);
+      iKey = (iKey<<7) ^ (x = *++pIter);
       if( x>=0x80 ){
-        iKey = (iKey<<7) | ((x = *++pIter) & 0x7f);
+        iKey = (iKey<<7) ^ 0x10204000 ^ (x = *++pIter);
         if( x>=0x80 ){
-          iKey = (iKey<<7) | ((x = *++pIter) & 0x7f);
+          iKey = (iKey<<7) ^ 0x4000 ^ (x = *++pIter);
           if( x>=0x80 ){
-            iKey = (iKey<<7) | ((x = *++pIter) & 0x7f);
+            iKey = (iKey<<7) ^ 0x4000 ^ (x = *++pIter);
             if( x>=0x80 ){
-              iKey = (iKey<<7) | ((x = *++pIter) & 0x7f);
+              iKey = (iKey<<7) ^ 0x4000 ^ (x = *++pIter);
               if( x>=0x80 ){
-                iKey = (iKey<<7) | ((x = *++pIter) & 0x7f);
+                iKey = (iKey<<7) ^ 0x4000 ^ (x = *++pIter);
                 if( x>=0x80 ){
-                  iKey = (iKey<<8) | (*++pIter);
+                  iKey = (iKey<<8) ^ 0x8000 ^ (*++pIter);
                 }
               }
             }
           }
         }
+      }else{
+        iKey ^= 0x204000;
       }
+    }else{
+      iKey ^= 0x4000;
     }
   }
   pIter++;
@@ -68180,10 +71136,11 @@ static void btreeParseCell(
 **
 ** cellSizePtrNoPayload()    =>   table internal nodes
 ** cellSizePtrTableLeaf()    =>   table leaf nodes
-** cellSizePtr()             =>   all index nodes & table leaf nodes
+** cellSizePtr()             =>   index internal nodes
+** cellSizeIdxLeaf()         =>   index leaf nodes
 */
 static u16 cellSizePtr(MemPage *pPage, u8 *pCell){
-  u8 *pIter = pCell + pPage->childPtrSize; /* For looping over bytes of pCell */
+  u8 *pIter = pCell + 4;                   /* For looping over bytes of pCell */
   u8 *pEnd;                                /* End mark for a varint */
   u32 nSize;                               /* Size value to return */
 
@@ -68196,6 +71153,49 @@ static u16 cellSizePtr(MemPage *pPage, u8 *pCell){
   pPage->xParseCell(pPage, pCell, &debuginfo);
 #endif
 
+  assert( pPage->childPtrSize==4 );
+  nSize = *pIter;
+  if( nSize>=0x80 ){
+    pEnd = &pIter[8];
+    nSize &= 0x7f;
+    do{
+      nSize = (nSize<<7) | (*++pIter & 0x7f);
+    }while( *(pIter)>=0x80 && pIter<pEnd );
+  }
+  pIter++;
+  testcase( nSize==pPage->maxLocal );
+  testcase( nSize==(u32)pPage->maxLocal+1 );
+  if( nSize<=pPage->maxLocal ){
+    nSize += (u32)(pIter - pCell);
+    assert( nSize>4 );
+  }else{
+    int minLocal = pPage->minLocal;
+    nSize = minLocal + (nSize - minLocal) % (pPage->pBt->usableSize - 4);
+    testcase( nSize==pPage->maxLocal );
+    testcase( nSize==(u32)pPage->maxLocal+1 );
+    if( nSize>pPage->maxLocal ){
+      nSize = minLocal;
+    }
+    nSize += 4 + (u16)(pIter - pCell);
+  }
+  assert( nSize==debuginfo.nSize || CORRUPT_DB );
+  return (u16)nSize;
+}
+static u16 cellSizePtrIdxLeaf(MemPage *pPage, u8 *pCell){
+  u8 *pIter = pCell;                       /* For looping over bytes of pCell */
+  u8 *pEnd;                                /* End mark for a varint */
+  u32 nSize;                               /* Size value to return */
+
+#ifdef SQLITE_DEBUG
+  /* The value returned by this function should always be the same as
+  ** the (CellInfo.nSize) value found by doing a full parse of the
+  ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of
+  ** this function verifies that this invariant is not violated. */
+  CellInfo debuginfo;
+  pPage->xParseCell(pPage, pCell, &debuginfo);
+#endif
+
+  assert( pPage->childPtrSize==0 );
   nSize = *pIter;
   if( nSize>=0x80 ){
     pEnd = &pIter[8];
@@ -68320,7 +71320,7 @@ static void ptrmapPutOvflPtr(MemPage *pPage, MemPage *pSrc, u8 *pCell,int *pRC){
   pPage->xParseCell(pPage, pCell, &info);
   if( info.nLocal<info.nPayload ){
     Pgno ovfl;
-    if( SQLITE_WITHIN(pSrc->aDataEnd, pCell, pCell+info.nLocal) ){
+    if( SQLITE_OVERFLOW(pSrc->aDataEnd, pCell, pCell+info.nLocal) ){
       testcase( pSrc!=pPage );
       *pRC = SQLITE_CORRUPT_BKPT;
       return;
@@ -68365,8 +71365,7 @@ static int defragmentPage(MemPage *pPage, int nMaxFrag){
   assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE );
   assert( pPage->nOverflow==0 );
   assert( sqlite3_mutex_held(pPage->pBt->mutex) );
-  temp = 0;
-  src = data = pPage->aData;
+  data = pPage->aData;
   hdr = pPage->hdrOffset;
   cellOffset = pPage->cellOffset;
   nCell = pPage->nCell;
@@ -68400,7 +71399,7 @@ static int defragmentPage(MemPage *pPage, int nMaxFrag){
           if( iFree2+sz2 > usableSize ) return SQLITE_CORRUPT_PAGE(pPage);
           memmove(&data[iFree+sz+sz2], &data[iFree+sz], iFree2-(iFree+sz));
           sz += sz2;
-        }else if( NEVER(iFree+sz>usableSize) ){
+        }else if( iFree+sz>usableSize ){
           return SQLITE_CORRUPT_PAGE(pPage);
         }
 
@@ -68420,39 +71419,38 @@ static int defragmentPage(MemPage *pPage, int nMaxFrag){
   cbrk = usableSize;
   iCellLast = usableSize - 4;
   iCellStart = get2byte(&data[hdr+5]);
-  for(i=0; i<nCell; i++){
-    u8 *pAddr;     /* The i-th cell pointer */
-    pAddr = &data[cellOffset + i*2];
-    pc = get2byte(pAddr);
-    testcase( pc==iCellFirst );
-    testcase( pc==iCellLast );
-    /* These conditions have already been verified in btreeInitPage()
-    ** if PRAGMA cell_size_check=ON.
-    */
-    if( pc<iCellStart || pc>iCellLast ){
-      return SQLITE_CORRUPT_PAGE(pPage);
-    }
-    assert( pc>=iCellStart && pc<=iCellLast );
-    size = pPage->xCellSize(pPage, &src[pc]);
-    cbrk -= size;
-    if( cbrk<iCellStart || pc+size>usableSize ){
-      return SQLITE_CORRUPT_PAGE(pPage);
-    }
-    assert( cbrk+size<=usableSize && cbrk>=iCellStart );
-    testcase( cbrk+size==usableSize );
-    testcase( pc+size==usableSize );
-    put2byte(pAddr, cbrk);
-    if( temp==0 ){
-      if( cbrk==pc ) continue;
-      temp = sqlite3PagerTempSpace(pPage->pBt->pPager);
-      memcpy(&temp[iCellStart], &data[iCellStart], usableSize - iCellStart);
-      src = temp;
+  if( nCell>0 ){
+    temp = sqlite3PagerTempSpace(pPage->pBt->pPager);
+    memcpy(temp, data, usableSize);
+    src = temp;
+    for(i=0; i<nCell; i++){
+      u8 *pAddr;     /* The i-th cell pointer */
+      pAddr = &data[cellOffset + i*2];
+      pc = get2byte(pAddr);
+      testcase( pc==iCellFirst );
+      testcase( pc==iCellLast );
+      /* These conditions have already been verified in btreeInitPage()
+      ** if PRAGMA cell_size_check=ON.
+      */
+      if( pc>iCellLast ){
+        return SQLITE_CORRUPT_PAGE(pPage);
+      }
+      assert( pc>=0 && pc<=iCellLast );
+      size = pPage->xCellSize(pPage, &src[pc]);
+      cbrk -= size;
+      if( cbrk<iCellStart || pc+size>usableSize ){
+        return SQLITE_CORRUPT_PAGE(pPage);
+      }
+      assert( cbrk+size<=usableSize && cbrk>=iCellStart );
+      testcase( cbrk+size==usableSize );
+      testcase( pc+size==usableSize );
+      put2byte(pAddr, cbrk);
+      memcpy(&data[cbrk], &src[pc], size);
     }
-    memcpy(&data[cbrk], &src[pc], size);
   }
   data[hdr+7] = 0;
 
- defragment_out:
+defragment_out:
   assert( pPage->nFree>=0 );
   if( data[hdr+7]+cbrk-iCellFirst!=pPage->nFree ){
     return SQLITE_CORRUPT_PAGE(pPage);
@@ -68509,7 +71507,6 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){
         ** fragmented bytes within the page. */
         memcpy(&aData[iAddr], &aData[pc], 2);
         aData[hdr+7] += (u8)x;
-        testcase( pc+x>maxPC );
         return &aData[pc];
       }else if( x+pc > maxPC ){
         /* This slot extends off the end of the usable part of the page */
@@ -68525,9 +71522,9 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){
     iAddr = pc;
     pTmp = &aData[pc];
     pc = get2byte(pTmp);
-    if( pc<=iAddr+size ){
+    if( pc<=iAddr ){
       if( pc ){
-        /* The next slot in the chain is not past the end of the current slot */
+        /* The next slot in the chain comes before the current slot */
         *pRc = SQLITE_CORRUPT_PAGE(pPg);
       }
       return 0;
@@ -68553,7 +71550,7 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){
 ** allocation is being made in order to insert a new cell, so we will
 ** also end up needing a new cell pointer.
 */
-static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
+static SQLITE_INLINE int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
   const int hdr = pPage->hdrOffset;    /* Local cache of pPage->hdrOffset */
   u8 * const data = pPage->aData;      /* Local cache of pPage->aData */
   int top;                             /* First byte of cell content area */
@@ -68579,13 +71576,14 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
   ** integer, so a value of 0 is used in its place. */
   pTmp = &data[hdr+5];
   top = get2byte(pTmp);
-  assert( top<=(int)pPage->pBt->usableSize ); /* by btreeComputeFreeSpace() */
   if( gap>top ){
     if( top==0 && pPage->pBt->usableSize==65536 ){
       top = 65536;
     }else{
       return SQLITE_CORRUPT_PAGE(pPage);
     }
+  }else if( top>(int)pPage->pBt->usableSize ){
+    return SQLITE_CORRUPT_PAGE(pPage);
   }
 
   /* If there is enough space between gap and top for one more cell pointer,
@@ -68647,7 +71645,7 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
 **
 ** Even though the freeblock list was checked by btreeComputeFreeSpace(),
 ** that routine will not detect overlap between cells or freeblocks.  Nor
-** does it detect cells or freeblocks that encrouch into the reserved bytes
+** does it detect cells or freeblocks that encroach into the reserved bytes
 ** at the end of the page.  So do additional corruption checks inside this
 ** routine and return SQLITE_CORRUPT if any problems are found.
 */
@@ -68668,7 +71666,7 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
   assert( CORRUPT_DB || iEnd <= pPage->pBt->usableSize );
   assert( sqlite3_mutex_held(pPage->pBt->mutex) );
   assert( iSize>=4 );   /* Minimum cell size is 4 */
-  assert( iStart<=pPage->pBt->usableSize-4 );
+  assert( CORRUPT_DB || iStart<=pPage->pBt->usableSize-4 );
 
   /* The list of freeblocks must be in ascending order.  Find the
   ** spot on the list where iStart should be inserted.
@@ -68679,7 +71677,7 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
     iFreeBlk = 0;  /* Shortcut for the case when the freelist is empty */
   }else{
     while( (iFreeBlk = get2byte(&data[iPtr]))<iStart ){
-      if( iFreeBlk<iPtr+4 ){
+      if( iFreeBlk<=iPtr ){
         if( iFreeBlk==0 ) break; /* TH3: corrupt082.100 */
         return SQLITE_CORRUPT_PAGE(pPage);
       }
@@ -68725,6 +71723,11 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
   }
   pTmp = &data[hdr+5];
   x = get2byte(pTmp);
+  if( pPage->pBt->btsFlags & BTS_FAST_SECURE ){
+    /* Overwrite deleted information with zeros when the secure_delete
+    ** option is enabled */
+    memset(&data[iStart], 0, iSize);
+  }
   if( iStart<=x ){
     /* The new freeblock is at the beginning of the cell content area,
     ** so just extend the cell content area rather than create another
@@ -68736,14 +71739,9 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
   }else{
     /* Insert the new freeblock into the freelist */
     put2byte(&data[iPtr], iStart);
+    put2byte(&data[iStart], iFreeBlk);
+    put2byte(&data[iStart+2], iSize);
   }
-  if( pPage->pBt->btsFlags & BTS_FAST_SECURE ){
-    /* Overwrite deleted information with zeros when the secure_delete
-    ** option is enabled */
-    memset(&data[iStart], 0, iSize);
-  }
-  put2byte(&data[iStart], iFreeBlk);
-  put2byte(&data[iStart+2], iSize);
   pPage->nFree += iOrigSize;
   return SQLITE_OK;
 }
@@ -68755,62 +71753,67 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
 ** Only the following combinations are supported.  Anything different
 ** indicates a corrupt database files:
 **
-**         PTF_ZERODATA
-**         PTF_ZERODATA | PTF_LEAF
-**         PTF_LEAFDATA | PTF_INTKEY
-**         PTF_LEAFDATA | PTF_INTKEY | PTF_LEAF
+**         PTF_ZERODATA                             (0x02,  2)
+**         PTF_LEAFDATA | PTF_INTKEY                (0x05,  5)
+**         PTF_ZERODATA | PTF_LEAF                  (0x0a, 10)
+**         PTF_LEAFDATA | PTF_INTKEY | PTF_LEAF     (0x0d, 13)
 */
 static int decodeFlags(MemPage *pPage, int flagByte){
   BtShared *pBt;     /* A copy of pPage->pBt */
 
   assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
   assert( sqlite3_mutex_held(pPage->pBt->mutex) );
-  pPage->leaf = (u8)(flagByte>>3);  assert( PTF_LEAF == 1<<3 );
-  flagByte &= ~PTF_LEAF;
-  pPage->childPtrSize = 4-4*pPage->leaf;
   pBt = pPage->pBt;
-  if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){
-    /* EVIDENCE-OF: R-07291-35328 A value of 5 (0x05) means the page is an
-    ** interior table b-tree page. */
-    assert( (PTF_LEAFDATA|PTF_INTKEY)==5 );
-    /* EVIDENCE-OF: R-26900-09176 A value of 13 (0x0d) means the page is a
-    ** leaf table b-tree page. */
-    assert( (PTF_LEAFDATA|PTF_INTKEY|PTF_LEAF)==13 );
-    pPage->intKey = 1;
-    if( pPage->leaf ){
+  pPage->max1bytePayload = pBt->max1bytePayload;
+  if( flagByte>=(PTF_ZERODATA | PTF_LEAF) ){
+    pPage->childPtrSize = 0;
+    pPage->leaf = 1;
+    if( flagByte==(PTF_LEAFDATA | PTF_INTKEY | PTF_LEAF) ){
       pPage->intKeyLeaf = 1;
       pPage->xCellSize = cellSizePtrTableLeaf;
       pPage->xParseCell = btreeParseCellPtr;
+      pPage->intKey = 1;
+      pPage->maxLocal = pBt->maxLeaf;
+      pPage->minLocal = pBt->minLeaf;
+    }else if( flagByte==(PTF_ZERODATA | PTF_LEAF) ){
+      pPage->intKey = 0;
+      pPage->intKeyLeaf = 0;
+      pPage->xCellSize = cellSizePtrIdxLeaf;
+      pPage->xParseCell = btreeParseCellPtrIndex;
+      pPage->maxLocal = pBt->maxLocal;
+      pPage->minLocal = pBt->minLocal;
     }else{
+      pPage->intKey = 0;
+      pPage->intKeyLeaf = 0;
+      pPage->xCellSize = cellSizePtrIdxLeaf;
+      pPage->xParseCell = btreeParseCellPtrIndex;
+      return SQLITE_CORRUPT_PAGE(pPage);
+    }
+  }else{
+    pPage->childPtrSize = 4;
+    pPage->leaf = 0;
+    if( flagByte==(PTF_ZERODATA) ){
+      pPage->intKey = 0;
+      pPage->intKeyLeaf = 0;
+      pPage->xCellSize = cellSizePtr;
+      pPage->xParseCell = btreeParseCellPtrIndex;
+      pPage->maxLocal = pBt->maxLocal;
+      pPage->minLocal = pBt->minLocal;
+    }else if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){
       pPage->intKeyLeaf = 0;
       pPage->xCellSize = cellSizePtrNoPayload;
       pPage->xParseCell = btreeParseCellPtrNoPayload;
+      pPage->intKey = 1;
+      pPage->maxLocal = pBt->maxLeaf;
+      pPage->minLocal = pBt->minLeaf;
+    }else{
+      pPage->intKey = 0;
+      pPage->intKeyLeaf = 0;
+      pPage->xCellSize = cellSizePtr;
+      pPage->xParseCell = btreeParseCellPtrIndex;
+      return SQLITE_CORRUPT_PAGE(pPage);
     }
-    pPage->maxLocal = pBt->maxLeaf;
-    pPage->minLocal = pBt->minLeaf;
-  }else if( flagByte==PTF_ZERODATA ){
-    /* EVIDENCE-OF: R-43316-37308 A value of 2 (0x02) means the page is an
-    ** interior index b-tree page. */
-    assert( (PTF_ZERODATA)==2 );
-    /* EVIDENCE-OF: R-59615-42828 A value of 10 (0x0a) means the page is a
-    ** leaf index b-tree page. */
-    assert( (PTF_ZERODATA|PTF_LEAF)==10 );
-    pPage->intKey = 0;
-    pPage->intKeyLeaf = 0;
-    pPage->xCellSize = cellSizePtr;
-    pPage->xParseCell = btreeParseCellPtrIndex;
-    pPage->maxLocal = pBt->maxLocal;
-    pPage->minLocal = pBt->minLocal;
-  }else{
-    /* EVIDENCE-OF: R-47608-56469 Any other value for the b-tree page type is
-    ** an error. */
-    pPage->intKey = 0;
-    pPage->intKeyLeaf = 0;
-    pPage->xCellSize = cellSizePtr;
-    pPage->xParseCell = btreeParseCellPtrIndex;
-    return SQLITE_CORRUPT_PAGE(pPage);
   }
-  pPage->max1bytePayload = pBt->max1bytePayload;
   return SQLITE_OK;
 }
 
@@ -69101,70 +72104,41 @@ SQLITE_PRIVATE Pgno sqlite3BtreeLastPage(Btree *p){
 
 /*
 ** Get a page from the pager and initialize it.
-**
-** If pCur!=0 then the page is being fetched as part of a moveToChild()
-** call.  Do additional sanity checking on the page in this case.
-** And if the fetch fails, this routine must decrement pCur->iPage.
-**
-** The page is fetched as read-write unless pCur is not NULL and is
-** a read-only cursor.
-**
-** If an error occurs, then *ppPage is undefined. It
-** may remain unchanged, or it may be set to an invalid value.
 */
 static int getAndInitPage(
   BtShared *pBt,                  /* The database file */
   Pgno pgno,                      /* Number of the page to get */
   MemPage **ppPage,               /* Write the page pointer here */
-  BtCursor *pCur,                 /* Cursor to receive the page, or NULL */
   int bReadOnly                   /* True for a read-only page */
 ){
   int rc;
   DbPage *pDbPage;
+  MemPage *pPage;
   assert( sqlite3_mutex_held(pBt->mutex) );
-  assert( pCur==0 || ppPage==&pCur->pPage );
-  assert( pCur==0 || bReadOnly==pCur->curPagerFlags );
-  assert( pCur==0 || pCur->iPage>0 );
 
   if( pgno>btreePagecount(pBt) ){
-    rc = SQLITE_CORRUPT_BKPT;
-    goto getAndInitPage_error1;
+    *ppPage = 0;
+    return SQLITE_CORRUPT_BKPT;
   }
   rc = sqlite3PagerGet(pBt->pPager, pgno, (DbPage**)&pDbPage, bReadOnly);
   if( rc ){
-    goto getAndInitPage_error1;
+    *ppPage = 0;
+    return rc;
   }
-  *ppPage = (MemPage*)sqlite3PagerGetExtra(pDbPage);
-  if( (*ppPage)->isInit==0 ){
+  pPage = (MemPage*)sqlite3PagerGetExtra(pDbPage);
+  if( pPage->isInit==0 ){
     btreePageFromDbPage(pDbPage, pgno, pBt);
-    rc = btreeInitPage(*ppPage);
+    rc = btreeInitPage(pPage);
     if( rc!=SQLITE_OK ){
-      goto getAndInitPage_error2;
+      releasePage(pPage);
+      *ppPage = 0;
+      return rc;
     }
   }
-  assert( (*ppPage)->pgno==pgno || CORRUPT_DB );
-  assert( (*ppPage)->aData==sqlite3PagerGetData(pDbPage) );
-
-  /* If obtaining a child page for a cursor, we must verify that the page is
-  ** compatible with the root page. */
-  if( pCur && ((*ppPage)->nCell<1 || (*ppPage)->intKey!=pCur->curIntKey) ){
-    rc = SQLITE_CORRUPT_PGNO(pgno);
-    goto getAndInitPage_error2;
-  }
+  assert( pPage->pgno==pgno || CORRUPT_DB );
+  assert( pPage->aData==sqlite3PagerGetData(pDbPage) );
+  *ppPage = pPage;
   return SQLITE_OK;
-
-getAndInitPage_error2:
-  releasePage(*ppPage);
-getAndInitPage_error1:
-  if( pCur ){
-    pCur->iPage--;
-    pCur->pPage = pCur->apPage[pCur->iPage];
-  }
-  testcase( pgno==0 );
-  assert( pgno!=0 || rc==SQLITE_CORRUPT
-                  || rc==SQLITE_IOERR_NOMEM
-                  || rc==SQLITE_NOMEM );
-  return rc;
 }
 
 /*
@@ -69247,7 +72221,7 @@ static void pageReinit(DbPage *pData){
       ** call to btreeInitPage() will likely return SQLITE_CORRUPT.
       ** But no harm is done by this.  And it is very important that
       ** btreeInitPage() be called on every btree page so we make
-      ** the call for every page that comes in for re-initing. */
+      ** the call for every page that comes in for re-initializing. */
       btreeInitPage(pPage);
     }
   }
@@ -69426,6 +72400,9 @@ SQLITE_PRIVATE int sqlite3BtreeOpen(
     assert( sizeof(u16)==2 );
     assert( sizeof(Pgno)==4 );
 
+    /* Suppress false-positive compiler warning from PVS-Studio */
+    memset(&zDbHeader[16], 0, 8);
+
     pBt = sqlite3MallocZero( sizeof(*pBt) );
     if( pBt==0 ){
       rc = SQLITE_NOMEM_BKPT;
@@ -69642,7 +72619,7 @@ static SQLITE_NOINLINE int allocateTempSpace(BtShared *pBt){
   ** can mean that fillInCell() only initializes the first 2 or 3
   ** bytes of pTmpSpace, but that the first 4 bytes are copied from
   ** it into a database page. This is not actually a problem, but it
-  ** does cause a valgrind error when the 1 or 2 bytes of unitialized
+  ** does cause a valgrind error when the 1 or 2 bytes of uninitialized
   ** data is passed to system call write(). So to avoid this error,
   ** zero the first 4 bytes of temp space here.
   **
@@ -69877,7 +72854,7 @@ SQLITE_PRIVATE int sqlite3BtreeGetReserveNoMutex(Btree *p){
 
 /*
 ** Return the number of bytes of space at the end of every page that
-** are intentually left unused.  This is the "reserved" space that is
+** are intentionally left unused.  This is the "reserved" space that is
 ** sometimes used by extensions.
 **
 ** The value returned is the larger of the current reserve size and
@@ -70124,7 +73101,6 @@ static int lockBtree(BtShared *pBt){
     ){
       goto page1_init_failed;
     }
-    pBt->btsFlags |= BTS_PAGESIZE_FIXED;
     assert( (pageSize & 7)==0 );
     /* EVIDENCE-OF: R-59310-51205 The "reserved space" size in the 1-byte
     ** integer at offset 20 is the number of bytes of space at the end of
@@ -70144,6 +73120,7 @@ static int lockBtree(BtShared *pBt){
       releasePageOne(pPage1);
       pBt->usableSize = usableSize;
       pBt->pageSize = pageSize;
+      pBt->btsFlags |= BTS_PAGESIZE_FIXED;
       freeTempSpace(pBt);
       rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize,
                                    pageSize-usableSize);
@@ -70163,6 +73140,7 @@ static int lockBtree(BtShared *pBt){
     if( usableSize<480 ){
       goto page1_init_failed;
     }
+    pBt->btsFlags |= BTS_PAGESIZE_FIXED;
     pBt->pageSize = pageSize;
     pBt->usableSize = usableSize;
 #ifndef SQLITE_OMIT_AUTOVACUUM
@@ -70341,7 +73319,11 @@ SQLITE_PRIVATE int sqlite3BtreeNewDb(Btree *p){
 ** when A already has a read lock, we encourage A to give up and let B
 ** proceed.
 */
-SQLITE_PRIVATE int sqlite3BtreeBeginTrans(Btree *p, int wrflag, int *pSchemaVersion){
+static SQLITE_NOINLINE int btreeBeginTrans(
+  Btree *p,                 /* The btree in which to start the transaction */
+  int wrflag,               /* True to start a write transaction */
+  int *pSchemaVersion       /* Put schema version number here, if not NULL */
+){
   BtShared *pBt = p->pBt;
   Pager *pPager = pBt->pPager;
   int rc = SQLITE_OK;
@@ -70513,6 +73495,28 @@ SQLITE_PRIVATE int sqlite3BtreeBeginTrans(Btree *p, int wrflag, int *pSchemaVers
   sqlite3BtreeLeave(p);
   return rc;
 }
+SQLITE_PRIVATE int sqlite3BtreeBeginTrans(Btree *p, int wrflag, int *pSchemaVersion){
+  BtShared *pBt;
+  if( p->sharable
+   || p->inTrans==TRANS_NONE
+   || (p->inTrans==TRANS_READ && wrflag!=0)
+  ){
+    return btreeBeginTrans(p,wrflag,pSchemaVersion);
+  }
+  pBt = p->pBt;
+  if( pSchemaVersion ){
+    *pSchemaVersion = get4byte(&pBt->pPage1->aData[40]);
+  }
+  if( wrflag ){
+    /* This call makes sure that the pager has the correct number of
+    ** open savepoints. If the second parameter is greater than 0 and
+    ** the sub-journal is not already open, then it will be opened here.
+    */
+    return sqlite3PagerOpenSavepoint(pBt->pPager, p->db->nSavepoint);
+  }else{
+    return SQLITE_OK;
+  }
+}
 
 #ifndef SQLITE_OMIT_AUTOVACUUM
 
@@ -70599,6 +73603,9 @@ static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){
           }
         }
       }else{
+        if( pCell+4 > pPage->aData+pPage->pBt->usableSize ){
+          return SQLITE_CORRUPT_PAGE(pPage);
+        }
         if( get4byte(pCell)==iFrom ){
           put4byte(pCell, iTo);
           break;
@@ -70647,7 +73654,7 @@ static int relocatePage(
   if( iDbPage<3 ) return SQLITE_CORRUPT_BKPT;
 
   /* Move page iDbPage from its current location to page number iFreePage */
-  TRACE(("AUTOVACUUM: Moving %d to free page %d (ptr page %d type %d)\n",
+  TRACE(("AUTOVACUUM: Moving %u to free page %u (ptr page %u type %u)\n",
       iDbPage, iFreePage, iPtrPage, eType));
   rc = sqlite3PagerMovepage(pPager, pDbPage->pDbPage, iFreePage, isCommit);
   if( rc!=SQLITE_OK ){
@@ -71605,7 +74612,6 @@ SQLITE_PRIVATE void sqlite3BtreeCursorUnpin(BtCursor *pCur){
   pCur->curFlags &= ~BTCF_Pinned;
 }
 
-#ifdef SQLITE_ENABLE_OFFSET_SQL_FUNC
 /*
 ** Return the offset into the database file for the start of the
 ** payload to which the cursor is pointing.
@@ -71617,7 +74623,6 @@ SQLITE_PRIVATE i64 sqlite3BtreeOffset(BtCursor *pCur){
   return (i64)pCur->pBt->pageSize*((i64)pCur->pPage->pgno - 1) +
          (i64)(pCur->info.pPayload - pCur->pPage->aData);
 }
-#endif /* SQLITE_ENABLE_OFFSET_SQL_FUNC */
 
 /*
 ** Return the number of bytes of payload for the entry that pCur is
@@ -71643,7 +74648,7 @@ SQLITE_PRIVATE u32 sqlite3BtreePayloadSize(BtCursor *pCur){
 ** routine always returns 2147483647 (which is the largest record
 ** that SQLite can handle) or more.  But returning a smaller value might
 ** prevent large memory allocations when trying to interpret a
-** corrupt datrabase.
+** corrupt database.
 **
 ** The current implementation merely returns the size of the underlying
 ** database file.
@@ -72105,8 +75110,7 @@ SQLITE_PRIVATE const void *sqlite3BtreePayloadFetch(BtCursor *pCur, u32 *pAmt){
 ** vice-versa).
 */
 static int moveToChild(BtCursor *pCur, u32 newPgno){
-  BtShared *pBt = pCur->pBt;
-
+  int rc;
   assert( cursorOwnsBtShared(pCur) );
   assert( pCur->eState==CURSOR_VALID );
   assert( pCur->iPage<BTCURSOR_MAX_DEPTH );
@@ -72120,7 +75124,18 @@ static int moveToChild(BtCursor *pCur, u32 newPgno){
   pCur->apPage[pCur->iPage] = pCur->pPage;
   pCur->ix = 0;
   pCur->iPage++;
-  return getAndInitPage(pBt, newPgno, &pCur->pPage, pCur, pCur->curPagerFlags);
+  rc = getAndInitPage(pCur->pBt, newPgno, &pCur->pPage, pCur->curPagerFlags);
+  assert( pCur->pPage!=0 || rc!=SQLITE_OK );
+  if( rc==SQLITE_OK
+   && (pCur->pPage->nCell<1 || pCur->pPage->intKey!=pCur->curIntKey)
+  ){
+    releasePage(pCur->pPage);
+    rc = SQLITE_CORRUPT_PGNO(newPgno);
+  }
+  if( rc ){
+    pCur->pPage = pCur->apPage[--pCur->iPage];
+  }
+  return rc;
 }
 
 #ifdef SQLITE_DEBUG
@@ -72226,8 +75241,8 @@ static int moveToRoot(BtCursor *pCur){
       }
       sqlite3BtreeClearCursor(pCur);
     }
-    rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->pPage,
-                        0, pCur->curPagerFlags);
+    rc = getAndInitPage(pCur->pBt, pCur->pgnoRoot, &pCur->pPage,
+                        pCur->curPagerFlags);
     if( rc!=SQLITE_OK ){
       pCur->eState = CURSOR_INVALID;
       return rc;
@@ -72339,7 +75354,7 @@ SQLITE_PRIVATE int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
     *pRes = 0;
     rc = moveToLeftmost(pCur);
   }else if( rc==SQLITE_EMPTY ){
-    assert( pCur->pgnoRoot==0 || pCur->pPage->nCell==0 );
+    assert( pCur->pgnoRoot==0 || (pCur->pPage!=0 && pCur->pPage->nCell==0) );
     *pRes = 1;
     rc = SQLITE_OK;
   }
@@ -72350,9 +75365,25 @@ SQLITE_PRIVATE int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
 ** on success.  Set *pRes to 0 if the cursor actually points to something
 ** or set *pRes to 1 if the table is empty.
 */
+static SQLITE_NOINLINE int btreeLast(BtCursor *pCur, int *pRes){
+  int rc = moveToRoot(pCur);
+  if( rc==SQLITE_OK ){
+    assert( pCur->eState==CURSOR_VALID );
+    *pRes = 0;
+    rc = moveToRightmost(pCur);
+    if( rc==SQLITE_OK ){
+      pCur->curFlags |= BTCF_AtLast;
+    }else{
+      pCur->curFlags &= ~BTCF_AtLast;
+    }
+  }else if( rc==SQLITE_EMPTY ){
+    assert( pCur->pgnoRoot==0 || pCur->pPage->nCell==0 );
+    *pRes = 1;
+    rc = SQLITE_OK;
+  }
+  return rc;
+}
 SQLITE_PRIVATE int sqlite3BtreeLast(BtCursor *pCur, int *pRes){
-  int rc;
-
   assert( cursorOwnsBtShared(pCur) );
   assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
 
@@ -72373,23 +75404,7 @@ SQLITE_PRIVATE int sqlite3BtreeLast(BtCursor *pCur, int *pRes){
     *pRes = 0;
     return SQLITE_OK;
   }
-
-  rc = moveToRoot(pCur);
-  if( rc==SQLITE_OK ){
-    assert( pCur->eState==CURSOR_VALID );
-    *pRes = 0;
-    rc = moveToRightmost(pCur);
-    if( rc==SQLITE_OK ){
-      pCur->curFlags |= BTCF_AtLast;
-    }else{
-      pCur->curFlags &= ~BTCF_AtLast;
-    }
-  }else if( rc==SQLITE_EMPTY ){
-    assert( pCur->pgnoRoot==0 || pCur->pPage->nCell==0 );
-    *pRes = 1;
-    rc = SQLITE_OK;
-  }
-  return rc;
+  return btreeLast(pCur, pRes);
 }
 
 /* Move the cursor so that it points to an entry in a table (a.k.a INTKEY)
@@ -72444,7 +75459,7 @@ SQLITE_PRIVATE int sqlite3BtreeTableMoveto(
       /* If the requested key is one more than the previous key, then
       ** try to get there using sqlite3BtreeNext() rather than a full
       ** binary search.  This is an optimization only.  The correct answer
-      ** is still obtained without this case, only a little more slowely */
+      ** is still obtained without this case, only a little more slowly. */
       if( pCur->info.nKey+1==intKey ){
         *pRes = 0;
         rc = sqlite3BtreeNext(pCur, 0);
@@ -72840,10 +75855,36 @@ SQLITE_PRIVATE int sqlite3BtreeIndexMoveto(
     }else{
       chldPg = get4byte(findCell(pPage, lwr));
     }
-    pCur->ix = (u16)lwr;
-    rc = moveToChild(pCur, chldPg);
-    if( rc ) break;
-  }
+
+    /* This block is similar to an in-lined version of:
+    **
+    **    pCur->ix = (u16)lwr;
+    **    rc = moveToChild(pCur, chldPg);
+    **    if( rc ) break;
+    */
+    pCur->info.nSize = 0;
+    pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
+    if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){
+      return SQLITE_CORRUPT_BKPT;
+    }
+    pCur->aiIdx[pCur->iPage] = (u16)lwr;
+    pCur->apPage[pCur->iPage] = pCur->pPage;
+    pCur->ix = 0;
+    pCur->iPage++;
+    rc = getAndInitPage(pCur->pBt, chldPg, &pCur->pPage, pCur->curPagerFlags);
+    if( rc==SQLITE_OK
+     && (pCur->pPage->nCell<1 || pCur->pPage->intKey!=pCur->curIntKey)
+    ){
+      releasePage(pCur->pPage);
+      rc = SQLITE_CORRUPT_PGNO(chldPg);
+    }
+    if( rc ){
+      pCur->pPage = pCur->apPage[--pCur->iPage];
+      break;
+    }
+    /*
+    ***** End of in-lined moveToChild() call */
+ }
 moveto_index_finish:
   pCur->info.nSize = 0;
   assert( (pCur->curFlags & BTCF_ValidOvfl)==0 );
@@ -72934,14 +75975,8 @@ static SQLITE_NOINLINE int btreeNext(BtCursor *pCur){
 
   pPage = pCur->pPage;
   idx = ++pCur->ix;
-  if( !pPage->isInit || sqlite3FaultSim(412) ){
-    /* The only known way for this to happen is for there to be a
-    ** recursive SQL function that does a DELETE operation as part of a
-    ** SELECT which deletes content out from under an active cursor
-    ** in a corrupt database file where the table being DELETE-ed from
-    ** has pages in common with the table being queried.  See TH3
-    ** module cov1/btree78.test testcase 220 (2018-06-08) for an
-    ** example. */
+  if( sqlite3FaultSim(412) ) pPage->isInit = 0;
+  if( !pPage->isInit ){
     return SQLITE_CORRUPT_BKPT;
   }
 
@@ -73117,8 +76152,8 @@ static int allocateBtreePage(
   assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) );
   pPage1 = pBt->pPage1;
   mxPage = btreePagecount(pBt);
-  /* EVIDENCE-OF: R-05119-02637 The 4-byte big-endian integer at offset 36
-  ** stores stores the total number of pages on the freelist. */
+  /* EVIDENCE-OF: R-21003-45125 The 4-byte big-endian integer at offset 36
+  ** stores the total number of pages on the freelist. */
   n = get4byte(&pPage1->aData[36]);
   testcase( n==mxPage-1 );
   if( n>=mxPage ){
@@ -73204,7 +76239,7 @@ static int allocateBtreePage(
         memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
         *ppPage = pTrunk;
         pTrunk = 0;
-        TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
+        TRACE(("ALLOCATE: %u trunk - %u free pages left\n", *pPgno, n-1));
       }else if( k>(u32)(pBt->usableSize/4 - 2) ){
         /* Value of k is out of range.  Database corruption */
         rc = SQLITE_CORRUPT_PGNO(iTrunk);
@@ -73270,7 +76305,7 @@ static int allocateBtreePage(
           }
         }
         pTrunk = 0;
-        TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
+        TRACE(("ALLOCATE: %u trunk - %u free pages left\n", *pPgno, n-1));
 #endif
       }else if( k>0 ){
         /* Extract a leaf from the trunk */
@@ -73315,8 +76350,8 @@ static int allocateBtreePage(
         ){
           int noContent;
           *pPgno = iPage;
-          TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d"
-                 ": %d more free pages\n",
+          TRACE(("ALLOCATE: %u was leaf %u of %u on trunk %u"
+                 ": %u more free pages\n",
                  *pPgno, closest+1, k, pTrunk->pgno, n-1));
           rc = sqlite3PagerWrite(pTrunk->pDbPage);
           if( rc ) goto end_allocate_page;
@@ -73372,7 +76407,7 @@ static int allocateBtreePage(
       ** becomes a new pointer-map page, the second is used by the caller.
       */
       MemPage *pPg = 0;
-      TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage));
+      TRACE(("ALLOCATE: %u from end of file (pointer-map page)\n", pBt->nPage));
       assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) );
       rc = btreeGetUnusedPage(pBt, pBt->nPage, &pPg, bNoContent);
       if( rc==SQLITE_OK ){
@@ -73395,7 +76430,7 @@ static int allocateBtreePage(
       releasePage(*ppPage);
       *ppPage = 0;
     }
-    TRACE(("ALLOCATE: %d from end of file\n", *pPgno));
+    TRACE(("ALLOCATE: %u from end of file\n", *pPgno));
   }
 
   assert( CORRUPT_DB || *pPgno!=PENDING_BYTE_PAGE(pBt) );
@@ -73463,7 +76498,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){
   /* If the database supports auto-vacuum, write an entry in the pointer-map
   ** to indicate that the page is free.
   */
-  if( ISAUTOVACUUM ){
+  if( ISAUTOVACUUM(pBt) ){
     ptrmapPut(pBt, iPage, PTRMAP_FREEPAGE, 0, &rc);
     if( rc ) goto freepage_out;
   }
@@ -73523,7 +76558,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){
         }
         rc = btreeSetHasContent(pBt, iPage);
       }
-      TRACE(("FREE-PAGE: %d leaf on trunk page %d\n",pPage->pgno,pTrunk->pgno));
+      TRACE(("FREE-PAGE: %u leaf on trunk page %u\n",pPage->pgno,pTrunk->pgno));
       goto freepage_out;
     }
   }
@@ -73544,7 +76579,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){
   put4byte(pPage->aData, iTrunk);
   put4byte(&pPage->aData[4], 0);
   put4byte(&pPage1->aData[32], iPage);
-  TRACE(("FREE-PAGE: %d new trunk page replacing %d\n", pPage->pgno, iTrunk));
+  TRACE(("FREE-PAGE: %u new trunk page replacing %u\n", pPage->pgno, iTrunk));
 
 freepage_out:
   if( pPage ){
@@ -73633,7 +76668,7 @@ static SQLITE_NOINLINE int clearCellOverflow(
 
 /* Call xParseCell to compute the size of a cell.  If the cell contains
 ** overflow, then invoke cellClearOverflow to clear out that overflow.
-** STore the result code (SQLITE_OK or some error code) in rc.
+** Store the result code (SQLITE_OK or some error code) in rc.
 **
 ** Implemented as macro to force inlining for performance.
 */
@@ -73867,12 +76902,6 @@ static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){
   assert( pPage->pBt->usableSize > (u32)(ptr-data) );
   pc = get2byte(ptr);
   hdr = pPage->hdrOffset;
-#if 0  /* Not required.  Omit for efficiency */
-  if( pc<hdr+pPage->nCell*2 ){
-    *pRC = SQLITE_CORRUPT_BKPT;
-    return;
-  }
-#endif
   testcase( pc==(u32)get2byte(&data[hdr+5]) );
   testcase( pc+sz==pPage->pBt->usableSize );
   if( pc+sz > pPage->pBt->usableSize ){
@@ -73910,23 +76939,27 @@ static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){
 ** Allocating a new entry in pPage->aCell[] implies that
 ** pPage->nOverflow is incremented.
 **
-** *pRC must be SQLITE_OK when this routine is called.
+** The insertCellFast() routine below works exactly the same as
+** insertCell() except that it lacks the pTemp and iChild parameters
+** which are assumed zero.  Other than that, the two routines are the
+** same.
+**
+** Fixes or enhancements to this routine should be reflected in
+** insertCellFast()!
 */
-static void insertCell(
+static int insertCell(
   MemPage *pPage,   /* Page into which we are copying */
   int i,            /* New cell becomes the i-th cell of the page */
   u8 *pCell,        /* Content of the new cell */
   int sz,           /* Bytes of content in pCell */
   u8 *pTemp,        /* Temp storage space for pCell, if needed */
-  Pgno iChild,      /* If non-zero, replace first 4 bytes with this value */
-  int *pRC          /* Read and write return code from here */
+  Pgno iChild       /* If non-zero, replace first 4 bytes with this value */
 ){
   int idx = 0;      /* Where to write new cell content in data[] */
   int j;            /* Loop counter */
   u8 *data;         /* The content of the whole page */
   u8 *pIns;         /* The point in pPage->aCellIdx[] where no cell inserted */
 
-  assert( *pRC==SQLITE_OK );
   assert( i>=0 && i<=pPage->nCell+pPage->nOverflow );
   assert( MX_CELL(pPage->pBt)<=10921 );
   assert( pPage->nCell<=MX_CELL(pPage->pBt) || CORRUPT_DB );
@@ -73935,14 +76968,103 @@ static void insertCell(
   assert( sqlite3_mutex_held(pPage->pBt->mutex) );
   assert( sz==pPage->xCellSize(pPage, pCell) || CORRUPT_DB );
   assert( pPage->nFree>=0 );
+  assert( iChild>0 );
   if( pPage->nOverflow || sz+2>pPage->nFree ){
     if( pTemp ){
       memcpy(pTemp, pCell, sz);
       pCell = pTemp;
     }
-    if( iChild ){
-      put4byte(pCell, iChild);
+    put4byte(pCell, iChild);
+    j = pPage->nOverflow++;
+    /* Comparison against ArraySize-1 since we hold back one extra slot
+    ** as a contingency.  In other words, never need more than 3 overflow
+    ** slots but 4 are allocated, just to be safe. */
+    assert( j < ArraySize(pPage->apOvfl)-1 );
+    pPage->apOvfl[j] = pCell;
+    pPage->aiOvfl[j] = (u16)i;
+
+    /* When multiple overflows occur, they are always sequential and in
+    ** sorted order.  This invariants arise because multiple overflows can
+    ** only occur when inserting divider cells into the parent page during
+    ** balancing, and the dividers are adjacent and sorted.
+    */
+    assert( j==0 || pPage->aiOvfl[j-1]<(u16)i ); /* Overflows in sorted order */
+    assert( j==0 || i==pPage->aiOvfl[j-1]+1 );   /* Overflows are sequential */
+  }else{
+    int rc = sqlite3PagerWrite(pPage->pDbPage);
+    if( NEVER(rc!=SQLITE_OK) ){
+      return rc;
     }
+    assert( sqlite3PagerIswriteable(pPage->pDbPage) );
+    data = pPage->aData;
+    assert( &data[pPage->cellOffset]==pPage->aCellIdx );
+    rc = allocateSpace(pPage, sz, &idx);
+    if( rc ){ return rc; }
+    /* The allocateSpace() routine guarantees the following properties
+    ** if it returns successfully */
+    assert( idx >= 0 );
+    assert( idx >= pPage->cellOffset+2*pPage->nCell+2 || CORRUPT_DB );
+    assert( idx+sz <= (int)pPage->pBt->usableSize );
+    pPage->nFree -= (u16)(2 + sz);
+    /* In a corrupt database where an entry in the cell index section of
+    ** a btree page has a value of 3 or less, the pCell value might point
+    ** as many as 4 bytes in front of the start of the aData buffer for
+    ** the source page.  Make sure this does not cause problems by not
+    ** reading the first 4 bytes */
+    memcpy(&data[idx+4], pCell+4, sz-4);
+    put4byte(&data[idx], iChild);
+    pIns = pPage->aCellIdx + i*2;
+    memmove(pIns+2, pIns, 2*(pPage->nCell - i));
+    put2byte(pIns, idx);
+    pPage->nCell++;
+    /* increment the cell count */
+    if( (++data[pPage->hdrOffset+4])==0 ) data[pPage->hdrOffset+3]++;
+    assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell || CORRUPT_DB );
+#ifndef SQLITE_OMIT_AUTOVACUUM
+    if( pPage->pBt->autoVacuum ){
+      int rc2 = SQLITE_OK;
+      /* The cell may contain a pointer to an overflow page. If so, write
+      ** the entry for the overflow page into the pointer map.
+      */
+      ptrmapPutOvflPtr(pPage, pPage, pCell, &rc2);
+      if( rc2 ) return rc2;
+    }
+#endif
+  }
+  return SQLITE_OK;
+}
+
+/*
+** This variant of insertCell() assumes that the pTemp and iChild
+** parameters are both zero.  Use this variant in sqlite3BtreeInsert()
+** for performance improvement, and also so that this variant is only
+** called from that one place, and is thus inlined, and thus runs must
+** faster.
+**
+** Fixes or enhancements to this routine should be reflected into
+** the insertCell() routine.
+*/
+static int insertCellFast(
+  MemPage *pPage,   /* Page into which we are copying */
+  int i,            /* New cell becomes the i-th cell of the page */
+  u8 *pCell,        /* Content of the new cell */
+  int sz            /* Bytes of content in pCell */
+){
+  int idx = 0;      /* Where to write new cell content in data[] */
+  int j;            /* Loop counter */
+  u8 *data;         /* The content of the whole page */
+  u8 *pIns;         /* The point in pPage->aCellIdx[] where no cell inserted */
+
+  assert( i>=0 && i<=pPage->nCell+pPage->nOverflow );
+  assert( MX_CELL(pPage->pBt)<=10921 );
+  assert( pPage->nCell<=MX_CELL(pPage->pBt) || CORRUPT_DB );
+  assert( pPage->nOverflow<=ArraySize(pPage->apOvfl) );
+  assert( ArraySize(pPage->apOvfl)==ArraySize(pPage->aiOvfl) );
+  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+  assert( sz==pPage->xCellSize(pPage, pCell) || CORRUPT_DB );
+  assert( pPage->nFree>=0 );
+  assert( pPage->nOverflow==0 );
+  if( sz+2>pPage->nFree ){
     j = pPage->nOverflow++;
     /* Comparison against ArraySize-1 since we hold back one extra slot
     ** as a contingency.  In other words, never need more than 3 overflow
@@ -73961,31 +77083,20 @@ static void insertCell(
   }else{
     int rc = sqlite3PagerWrite(pPage->pDbPage);
     if( rc!=SQLITE_OK ){
-      *pRC = rc;
-      return;
+      return rc;
     }
     assert( sqlite3PagerIswriteable(pPage->pDbPage) );
     data = pPage->aData;
     assert( &data[pPage->cellOffset]==pPage->aCellIdx );
     rc = allocateSpace(pPage, sz, &idx);
-    if( rc ){ *pRC = rc; return; }
+    if( rc ){ return rc; }
     /* The allocateSpace() routine guarantees the following properties
     ** if it returns successfully */
     assert( idx >= 0 );
     assert( idx >= pPage->cellOffset+2*pPage->nCell+2 || CORRUPT_DB );
     assert( idx+sz <= (int)pPage->pBt->usableSize );
     pPage->nFree -= (u16)(2 + sz);
-    if( iChild ){
-      /* In a corrupt database where an entry in the cell index section of
-      ** a btree page has a value of 3 or less, the pCell value might point
-      ** as many as 4 bytes in front of the start of the aData buffer for
-      ** the source page.  Make sure this does not cause problems by not
-      ** reading the first 4 bytes */
-      memcpy(&data[idx+4], pCell+4, sz-4);
-      put4byte(&data[idx], iChild);
-    }else{
-      memcpy(&data[idx], pCell, sz);
-    }
+    memcpy(&data[idx], pCell, sz);
     pIns = pPage->aCellIdx + i*2;
     memmove(pIns+2, pIns, 2*(pPage->nCell - i));
     put2byte(pIns, idx);
@@ -73995,13 +77106,16 @@ static void insertCell(
     assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell || CORRUPT_DB );
 #ifndef SQLITE_OMIT_AUTOVACUUM
     if( pPage->pBt->autoVacuum ){
+      int rc2 = SQLITE_OK;
       /* The cell may contain a pointer to an overflow page. If so, write
       ** the entry for the overflow page into the pointer map.
       */
-      ptrmapPutOvflPtr(pPage, pPage, pCell, pRC);
+      ptrmapPutOvflPtr(pPage, pPage, pCell, &rc2);
+      if( rc2 ) return rc2;
     }
 #endif
   }
+  return SQLITE_OK;
 }
 
 /*
@@ -74102,14 +77216,16 @@ struct CellArray {
 ** computed.
 */
 static void populateCellCache(CellArray *p, int idx, int N){
+  MemPage *pRef = p->pRef;
+  u16 *szCell = p->szCell;
   assert( idx>=0 && idx+N<=p->nCell );
   while( N>0 ){
     assert( p->apCell[idx]!=0 );
-    if( p->szCell[idx]==0 ){
-      p->szCell[idx] = p->pRef->xCellSize(p->pRef, p->apCell[idx]);
+    if( szCell[idx]==0 ){
+      szCell[idx] = pRef->xCellSize(pRef, p->apCell[idx]);
     }else{
       assert( CORRUPT_DB ||
-              p->szCell[idx]==p->pRef->xCellSize(p->pRef, p->apCell[idx]) );
+              szCell[idx]==pRef->xCellSize(pRef, p->apCell[idx]) );
     }
     idx++;
     N--;
@@ -74163,12 +77279,13 @@ static int rebuildPage(
   int k;                          /* Current slot in pCArray->apEnd[] */
   u8 *pSrcEnd;                    /* Current pCArray->apEnd[k] value */
 
+  assert( nCell>0 );
   assert( i<iEnd );
   j = get2byte(&aData[hdr+5]);
   if( j>(u32)usableSize ){ j = 0; }
   memcpy(&pTmp[j], &aData[j], usableSize - j);
 
-  for(k=0; pCArray->ixNx[k]<=i && ALWAYS(k<NB*2); k++){}
+  for(k=0; ALWAYS(k<NB*2) && pCArray->ixNx[k]<=i; k++){}
   pSrcEnd = pCArray->apEnd[k];
 
   pData = pEnd;
@@ -74231,7 +77348,7 @@ static int rebuildPage(
 ** Finally, argument pBegin points to the byte immediately following the
 ** end of the space required by this page for the cell-pointer area (for
 ** all cells - not just those inserted by the current call). If the content
-** area must be extended to before this point in order to accomodate all
+** area must be extended to before this point in order to accommodate all
 ** cells in apCell[], then the cells do not fit and non-zero is returned.
 */
 static int pageInsertArray(
@@ -74251,7 +77368,7 @@ static int pageInsertArray(
   u8 *pEnd;                       /* Maximum extent of cell data */
   assert( CORRUPT_DB || pPg->hdrOffset==0 );    /* Never called on page 1 */
   if( iEnd<=iFirst ) return 0;
-  for(k=0; pCArray->ixNx[k]<=i && ALWAYS(k<NB*2); k++){}
+  for(k=0; ALWAYS(k<NB*2) && pCArray->ixNx[k]<=i ; k++){}
   pEnd = pCArray->apEnd[k];
   while( 1 /*Exit by break*/ ){
     int sz, rc;
@@ -74309,39 +77426,50 @@ static int pageFreeArray(
   u8 * const pEnd = &aData[pPg->pBt->usableSize];
   u8 * const pStart = &aData[pPg->hdrOffset + 8 + pPg->childPtrSize];
   int nRet = 0;
-  int i;
+  int i, j;
   int iEnd = iFirst + nCell;
-  u8 *pFree = 0;
-  int szFree = 0;
+  int nFree = 0;
+  int aOfst[10];
+  int aAfter[10];
 
   for(i=iFirst; i<iEnd; i++){
     u8 *pCell = pCArray->apCell[i];
     if( SQLITE_WITHIN(pCell, pStart, pEnd) ){
       int sz;
+      int iAfter;
+      int iOfst;
       /* No need to use cachedCellSize() here.  The sizes of all cells that
       ** are to be freed have already been computing while deciding which
       ** cells need freeing */
       sz = pCArray->szCell[i];  assert( sz>0 );
-      if( pFree!=(pCell + sz) ){
-        if( pFree ){
-          assert( pFree>aData && (pFree - aData)<65536 );
-          freeSpace(pPg, (u16)(pFree - aData), szFree);
-        }
-        pFree = pCell;
-        szFree = sz;
-        if( pFree+sz>pEnd ){
-          return 0;
+      iOfst = (u16)(pCell - aData);
+      iAfter = iOfst+sz;
+      for(j=0; j<nFree; j++){
+        if( aOfst[j]==iAfter ){
+          aOfst[j] = iOfst;
+          break;
+        }else if( aAfter[j]==iOfst ){
+          aAfter[j] = iAfter;
+          break;
         }
-      }else{
-        pFree = pCell;
-        szFree += sz;
+      }
+      if( j>=nFree ){
+        if( nFree>=(int)(sizeof(aOfst)/sizeof(aOfst[0])) ){
+          for(j=0; j<nFree; j++){
+            freeSpace(pPg, aOfst[j], aAfter[j]-aOfst[j]);
+          }
+          nFree = 0;
+        }
+        aOfst[nFree] = iOfst;
+        aAfter[nFree] = iAfter;
+        if( &aData[iAfter]>pEnd ) return 0;
+        nFree++;
       }
       nRet++;
     }
   }
-  if( pFree ){
-    assert( pFree>aData && (pFree - aData)<65536 );
-    freeSpace(pPg, (u16)(pFree - aData), szFree);
+  for(j=0; j<nFree; j++){
+    freeSpace(pPg, aOfst[j], aAfter[j]-aOfst[j]);
   }
   return nRet;
 }
@@ -74394,9 +77522,9 @@ static int editPage(
     nCell -= nTail;
   }
 
-  pData = &aData[get2byteNotZero(&aData[hdr+5])];
+  pData = &aData[get2byte(&aData[hdr+5])];
   if( pData<pBegin ) goto editpage_fail;
-  if( pData>pPg->aDataEnd ) goto editpage_fail;
+  if( NEVER(pData>pPg->aDataEnd) ) goto editpage_fail;
 
   /* Add cells to the start of the page */
   if( iNew<iOld ){
@@ -74458,6 +77586,7 @@ static int editPage(
   return SQLITE_OK;
  editpage_fail:
   /* Unable to edit this page. Rebuild it from scratch instead. */
+  if( nNew<1 ) return SQLITE_CORRUPT_BKPT;
   populateCellCache(pCArray, iNew, nNew);
   return rebuildPage(pCArray, iNew, nNew, pPg);
 }
@@ -74535,12 +77664,12 @@ static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){
     ** with entries for the new page, and any pointer from the
     ** cell on the page to an overflow page. If either of these
     ** operations fails, the return code is set, but the contents
-    ** of the parent page are still manipulated by thh code below.
+    ** of the parent page are still manipulated by the code below.
     ** That is Ok, at this point the parent page is guaranteed to
     ** be marked as dirty. Returning an error code will cause a
     ** rollback, undoing any changes made to the parent page.
     */
-    if( ISAUTOVACUUM ){
+    if( ISAUTOVACUUM(pBt) ){
       ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno, &rc);
       if( szCell>pNew->minLocal ){
         ptrmapPutOvflPtr(pNew, pNew, pCell, &rc);
@@ -74568,8 +77697,8 @@ static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){
 
     /* Insert the new divider cell into pParent. */
     if( rc==SQLITE_OK ){
-      insertCell(pParent, pParent->nCell, pSpace, (int)(pOut-pSpace),
-                   0, pPage->pgno, &rc);
+      rc = insertCell(pParent, pParent->nCell, pSpace, (int)(pOut-pSpace),
+                      0, pPage->pgno);
     }
 
     /* Set the right-child pointer of pParent to point to the new page. */
@@ -74678,7 +77807,7 @@ static void copyNodeContent(MemPage *pFrom, MemPage *pTo, int *pRC){
     /* If this is an auto-vacuum database, update the pointer-map entries
     ** for any b-tree or overflow pages that pTo now contains the pointers to.
     */
-    if( ISAUTOVACUUM ){
+    if( ISAUTOVACUUM(pBt) ){
       *pRC = setChildPtrmaps(pTo);
     }
   }
@@ -74756,8 +77885,6 @@ static int balance_nonroot(
   Pgno pgno;                   /* Temp var to store a page number in */
   u8 abDone[NB+2];             /* True after i'th new page is populated */
   Pgno aPgno[NB+2];            /* Page numbers of new pages before shuffling */
-  Pgno aPgOrder[NB+2];         /* Copy of aPgno[] used for sorting pages */
-  u16 aPgFlags[NB+2];          /* flags field of new pages before shuffling */
   CellArray b;                 /* Parsed information on cells being balanced */
 
   memset(abDone, 0, sizeof(abDone));
@@ -74813,7 +77940,7 @@ static int balance_nonroot(
   pgno = get4byte(pRight);
   while( 1 ){
     if( rc==SQLITE_OK ){
-      rc = getAndInitPage(pBt, pgno, &apOld[i], 0, 0);
+      rc = getAndInitPage(pBt, pgno, &apOld[i], 0);
     }
     if( rc ){
       memset(apOld, 0, (i+1)*sizeof(MemPage*));
@@ -75104,15 +78231,17 @@ static int balance_nonroot(
     d = r + 1 - leafData;
     (void)cachedCellSize(&b, d);
     do{
+      int szR, szD;
       assert( d<nMaxCells );
       assert( r<nMaxCells );
-      (void)cachedCellSize(&b, r);
+      szR = cachedCellSize(&b, r);
+      szD = b.szCell[d];
       if( szRight!=0
-       && (bBulk || szRight+b.szCell[d]+2 > szLeft-(b.szCell[r]+(i==k-1?0:2)))){
+       && (bBulk || szRight+szD+2 > szLeft-(szR+(i==k-1?0:2)))){
         break;
       }
-      szRight += b.szCell[d] + 2;
-      szLeft -= b.szCell[r] + 2;
+      szRight += szD + 2;
+      szLeft -= szR + 2;
       cntNew[i-1] = r;
       r--;
       d--;
@@ -75125,7 +78254,7 @@ static int balance_nonroot(
     }
   }
 
-  /* Sanity check:  For a non-corrupt database file one of the follwing
+  /* Sanity check:  For a non-corrupt database file one of the following
   ** must be true:
   **    (1) We found one or more cells (cntNew[0])>0), or
   **    (2) pPage is a virtual root page.  A virtual root page is when
@@ -75133,7 +78262,7 @@ static int balance_nonroot(
   **        that page.
   */
   assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) || CORRUPT_DB);
-  TRACE(("BALANCE: old: %d(nc=%d) %d(nc=%d) %d(nc=%d)\n",
+  TRACE(("BALANCE: old: %u(nc=%u) %u(nc=%u) %u(nc=%u)\n",
     apOld[0]->pgno, apOld[0]->nCell,
     nOld>=2 ? apOld[1]->pgno : 0, nOld>=2 ? apOld[1]->nCell : 0,
     nOld>=3 ? apOld[2]->pgno : 0, nOld>=3 ? apOld[2]->nCell : 0
@@ -75166,7 +78295,7 @@ static int balance_nonroot(
       cntOld[i] = b.nCell;
 
       /* Set the pointer-map entry for the new sibling page. */
-      if( ISAUTOVACUUM ){
+      if( ISAUTOVACUUM(pBt) ){
         ptrmapPut(pBt, pNew->pgno, PTRMAP_BTREE, pParent->pgno, &rc);
         if( rc!=SQLITE_OK ){
           goto balance_cleanup;
@@ -75181,47 +78310,44 @@ static int balance_nonroot(
   ** of the table is closer to a linear scan through the file. That in turn
   ** helps the operating system to deliver pages from the disk more rapidly.
   **
-  ** An O(n^2) insertion sort algorithm is used, but since n is never more
-  ** than (NB+2) (a small constant), that should not be a problem.
+  ** An O(N*N) sort algorithm is used, but since N is never more than NB+2
+  ** (5), that is not a performance concern.
   **
   ** When NB==3, this one optimization makes the database about 25% faster
   ** for large insertions and deletions.
   */
   for(i=0; i<nNew; i++){
-    aPgOrder[i] = aPgno[i] = apNew[i]->pgno;
-    aPgFlags[i] = apNew[i]->pDbPage->flags;
-    for(j=0; j<i; j++){
-      if( NEVER(aPgno[j]==aPgno[i]) ){
-        /* This branch is taken if the set of sibling pages somehow contains
-        ** duplicate entries. This can happen if the database is corrupt.
-        ** It would be simpler to detect this as part of the loop below, but
-        ** we do the detection here in order to avoid populating the pager
-        ** cache with two separate objects associated with the same
-        ** page number.  */
-        assert( CORRUPT_DB );
-        rc = SQLITE_CORRUPT_BKPT;
-        goto balance_cleanup;
-      }
-    }
+    aPgno[i] = apNew[i]->pgno;
+    assert( apNew[i]->pDbPage->flags & PGHDR_WRITEABLE );
+    assert( apNew[i]->pDbPage->flags & PGHDR_DIRTY );
   }
-  for(i=0; i<nNew; i++){
-    int iBest = 0;                /* aPgno[] index of page number to use */
-    for(j=1; j<nNew; j++){
-      if( aPgOrder[j]<aPgOrder[iBest] ) iBest = j;
-    }
-    pgno = aPgOrder[iBest];
-    aPgOrder[iBest] = 0xffffffff;
-    if( iBest!=i ){
-      if( iBest>i ){
-        sqlite3PagerRekey(apNew[iBest]->pDbPage, pBt->nPage+iBest+1, 0);
-      }
-      sqlite3PagerRekey(apNew[i]->pDbPage, pgno, aPgFlags[iBest]);
-      apNew[i]->pgno = pgno;
+  for(i=0; i<nNew-1; i++){
+    int iB = i;
+    for(j=i+1; j<nNew; j++){
+      if( apNew[j]->pgno < apNew[iB]->pgno ) iB = j;
     }
-  }
 
-  TRACE(("BALANCE: new: %d(%d nc=%d) %d(%d nc=%d) %d(%d nc=%d) "
-         "%d(%d nc=%d) %d(%d nc=%d)\n",
+    /* If apNew[i] has a page number that is bigger than any of the
+    ** subsequence apNew[i] entries, then swap apNew[i] with the subsequent
+    ** entry that has the smallest page number (which we know to be
+    ** entry apNew[iB]).
+    */
+    if( iB!=i ){
+      Pgno pgnoA = apNew[i]->pgno;
+      Pgno pgnoB = apNew[iB]->pgno;
+      Pgno pgnoTemp = (PENDING_BYTE/pBt->pageSize)+1;
+      u16 fgA = apNew[i]->pDbPage->flags;
+      u16 fgB = apNew[iB]->pDbPage->flags;
+      sqlite3PagerRekey(apNew[i]->pDbPage, pgnoTemp, fgB);
+      sqlite3PagerRekey(apNew[iB]->pDbPage, pgnoA, fgA);
+      sqlite3PagerRekey(apNew[i]->pDbPage, pgnoB, fgB);
+      apNew[i]->pgno = pgnoB;
+      apNew[iB]->pgno = pgnoA;
+    }
+  }
+
+  TRACE(("BALANCE: new: %u(%u nc=%u) %u(%u nc=%u) %u(%u nc=%u) "
+         "%u(%u nc=%u) %u(%u nc=%u)\n",
     apNew[0]->pgno, szNew[0], cntNew[0],
     nNew>=2 ? apNew[1]->pgno : 0, nNew>=2 ? szNew[1] : 0,
     nNew>=2 ? cntNew[1] - cntNew[0] - !leafData : 0,
@@ -75262,7 +78388,7 @@ static int balance_nonroot(
   ** updated. This happens below, after the sibling pages have been
   ** populated, not here.
   */
-  if( ISAUTOVACUUM ){
+  if( ISAUTOVACUUM(pBt) ){
     MemPage *pOld;
     MemPage *pNew = pOld = apNew[0];
     int cntOldNext = pNew->nCell + pNew->nOverflow;
@@ -75353,13 +78479,13 @@ static int balance_nonroot(
     iOvflSpace += sz;
     assert( sz<=pBt->maxLocal+23 );
     assert( iOvflSpace <= (int)pBt->pageSize );
-    for(k=0; b.ixNx[k]<=j && ALWAYS(k<NB*2); k++){}
+    for(k=0; ALWAYS(k<NB*2) && b.ixNx[k]<=j; k++){}
     pSrcEnd = b.apEnd[k];
-    if( SQLITE_WITHIN(pSrcEnd, pCell, pCell+sz) ){
+    if( SQLITE_OVERFLOW(pSrcEnd, pCell, pCell+sz) ){
       rc = SQLITE_CORRUPT_BKPT;
       goto balance_cleanup;
     }
-    insertCell(pParent, nxDiv+i, pCell, sz, pTemp, pNew->pgno, &rc);
+    rc = insertCell(pParent, nxDiv+i, pCell, sz, pTemp, pNew->pgno);
     if( rc!=SQLITE_OK ) goto balance_cleanup;
     assert( sqlite3PagerIswriteable(pParent->pDbPage) );
   }
@@ -75389,6 +78515,8 @@ static int balance_nonroot(
   for(i=1-nNew; i<nNew; i++){
     int iPg = i<0 ? -i : i;
     assert( iPg>=0 && iPg<nNew );
+    assert( iPg>=1 || i>=0 );
+    assert( iPg<ArraySize(cntOld) );
     if( abDone[iPg] ) continue;         /* Skip pages already processed */
     if( i>=0                            /* On the upwards pass, or... */
      || cntOld[iPg-1]>=cntNew[iPg-1]    /* Condition (1) is true */
@@ -75455,7 +78583,7 @@ static int balance_nonroot(
     );
     copyNodeContent(apNew[0], pParent, &rc);
     freePage(apNew[0], &rc);
-  }else if( ISAUTOVACUUM && !leafCorrection ){
+  }else if( ISAUTOVACUUM(pBt) && !leafCorrection ){
     /* Fix the pointer map entries associated with the right-child of each
     ** sibling page. All other pointer map entries have already been taken
     ** care of.  */
@@ -75466,7 +78594,7 @@ static int balance_nonroot(
   }
 
   assert( pParent->isInit );
-  TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n",
+  TRACE(("BALANCE: finished: old=%u new=%u cells=%u\n",
           nOld, nNew, b.nCell));
 
   /* Free any old pages that were not reused as new pages.
@@ -75476,7 +78604,7 @@ static int balance_nonroot(
   }
 
 #if 0
-  if( ISAUTOVACUUM && rc==SQLITE_OK && apNew[0]->isInit ){
+  if( ISAUTOVACUUM(pBt) && rc==SQLITE_OK && apNew[0]->isInit ){
     /* The ptrmapCheckPages() contains assert() statements that verify that
     ** all pointer map pages are set correctly. This is helpful while
     ** debugging. This is usually disabled because a corrupt database may
@@ -75538,7 +78666,7 @@ static int balance_deeper(MemPage *pRoot, MemPage **ppChild){
   if( rc==SQLITE_OK ){
     rc = allocateBtreePage(pBt,&pChild,&pgnoChild,pRoot->pgno,0);
     copyNodeContent(pRoot, pChild, &rc);
-    if( ISAUTOVACUUM ){
+    if( ISAUTOVACUUM(pBt) ){
       ptrmapPut(pBt, pgnoChild, PTRMAP_BTREE, pRoot->pgno, &rc);
     }
   }
@@ -75551,7 +78679,7 @@ static int balance_deeper(MemPage *pRoot, MemPage **ppChild){
   assert( sqlite3PagerIswriteable(pRoot->pDbPage) );
   assert( pChild->nCell==pRoot->nCell || CORRUPT_DB );
 
-  TRACE(("BALANCE: copy root %d into %d\n", pRoot->pgno, pChild->pgno));
+  TRACE(("BALANCE: copy root %u into %u\n", pRoot->pgno, pChild->pgno));
 
   /* Copy the overflow cells from pRoot to pChild */
   memcpy(pChild->aiOvfl, pRoot->aiOvfl,
@@ -75642,6 +78770,11 @@ static int balance(BtCursor *pCur){
       }else{
         break;
       }
+    }else if( sqlite3PagerPageRefcount(pPage->pDbPage)>1 ){
+      /* The page being written is not a root page, and there is currently
+      ** more than one reference to it. This only happens if the page is one
+      ** of its own ancestor pages. Corruption. */
+      rc = SQLITE_CORRUPT_BKPT;
     }else{
       MemPage * const pParent = pCur->apPage[iPage-1];
       int const iIdx = pCur->aiIdx[iPage-1];
@@ -75740,7 +78873,7 @@ static int btreeOverwriteContent(
 ){
   int nData = pX->nData - iOffset;
   if( nData<=0 ){
-    /* Overwritting with zeros */
+    /* Overwriting with zeros */
     int i;
     for(i=0; i<iAmt && pDest[i]==0; i++){}
     if( i<iAmt ){
@@ -75772,9 +78905,13 @@ static int btreeOverwriteContent(
 
 /*
 ** Overwrite the cell that cursor pCur is pointing to with fresh content
-** contained in pX.
+** contained in pX.  In this variant, pCur is pointing to an overflow
+** cell.
 */
-static int btreeOverwriteCell(BtCursor *pCur, const BtreePayload *pX){
+static SQLITE_NOINLINE int btreeOverwriteOverflowCell(
+  BtCursor *pCur,                     /* Cursor pointing to cell to overwrite */
+  const BtreePayload *pX              /* Content to write into the cell */
+){
   int iOffset;                        /* Next byte of pX->pData to write */
   int nTotal = pX->nData + pX->nZero; /* Total bytes of to write */
   int rc;                             /* Return code */
@@ -75783,16 +78920,12 @@ static int btreeOverwriteCell(BtCursor *pCur, const BtreePayload *pX){
   Pgno ovflPgno;                      /* Next overflow page to write */
   u32 ovflPageSize;                   /* Size to write on overflow page */
 
-  if( pCur->info.pPayload + pCur->info.nLocal > pPage->aDataEnd
-   || pCur->info.pPayload < pPage->aData + pPage->cellOffset
-  ){
-    return SQLITE_CORRUPT_BKPT;
-  }
+  assert( pCur->info.nLocal<nTotal );  /* pCur is an overflow cell */
+
   /* Overwrite the local portion first */
   rc = btreeOverwriteContent(pPage, pCur->info.pPayload, pX,
                              0, pCur->info.nLocal);
   if( rc ) return rc;
-  if( pCur->info.nLocal==nTotal ) return SQLITE_OK;
 
   /* Now overwrite the overflow pages */
   iOffset = pCur->info.nLocal;
@@ -75822,6 +78955,29 @@ static int btreeOverwriteCell(BtCursor *pCur, const BtreePayload *pX){
   return SQLITE_OK;
 }
 
+/*
+** Overwrite the cell that cursor pCur is pointing to with fresh content
+** contained in pX.
+*/
+static int btreeOverwriteCell(BtCursor *pCur, const BtreePayload *pX){
+  int nTotal = pX->nData + pX->nZero; /* Total bytes of to write */
+  MemPage *pPage = pCur->pPage;       /* Page being written */
+
+  if( pCur->info.pPayload + pCur->info.nLocal > pPage->aDataEnd
+   || pCur->info.pPayload < pPage->aData + pPage->cellOffset
+  ){
+    return SQLITE_CORRUPT_BKPT;
+  }
+  if( pCur->info.nLocal==nTotal ){
+    /* The entire cell is local */
+    return btreeOverwriteContent(pPage, pCur->info.pPayload, pX,
+                                 0, pCur->info.nLocal);
+  }else{
+    /* The cell contains overflow content */
+    return btreeOverwriteOverflowCell(pCur, pX);
+  }
+}
+
 
 /*
 ** Insert a new record into the BTree.  The content of the new record
@@ -75865,7 +79021,6 @@ SQLITE_PRIVATE int sqlite3BtreeInsert(
   int idx;
   MemPage *pPage;
   Btree *p = pCur->pBtree;
-  BtShared *pBt = p->pBt;
   unsigned char *oldCell;
   unsigned char *newCell = 0;
 
@@ -75884,7 +79039,7 @@ SQLITE_PRIVATE int sqlite3BtreeInsert(
   ** not to clear the cursor here.
   */
   if( pCur->curFlags & BTCF_Multiple ){
-    rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur);
+    rc = saveAllCursors(p->pBt, pCur->pgnoRoot, pCur);
     if( rc ) return rc;
     if( loc && pCur->iPage<0 ){
       /* This can only happen if the schema is corrupt such that there is more
@@ -75908,8 +79063,8 @@ SQLITE_PRIVATE int sqlite3BtreeInsert(
 
   assert( cursorOwnsBtShared(pCur) );
   assert( (pCur->curFlags & BTCF_WriteFlag)!=0
-              && pBt->inTransaction==TRANS_WRITE
-              && (pBt->btsFlags & BTS_READ_ONLY)==0 );
+              && p->pBt->inTransaction==TRANS_WRITE
+              && (p->pBt->btsFlags & BTS_READ_ONLY)==0 );
   assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) );
 
   /* Assert that the caller has been consistent. If this cursor was opened
@@ -76007,7 +79162,7 @@ SQLITE_PRIVATE int sqlite3BtreeInsert(
     }
   }
   assert( pCur->eState==CURSOR_VALID
-       || (pCur->eState==CURSOR_INVALID && loc) );
+       || (pCur->eState==CURSOR_INVALID && loc) || CORRUPT_DB );
 
   pPage = pCur->pPage;
   assert( pPage->intKey || pX->nKey>=0 || (flags & BTREE_PREFORMAT) );
@@ -76022,31 +79177,34 @@ SQLITE_PRIVATE int sqlite3BtreeInsert(
     if( rc ) return rc;
   }
 
-  TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
+  TRACE(("INSERT: table=%u nkey=%lld ndata=%u page=%u %s\n",
           pCur->pgnoRoot, pX->nKey, pX->nData, pPage->pgno,
           loc==0 ? "overwrite" : "new entry"));
   assert( pPage->isInit || CORRUPT_DB );
-  newCell = pBt->pTmpSpace;
+  newCell = p->pBt->pTmpSpace;
   assert( newCell!=0 );
+  assert( BTREE_PREFORMAT==OPFLAG_PREFORMAT );
   if( flags & BTREE_PREFORMAT ){
     rc = SQLITE_OK;
-    szNew = pBt->nPreformatSize;
+    szNew = p->pBt->nPreformatSize;
     if( szNew<4 ) szNew = 4;
-    if( ISAUTOVACUUM && szNew>pPage->maxLocal ){
+    if( ISAUTOVACUUM(p->pBt) && szNew>pPage->maxLocal ){
       CellInfo info;
       pPage->xParseCell(pPage, newCell, &info);
       if( info.nPayload!=info.nLocal ){
         Pgno ovfl = get4byte(&newCell[szNew-4]);
-        ptrmapPut(pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, &rc);
+        ptrmapPut(p->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, &rc);
+        if( NEVER(rc) ) goto end_insert;
       }
     }
   }else{
     rc = fillInCell(pPage, newCell, pX, &szNew);
+    if( rc ) goto end_insert;
   }
-  if( rc ) goto end_insert;
   assert( szNew==pPage->xCellSize(pPage, newCell) );
-  assert( szNew <= MX_CELL_SIZE(pBt) );
+  assert( szNew <= MX_CELL_SIZE(p->pBt) );
   idx = pCur->ix;
+  pCur->info.nSize = 0;
   if( loc==0 ){
     CellInfo info;
     assert( idx>=0 );
@@ -76065,7 +79223,7 @@ SQLITE_PRIVATE int sqlite3BtreeInsert(
     testcase( pCur->curFlags & BTCF_ValidOvfl );
     invalidateOverflowCache(pCur);
     if( info.nSize==szNew && info.nLocal==info.nPayload
-     && (!ISAUTOVACUUM || szNew<pPage->minLocal)
+     && (!ISAUTOVACUUM(p->pBt) || szNew<pPage->minLocal)
     ){
       /* Overwrite the old cell with the new if they are the same size.
       ** We could also try to do this if the old cell is smaller, then add
@@ -76095,7 +79253,7 @@ SQLITE_PRIVATE int sqlite3BtreeInsert(
   }else{
     assert( pPage->leaf );
   }
-  insertCell(pPage, idx, newCell, szNew, 0, 0, &rc);
+  rc = insertCellFast(pPage, idx, newCell, szNew);
   assert( pPage->nOverflow==0 || rc==SQLITE_OK );
   assert( rc!=SQLITE_OK || pPage->nCell>0 || pPage->nOverflow>0 );
 
@@ -76119,7 +79277,6 @@ SQLITE_PRIVATE int sqlite3BtreeInsert(
   ** larger than the largest existing key, it is possible to insert the
   ** row without seeking the cursor. This can be a big performance boost.
   */
-  pCur->info.nSize = 0;
   if( pPage->nOverflow ){
     assert( rc==SQLITE_OK );
     pCur->curFlags &= ~(BTCF_ValidNKey);
@@ -76168,7 +79325,6 @@ SQLITE_PRIVATE int sqlite3BtreeInsert(
 ** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
 */
 SQLITE_PRIVATE int sqlite3BtreeTransferRow(BtCursor *pDest, BtCursor *pSrc, i64 iKey){
-  int rc = SQLITE_OK;
   BtShared *pBt = pDest->pBt;
   u8 *aOut = pBt->pTmpSpace;    /* Pointer to next output buffer */
   const u8 *aIn;                /* Pointer to next input buffer */
@@ -76191,7 +79347,9 @@ SQLITE_PRIVATE int sqlite3BtreeTransferRow(BtCursor *pDest, BtCursor *pSrc, i64
   if( nIn==nRem && nIn<pDest->pPage->maxLocal ){
     memcpy(aOut, aIn, nIn);
     pBt->nPreformatSize = nIn + (aOut - pBt->pTmpSpace);
+    return SQLITE_OK;
   }else{
+    int rc = SQLITE_OK;
     Pager *pSrcPager = pSrc->pBt->pPager;
     u8 *pPgnoOut = 0;
     Pgno ovflIn = 0;
@@ -76243,7 +79401,7 @@ SQLITE_PRIVATE int sqlite3BtreeTransferRow(BtCursor *pDest, BtCursor *pSrc, i64
         MemPage *pNew = 0;
         rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0);
         put4byte(pPgnoOut, pgnoNew);
-        if( ISAUTOVACUUM && pPageOut ){
+        if( ISAUTOVACUUM(pBt) && pPageOut ){
           ptrmapPut(pBt, pgnoNew, PTRMAP_OVERFLOW2, pPageOut->pgno, &rc);
         }
         releasePage(pPageOut);
@@ -76259,9 +79417,8 @@ SQLITE_PRIVATE int sqlite3BtreeTransferRow(BtCursor *pDest, BtCursor *pSrc, i64
 
     releasePage(pPageOut);
     sqlite3PagerUnref(pPageIn);
+    return rc;
   }
-
-  return rc;
 }
 
 /*
@@ -76320,6 +79477,9 @@ SQLITE_PRIVATE int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){
   if( pPage->nFree<0 && btreeComputeFreeSpace(pPage) ){
     return SQLITE_CORRUPT_BKPT;
   }
+  if( pCell<&pPage->aCellIdx[pPage->nCell] ){
+    return SQLITE_CORRUPT_BKPT;
+  }
 
   /* If the BTREE_SAVEPOSITION bit is on, then the cursor position must
   ** be preserved following this delete operation. If the current delete
@@ -76416,7 +79576,7 @@ SQLITE_PRIVATE int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){
     assert( pTmp!=0 );
     rc = sqlite3PagerWrite(pLeaf->pDbPage);
     if( rc==SQLITE_OK ){
-      insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n, &rc);
+      rc = insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n);
     }
     dropCell(pLeaf, pLeaf->nCell-1, nCell, &rc);
     if( rc ) return rc;
@@ -76496,7 +79656,7 @@ static int btreeCreateTable(Btree *p, Pgno *piTable, int createTabFlags){
   MemPage *pRoot;
   Pgno pgnoRoot;
   int rc;
-  int ptfFlags;          /* Page-type flage for the root page of new table */
+  int ptfFlags;          /* Page-type flags for the root page of new table */
 
   assert( sqlite3BtreeHoldsMutex(p) );
   assert( pBt->inTransaction==TRANS_WRITE );
@@ -76665,7 +79825,7 @@ static int clearDatabasePage(
   if( pgno>btreePagecount(pBt) ){
     return SQLITE_CORRUPT_BKPT;
   }
-  rc = getAndInitPage(pBt, pgno, &pPage, 0, 0);
+  rc = getAndInitPage(pBt, pgno, &pPage, 0);
   if( rc ) return rc;
   if( (pBt->openFlags & BTREE_SINGLE)==0
    && sqlite3PagerPageRefcount(pPage->pDbPage) != (1 + (pgno==1))
@@ -77015,6 +80175,41 @@ SQLITE_PRIVATE Pager *sqlite3BtreePager(Btree *p){
 }
 
 #ifndef SQLITE_OMIT_INTEGRITY_CHECK
+/*
+** Record an OOM error during integrity_check
+*/
+static void checkOom(IntegrityCk *pCheck){
+  pCheck->rc = SQLITE_NOMEM;
+  pCheck->mxErr = 0;  /* Causes integrity_check processing to stop */
+  if( pCheck->nErr==0 ) pCheck->nErr++;
+}
+
+/*
+** Invoke the progress handler, if appropriate.  Also check for an
+** interrupt.
+*/
+static void checkProgress(IntegrityCk *pCheck){
+  sqlite3 *db = pCheck->db;
+  if( AtomicLoad(&db->u1.isInterrupted) ){
+    pCheck->rc = SQLITE_INTERRUPT;
+    pCheck->nErr++;
+    pCheck->mxErr = 0;
+  }
+#ifndef SQLITE_OMIT_PROGRESS_CALLBACK
+  if( db->xProgress ){
+    assert( db->nProgressOps>0 );
+    pCheck->nStep++;
+    if( (pCheck->nStep % db->nProgressOps)==0
+     && db->xProgress(db->pProgressArg)
+    ){
+      pCheck->rc = SQLITE_INTERRUPT;
+      pCheck->nErr++;
+      pCheck->mxErr = 0;
+    }
+  }
+#endif
+}
+
 /*
 ** Append a message to the error message string.
 */
@@ -77024,6 +80219,7 @@ static void checkAppendMsg(
   ...
 ){
   va_list ap;
+  checkProgress(pCheck);
   if( !pCheck->mxErr ) return;
   pCheck->mxErr--;
   pCheck->nErr++;
@@ -77032,12 +80228,13 @@ static void checkAppendMsg(
     sqlite3_str_append(&pCheck->errMsg, "\n", 1);
   }
   if( pCheck->zPfx ){
-    sqlite3_str_appendf(&pCheck->errMsg, pCheck->zPfx, pCheck->v1, pCheck->v2);
+    sqlite3_str_appendf(&pCheck->errMsg, pCheck->zPfx,
+                        pCheck->v0, pCheck->v1, pCheck->v2);
   }
   sqlite3_str_vappendf(&pCheck->errMsg, zFormat, ap);
   va_end(ap);
   if( pCheck->errMsg.accError==SQLITE_NOMEM ){
-    pCheck->bOomFault = 1;
+    checkOom(pCheck);
   }
 }
 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
@@ -77049,7 +80246,8 @@ static void checkAppendMsg(
 ** corresponds to page iPg is already set.
 */
 static int getPageReferenced(IntegrityCk *pCheck, Pgno iPg){
-  assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[0])==1 );
+  assert( pCheck->aPgRef!=0 );
+  assert( iPg<=pCheck->nCkPage && sizeof(pCheck->aPgRef[0])==1 );
   return (pCheck->aPgRef[iPg/8] & (1 << (iPg & 0x07)));
 }
 
@@ -77057,7 +80255,8 @@ static int getPageReferenced(IntegrityCk *pCheck, Pgno iPg){
 ** Set the bit in the IntegrityCk.aPgRef[] array that corresponds to page iPg.
 */
 static void setPageReferenced(IntegrityCk *pCheck, Pgno iPg){
-  assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[0])==1 );
+  assert( pCheck->aPgRef!=0 );
+  assert( iPg<=pCheck->nCkPage && sizeof(pCheck->aPgRef[0])==1 );
   pCheck->aPgRef[iPg/8] |= (1 << (iPg & 0x07));
 }
 
@@ -77071,15 +80270,14 @@ static void setPageReferenced(IntegrityCk *pCheck, Pgno iPg){
 ** Also check that the page number is in bounds.
 */
 static int checkRef(IntegrityCk *pCheck, Pgno iPage){
-  if( iPage>pCheck->nPage || iPage==0 ){
-    checkAppendMsg(pCheck, "invalid page number %d", iPage);
+  if( iPage>pCheck->nCkPage || iPage==0 ){
+    checkAppendMsg(pCheck, "invalid page number %u", iPage);
     return 1;
   }
   if( getPageReferenced(pCheck, iPage) ){
-    checkAppendMsg(pCheck, "2nd reference to page %d", iPage);
+    checkAppendMsg(pCheck, "2nd reference to page %u", iPage);
     return 1;
   }
-  if( AtomicLoad(&pCheck->db->u1.isInterrupted) ) return 1;
   setPageReferenced(pCheck, iPage);
   return 0;
 }
@@ -77102,14 +80300,14 @@ static void checkPtrmap(
 
   rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent);
   if( rc!=SQLITE_OK ){
-    if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ) pCheck->bOomFault = 1;
-    checkAppendMsg(pCheck, "Failed to read ptrmap key=%d", iChild);
+    if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ) checkOom(pCheck);
+    checkAppendMsg(pCheck, "Failed to read ptrmap key=%u", iChild);
     return;
   }
 
   if( ePtrmapType!=eType || iPtrmapParent!=iParent ){
     checkAppendMsg(pCheck,
-      "Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)",
+      "Bad ptr map entry key=%u expected=(%u,%u) got=(%u,%u)",
       iChild, eType, iParent, ePtrmapType, iPtrmapParent);
   }
 }
@@ -77134,7 +80332,7 @@ static void checkList(
     if( checkRef(pCheck, iPage) ) break;
     N--;
     if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage, 0) ){
-      checkAppendMsg(pCheck, "failed to get page %d", iPage);
+      checkAppendMsg(pCheck, "failed to get page %u", iPage);
       break;
     }
     pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage);
@@ -77147,7 +80345,7 @@ static void checkList(
 #endif
       if( n>pCheck->pBt->usableSize/4-2 ){
         checkAppendMsg(pCheck,
-           "freelist leaf count too big on page %d", iPage);
+           "freelist leaf count too big on page %u", iPage);
         N--;
       }else{
         for(i=0; i<(int)n; i++){
@@ -77179,7 +80377,7 @@ static void checkList(
   }
   if( N && nErrAtStart==pCheck->nErr ){
     checkAppendMsg(pCheck,
-      "%s is %d but should be %d",
+      "%s is %u but should be %u",
       isFreeList ? "size" : "overflow list length",
       expected-N, expected);
   }
@@ -77209,7 +80407,9 @@ static void checkList(
 ** lower 16 bits are the index of the last byte of that range.
 */
 static void btreeHeapInsert(u32 *aHeap, u32 x){
-  u32 j, i = ++aHeap[0];
+  u32 j, i;
+  assert( aHeap!=0 );
+  i = ++aHeap[0];
   aHeap[i] = x;
   while( (j = i/2)>0 && aHeap[j]>aHeap[i] ){
     x = aHeap[j];
@@ -77286,15 +80486,18 @@ static int checkTreePage(
 
   /* Check that the page exists
   */
+  checkProgress(pCheck);
+  if( pCheck->mxErr==0 ) goto end_of_check;
   pBt = pCheck->pBt;
   usableSize = pBt->usableSize;
   if( iPage==0 ) return 0;
   if( checkRef(pCheck, iPage) ) return 0;
-  pCheck->zPfx = "Page %u: ";
+  pCheck->zPfx = "Tree %u page %u: ";
   pCheck->v1 = iPage;
   if( (rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0 ){
     checkAppendMsg(pCheck,
        "unable to get the page. error code=%d", rc);
+    if( rc==SQLITE_IOERR_NOMEM ) pCheck->rc = SQLITE_NOMEM;
     goto end_of_check;
   }
 
@@ -77317,7 +80520,7 @@ static int checkTreePage(
   hdr = pPage->hdrOffset;
 
   /* Set up for cell analysis */
-  pCheck->zPfx = "On tree page %u cell %d: ";
+  pCheck->zPfx = "Tree %u page %u cell %u: ";
   contentOffset = get2byteNotZero(&data[hdr+5]);
   assert( contentOffset<=usableSize );  /* Enforced by btreeInitPage() */
 
@@ -77337,7 +80540,7 @@ static int checkTreePage(
     pgno = get4byte(&data[hdr+8]);
 #ifndef SQLITE_OMIT_AUTOVACUUM
     if( pBt->autoVacuum ){
-      pCheck->zPfx = "On page %u at right child: ";
+      pCheck->zPfx = "Tree %u page %u right child: ";
       checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage);
     }
 #endif
@@ -77361,7 +80564,7 @@ static int checkTreePage(
     pc = get2byteAligned(pCellIdx);
     pCellIdx -= 2;
     if( pc<contentOffset || pc>usableSize-4 ){
-      checkAppendMsg(pCheck, "Offset %d out of range %d..%d",
+      checkAppendMsg(pCheck, "Offset %u out of range %u..%u",
                              pc, contentOffset, usableSize-4);
       doCoverageCheck = 0;
       continue;
@@ -77493,7 +80696,7 @@ static int checkTreePage(
     */
     if( heap[0]==0 && nFrag!=data[hdr+7] ){
       checkAppendMsg(pCheck,
-          "Fragmentation of %d bytes reported as %d on page %u",
+          "Fragmentation of %u bytes reported as %u on page %u",
           nFrag, data[hdr+7], iPage);
     }
   }
@@ -77531,13 +80734,14 @@ static int checkTreePage(
 ** the unverified btrees.  Except, if aRoot[1] is 1, then the freelist
 ** checks are still performed.
 */
-SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck(
+SQLITE_PRIVATE int sqlite3BtreeIntegrityCheck(
   sqlite3 *db,  /* Database connection that is running the check */
   Btree *p,     /* The btree to be checked */
   Pgno *aRoot,  /* An array of root pages numbers for individual trees */
   int nRoot,    /* Number of entries in aRoot[] */
   int mxErr,    /* Stop reporting errors after this many */
-  int *pnErr    /* Write number of errors seen to this variable */
+  int *pnErr,   /* OUT: Write number of errors seen to this variable */
+  char **pzOut  /* OUT: Write the error message string here */
 ){
   Pgno i;
   IntegrityCk sCheck;
@@ -77560,42 +80764,36 @@ SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck(
   assert( p->inTrans>TRANS_NONE && pBt->inTransaction>TRANS_NONE );
   VVA_ONLY( nRef = sqlite3PagerRefcount(pBt->pPager) );
   assert( nRef>=0 );
+  memset(&sCheck, 0, sizeof(sCheck));
   sCheck.db = db;
   sCheck.pBt = pBt;
   sCheck.pPager = pBt->pPager;
-  sCheck.nPage = btreePagecount(sCheck.pBt);
+  sCheck.nCkPage = btreePagecount(sCheck.pBt);
   sCheck.mxErr = mxErr;
-  sCheck.nErr = 0;
-  sCheck.bOomFault = 0;
-  sCheck.zPfx = 0;
-  sCheck.v1 = 0;
-  sCheck.v2 = 0;
-  sCheck.aPgRef = 0;
-  sCheck.heap = 0;
   sqlite3StrAccumInit(&sCheck.errMsg, 0, zErr, sizeof(zErr), SQLITE_MAX_LENGTH);
   sCheck.errMsg.printfFlags = SQLITE_PRINTF_INTERNAL;
-  if( sCheck.nPage==0 ){
+  if( sCheck.nCkPage==0 ){
     goto integrity_ck_cleanup;
   }
 
-  sCheck.aPgRef = sqlite3MallocZero((sCheck.nPage / 8)+ 1);
+  sCheck.aPgRef = sqlite3MallocZero((sCheck.nCkPage / 8)+ 1);
   if( !sCheck.aPgRef ){
-    sCheck.bOomFault = 1;
+    checkOom(&sCheck);
     goto integrity_ck_cleanup;
   }
   sCheck.heap = (u32*)sqlite3PageMalloc( pBt->pageSize );
   if( sCheck.heap==0 ){
-    sCheck.bOomFault = 1;
+    checkOom(&sCheck);
     goto integrity_ck_cleanup;
   }
 
   i = PENDING_BYTE_PAGE(pBt);
-  if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i);
+  if( i<=sCheck.nCkPage ) setPageReferenced(&sCheck, i);
 
   /* Check the integrity of the freelist
   */
   if( bCkFreelist ){
-    sCheck.zPfx = "Main freelist: ";
+    sCheck.zPfx = "Freelist: ";
     checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]),
               get4byte(&pBt->pPage1->aData[36]));
     sCheck.zPfx = 0;
@@ -77612,7 +80810,7 @@ SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck(
       mxInHdr = get4byte(&pBt->pPage1->aData[52]);
       if( mx!=mxInHdr ){
         checkAppendMsg(&sCheck,
-          "max rootpage (%d) disagrees with header (%d)",
+          "max rootpage (%u) disagrees with header (%u)",
           mx, mxInHdr
         );
       }
@@ -77633,6 +80831,7 @@ SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck(
       checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0);
     }
 #endif
+    sCheck.v0 = aRoot[i];
     checkTreePage(&sCheck, aRoot[i], &notUsed, LARGEST_INT64);
   }
   pBt->db->flags = savedDbFlags;
@@ -77640,10 +80839,10 @@ SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck(
   /* Make sure every page in the file is referenced
   */
   if( !bPartial ){
-    for(i=1; i<=sCheck.nPage && sCheck.mxErr; i++){
+    for(i=1; i<=sCheck.nCkPage && sCheck.mxErr; i++){
 #ifdef SQLITE_OMIT_AUTOVACUUM
       if( getPageReferenced(&sCheck, i)==0 ){
-        checkAppendMsg(&sCheck, "Page %d is never used", i);
+        checkAppendMsg(&sCheck, "Page %u: never used", i);
       }
 #else
       /* If the database supports auto-vacuum, make sure no tables contain
@@ -77651,11 +80850,11 @@ SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck(
       */
       if( getPageReferenced(&sCheck, i)==0 &&
          (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){
-        checkAppendMsg(&sCheck, "Page %d is never used", i);
+        checkAppendMsg(&sCheck, "Page %u: never used", i);
       }
       if( getPageReferenced(&sCheck, i)!=0 &&
          (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){
-        checkAppendMsg(&sCheck, "Pointer map page %d is referenced", i);
+        checkAppendMsg(&sCheck, "Page %u: pointer map referenced", i);
       }
 #endif
     }
@@ -77666,16 +80865,17 @@ SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck(
 integrity_ck_cleanup:
   sqlite3PageFree(sCheck.heap);
   sqlite3_free(sCheck.aPgRef);
-  if( sCheck.bOomFault ){
+  *pnErr = sCheck.nErr;
+  if( sCheck.nErr==0 ){
     sqlite3_str_reset(&sCheck.errMsg);
-    sCheck.nErr++;
+    *pzOut = 0;
+  }else{
+    *pzOut = sqlite3StrAccumFinish(&sCheck.errMsg);
   }
-  *pnErr = sCheck.nErr;
-  if( sCheck.nErr==0 ) sqlite3_str_reset(&sCheck.errMsg);
   /* Make sure this analysis did not leave any unref() pages. */
   assert( nRef==sqlite3PagerRefcount(pBt->pPager) );
   sqlite3BtreeLeave(p);
-  return sqlite3StrAccumFinish(&sCheck.errMsg);
+  return sCheck.rc;
 }
 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
 
@@ -77940,6 +81140,17 @@ SQLITE_PRIVATE int sqlite3BtreeIsReadonly(Btree *p){
 */
 SQLITE_PRIVATE int sqlite3HeaderSizeBtree(void){ return ROUND8(sizeof(MemPage)); }
 
+/*
+** If no transaction is active and the database is not a temp-db, clear
+** the in-memory pager cache.
+*/
+SQLITE_PRIVATE void sqlite3BtreeClearCache(Btree *p){
+  BtShared *pBt = p->pBt;
+  if( pBt->inTransaction==TRANS_NONE ){
+    sqlite3PagerClearCache(pBt->pPager);
+  }
+}
+
 #if !defined(SQLITE_OMIT_SHARED_CACHE)
 /*
 ** Return true if the Btree passed as the only argument is sharable.
@@ -78205,13 +81416,7 @@ static int backupOnePage(
   assert( !isFatalError(p->rc) );
   assert( iSrcPg!=PENDING_BYTE_PAGE(p->pSrc->pBt) );
   assert( zSrcData );
-
-  /* Catch the case where the destination is an in-memory database and the
-  ** page sizes of the source and destination differ.
-  */
-  if( nSrcPgsz!=nDestPgsz && sqlite3PagerIsMemdb(pDestPager) ){
-    rc = SQLITE_READONLY;
-  }
+  assert( nSrcPgsz==nDestPgsz || sqlite3PagerIsMemdb(pDestPager)==0 );
 
   /* This loop runs once for each destination page spanned by the source
   ** page. For each iteration, variable iOff is set to the byte offset
@@ -78344,7 +81549,10 @@ SQLITE_API int sqlite3_backup_step(sqlite3_backup *p, int nPage){
     pgszSrc = sqlite3BtreeGetPageSize(p->pSrc);
     pgszDest = sqlite3BtreeGetPageSize(p->pDest);
     destMode = sqlite3PagerGetJournalMode(sqlite3BtreePager(p->pDest));
-    if( SQLITE_OK==rc && destMode==PAGER_JOURNALMODE_WAL && pgszSrc!=pgszDest ){
+    if( SQLITE_OK==rc
+     && (destMode==PAGER_JOURNALMODE_WAL || sqlite3PagerIsMemdb(pDestPager))
+     && pgszSrc!=pgszDest
+    ){
       rc = SQLITE_READONLY;
     }
 
@@ -78850,9 +82058,9 @@ static void vdbeMemRenderNum(int sz, char *zBuf, Mem *p){
     i64 x;
     assert( (p->flags&MEM_Int)*2==sizeof(x) );
     memcpy(&x, (char*)&p->u, (p->flags&MEM_Int)*2);
-    sqlite3Int64ToText(x, zBuf);
+    p->n = sqlite3Int64ToText(x, zBuf);
 #else
-    sqlite3Int64ToText(p->u.i, zBuf);
+    p->n = sqlite3Int64ToText(p->u.i, zBuf);
 #endif
   }else{
     sqlite3StrAccumInit(&acc, 0, zBuf, sz, 0);
@@ -78860,6 +82068,7 @@ static void vdbeMemRenderNum(int sz, char *zBuf, Mem *p){
          (p->flags & MEM_IntReal)!=0 ? (double)p->u.i : p->u.r);
     assert( acc.zText==zBuf && acc.mxAlloc<=0 );
     zBuf[acc.nChar] = 0; /* Fast version of sqlite3StrAccumFinish(&acc) */
+    p->n = acc.nChar;
   }
 }
 
@@ -78887,10 +82096,12 @@ static void vdbeMemRenderNum(int sz, char *zBuf, Mem *p){
 ** This routine is for use inside of assert() statements only.
 */
 SQLITE_PRIVATE int sqlite3VdbeMemValidStrRep(Mem *p){
+  Mem tmp;
   char zBuf[100];
   char *z;
   int i, j, incr;
   if( (p->flags & MEM_Str)==0 ) return 1;
+  if( p->db && p->db->mallocFailed ) return 1;
   if( p->flags & MEM_Term ){
     /* Insure that the string is properly zero-terminated.  Pay particular
     ** attention to the case where p->n is odd */
@@ -78903,7 +82114,8 @@ SQLITE_PRIVATE int sqlite3VdbeMemValidStrRep(Mem *p){
     assert( p->enc==SQLITE_UTF8 || p->z[((p->n+1)&~1)+1]==0 );
   }
   if( (p->flags & (MEM_Int|MEM_Real|MEM_IntReal))==0 ) return 1;
-  vdbeMemRenderNum(sizeof(zBuf), zBuf, p);
+  memcpy(&tmp, p, sizeof(tmp));
+  vdbeMemRenderNum(sizeof(zBuf), zBuf, &tmp);
   z = p->z;
   i = j = 0;
   incr = 1;
@@ -79046,6 +82258,40 @@ SQLITE_PRIVATE int sqlite3VdbeMemClearAndResize(Mem *pMem, int szNew){
   return SQLITE_OK;
 }
 
+/*
+** If pMem is already a string, detect if it is a zero-terminated
+** string, or make it into one if possible, and mark it as such.
+**
+** This is an optimization.  Correct operation continues even if
+** this routine is a no-op.
+*/
+SQLITE_PRIVATE void sqlite3VdbeMemZeroTerminateIfAble(Mem *pMem){
+  if( (pMem->flags & (MEM_Str|MEM_Term|MEM_Ephem|MEM_Static))!=MEM_Str ){
+    /* pMem must be a string, and it cannot be an ephemeral or static string */
+    return;
+  }
+  if( pMem->enc!=SQLITE_UTF8 ) return;
+  if( NEVER(pMem->z==0) ) return;
+  if( pMem->flags & MEM_Dyn ){
+    if( pMem->xDel==sqlite3_free
+     && sqlite3_msize(pMem->z) >= (u64)(pMem->n+1)
+    ){
+      pMem->z[pMem->n] = 0;
+      pMem->flags |= MEM_Term;
+      return;
+    }
+    if( pMem->xDel==sqlite3RCStrUnref ){
+      /* Blindly assume that all RCStr objects are zero-terminated */
+      pMem->flags |= MEM_Term;
+      return;
+    }
+  }else if( pMem->szMalloc >= pMem->n+1 ){
+    pMem->z[pMem->n] = 0;
+    pMem->flags |= MEM_Term;
+    return;
+  }
+}
+
 /*
 ** It is already known that pMem contains an unterminated string.
 ** Add the zero terminator.
@@ -79172,7 +82418,7 @@ SQLITE_PRIVATE int sqlite3VdbeMemStringify(Mem *pMem, u8 enc, u8 bForce){
 
   vdbeMemRenderNum(nByte, pMem->z, pMem);
   assert( pMem->z!=0 );
-  pMem->n = sqlite3Strlen30NN(pMem->z);
+  assert( pMem->n==(int)sqlite3Strlen30NN(pMem->z) );
   pMem->enc = SQLITE_UTF8;
   pMem->flags |= MEM_Str|MEM_Term;
   if( bForce ) pMem->flags &= ~(MEM_Int|MEM_Real|MEM_IntReal);
@@ -79307,36 +82553,6 @@ SQLITE_PRIVATE void sqlite3VdbeMemReleaseMalloc(Mem *p){
   if( p->szMalloc ) vdbeMemClear(p);
 }
 
-/*
-** Convert a 64-bit IEEE double into a 64-bit signed integer.
-** If the double is out of range of a 64-bit signed integer then
-** return the closest available 64-bit signed integer.
-*/
-static SQLITE_NOINLINE i64 doubleToInt64(double r){
-#ifdef SQLITE_OMIT_FLOATING_POINT
-  /* When floating-point is omitted, double and int64 are the same thing */
-  return r;
-#else
-  /*
-  ** Many compilers we encounter do not define constants for the
-  ** minimum and maximum 64-bit integers, or they define them
-  ** inconsistently.  And many do not understand the "LL" notation.
-  ** So we define our own static constants here using nothing
-  ** larger than a 32-bit integer constant.
-  */
-  static const i64 maxInt = LARGEST_INT64;
-  static const i64 minInt = SMALLEST_INT64;
-
-  if( r<=(double)minInt ){
-    return minInt;
-  }else if( r>=(double)maxInt ){
-    return maxInt;
-  }else{
-    return (i64)r;
-  }
-#endif
-}
-
 /*
 ** Return some kind of integer value which is the best we can do
 ** at representing the value that *pMem describes as an integer.
@@ -79363,7 +82579,7 @@ SQLITE_PRIVATE i64 sqlite3VdbeIntValue(const Mem *pMem){
     testcase( flags & MEM_IntReal );
     return pMem->u.i;
   }else if( flags & MEM_Real ){
-    return doubleToInt64(pMem->u.r);
+    return sqlite3RealToI64(pMem->u.r);
   }else if( (flags & (MEM_Str|MEM_Blob))!=0 && pMem->z!=0 ){
     return memIntValue(pMem);
   }else{
@@ -79412,32 +82628,35 @@ SQLITE_PRIVATE int sqlite3VdbeBooleanValue(Mem *pMem, int ifNull){
 }
 
 /*
-** The MEM structure is already a MEM_Real.  Try to also make it a
-** MEM_Int if we can.
+** The MEM structure is already a MEM_Real or MEM_IntReal. Try to
+** make it a MEM_Int if we can.
 */
 SQLITE_PRIVATE void sqlite3VdbeIntegerAffinity(Mem *pMem){
-  i64 ix;
   assert( pMem!=0 );
-  assert( pMem->flags & MEM_Real );
+  assert( pMem->flags & (MEM_Real|MEM_IntReal) );
   assert( !sqlite3VdbeMemIsRowSet(pMem) );
   assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
   assert( EIGHT_BYTE_ALIGNMENT(pMem) );
 
-  ix = doubleToInt64(pMem->u.r);
-
-  /* Only mark the value as an integer if
-  **
-  **    (1) the round-trip conversion real->int->real is a no-op, and
-  **    (2) The integer is neither the largest nor the smallest
-  **        possible integer (ticket #3922)
-  **
-  ** The second and third terms in the following conditional enforces
-  ** the second condition under the assumption that addition overflow causes
-  ** values to wrap around.
-  */
-  if( pMem->u.r==ix && ix>SMALLEST_INT64 && ix<LARGEST_INT64 ){
-    pMem->u.i = ix;
+  if( pMem->flags & MEM_IntReal ){
     MemSetTypeFlag(pMem, MEM_Int);
+  }else{
+    i64 ix = sqlite3RealToI64(pMem->u.r);
+
+    /* Only mark the value as an integer if
+    **
+    **    (1) the round-trip conversion real->int->real is a no-op, and
+    **    (2) The integer is neither the largest nor the smallest
+    **        possible integer (ticket #3922)
+    **
+    ** The second and third terms in the following conditional enforces
+    ** the second condition under the assumption that addition overflow causes
+    ** values to wrap around.
+    */
+    if( pMem->u.r==ix && ix>SMALLEST_INT64 && ix<LARGEST_INT64 ){
+      pMem->u.i = ix;
+      MemSetTypeFlag(pMem, MEM_Int);
+    }
   }
 }
 
@@ -79485,6 +82704,16 @@ SQLITE_PRIVATE int sqlite3RealSameAsInt(double r1, sqlite3_int64 i){
           && i >= -2251799813685248LL && i < 2251799813685248LL);
 }
 
+/* Convert a floating point value to its closest integer.  Do so in
+** a way that avoids 'outside the range of representable values' warnings
+** from UBSAN.
+*/
+SQLITE_PRIVATE i64 sqlite3RealToI64(double r){
+  if( r<-9223372036854774784.0 ) return SMALLEST_INT64;
+  if( r>+9223372036854774784.0 ) return LARGEST_INT64;
+  return (i64)r;
+}
+
 /*
 ** Convert pMem so that it has type MEM_Real or MEM_Int.
 ** Invalidate any prior representations.
@@ -79506,7 +82735,7 @@ SQLITE_PRIVATE int sqlite3VdbeMemNumerify(Mem *pMem){
     assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
     rc = sqlite3AtoF(pMem->z, &pMem->u.r, pMem->n, pMem->enc);
     if( ((rc==0 || rc==1) && sqlite3Atoi64(pMem->z, &ix, pMem->n, pMem->enc)<=1)
-     || sqlite3RealSameAsInt(pMem->u.r, (ix = (i64)pMem->u.r))
+     || sqlite3RealSameAsInt(pMem->u.r, (ix = sqlite3RealToI64(pMem->u.r)))
     ){
       pMem->u.i = ix;
       MemSetTypeFlag(pMem, MEM_Int);
@@ -79552,13 +82781,17 @@ SQLITE_PRIVATE int sqlite3VdbeMemCast(Mem *pMem, u8 aff, u8 encoding){
       break;
     }
     default: {
+      int rc;
       assert( aff==SQLITE_AFF_TEXT );
       assert( MEM_Str==(MEM_Blob>>3) );
       pMem->flags |= (pMem->flags&MEM_Blob)>>3;
       sqlite3ValueApplyAffinity(pMem, SQLITE_AFF_TEXT, encoding);
       assert( pMem->flags & MEM_Str || pMem->db->mallocFailed );
       pMem->flags &= ~(MEM_Int|MEM_Real|MEM_IntReal|MEM_Blob|MEM_Zero);
-      return sqlite3VdbeChangeEncoding(pMem, encoding);
+      if( encoding!=SQLITE_UTF8 ) pMem->n &= ~1;
+      rc = sqlite3VdbeChangeEncoding(pMem, encoding);
+      if( rc ) return rc;
+      sqlite3VdbeMemZeroTerminateIfAble(pMem);
     }
   }
   return SQLITE_OK;
@@ -80082,6 +83315,24 @@ SQLITE_PRIVATE const void *sqlite3ValueText(sqlite3_value* pVal, u8 enc){
   return valueToText(pVal, enc);
 }
 
+/* Return true if sqlit3_value object pVal is a string or blob value
+** that uses the destructor specified in the second argument.
+**
+** TODO:  Maybe someday promote this interface into a published API so
+** that third-party extensions can get access to it?
+*/
+SQLITE_PRIVATE int sqlite3ValueIsOfClass(const sqlite3_value *pVal, void(*xFree)(void*)){
+  if( ALWAYS(pVal!=0)
+   && ALWAYS((pVal->flags & (MEM_Str|MEM_Blob))!=0)
+   && (pVal->flags & MEM_Dyn)!=0
+   && pVal->xDel==xFree
+  ){
+    return 1;
+  }else{
+    return 0;
+  }
+}
+
 /*
 ** Create a new sqlite3_value object.
 */
@@ -80149,6 +83400,7 @@ static sqlite3_value *valueNew(sqlite3 *db, struct ValueNewStat4Ctx *p){
     }
 
     pRec->nField = p->iVal+1;
+    sqlite3VdbeMemSetNull(&pRec->aMem[p->iVal]);
     return &pRec->aMem[p->iVal];
   }
 #else
@@ -80202,9 +83454,12 @@ static int valueFromFunction(
   if( pList ) nVal = pList->nExpr;
   assert( !ExprHasProperty(p, EP_IntValue) );
   pFunc = sqlite3FindFunction(db, p->u.zToken, nVal, enc, 0);
+#ifdef SQLITE_ENABLE_UNKNOWN_SQL_FUNCTION
+  if( pFunc==0 ) return SQLITE_OK;
+#endif
   assert( pFunc );
   if( (pFunc->funcFlags & (SQLITE_FUNC_CONSTANT|SQLITE_FUNC_SLOCHNG))==0
-   || (pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL)
+   || (pFunc->funcFlags & (SQLITE_FUNC_NEEDCOLL|SQLITE_FUNC_RUNONLY))!=0
   ){
     return SQLITE_OK;
   }
@@ -80227,8 +83482,6 @@ static int valueFromFunction(
     goto value_from_function_out;
   }
 
-  testcase( pCtx->pParse->rc==SQLITE_ERROR );
-  testcase( pCtx->pParse->rc==SQLITE_OK );
   memset(&ctx, 0, sizeof(ctx));
   ctx.pOut = pVal;
   ctx.pFunc = pFunc;
@@ -80241,16 +83494,16 @@ static int valueFromFunction(
     sqlite3ValueApplyAffinity(pVal, aff, SQLITE_UTF8);
     assert( rc==SQLITE_OK );
     rc = sqlite3VdbeChangeEncoding(pVal, enc);
-    if( rc==SQLITE_OK && sqlite3VdbeMemTooBig(pVal) ){
+    if( NEVER(rc==SQLITE_OK && sqlite3VdbeMemTooBig(pVal)) ){
       rc = SQLITE_TOOBIG;
       pCtx->pParse->nErr++;
     }
   }
-  pCtx->pParse->rc = rc;
 
  value_from_function_out:
   if( rc!=SQLITE_OK ){
     pVal = 0;
+    pCtx->pParse->rc = rc;
   }
   if( apVal ){
     for(i=0; i<nVal; i++){
@@ -80308,6 +83561,13 @@ static int valueFromExpr(
     rc = valueFromExpr(db, pExpr->pLeft, enc, aff, ppVal, pCtx);
     testcase( rc!=SQLITE_OK );
     if( *ppVal ){
+#ifdef SQLITE_ENABLE_STAT4
+      rc = ExpandBlob(*ppVal);
+#else
+      /* zero-blobs only come from functions, not literal values.  And
+      ** functions are only processed under STAT4 */
+      assert( (ppVal[0][0].flags & MEM_Zero)==0 );
+#endif
       sqlite3VdbeMemCast(*ppVal, aff, enc);
       sqlite3ValueApplyAffinity(*ppVal, affinity, enc);
     }
@@ -80400,6 +83660,7 @@ static int valueFromExpr(
     if( pVal ){
       pVal->flags = MEM_Int;
       pVal->u.i = pExpr->u.zToken[4]==0;
+      sqlite3ValueApplyAffinity(pVal, affinity, enc);
     }
   }
 
@@ -80693,6 +83954,9 @@ SQLITE_PRIVATE int sqlite3ValueBytes(sqlite3_value *pVal, u8 enc){
   if( (p->flags & MEM_Str)!=0 && pVal->enc==enc ){
     return p->n;
   }
+  if( (p->flags & MEM_Str)!=0 && enc!=SQLITE_UTF8 && pVal->enc!=SQLITE_UTF8 ){
+    return p->n;
+  }
   if( (p->flags & MEM_Blob)!=0 ){
     if( p->flags & MEM_Zero ){
       return p->n + p->u.nZero;
@@ -80738,10 +84002,10 @@ SQLITE_PRIVATE Vdbe *sqlite3VdbeCreate(Parse *pParse){
   memset(&p->aOp, 0, sizeof(Vdbe)-offsetof(Vdbe,aOp));
   p->db = db;
   if( db->pVdbe ){
-    db->pVdbe->pPrev = p;
+    db->pVdbe->ppVPrev = &p->pVNext;
   }
-  p->pNext = db->pVdbe;
-  p->pPrev = 0;
+  p->pVNext = db->pVdbe;
+  p->ppVPrev = &db->pVdbe;
   db->pVdbe = p;
   assert( p->eVdbeState==VDBE_INIT_STATE );
   p->pParse = pParse;
@@ -80823,21 +84087,28 @@ SQLITE_PRIVATE int sqlite3VdbeUsesDoubleQuotedString(
 #endif
 
 /*
-** Swap all content between two VDBE structures.
+** Swap byte-code between two VDBE structures.
+**
+** This happens after pB was previously run and returned
+** SQLITE_SCHEMA.  The statement was then reprepared in pA.
+** This routine transfers the new bytecode in pA over to pB
+** so that pB can be run again.  The old pB byte code is
+** moved back to pA so that it will be cleaned up when pA is
+** finalized.
 */
 SQLITE_PRIVATE void sqlite3VdbeSwap(Vdbe *pA, Vdbe *pB){
-  Vdbe tmp, *pTmp;
+  Vdbe tmp, *pTmp, **ppTmp;
   char *zTmp;
   assert( pA->db==pB->db );
   tmp = *pA;
   *pA = *pB;
   *pB = tmp;
-  pTmp = pA->pNext;
-  pA->pNext = pB->pNext;
-  pB->pNext = pTmp;
-  pTmp = pA->pPrev;
-  pA->pPrev = pB->pPrev;
-  pB->pPrev = pTmp;
+  pTmp = pA->pVNext;
+  pA->pVNext = pB->pVNext;
+  pB->pVNext = pTmp;
+  ppTmp = pA->ppVPrev;
+  pA->ppVPrev = pB->ppVPrev;
+  pB->ppVPrev = ppTmp;
   zTmp = pA->zSql;
   pA->zSql = pB->zSql;
   pB->zSql = zTmp;
@@ -80912,11 +84183,43 @@ static int growOpArray(Vdbe *v, int nOp){
 **   sqlite3CantopenError(lineno)
 */
 static void test_addop_breakpoint(int pc, Op *pOp){
-  static int n = 0;
+  static u64 n = 0;
+  (void)pc;
+  (void)pOp;
   n++;
+  if( n==LARGEST_UINT64 ) abort(); /* so that n is used, preventing a warning */
 }
 #endif
 
+/*
+** Slow paths for sqlite3VdbeAddOp3() and sqlite3VdbeAddOp4Int() for the
+** unusual case when we need to increase the size of the Vdbe.aOp[] array
+** before adding the new opcode.
+*/
+static SQLITE_NOINLINE int growOp3(Vdbe *p, int op, int p1, int p2, int p3){
+  assert( p->nOpAlloc<=p->nOp );
+  if( growOpArray(p, 1) ) return 1;
+  assert( p->nOpAlloc>p->nOp );
+  return sqlite3VdbeAddOp3(p, op, p1, p2, p3);
+}
+static SQLITE_NOINLINE int addOp4IntSlow(
+  Vdbe *p,            /* Add the opcode to this VM */
+  int op,             /* The new opcode */
+  int p1,             /* The P1 operand */
+  int p2,             /* The P2 operand */
+  int p3,             /* The P3 operand */
+  int p4              /* The P4 operand as an integer */
+){
+  int addr = sqlite3VdbeAddOp3(p, op, p1, p2, p3);
+  if( p->db->mallocFailed==0 ){
+    VdbeOp *pOp = &p->aOp[addr];
+    pOp->p4type = P4_INT32;
+    pOp->p4.i = p4;
+  }
+  return addr;
+}
+
+
 /*
 ** Add a new instruction to the list of instructions current in the
 ** VDBE.  Return the address of the new instruction.
@@ -80927,17 +84230,16 @@ static void test_addop_breakpoint(int pc, Op *pOp){
 **
 **    op              The opcode for this instruction
 **
-**    p1, p2, p3      Operands
-**
-** Use the sqlite3VdbeResolveLabel() function to fix an address and
-** the sqlite3VdbeChangeP4() function to change the value of the P4
-** operand.
+**    p1, p2, p3, p4  Operands
 */
-static SQLITE_NOINLINE int growOp3(Vdbe *p, int op, int p1, int p2, int p3){
-  assert( p->nOpAlloc<=p->nOp );
-  if( growOpArray(p, 1) ) return 1;
-  assert( p->nOpAlloc>p->nOp );
-  return sqlite3VdbeAddOp3(p, op, p1, p2, p3);
+SQLITE_PRIVATE int sqlite3VdbeAddOp0(Vdbe *p, int op){
+  return sqlite3VdbeAddOp3(p, op, 0, 0, 0);
+}
+SQLITE_PRIVATE int sqlite3VdbeAddOp1(Vdbe *p, int op, int p1){
+  return sqlite3VdbeAddOp3(p, op, p1, 0, 0);
+}
+SQLITE_PRIVATE int sqlite3VdbeAddOp2(Vdbe *p, int op, int p1, int p2){
+  return sqlite3VdbeAddOp3(p, op, p1, p2, 0);
 }
 SQLITE_PRIVATE int sqlite3VdbeAddOp3(Vdbe *p, int op, int p1, int p2, int p3){
   int i;
@@ -80960,32 +84262,78 @@ SQLITE_PRIVATE int sqlite3VdbeAddOp3(Vdbe *p, int op, int p1, int p2, int p3){
   pOp->p3 = p3;
   pOp->p4.p = 0;
   pOp->p4type = P4_NOTUSED;
+
+  /* Replicate this logic in sqlite3VdbeAddOp4Int()
+  ** vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv   */
 #ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS
   pOp->zComment = 0;
 #endif
+#if defined(SQLITE_ENABLE_STMT_SCANSTATUS) || defined(VDBE_PROFILE)
+  pOp->nExec = 0;
+  pOp->nCycle = 0;
+#endif
 #ifdef SQLITE_DEBUG
   if( p->db->flags & SQLITE_VdbeAddopTrace ){
     sqlite3VdbePrintOp(0, i, &p->aOp[i]);
     test_addop_breakpoint(i, &p->aOp[i]);
   }
 #endif
-#ifdef VDBE_PROFILE
-  pOp->cycles = 0;
-  pOp->cnt = 0;
-#endif
 #ifdef SQLITE_VDBE_COVERAGE
   pOp->iSrcLine = 0;
 #endif
+  /* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  ** Replicate in sqlite3VdbeAddOp4Int() */
+
   return i;
 }
-SQLITE_PRIVATE int sqlite3VdbeAddOp0(Vdbe *p, int op){
-  return sqlite3VdbeAddOp3(p, op, 0, 0, 0);
-}
-SQLITE_PRIVATE int sqlite3VdbeAddOp1(Vdbe *p, int op, int p1){
-  return sqlite3VdbeAddOp3(p, op, p1, 0, 0);
-}
-SQLITE_PRIVATE int sqlite3VdbeAddOp2(Vdbe *p, int op, int p1, int p2){
-  return sqlite3VdbeAddOp3(p, op, p1, p2, 0);
+SQLITE_PRIVATE int sqlite3VdbeAddOp4Int(
+  Vdbe *p,            /* Add the opcode to this VM */
+  int op,             /* The new opcode */
+  int p1,             /* The P1 operand */
+  int p2,             /* The P2 operand */
+  int p3,             /* The P3 operand */
+  int p4              /* The P4 operand as an integer */
+){
+  int i;
+  VdbeOp *pOp;
+
+  i = p->nOp;
+  if( p->nOpAlloc<=i ){
+    return addOp4IntSlow(p, op, p1, p2, p3, p4);
+  }
+  p->nOp++;
+  pOp = &p->aOp[i];
+  assert( pOp!=0 );
+  pOp->opcode = (u8)op;
+  pOp->p5 = 0;
+  pOp->p1 = p1;
+  pOp->p2 = p2;
+  pOp->p3 = p3;
+  pOp->p4.i = p4;
+  pOp->p4type = P4_INT32;
+
+  /* Replicate this logic in sqlite3VdbeAddOp3()
+  ** vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv   */
+#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS
+  pOp->zComment = 0;
+#endif
+#if defined(SQLITE_ENABLE_STMT_SCANSTATUS) || defined(VDBE_PROFILE)
+  pOp->nExec = 0;
+  pOp->nCycle = 0;
+#endif
+#ifdef SQLITE_DEBUG
+  if( p->db->flags & SQLITE_VdbeAddopTrace ){
+    sqlite3VdbePrintOp(0, i, &p->aOp[i]);
+    test_addop_breakpoint(i, &p->aOp[i]);
+  }
+#endif
+#ifdef SQLITE_VDBE_COVERAGE
+  pOp->iSrcLine = 0;
+#endif
+  /* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  ** Replicate in sqlite3VdbeAddOp3() */
+
+  return i;
 }
 
 /* Generate code for an unconditional jump to instruction iDest
@@ -81140,11 +84488,12 @@ SQLITE_PRIVATE void sqlite3ExplainBreakpoint(const char *z1, const char *z2){
 ** If the bPush flag is true, then make this opcode the parent for
 ** subsequent Explains until sqlite3VdbeExplainPop() is called.
 */
-SQLITE_PRIVATE void sqlite3VdbeExplain(Parse *pParse, u8 bPush, const char *zFmt, ...){
-#ifndef SQLITE_DEBUG
+SQLITE_PRIVATE int sqlite3VdbeExplain(Parse *pParse, u8 bPush, const char *zFmt, ...){
+  int addr = 0;
+#if !defined(SQLITE_DEBUG)
   /* Always include the OP_Explain opcodes if SQLITE_DEBUG is defined.
   ** But omit them (for performance) during production builds */
-  if( pParse->explain==2 )
+  if( pParse->explain==2 || IS_STMT_SCANSTATUS(pParse->db) )
 #endif
   {
     char *zMsg;
@@ -81156,13 +84505,15 @@ SQLITE_PRIVATE void sqlite3VdbeExplain(Parse *pParse, u8 bPush, const char *zFmt
     va_end(ap);
     v = pParse->pVdbe;
     iThis = v->nOp;
-    sqlite3VdbeAddOp4(v, OP_Explain, iThis, pParse->addrExplain, 0,
+    addr = sqlite3VdbeAddOp4(v, OP_Explain, iThis, pParse->addrExplain, 0,
                       zMsg, P4_DYNAMIC);
-    sqlite3ExplainBreakpoint(bPush?"PUSH":"", sqlite3VdbeGetOp(v,-1)->p4.z);
+    sqlite3ExplainBreakpoint(bPush?"PUSH":"", sqlite3VdbeGetLastOp(v)->p4.z);
     if( bPush){
       pParse->addrExplain = iThis;
     }
+    sqlite3VdbeScanStatus(v, iThis, -1, -1, 0, 0);
   }
+  return addr;
 }
 
 /*
@@ -81190,26 +84541,6 @@ SQLITE_PRIVATE void sqlite3VdbeAddParseSchemaOp(Vdbe *p, int iDb, char *zWhere,
   sqlite3MayAbort(p->pParse);
 }
 
-/*
-** Add an opcode that includes the p4 value as an integer.
-*/
-SQLITE_PRIVATE int sqlite3VdbeAddOp4Int(
-  Vdbe *p,            /* Add the opcode to this VM */
-  int op,             /* The new opcode */
-  int p1,             /* The P1 operand */
-  int p2,             /* The P2 operand */
-  int p3,             /* The P3 operand */
-  int p4              /* The P4 operand as an integer */
-){
-  int addr = sqlite3VdbeAddOp3(p, op, p1, p2, p3);
-  if( p->db->mallocFailed==0 ){
-    VdbeOp *pOp = &p->aOp[addr];
-    pOp->p4type = P4_INT32;
-    pOp->p4.i = p4;
-  }
-  return addr;
-}
-
 /* Insert the end of a co-routine
 */
 SQLITE_PRIVATE void sqlite3VdbeEndCoroutine(Vdbe *v, int regYield){
@@ -81270,6 +84601,9 @@ static SQLITE_NOINLINE void resizeResolveLabel(Parse *p, Vdbe *v, int j){
     int i;
     for(i=p->nLabelAlloc; i<nNewSize; i++) p->aLabel[i] = -1;
 #endif
+    if( nNewSize>=100 && (nNewSize/100)>(p->nLabelAlloc/100) ){
+      sqlite3ProgressCheck(p);
+    }
     p->nLabelAlloc = nNewSize;
     p->aLabel[j] = v->nOp;
   }
@@ -81513,11 +84847,13 @@ static void resolveP2Values(Vdbe *p, int *pMaxFuncArgs){
   Op *pOp;
   Parse *pParse = p->pParse;
   int *aLabel = pParse->aLabel;
+
+  assert( pParse->db->mallocFailed==0 ); /* tag-20230419-1 */
   p->readOnly = 1;
   p->bIsReader = 0;
   pOp = &p->aOp[p->nOp-1];
-  while(1){
-
+  assert( p->aOp[0].opcode==OP_Init );
+  while( 1 /* Loop terminates when it reaches the OP_Init opcode */ ){
     /* Only JUMP opcodes and the short list of special opcodes in the switch
     ** below need to be considered.  The mkopcodeh.tcl generator script groups
     ** all these opcodes together near the front of the opcode list.  Skip
@@ -81546,6 +84882,10 @@ static void resolveP2Values(Vdbe *p, int *pMaxFuncArgs){
           p->bIsReader = 1;
           break;
         }
+        case OP_Init: {
+          assert( pOp->p2>=0 );
+          goto resolve_p2_values_loop_exit;
+        }
 #ifndef SQLITE_OMIT_VIRTUALTABLE
         case OP_VUpdate: {
           if( pOp->p2>nMaxArgs ) nMaxArgs = pOp->p2;
@@ -81568,6 +84908,7 @@ static void resolveP2Values(Vdbe *p, int *pMaxFuncArgs){
             ** have non-negative values for P2. */
             assert( (sqlite3OpcodeProperty[pOp->opcode] & OPFLG_JUMP)!=0 );
             assert( ADDR(pOp->p2)<-pParse->nLabel );
+            assert( aLabel!=0 );  /* True because of tag-20230419-1 */
             pOp->p2 = aLabel[ADDR(pOp->p2)];
           }
           break;
@@ -81578,11 +84919,12 @@ static void resolveP2Values(Vdbe *p, int *pMaxFuncArgs){
       ** have non-negative values for P2. */
       assert( (sqlite3OpcodeProperty[pOp->opcode]&OPFLG_JUMP)==0 || pOp->p2>=0);
     }
-    if( pOp==p->aOp ) break;
+    assert( pOp>p->aOp );
     pOp--;
   }
+resolve_p2_values_loop_exit:
   if( aLabel ){
-    sqlite3DbFreeNN(p->db, pParse->aLabel);
+    sqlite3DbNNFreeNN(p->db, pParse->aLabel);
     pParse->aLabel = 0;
   }
   pParse->nLabel = 0;
@@ -81633,6 +84975,10 @@ SQLITE_PRIVATE void sqlite3VdbeNoJumpsOutsideSubrtn(
       int iDest = pOp->p2;   /* Jump destination */
       if( iDest==0 ) continue;
       if( pOp->opcode==OP_Gosub ) continue;
+      if( pOp->p3==20230325 && pOp->opcode==OP_NotNull ){
+        /* This is a deliberately taken illegal branch.  tag-20230325-2 */
+        continue;
+      }
       if( iDest<0 ){
         int j = ADDR(iDest);
         assert( j>=0 );
@@ -81810,20 +85156,83 @@ SQLITE_PRIVATE void sqlite3VdbeScanStatus(
   LogEst nEst,                    /* Estimated number of output rows */
   const char *zName               /* Name of table or index being scanned */
 ){
-  sqlite3_int64 nByte = (p->nScan+1) * sizeof(ScanStatus);
-  ScanStatus *aNew;
-  aNew = (ScanStatus*)sqlite3DbRealloc(p->db, p->aScan, nByte);
-  if( aNew ){
-    ScanStatus *pNew = &aNew[p->nScan++];
-    pNew->addrExplain = addrExplain;
-    pNew->addrLoop = addrLoop;
-    pNew->addrVisit = addrVisit;
-    pNew->nEst = nEst;
-    pNew->zName = sqlite3DbStrDup(p->db, zName);
-    p->aScan = aNew;
+  if( IS_STMT_SCANSTATUS(p->db) ){
+    sqlite3_int64 nByte = (p->nScan+1) * sizeof(ScanStatus);
+    ScanStatus *aNew;
+    aNew = (ScanStatus*)sqlite3DbRealloc(p->db, p->aScan, nByte);
+    if( aNew ){
+      ScanStatus *pNew = &aNew[p->nScan++];
+      memset(pNew, 0, sizeof(ScanStatus));
+      pNew->addrExplain = addrExplain;
+      pNew->addrLoop = addrLoop;
+      pNew->addrVisit = addrVisit;
+      pNew->nEst = nEst;
+      pNew->zName = sqlite3DbStrDup(p->db, zName);
+      p->aScan = aNew;
+    }
   }
 }
-#endif
+
+/*
+** Add the range of instructions from addrStart to addrEnd (inclusive) to
+** the set of those corresponding to the sqlite3_stmt_scanstatus() counters
+** associated with the OP_Explain instruction at addrExplain. The
+** sum of the sqlite3Hwtime() values for each of these instructions
+** will be returned for SQLITE_SCANSTAT_NCYCLE requests.
+*/
+SQLITE_PRIVATE void sqlite3VdbeScanStatusRange(
+  Vdbe *p,
+  int addrExplain,
+  int addrStart,
+  int addrEnd
+){
+  if( IS_STMT_SCANSTATUS(p->db) ){
+    ScanStatus *pScan = 0;
+    int ii;
+    for(ii=p->nScan-1; ii>=0; ii--){
+      pScan = &p->aScan[ii];
+      if( pScan->addrExplain==addrExplain ) break;
+      pScan = 0;
+    }
+    if( pScan ){
+      if( addrEnd<0 ) addrEnd = sqlite3VdbeCurrentAddr(p)-1;
+      for(ii=0; ii<ArraySize(pScan->aAddrRange); ii+=2){
+        if( pScan->aAddrRange[ii]==0 ){
+          pScan->aAddrRange[ii] = addrStart;
+          pScan->aAddrRange[ii+1] = addrEnd;
+          break;
+        }
+      }
+    }
+  }
+}
+
+/*
+** Set the addresses for the SQLITE_SCANSTAT_NLOOP and SQLITE_SCANSTAT_NROW
+** counters for the query element associated with the OP_Explain at
+** addrExplain.
+*/
+SQLITE_PRIVATE void sqlite3VdbeScanStatusCounters(
+  Vdbe *p,
+  int addrExplain,
+  int addrLoop,
+  int addrVisit
+){
+  if( IS_STMT_SCANSTATUS(p->db) ){
+    ScanStatus *pScan = 0;
+    int ii;
+    for(ii=p->nScan-1; ii>=0; ii--){
+      pScan = &p->aScan[ii];
+      if( pScan->addrExplain==addrExplain ) break;
+      pScan = 0;
+    }
+    if( pScan ){
+      if( addrLoop>0 ) pScan->addrLoop = addrLoop;
+      if( addrVisit>0 ) pScan->addrVisit = addrVisit;
+    }
+  }
+}
+#endif /* defined(SQLITE_ENABLE_STMT_SCANSTATUS) */
 
 
 /*
@@ -81831,15 +85240,19 @@ SQLITE_PRIVATE void sqlite3VdbeScanStatus(
 ** for a specific instruction.
 */
 SQLITE_PRIVATE void sqlite3VdbeChangeOpcode(Vdbe *p, int addr, u8 iNewOpcode){
+  assert( addr>=0 );
   sqlite3VdbeGetOp(p,addr)->opcode = iNewOpcode;
 }
 SQLITE_PRIVATE void sqlite3VdbeChangeP1(Vdbe *p, int addr, int val){
+  assert( addr>=0 );
   sqlite3VdbeGetOp(p,addr)->p1 = val;
 }
 SQLITE_PRIVATE void sqlite3VdbeChangeP2(Vdbe *p, int addr, int val){
+  assert( addr>=0 || p->db->mallocFailed );
   sqlite3VdbeGetOp(p,addr)->p2 = val;
 }
 SQLITE_PRIVATE void sqlite3VdbeChangeP3(Vdbe *p, int addr, int val){
+  assert( addr>=0 );
   sqlite3VdbeGetOp(p,addr)->p3 = val;
 }
 SQLITE_PRIVATE void sqlite3VdbeChangeP5(Vdbe *p, u16 p5){
@@ -81847,6 +85260,18 @@ SQLITE_PRIVATE void sqlite3VdbeChangeP5(Vdbe *p, u16 p5){
   if( p->nOp>0 ) p->aOp[p->nOp-1].p5 = p5;
 }
 
+/*
+** If the previous opcode is an OP_Column that delivers results
+** into register iDest, then add the OPFLAG_TYPEOFARG flag to that
+** opcode.
+*/
+SQLITE_PRIVATE void sqlite3VdbeTypeofColumn(Vdbe *p, int iDest){
+  VdbeOp *pOp = sqlite3VdbeGetLastOp(p);
+  if( pOp->p3==iDest && pOp->opcode==OP_Column ){
+    pOp->p5 |= OPFLAG_TYPEOFARG;
+  }
+}
+
 /*
 ** Change the P2 operand of instruction addr so that it points to
 ** the address of the next instruction to be coded.
@@ -81875,7 +85300,7 @@ SQLITE_PRIVATE void sqlite3VdbeJumpHereOrPopInst(Vdbe *p, int addr){
          || p->aOp[addr].opcode==OP_FkIfZero );
     assert( p->aOp[addr].p4type==0 );
 #ifdef SQLITE_VDBE_COVERAGE
-    sqlite3VdbeGetOp(p,-1)->iSrcLine = 0;  /* Erase VdbeCoverage() macros */
+    sqlite3VdbeGetLastOp(p)->iSrcLine = 0;  /* Erase VdbeCoverage() macros */
 #endif
     p->nOp--;
   }else{
@@ -81886,11 +85311,12 @@ SQLITE_PRIVATE void sqlite3VdbeJumpHereOrPopInst(Vdbe *p, int addr){
 
 /*
 ** If the input FuncDef structure is ephemeral, then free it.  If
-** the FuncDef is not ephermal, then do nothing.
+** the FuncDef is not ephemeral, then do nothing.
 */
 static void freeEphemeralFunction(sqlite3 *db, FuncDef *pDef){
+  assert( db!=0 );
   if( (pDef->funcFlags & SQLITE_FUNC_EPHEM)!=0 ){
-    sqlite3DbFreeNN(db, pDef);
+    sqlite3DbNNFreeNN(db, pDef);
   }
 }
 
@@ -81899,11 +85325,12 @@ static void freeEphemeralFunction(sqlite3 *db, FuncDef *pDef){
 */
 static SQLITE_NOINLINE void freeP4Mem(sqlite3 *db, Mem *p){
   if( p->szMalloc ) sqlite3DbFree(db, p->zMalloc);
-  sqlite3DbFreeNN(db, p);
+  sqlite3DbNNFreeNN(db, p);
 }
 static SQLITE_NOINLINE void freeP4FuncCtx(sqlite3 *db, sqlite3_context *p){
+  assert( db!=0 );
   freeEphemeralFunction(db, p->pFunc);
-  sqlite3DbFreeNN(db, p);
+  sqlite3DbNNFreeNN(db, p);
 }
 static void freeP4(sqlite3 *db, int p4type, void *p4){
   assert( db );
@@ -81916,7 +85343,7 @@ static void freeP4(sqlite3 *db, int p4type, void *p4){
     case P4_INT64:
     case P4_DYNAMIC:
     case P4_INTARRAY: {
-      sqlite3DbFree(db, p4);
+      if( p4 ) sqlite3DbNNFreeNN(db, p4);
       break;
     }
     case P4_KEYINFO: {
@@ -81955,6 +85382,7 @@ static void freeP4(sqlite3 *db, int p4type, void *p4){
 */
 static void vdbeFreeOpArray(sqlite3 *db, Op *aOp, int nOp){
   assert( nOp>=0 );
+  assert( db!=0 );
   if( aOp ){
     Op *pOp = &aOp[nOp-1];
     while(1){  /* Exit via break */
@@ -81965,7 +85393,7 @@ static void vdbeFreeOpArray(sqlite3 *db, Op *aOp, int nOp){
       if( pOp==aOp ) break;
       pOp--;
     }
-    sqlite3DbFreeNN(db, aOp);
+    sqlite3DbNNFreeNN(db, aOp);
   }
 }
 
@@ -82047,7 +85475,6 @@ SQLITE_PRIVATE void sqlite3VdbeReleaseRegisters(
 }
 #endif /* SQLITE_DEBUG */
 
-
 /*
 ** Change the value of the P4 operand for a specific instruction.
 ** This routine is useful when a large program is loaded from a
@@ -82134,7 +85561,7 @@ SQLITE_PRIVATE void sqlite3VdbeAppendP4(Vdbe *p, void *pP4, int n){
   if( p->db->mallocFailed ){
     freeP4(p->db, n, pP4);
   }else{
-    assert( pP4!=0 );
+    assert( pP4!=0 || n==P4_DYNAMIC );
     assert( p->nOp>0 );
     pOp = &p->aOp[p->nOp-1];
     assert( pOp->p4type==P4_NOTUSED );
@@ -82196,13 +85623,13 @@ SQLITE_PRIVATE void sqlite3VdbeNoopComment(Vdbe *p, const char *zFormat, ...){
 ** Set the value if the iSrcLine field for the previously coded instruction.
 */
 SQLITE_PRIVATE void sqlite3VdbeSetLineNumber(Vdbe *v, int iLine){
-  sqlite3VdbeGetOp(v,-1)->iSrcLine = iLine;
+  sqlite3VdbeGetLastOp(v)->iSrcLine = iLine;
 }
 #endif /* SQLITE_VDBE_COVERAGE */
 
 /*
-** Return the opcode for a given address.  If the address is -1, then
-** return the most recently inserted opcode.
+** Return the opcode for a given address.  The address must be non-negative.
+** See sqlite3VdbeGetLastOp() to get the most recently added opcode.
 **
 ** If a memory allocation error has occurred prior to the calling of this
 ** routine, then a pointer to a dummy VdbeOp will be returned.  That opcode
@@ -82218,9 +85645,6 @@ SQLITE_PRIVATE VdbeOp *sqlite3VdbeGetOp(Vdbe *p, int addr){
   ** zeros, which is correct.  MSVC generates a warning, nevertheless. */
   static VdbeOp dummy;  /* Ignore the MSVC warning about no initializer */
   assert( p->eVdbeState==VDBE_INIT_STATE );
-  if( addr<0 ){
-    addr = p->nOp - 1;
-  }
   assert( (addr>=0 && addr<p->nOp) || p->db->mallocFailed );
   if( p->db->mallocFailed ){
     return (VdbeOp*)&dummy;
@@ -82229,6 +85653,12 @@ SQLITE_PRIVATE VdbeOp *sqlite3VdbeGetOp(Vdbe *p, int addr){
   }
 }
 
+/* Return the most recently added opcode
+*/
+SQLITE_PRIVATE VdbeOp *sqlite3VdbeGetLastOp(Vdbe *p){
+  return sqlite3VdbeGetOp(p, p->nOp - 1);
+}
+
 #if defined(SQLITE_ENABLE_EXPLAIN_COMMENTS)
 /*
 ** Return an integer value for one of the parameters to the opcode pOp
@@ -82716,7 +86146,7 @@ static void releaseMemArray(Mem *p, int N){
         sqlite3VdbeMemRelease(p);
         p->flags = MEM_Undefined;
       }else if( p->szMalloc ){
-        sqlite3DbFreeNN(db, p->zMalloc);
+        sqlite3DbNNFreeNN(db, p->zMalloc);
         p->szMalloc = 0;
         p->flags = MEM_Undefined;
       }
@@ -82930,7 +86360,6 @@ SQLITE_PRIVATE int sqlite3VdbeList(
   ** sqlite3_column_text16(), causing a translation to UTF-16 encoding.
   */
   releaseMemArray(pMem, 8);
-  p->pResultSet = 0;
 
   if( p->rc==SQLITE_NOMEM ){
     /* This happens if a malloc() inside a call to sqlite3_column_text() or
@@ -82966,7 +86395,7 @@ SQLITE_PRIVATE int sqlite3VdbeList(
         sqlite3VdbeMemSetInt64(pMem+1, pOp->p2);
         sqlite3VdbeMemSetInt64(pMem+2, pOp->p3);
         sqlite3VdbeMemSetStr(pMem+3, zP4, -1, SQLITE_UTF8, sqlite3_free);
-        p->nResColumn = 4;
+        assert( p->nResColumn==4 );
       }else{
         sqlite3VdbeMemSetInt64(pMem+0, i);
         sqlite3VdbeMemSetStr(pMem+1, (char*)sqlite3OpcodeName(pOp->opcode),
@@ -82985,9 +86414,9 @@ SQLITE_PRIVATE int sqlite3VdbeList(
         sqlite3VdbeMemSetNull(pMem+7);
 #endif
         sqlite3VdbeMemSetStr(pMem+5, zP4, -1, SQLITE_UTF8, sqlite3_free);
-        p->nResColumn = 8;
+        assert( p->nResColumn==8 );
       }
-      p->pResultSet = pMem;
+      p->pResultRow = pMem;
       if( db->mallocFailed ){
         p->rc = SQLITE_NOMEM;
         rc = SQLITE_ERROR;
@@ -83098,7 +86527,7 @@ static void *allocSpace(
 ** running it.
 */
 SQLITE_PRIVATE void sqlite3VdbeRewind(Vdbe *p){
-#if defined(SQLITE_DEBUG) || defined(VDBE_PROFILE)
+#if defined(SQLITE_DEBUG)
   int i;
 #endif
   assert( p!=0 );
@@ -83127,8 +86556,8 @@ SQLITE_PRIVATE void sqlite3VdbeRewind(Vdbe *p){
   p->nFkConstraint = 0;
 #ifdef VDBE_PROFILE
   for(i=0; i<p->nOp; i++){
-    p->aOp[i].cnt = 0;
-    p->aOp[i].cycles = 0;
+    p->aOp[i].nExec = 0;
+    p->aOp[i].nCycle = 0;
   }
 #endif
 }
@@ -83199,26 +86628,9 @@ SQLITE_PRIVATE void sqlite3VdbeMakeReady(
   resolveP2Values(p, &nArg);
   p->usesStmtJournal = (u8)(pParse->isMultiWrite && pParse->mayAbort);
   if( pParse->explain ){
-    static const char * const azColName[] = {
-       "addr", "opcode", "p1", "p2", "p3", "p4", "p5", "comment",
-       "id", "parent", "notused", "detail"
-    };
-    int iFirst, mx, i;
     if( nMem<10 ) nMem = 10;
     p->explain = pParse->explain;
-    if( pParse->explain==2 ){
-      sqlite3VdbeSetNumCols(p, 4);
-      iFirst = 8;
-      mx = 12;
-    }else{
-      sqlite3VdbeSetNumCols(p, 8);
-      iFirst = 0;
-      mx = 8;
-    }
-    for(i=iFirst; i<mx; i++){
-      sqlite3VdbeSetColName(p, i-iFirst, COLNAME_NAME,
-                            azColName[i], SQLITE_STATIC);
-    }
+    p->nResColumn = 12 - 4*p->explain;
   }
   p->expired = 0;
 
@@ -83237,9 +86649,6 @@ SQLITE_PRIVATE void sqlite3VdbeMakeReady(
   p->aVar = allocSpace(&x, 0, nVar*sizeof(Mem));
   p->apArg = allocSpace(&x, 0, nArg*sizeof(Mem*));
   p->apCsr = allocSpace(&x, 0, nCursor*sizeof(VdbeCursor*));
-#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
-  p->anExec = allocSpace(&x, 0, p->nOp*sizeof(i64));
-#endif
   if( x.nNeeded ){
     x.pSpace = p->pFree = sqlite3DbMallocRawNN(db, x.nNeeded);
     x.nFree = x.nNeeded;
@@ -83248,9 +86657,6 @@ SQLITE_PRIVATE void sqlite3VdbeMakeReady(
       p->aVar = allocSpace(&x, p->aVar, nVar*sizeof(Mem));
       p->apArg = allocSpace(&x, p->apArg, nArg*sizeof(Mem*));
       p->apCsr = allocSpace(&x, p->apCsr, nCursor*sizeof(VdbeCursor*));
-#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
-      p->anExec = allocSpace(&x, p->anExec, p->nOp*sizeof(i64));
-#endif
     }
   }
 
@@ -83265,9 +86671,6 @@ SQLITE_PRIVATE void sqlite3VdbeMakeReady(
     p->nMem = nMem;
     initMemArray(p->aMem, nMem, db, MEM_Undefined);
     memset(p->apCsr, 0, nCursor*sizeof(VdbeCursor*));
-#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
-    memset(p->anExec, 0, p->nOp*sizeof(i64));
-#endif
   }
   sqlite3VdbeRewind(p);
 }
@@ -83279,7 +86682,23 @@ SQLITE_PRIVATE void sqlite3VdbeMakeReady(
 SQLITE_PRIVATE void sqlite3VdbeFreeCursor(Vdbe *p, VdbeCursor *pCx){
   if( pCx ) sqlite3VdbeFreeCursorNN(p,pCx);
 }
+static SQLITE_NOINLINE void freeCursorWithCache(Vdbe *p, VdbeCursor *pCx){
+  VdbeTxtBlbCache *pCache = pCx->pCache;
+  assert( pCx->colCache );
+  pCx->colCache = 0;
+  pCx->pCache = 0;
+  if( pCache->pCValue ){
+    sqlite3RCStrUnref(pCache->pCValue);
+    pCache->pCValue = 0;
+  }
+  sqlite3DbFree(p->db, pCache);
+  sqlite3VdbeFreeCursorNN(p, pCx);
+}
 SQLITE_PRIVATE void sqlite3VdbeFreeCursorNN(Vdbe *p, VdbeCursor *pCx){
+  if( pCx->colCache ){
+    freeCursorWithCache(p, pCx);
+    return;
+  }
   switch( pCx->eCurType ){
     case CURTYPE_SORTER: {
       sqlite3VdbeSorterClose(p->db, pCx);
@@ -83325,9 +86744,6 @@ static void closeCursorsInFrame(Vdbe *p){
 SQLITE_PRIVATE int sqlite3VdbeFrameRestore(VdbeFrame *pFrame){
   Vdbe *v = pFrame->v;
   closeCursorsInFrame(v);
-#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
-  v->anExec = pFrame->anExec;
-#endif
   v->aOp = pFrame->aOp;
   v->nOp = pFrame->nOp;
   v->aMem = pFrame->aMem;
@@ -83383,12 +86799,12 @@ SQLITE_PRIVATE void sqlite3VdbeSetNumCols(Vdbe *p, int nResColumn){
   int n;
   sqlite3 *db = p->db;
 
-  if( p->nResColumn ){
-    releaseMemArray(p->aColName, p->nResColumn*COLNAME_N);
+  if( p->nResAlloc ){
+    releaseMemArray(p->aColName, p->nResAlloc*COLNAME_N);
     sqlite3DbFree(db, p->aColName);
   }
   n = nResColumn*COLNAME_N;
-  p->nResColumn = (u16)nResColumn;
+  p->nResColumn = p->nResAlloc = (u16)nResColumn;
   p->aColName = (Mem*)sqlite3DbMallocRawNN(db, sizeof(Mem)*n );
   if( p->aColName==0 ) return;
   initMemArray(p->aColName, n, db, MEM_Null);
@@ -83413,14 +86829,14 @@ SQLITE_PRIVATE int sqlite3VdbeSetColName(
 ){
   int rc;
   Mem *pColName;
-  assert( idx<p->nResColumn );
+  assert( idx<p->nResAlloc );
   assert( var<COLNAME_N );
   if( p->db->mallocFailed ){
     assert( !zName || xDel!=SQLITE_DYNAMIC );
     return SQLITE_NOMEM_BKPT;
   }
   assert( p->aColName!=0 );
-  pColName = &(p->aColName[idx+var*p->nResColumn]);
+  pColName = &(p->aColName[idx+var*p->nResAlloc]);
   rc = sqlite3VdbeMemSetStr(pColName, zName, -1, SQLITE_UTF8, xDel);
   assert( rc!=0 || !zName || (pColName->flags&MEM_Term)!=0 );
   return rc;
@@ -83708,7 +87124,7 @@ static void checkActiveVdbeCnt(sqlite3 *db){
       if( p->readOnly==0 ) nWrite++;
       if( p->bIsReader ) nRead++;
     }
-    p = p->pNext;
+    p = p->pVNext;
   }
   assert( cnt==db->nVdbeActive );
   assert( nWrite==db->nVdbeWrite );
@@ -83933,6 +87349,7 @@ SQLITE_PRIVATE int sqlite3VdbeHalt(Vdbe *p){
           sqlite3VdbeLeave(p);
           return SQLITE_BUSY;
         }else if( rc!=SQLITE_OK ){
+          sqlite3SystemError(db, rc);
           p->rc = rc;
           sqlite3RollbackAll(db, SQLITE_OK);
           p->nChange = 0;
@@ -83942,6 +87359,8 @@ SQLITE_PRIVATE int sqlite3VdbeHalt(Vdbe *p){
           db->flags &= ~(u64)SQLITE_DeferFKs;
           sqlite3CommitInternalChanges(db);
         }
+      }else if( p->rc==SQLITE_SCHEMA && db->nVdbeActive>1 ){
+        p->nChange = 0;
       }else{
         sqlite3RollbackAll(db, SQLITE_OK);
         p->nChange = 0;
@@ -84131,7 +87550,7 @@ SQLITE_PRIVATE int sqlite3VdbeReset(Vdbe *p){
     sqlite3DbFree(db, p->zErrMsg);
     p->zErrMsg = 0;
   }
-  p->pResultSet = 0;
+  p->pResultRow = 0;
 #ifdef SQLITE_DEBUG
   p->nWrite = 0;
 #endif
@@ -84159,10 +87578,12 @@ SQLITE_PRIVATE int sqlite3VdbeReset(Vdbe *p){
       }
       for(i=0; i<p->nOp; i++){
         char zHdr[100];
+        i64 cnt = p->aOp[i].nExec;
+        i64 cycles = p->aOp[i].nCycle;
         sqlite3_snprintf(sizeof(zHdr), zHdr, "%6u %12llu %8llu ",
-           p->aOp[i].cnt,
-           p->aOp[i].cycles,
-           p->aOp[i].cnt>0 ? p->aOp[i].cycles/p->aOp[i].cnt : 0
+           cnt,
+           cycles,
+           cnt>0 ? cycles/cnt : 0
         );
         fprintf(out, "%s", zHdr);
         sqlite3VdbePrintOp(out, i, &p->aOp[i]);
@@ -84237,10 +87658,11 @@ SQLITE_PRIVATE void sqlite3VdbeDeleteAuxData(sqlite3 *db, AuxData **pp, int iOp,
 */
 static void sqlite3VdbeClearObject(sqlite3 *db, Vdbe *p){
   SubProgram *pSub, *pNext;
+  assert( db!=0 );
   assert( p->db==0 || p->db==db );
   if( p->aColName ){
-    releaseMemArray(p->aColName, p->nResColumn*COLNAME_N);
-    sqlite3DbFreeNN(db, p->aColName);
+    releaseMemArray(p->aColName, p->nResAlloc*COLNAME_N);
+    sqlite3DbNNFreeNN(db, p->aColName);
   }
   for(pSub=p->pProgram; pSub; pSub=pNext){
     pNext = pSub->pNext;
@@ -84249,17 +87671,17 @@ static void sqlite3VdbeClearObject(sqlite3 *db, Vdbe *p){
   }
   if( p->eVdbeState!=VDBE_INIT_STATE ){
     releaseMemArray(p->aVar, p->nVar);
-    if( p->pVList ) sqlite3DbFreeNN(db, p->pVList);
-    if( p->pFree ) sqlite3DbFreeNN(db, p->pFree);
+    if( p->pVList ) sqlite3DbNNFreeNN(db, p->pVList);
+    if( p->pFree ) sqlite3DbNNFreeNN(db, p->pFree);
   }
   vdbeFreeOpArray(db, p->aOp, p->nOp);
-  sqlite3DbFree(db, p->zSql);
+  if( p->zSql ) sqlite3DbNNFreeNN(db, p->zSql);
 #ifdef SQLITE_ENABLE_NORMALIZE
   sqlite3DbFree(db, p->zNormSql);
   {
-    DblquoteStr *pThis, *pNext;
-    for(pThis=p->pDblStr; pThis; pThis=pNext){
-      pNext = pThis->pNextStr;
+    DblquoteStr *pThis, *pNxt;
+    for(pThis=p->pDblStr; pThis; pThis=pNxt){
+      pNxt = pThis->pNextStr;
       sqlite3DbFree(db, pThis);
     }
   }
@@ -84283,20 +87705,17 @@ SQLITE_PRIVATE void sqlite3VdbeDelete(Vdbe *p){
 
   assert( p!=0 );
   db = p->db;
+  assert( db!=0 );
   assert( sqlite3_mutex_held(db->mutex) );
   sqlite3VdbeClearObject(db, p);
   if( db->pnBytesFreed==0 ){
-    if( p->pPrev ){
-      p->pPrev->pNext = p->pNext;
-    }else{
-      assert( db->pVdbe==p );
-      db->pVdbe = p->pNext;
-    }
-    if( p->pNext ){
-      p->pNext->pPrev = p->pPrev;
+    assert( p->ppVPrev!=0 );
+    *p->ppVPrev = p->pVNext;
+    if( p->pVNext ){
+      p->pVNext->ppVPrev = p->ppVPrev;
     }
   }
-  sqlite3DbFreeNN(db, p);
+  sqlite3DbNNFreeNN(db, p);
 }
 
 /*
@@ -84842,6 +88261,15 @@ static int vdbeRecordCompareDebug(
     if( d1+(u64)serial_type1+2>(u64)nKey1
      && d1+(u64)sqlite3VdbeSerialTypeLen(serial_type1)>(u64)nKey1
     ){
+      if( serial_type1>=1
+       && serial_type1<=7
+       && d1+(u64)sqlite3VdbeSerialTypeLen(serial_type1)<=(u64)nKey1+8
+       && CORRUPT_DB
+      ){
+        return 1;  /* corrupt record not detected by
+                   ** sqlite3VdbeRecordCompareWithSkip().  Return true
+                   ** to avoid firing the assert() */
+      }
       break;
     }
 
@@ -85010,20 +88438,33 @@ SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3BlobCompare(const Mem *pB1, const Mem
   return n1 - n2;
 }
 
+/* The following two functions are used only within testcase() to prove
+** test coverage.  These functions do no exist for production builds.
+** We must use separate SQLITE_NOINLINE functions here, since otherwise
+** optimizer code movement causes gcov to become very confused.
+*/
+#if  defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_DEBUG)
+static int SQLITE_NOINLINE doubleLt(double a, double b){ return a<b; }
+static int SQLITE_NOINLINE doubleEq(double a, double b){ return a==b; }
+#endif
+
 /*
 ** Do a comparison between a 64-bit signed integer and a 64-bit floating-point
 ** number.  Return negative, zero, or positive if the first (i64) is less than,
 ** equal to, or greater than the second (double).
 */
 SQLITE_PRIVATE int sqlite3IntFloatCompare(i64 i, double r){
-  if( sizeof(LONGDOUBLE_TYPE)>8 ){
+  if( sqlite3IsNaN(r) ){
+    /* SQLite considers NaN to be a NULL. And all integer values are greater
+    ** than NULL */
+    return 1;
+  }
+  if( sqlite3Config.bUseLongDouble ){
     LONGDOUBLE_TYPE x = (LONGDOUBLE_TYPE)i;
     testcase( x<r );
     testcase( x>r );
     testcase( x==r );
-    if( x<r ) return -1;
-    if( x>r ) return +1;  /*NO_TEST*/ /* work around bugs in gcov */
-    return 0;             /*NO_TEST*/ /* work around bugs in gcov */
+    return (x<r) ? -1 : (x>r);
   }else{
     i64 y;
     double s;
@@ -85033,9 +88474,10 @@ SQLITE_PRIVATE int sqlite3IntFloatCompare(i64 i, double r){
     if( i<y ) return -1;
     if( i>y ) return +1;
     s = (double)i;
-    if( s<r ) return -1;
-    if( s>r ) return +1;
-    return 0;
+    testcase( doubleLt(s,r) );
+    testcase( doubleLt(r,s) );
+    testcase( doubleEq(r,s) );
+    return (s<r) ? -1 : (s>r);
   }
 }
 
@@ -85251,7 +88693,7 @@ SQLITE_PRIVATE int sqlite3VdbeRecordCompareWithSkip(
   assert( pPKey2->pKeyInfo->aSortFlags!=0 );
   assert( pPKey2->pKeyInfo->nKeyField>0 );
   assert( idx1<=szHdr1 || CORRUPT_DB );
-  do{
+  while( 1 /*exit-by-break*/ ){
     u32 serial_type;
 
     /* RHS is an integer */
@@ -85261,7 +88703,7 @@ SQLITE_PRIVATE int sqlite3VdbeRecordCompareWithSkip(
       serial_type = aKey1[idx1];
       testcase( serial_type==12 );
       if( serial_type>=10 ){
-        rc = +1;
+        rc = serial_type==10 ? -1 : +1;
       }else if( serial_type==0 ){
         rc = -1;
       }else if( serial_type==7 ){
@@ -85285,8 +88727,8 @@ SQLITE_PRIVATE int sqlite3VdbeRecordCompareWithSkip(
         /* Serial types 12 or greater are strings and blobs (greater than
         ** numbers). Types 10 and 11 are currently "reserved for future
         ** use", so it doesn't really matter what the results of comparing
-        ** them to numberic values are.  */
-        rc = +1;
+        ** them to numeric values are.  */
+        rc = serial_type==10 ? -1 : +1;
       }else if( serial_type==0 ){
         rc = -1;
       }else{
@@ -85367,7 +88809,7 @@ SQLITE_PRIVATE int sqlite3VdbeRecordCompareWithSkip(
     /* RHS is null */
     else{
       serial_type = aKey1[idx1];
-      rc = (serial_type!=0);
+      rc = (serial_type!=0 && serial_type!=10);
     }
 
     if( rc!=0 ){
@@ -85389,8 +88831,13 @@ SQLITE_PRIVATE int sqlite3VdbeRecordCompareWithSkip(
     if( i==pPKey2->nField ) break;
     pRhs++;
     d1 += sqlite3VdbeSerialTypeLen(serial_type);
+    if( d1>(unsigned)nKey1 ) break;
     idx1 += sqlite3VarintLen(serial_type);
-  }while( idx1<(unsigned)szHdr1 && d1<=(unsigned)nKey1 );
+    if( idx1>=(unsigned)szHdr1 ){
+      pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT;
+      return 0;  /* Corrupt index */
+    }
+  }
 
   /* No memory allocation is ever used on mem1.  Prove this using
   ** the following assert().  If the assert() fails, it indicates a
@@ -85791,7 +89238,7 @@ SQLITE_PRIVATE void sqlite3VdbeCountChanges(Vdbe *v){
 */
 SQLITE_PRIVATE void sqlite3ExpirePreparedStatements(sqlite3 *db, int iCode){
   Vdbe *p;
-  for(p = db->pVdbe; p; p=p->pNext){
+  for(p = db->pVdbe; p; p=p->pVNext){
     p->expired = iCode+1;
   }
 }
@@ -85884,6 +89331,20 @@ SQLITE_PRIVATE int sqlite3NotPureFunc(sqlite3_context *pCtx){
   return 1;
 }
 
+#if defined(SQLITE_ENABLE_CURSOR_HINTS) && defined(SQLITE_DEBUG)
+/*
+** This Walker callback is used to help verify that calls to
+** sqlite3BtreeCursorHint() with opcode BTREE_HINT_RANGE have
+** byte-code register values correctly initialized.
+*/
+SQLITE_PRIVATE int sqlite3CursorRangeHintExprCheck(Walker *pWalker, Expr *pExpr){
+  if( pExpr->op==TK_REGISTER ){
+    assert( (pWalker->u.aMem[pExpr->iTable].flags & MEM_Undefined)==0 );
+  }
+  return WRC_Continue;
+}
+#endif /* SQLITE_ENABLE_CURSOR_HINTS && SQLITE_DEBUG */
+
 #ifndef SQLITE_OMIT_VIRTUALTABLE
 /*
 ** Transfer error message text from an sqlite3_vtab.zErrMsg (text stored
@@ -85912,13 +89373,14 @@ SQLITE_PRIVATE void sqlite3VtabImportErrmsg(Vdbe *p, sqlite3_vtab *pVtab){
 ** the vdbeUnpackRecord() function found in vdbeapi.c.
 */
 static void vdbeFreeUnpacked(sqlite3 *db, int nField, UnpackedRecord *p){
+  assert( db!=0 );
   if( p ){
     int i;
     for(i=0; i<nField; i++){
       Mem *pMem = &p->aMem[i];
       if( pMem->zMalloc ) sqlite3VdbeMemReleaseMalloc(pMem);
     }
-    sqlite3DbFreeNN(db, p);
+    sqlite3DbNNFreeNN(db, p);
   }
 }
 #endif /* SQLITE_ENABLE_PREUPDATE_HOOK */
@@ -85945,6 +89407,16 @@ SQLITE_PRIVATE void sqlite3VdbePreUpdateHook(
   PreUpdate preupdate;
   const char *zTbl = pTab->zName;
   static const u8 fakeSortOrder = 0;
+#ifdef SQLITE_DEBUG
+  int nRealCol;
+  if( pTab->tabFlags & TF_WithoutRowid ){
+    nRealCol = sqlite3PrimaryKeyIndex(pTab)->nColumn;
+  }else if( pTab->tabFlags & TF_HasVirtual ){
+    nRealCol = pTab->nNVCol;
+  }else{
+    nRealCol = pTab->nCol;
+  }
+#endif
 
   assert( db->pPreUpdate==0 );
   memset(&preupdate, 0, sizeof(PreUpdate));
@@ -85961,8 +89433,8 @@ SQLITE_PRIVATE void sqlite3VdbePreUpdateHook(
 
   assert( pCsr!=0 );
   assert( pCsr->eCurType==CURTYPE_BTREE );
-  assert( pCsr->nField==pTab->nCol
-       || (pCsr->nField==pTab->nCol+1 && op==SQLITE_DELETE && iReg==-1)
+  assert( pCsr->nField==nRealCol
+       || (pCsr->nField==nRealCol+1 && op==SQLITE_DELETE && iReg==-1)
   );
 
   preupdate.v = v;
@@ -85989,7 +89461,7 @@ SQLITE_PRIVATE void sqlite3VdbePreUpdateHook(
     for(i=0; i<pCsr->nField; i++){
       sqlite3VdbeMemRelease(&preupdate.aNew[i]);
     }
-    sqlite3DbFreeNN(db, preupdate.aNew);
+    sqlite3DbNNFreeNN(db, preupdate.aNew);
   }
 }
 #endif /* SQLITE_ENABLE_PREUPDATE_HOOK */
@@ -86013,6 +89485,7 @@ SQLITE_PRIVATE void sqlite3VdbePreUpdateHook(
 */
 /* #include "sqliteInt.h" */
 /* #include "vdbeInt.h" */
+/* #include "opcodes.h" */
 
 #ifndef SQLITE_OMIT_DEPRECATED
 /*
@@ -86106,7 +89579,9 @@ SQLITE_API int sqlite3_finalize(sqlite3_stmt *pStmt){
     if( vdbeSafety(v) ) return SQLITE_MISUSE_BKPT;
     sqlite3_mutex_enter(db->mutex);
     checkProfileCallback(db, v);
-    rc = sqlite3VdbeFinalize(v);
+    assert( v->eVdbeState>=VDBE_READY_STATE );
+    rc = sqlite3VdbeReset(v);
+    sqlite3VdbeDelete(v);
     rc = sqlite3ApiExit(db, rc);
     sqlite3LeaveMutexAndCloseZombie(db);
   }
@@ -86266,7 +89741,7 @@ SQLITE_API int sqlite3_value_type(sqlite3_value* pVal){
      SQLITE_NULL,     /* 0x1f (not possible) */
      SQLITE_FLOAT,    /* 0x20 INTREAL */
      SQLITE_NULL,     /* 0x21 (not possible) */
-     SQLITE_TEXT,     /* 0x22 INTREAL + TEXT */
+     SQLITE_FLOAT,    /* 0x22 INTREAL + TEXT */
      SQLITE_NULL,     /* 0x23 (not possible) */
      SQLITE_FLOAT,    /* 0x24 (not possible) */
      SQLITE_NULL,     /* 0x25 (not possible) */
@@ -86314,6 +89789,9 @@ SQLITE_API int sqlite3_value_type(sqlite3_value* pVal){
 #endif
   return aType[pVal->flags&MEM_AffMask];
 }
+SQLITE_API int sqlite3_value_encoding(sqlite3_value *pVal){
+  return pVal->enc;
+}
 
 /* Return true if a parameter to xUpdate represents an unchanged column */
 SQLITE_API int sqlite3_value_nochange(sqlite3_value *pVal){
@@ -86367,7 +89845,7 @@ SQLITE_API void sqlite3_value_free(sqlite3_value *pOld){
 ** is too big or if an OOM occurs.
 **
 ** The invokeValueDestructor(P,X) routine invokes destructor function X()
-** on value P is not going to be used and need to be destroyed.
+** on value P if P is not going to be used and need to be destroyed.
 */
 static void setResultStrOrError(
   sqlite3_context *pCtx,  /* Function context */
@@ -86397,7 +89875,7 @@ static void setResultStrOrError(
 static int invokeValueDestructor(
   const void *p,             /* Value to destroy */
   void (*xDel)(void*),       /* The destructor */
-  sqlite3_context *pCtx      /* Set a SQLITE_TOOBIG error if no NULL */
+  sqlite3_context *pCtx      /* Set a SQLITE_TOOBIG error if not NULL */
 ){
   assert( xDel!=SQLITE_DYNAMIC );
   if( xDel==0 ){
@@ -86407,7 +89885,14 @@ static int invokeValueDestructor(
   }else{
     xDel((void*)p);
   }
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx!=0 ){
+    sqlite3_result_error_toobig(pCtx);
+  }
+#else
+  assert( pCtx!=0 );
   sqlite3_result_error_toobig(pCtx);
+#endif
   return SQLITE_TOOBIG;
 }
 SQLITE_API void sqlite3_result_blob(
@@ -86416,6 +89901,12 @@ SQLITE_API void sqlite3_result_blob(
   int n,
   void (*xDel)(void *)
 ){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 || n<0 ){
+    invokeValueDestructor(z, xDel, pCtx);
+    return;
+  }
+#endif
   assert( n>=0 );
   assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   setResultStrOrError(pCtx, z, n, 0, xDel);
@@ -86426,8 +89917,14 @@ SQLITE_API void sqlite3_result_blob64(
   sqlite3_uint64 n,
   void (*xDel)(void *)
 ){
-  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   assert( xDel!=SQLITE_DYNAMIC );
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ){
+    invokeValueDestructor(z, xDel, 0);
+    return;
+  }
+#endif
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   if( n>0x7fffffff ){
     (void)invokeValueDestructor(z, xDel, pCtx);
   }else{
@@ -86435,30 +89932,48 @@ SQLITE_API void sqlite3_result_blob64(
   }
 }
 SQLITE_API void sqlite3_result_double(sqlite3_context *pCtx, double rVal){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ) return;
+#endif
   assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   sqlite3VdbeMemSetDouble(pCtx->pOut, rVal);
 }
 SQLITE_API void sqlite3_result_error(sqlite3_context *pCtx, const char *z, int n){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ) return;
+#endif
   assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   pCtx->isError = SQLITE_ERROR;
   sqlite3VdbeMemSetStr(pCtx->pOut, z, n, SQLITE_UTF8, SQLITE_TRANSIENT);
 }
 #ifndef SQLITE_OMIT_UTF16
 SQLITE_API void sqlite3_result_error16(sqlite3_context *pCtx, const void *z, int n){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ) return;
+#endif
   assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   pCtx->isError = SQLITE_ERROR;
   sqlite3VdbeMemSetStr(pCtx->pOut, z, n, SQLITE_UTF16NATIVE, SQLITE_TRANSIENT);
 }
 #endif
 SQLITE_API void sqlite3_result_int(sqlite3_context *pCtx, int iVal){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ) return;
+#endif
   assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   sqlite3VdbeMemSetInt64(pCtx->pOut, (i64)iVal);
 }
 SQLITE_API void sqlite3_result_int64(sqlite3_context *pCtx, i64 iVal){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ) return;
+#endif
   assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   sqlite3VdbeMemSetInt64(pCtx->pOut, iVal);
 }
 SQLITE_API void sqlite3_result_null(sqlite3_context *pCtx){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ) return;
+#endif
   assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   sqlite3VdbeMemSetNull(pCtx->pOut);
 }
@@ -86468,14 +89983,37 @@ SQLITE_API void sqlite3_result_pointer(
   const char *zPType,
   void (*xDestructor)(void*)
 ){
-  Mem *pOut = pCtx->pOut;
+  Mem *pOut;
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ){
+    invokeValueDestructor(pPtr, xDestructor, 0);
+    return;
+  }
+#endif
+  pOut = pCtx->pOut;
   assert( sqlite3_mutex_held(pOut->db->mutex) );
   sqlite3VdbeMemRelease(pOut);
   pOut->flags = MEM_Null;
   sqlite3VdbeMemSetPointer(pOut, pPtr, zPType, xDestructor);
 }
 SQLITE_API void sqlite3_result_subtype(sqlite3_context *pCtx, unsigned int eSubtype){
-  Mem *pOut = pCtx->pOut;
+  Mem *pOut;
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ) return;
+#endif
+#if defined(SQLITE_STRICT_SUBTYPE) && SQLITE_STRICT_SUBTYPE+0!=0
+  if( pCtx->pFunc!=0
+   && (pCtx->pFunc->funcFlags & SQLITE_RESULT_SUBTYPE)==0
+  ){
+    char zErr[200];
+    sqlite3_snprintf(sizeof(zErr), zErr,
+                     "misuse of sqlite3_result_subtype() by %s()",
+                     pCtx->pFunc->zName);
+    sqlite3_result_error(pCtx, zErr, -1);
+    return;
+  }
+#endif /* SQLITE_STRICT_SUBTYPE */
+  pOut = pCtx->pOut;
   assert( sqlite3_mutex_held(pOut->db->mutex) );
   pOut->eSubtype = eSubtype & 0xff;
   pOut->flags |= MEM_Subtype;
@@ -86486,6 +90024,12 @@ SQLITE_API void sqlite3_result_text(
   int n,
   void (*xDel)(void *)
 ){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ){
+    invokeValueDestructor(z, xDel, 0);
+    return;
+  }
+#endif
   assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   setResultStrOrError(pCtx, z, n, SQLITE_UTF8, xDel);
 }
@@ -86496,13 +90040,23 @@ SQLITE_API void sqlite3_result_text64(
   void (*xDel)(void *),
   unsigned char enc
 ){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ){
+    invokeValueDestructor(z, xDel, 0);
+    return;
+  }
+#endif
   assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   assert( xDel!=SQLITE_DYNAMIC );
-  if( enc==SQLITE_UTF16 ) enc = SQLITE_UTF16NATIVE;
+  if( enc!=SQLITE_UTF8 ){
+    if( enc==SQLITE_UTF16 ) enc = SQLITE_UTF16NATIVE;
+    n &= ~(u64)1;
+  }
   if( n>0x7fffffff ){
     (void)invokeValueDestructor(z, xDel, pCtx);
   }else{
     setResultStrOrError(pCtx, z, (int)n, enc, xDel);
+    sqlite3VdbeMemZeroTerminateIfAble(pCtx->pOut);
   }
 }
 #ifndef SQLITE_OMIT_UTF16
@@ -86513,7 +90067,7 @@ SQLITE_API void sqlite3_result_text16(
   void (*xDel)(void *)
 ){
   assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
-  setResultStrOrError(pCtx, z, n, SQLITE_UTF16NATIVE, xDel);
+  setResultStrOrError(pCtx, z, n & ~(u64)1, SQLITE_UTF16NATIVE, xDel);
 }
 SQLITE_API void sqlite3_result_text16be(
   sqlite3_context *pCtx,
@@ -86522,7 +90076,7 @@ SQLITE_API void sqlite3_result_text16be(
   void (*xDel)(void *)
 ){
   assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
-  setResultStrOrError(pCtx, z, n, SQLITE_UTF16BE, xDel);
+  setResultStrOrError(pCtx, z, n & ~(u64)1, SQLITE_UTF16BE, xDel);
 }
 SQLITE_API void sqlite3_result_text16le(
   sqlite3_context *pCtx,
@@ -86531,11 +90085,20 @@ SQLITE_API void sqlite3_result_text16le(
   void (*xDel)(void *)
 ){
   assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
-  setResultStrOrError(pCtx, z, n, SQLITE_UTF16LE, xDel);
+  setResultStrOrError(pCtx, z, n & ~(u64)1, SQLITE_UTF16LE, xDel);
 }
 #endif /* SQLITE_OMIT_UTF16 */
 SQLITE_API void sqlite3_result_value(sqlite3_context *pCtx, sqlite3_value *pValue){
-  Mem *pOut = pCtx->pOut;
+  Mem *pOut;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ) return;
+  if( pValue==0 ){
+    sqlite3_result_null(pCtx);
+    return;
+  }
+#endif
+  pOut = pCtx->pOut;
   assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   sqlite3VdbeMemCopy(pOut, pValue);
   sqlite3VdbeChangeEncoding(pOut, pCtx->enc);
@@ -86547,7 +90110,12 @@ SQLITE_API void sqlite3_result_zeroblob(sqlite3_context *pCtx, int n){
   sqlite3_result_zeroblob64(pCtx, n>0 ? n : 0);
 }
 SQLITE_API int sqlite3_result_zeroblob64(sqlite3_context *pCtx, u64 n){
-  Mem *pOut = pCtx->pOut;
+  Mem *pOut;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ) return SQLITE_MISUSE_BKPT;
+#endif
+  pOut = pCtx->pOut;
   assert( sqlite3_mutex_held(pOut->db->mutex) );
   if( n>(u64)pOut->db->aLimit[SQLITE_LIMIT_LENGTH] ){
     sqlite3_result_error_toobig(pCtx);
@@ -86561,6 +90129,9 @@ SQLITE_API int sqlite3_result_zeroblob64(sqlite3_context *pCtx, u64 n){
 #endif
 }
 SQLITE_API void sqlite3_result_error_code(sqlite3_context *pCtx, int errCode){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ) return;
+#endif
   pCtx->isError = errCode ? errCode : -1;
 #ifdef SQLITE_DEBUG
   if( pCtx->pVdbe ) pCtx->pVdbe->rcApp = errCode;
@@ -86573,6 +90144,9 @@ SQLITE_API void sqlite3_result_error_code(sqlite3_context *pCtx, int errCode){
 
 /* Force an SQLITE_TOOBIG error. */
 SQLITE_API void sqlite3_result_error_toobig(sqlite3_context *pCtx){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ) return;
+#endif
   assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   pCtx->isError = SQLITE_TOOBIG;
   sqlite3VdbeMemSetStr(pCtx->pOut, "string or blob too big", -1,
@@ -86581,6 +90155,9 @@ SQLITE_API void sqlite3_result_error_toobig(sqlite3_context *pCtx){
 
 /* An SQLITE_NOMEM error. */
 SQLITE_API void sqlite3_result_error_nomem(sqlite3_context *pCtx){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ) return;
+#endif
   assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   sqlite3VdbeMemSetNull(pCtx->pOut);
   pCtx->isError = SQLITE_NOMEM_BKPT;
@@ -86742,7 +90319,7 @@ static int sqlite3Step(Vdbe *p){
     /* If the statement completed successfully, invoke the profile callback */
     checkProfileCallback(db, p);
 #endif
-
+    p->pResultRow = 0;
     if( rc==SQLITE_DONE && db->autoCommit ){
       assert( p->rc==SQLITE_OK );
       p->rc = doWalCallbacks(db);
@@ -86833,7 +90410,11 @@ SQLITE_API int sqlite3_step(sqlite3_stmt *pStmt){
 ** pointer to it.
 */
 SQLITE_API void *sqlite3_user_data(sqlite3_context *p){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( p==0 ) return 0;
+#else
   assert( p && p->pFunc );
+#endif
   return p->pFunc->pUserData;
 }
 
@@ -86848,7 +90429,11 @@ SQLITE_API void *sqlite3_user_data(sqlite3_context *p){
 ** application defined function.
 */
 SQLITE_API sqlite3 *sqlite3_context_db_handle(sqlite3_context *p){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( p==0 ) return 0;
+#else
   assert( p && p->pOut );
+#endif
   return p->pOut->db;
 }
 
@@ -86867,10 +90452,25 @@ SQLITE_API sqlite3 *sqlite3_context_db_handle(sqlite3_context *p){
 ** value, as a signal to the xUpdate routine that the column is unchanged.
 */
 SQLITE_API int sqlite3_vtab_nochange(sqlite3_context *p){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( p==0 ) return 0;
+#else
   assert( p );
+#endif
   return sqlite3_value_nochange(p->pOut);
 }
 
+/*
+** The destructor function for a ValueList object.  This needs to be
+** a separate function, unknowable to the application, to ensure that
+** calls to sqlite3_vtab_in_first()/sqlite3_vtab_in_next() that are not
+** preceded by activation of IN processing via sqlite3_vtab_int() do not
+** try to access a fake ValueList object inserted by a hostile extension.
+*/
+SQLITE_PRIVATE void sqlite3VdbeValueListFree(void *pToDelete){
+  sqlite3_free(pToDelete);
+}
+
 /*
 ** Implementation of sqlite3_vtab_in_first() (if bNext==0) and
 ** sqlite3_vtab_in_next() (if bNext!=0).
@@ -86884,9 +90484,16 @@ static int valueFromValueList(
   ValueList *pRhs;
 
   *ppOut = 0;
-  if( pVal==0 ) return SQLITE_MISUSE;
-  pRhs = (ValueList*)sqlite3_value_pointer(pVal, "ValueList");
-  if( pRhs==0 ) return SQLITE_MISUSE;
+  if( pVal==0 ) return SQLITE_MISUSE_BKPT;
+  if( (pVal->flags & MEM_Dyn)==0 || pVal->xDel!=sqlite3VdbeValueListFree ){
+    return SQLITE_ERROR;
+  }else{
+    assert( (pVal->flags&(MEM_TypeMask|MEM_Term|MEM_Subtype)) ==
+                 (MEM_Null|MEM_Term|MEM_Subtype) );
+    assert( pVal->eSubtype=='p' );
+    assert( pVal->u.zPType!=0 && strcmp(pVal->u.zPType,"ValueList")==0 );
+    pRhs = (ValueList*)pVal->z;
+  }
   if( bNext ){
     rc = sqlite3BtreeNext(pRhs->pCsr, 0);
   }else{
@@ -87008,6 +90615,9 @@ SQLITE_API void *sqlite3_aggregate_context(sqlite3_context *p, int nByte){
 SQLITE_API void *sqlite3_get_auxdata(sqlite3_context *pCtx, int iArg){
   AuxData *pAuxData;
 
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ) return 0;
+#endif
   assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
 #if SQLITE_ENABLE_STAT4
   if( pCtx->pVdbe==0 ) return 0;
@@ -87040,8 +90650,12 @@ SQLITE_API void sqlite3_set_auxdata(
   void (*xDelete)(void*)
 ){
   AuxData *pAuxData;
-  Vdbe *pVdbe = pCtx->pVdbe;
+  Vdbe *pVdbe;
 
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCtx==0 ) return;
+#endif
+  pVdbe= pCtx->pVdbe;
   assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
 #ifdef SQLITE_ENABLE_STAT4
   if( pVdbe==0 ) goto failed;
@@ -87097,7 +90711,8 @@ SQLITE_API int sqlite3_aggregate_count(sqlite3_context *p){
 */
 SQLITE_API int sqlite3_column_count(sqlite3_stmt *pStmt){
   Vdbe *pVm = (Vdbe *)pStmt;
-  return pVm ? pVm->nResColumn : 0;
+  if( pVm==0 ) return 0;
+  return pVm->nResColumn;
 }
 
 /*
@@ -87106,7 +90721,7 @@ SQLITE_API int sqlite3_column_count(sqlite3_stmt *pStmt){
 */
 SQLITE_API int sqlite3_data_count(sqlite3_stmt *pStmt){
   Vdbe *pVm = (Vdbe *)pStmt;
-  if( pVm==0 || pVm->pResultSet==0 ) return 0;
+  if( pVm==0 || pVm->pResultRow==0 ) return 0;
   return pVm->nResColumn;
 }
 
@@ -87161,8 +90776,8 @@ static Mem *columnMem(sqlite3_stmt *pStmt, int i){
   if( pVm==0 ) return (Mem*)columnNullValue();
   assert( pVm->db );
   sqlite3_mutex_enter(pVm->db->mutex);
-  if( pVm->pResultSet!=0 && i<pVm->nResColumn && i>=0 ){
-    pOut = &pVm->pResultSet[i];
+  if( pVm->pResultRow!=0 && i<pVm->nResColumn && i>=0 ){
+    pOut = &pVm->pResultRow[i];
   }else{
     sqlite3Error(pVm->db, SQLITE_RANGE);
     pOut = (Mem*)columnNullValue();
@@ -87186,7 +90801,7 @@ static Mem *columnMem(sqlite3_stmt *pStmt, int i){
 **     sqlite3_column_real()
 **     sqlite3_column_bytes()
 **     sqlite3_column_bytes16()
-**     sqiite3_column_blob()
+**     sqlite3_column_blob()
 */
 static void columnMallocFailure(sqlite3_stmt *pStmt)
 {
@@ -87270,6 +90885,32 @@ SQLITE_API int sqlite3_column_type(sqlite3_stmt *pStmt, int i){
   return iType;
 }
 
+/*
+** Column names appropriate for EXPLAIN or EXPLAIN QUERY PLAN.
+*/
+static const char * const azExplainColNames8[] = {
+   "addr", "opcode", "p1", "p2", "p3", "p4", "p5", "comment",  /* EXPLAIN */
+   "id", "parent", "notused", "detail"                         /* EQP */
+};
+static const u16 azExplainColNames16data[] = {
+  /*   0 */  'a', 'd', 'd', 'r',                0,
+  /*   5 */  'o', 'p', 'c', 'o', 'd', 'e',      0,
+  /*  12 */  'p', '1',                          0,
+  /*  15 */  'p', '2',                          0,
+  /*  18 */  'p', '3',                          0,
+  /*  21 */  'p', '4',                          0,
+  /*  24 */  'p', '5',                          0,
+  /*  27 */  'c', 'o', 'm', 'm', 'e', 'n', 't', 0,
+  /*  35 */  'i', 'd',                          0,
+  /*  38 */  'p', 'a', 'r', 'e', 'n', 't',      0,
+  /*  45 */  'n', 'o', 't', 'u', 's', 'e', 'd', 0,
+  /*  53 */  'd', 'e', 't', 'a', 'i', 'l',      0
+};
+static const u8 iExplainColNames16[] = {
+  0, 5, 12, 15, 18, 21, 24, 27,
+  35, 38, 45, 53
+};
+
 /*
 ** Convert the N-th element of pStmt->pColName[] into a string using
 ** xFunc() then return that string.  If N is out of range, return 0.
@@ -87302,15 +90943,29 @@ static const void *columnName(
     return 0;
   }
 #endif
+  if( N<0 ) return 0;
   ret = 0;
   p = (Vdbe *)pStmt;
   db = p->db;
   assert( db!=0 );
-  n = sqlite3_column_count(pStmt);
-  if( N<n && N>=0 ){
+  sqlite3_mutex_enter(db->mutex);
+
+  if( p->explain ){
+    if( useType>0 ) goto columnName_end;
+    n = p->explain==1 ? 8 : 4;
+    if( N>=n ) goto columnName_end;
+    if( useUtf16 ){
+      int i = iExplainColNames16[N + 8*p->explain - 8];
+      ret = (void*)&azExplainColNames16data[i];
+    }else{
+      ret = (void*)azExplainColNames8[N + 8*p->explain - 8];
+    }
+    goto columnName_end;
+  }
+  n = p->nResColumn;
+  if( N<n ){
+    u8 prior_mallocFailed = db->mallocFailed;
     N += useType*n;
-    sqlite3_mutex_enter(db->mutex);
-    assert( db->mallocFailed==0 );
 #ifndef SQLITE_OMIT_UTF16
     if( useUtf16 ){
       ret = sqlite3_value_text16((sqlite3_value*)&p->aColName[N]);
@@ -87322,12 +90977,14 @@ static const void *columnName(
     /* A malloc may have failed inside of the _text() call. If this
     ** is the case, clear the mallocFailed flag and return NULL.
     */
-    if( db->mallocFailed ){
+    assert( db->mallocFailed==0 || db->mallocFailed==1 );
+    if( db->mallocFailed > prior_mallocFailed ){
       sqlite3OomClear(db);
       ret = 0;
     }
-    sqlite3_mutex_leave(db->mutex);
   }
+columnName_end:
+  sqlite3_mutex_leave(db->mutex);
   return ret;
 }
 
@@ -87420,7 +91077,7 @@ SQLITE_API const void *sqlite3_column_origin_name16(sqlite3_stmt *pStmt, int N){
 /*
 ** Unbind the value bound to variable i in virtual machine p. This is the
 ** the same as binding a NULL value to the column. If the "i" parameter is
-** out of range, then SQLITE_RANGE is returned. Othewise SQLITE_OK.
+** out of range, then SQLITE_RANGE is returned. Otherwise SQLITE_OK.
 **
 ** A successful evaluation of this routine acquires the mutex on p.
 ** the mutex is released if any kind of error occurs.
@@ -87428,25 +91085,24 @@ SQLITE_API const void *sqlite3_column_origin_name16(sqlite3_stmt *pStmt, int N){
 ** The error code stored in database p->db is overwritten with the return
 ** value in any case.
 */
-static int vdbeUnbind(Vdbe *p, int i){
+static int vdbeUnbind(Vdbe *p, unsigned int i){
   Mem *pVar;
   if( vdbeSafetyNotNull(p) ){
     return SQLITE_MISUSE_BKPT;
   }
   sqlite3_mutex_enter(p->db->mutex);
   if( p->eVdbeState!=VDBE_READY_STATE ){
-    sqlite3Error(p->db, SQLITE_MISUSE);
+    sqlite3Error(p->db, SQLITE_MISUSE_BKPT);
     sqlite3_mutex_leave(p->db->mutex);
     sqlite3_log(SQLITE_MISUSE,
         "bind on a busy prepared statement: [%s]", p->zSql);
     return SQLITE_MISUSE_BKPT;
   }
-  if( i<1 || i>p->nVar ){
+  if( i>=(unsigned int)p->nVar ){
     sqlite3Error(p->db, SQLITE_RANGE);
     sqlite3_mutex_leave(p->db->mutex);
     return SQLITE_RANGE;
   }
-  i--;
   pVar = &p->aVar[i];
   sqlite3VdbeMemRelease(pVar);
   pVar->flags = MEM_Null;
@@ -87483,7 +91139,7 @@ static int bindText(
   Mem *pVar;
   int rc;
 
-  rc = vdbeUnbind(p, i);
+  rc = vdbeUnbind(p, (u32)(i-1));
   if( rc==SQLITE_OK ){
     if( zData!=0 ){
       pVar = &p->aVar[i-1];
@@ -87532,7 +91188,7 @@ SQLITE_API int sqlite3_bind_blob64(
 SQLITE_API int sqlite3_bind_double(sqlite3_stmt *pStmt, int i, double rValue){
   int rc;
   Vdbe *p = (Vdbe *)pStmt;
-  rc = vdbeUnbind(p, i);
+  rc = vdbeUnbind(p, (u32)(i-1));
   if( rc==SQLITE_OK ){
     sqlite3VdbeMemSetDouble(&p->aVar[i-1], rValue);
     sqlite3_mutex_leave(p->db->mutex);
@@ -87545,7 +91201,7 @@ SQLITE_API int sqlite3_bind_int(sqlite3_stmt *p, int i, int iValue){
 SQLITE_API int sqlite3_bind_int64(sqlite3_stmt *pStmt, int i, sqlite_int64 iValue){
   int rc;
   Vdbe *p = (Vdbe *)pStmt;
-  rc = vdbeUnbind(p, i);
+  rc = vdbeUnbind(p, (u32)(i-1));
   if( rc==SQLITE_OK ){
     sqlite3VdbeMemSetInt64(&p->aVar[i-1], iValue);
     sqlite3_mutex_leave(p->db->mutex);
@@ -87555,7 +91211,7 @@ SQLITE_API int sqlite3_bind_int64(sqlite3_stmt *pStmt, int i, sqlite_int64 iValu
 SQLITE_API int sqlite3_bind_null(sqlite3_stmt *pStmt, int i){
   int rc;
   Vdbe *p = (Vdbe*)pStmt;
-  rc = vdbeUnbind(p, i);
+  rc = vdbeUnbind(p, (u32)(i-1));
   if( rc==SQLITE_OK ){
     sqlite3_mutex_leave(p->db->mutex);
   }
@@ -87570,7 +91226,7 @@ SQLITE_API int sqlite3_bind_pointer(
 ){
   int rc;
   Vdbe *p = (Vdbe*)pStmt;
-  rc = vdbeUnbind(p, i);
+  rc = vdbeUnbind(p, (u32)(i-1));
   if( rc==SQLITE_OK ){
     sqlite3VdbeMemSetPointer(&p->aVar[i-1], pPtr, zPTtype, xDestructor);
     sqlite3_mutex_leave(p->db->mutex);
@@ -87597,7 +91253,10 @@ SQLITE_API int sqlite3_bind_text64(
   unsigned char enc
 ){
   assert( xDel!=SQLITE_DYNAMIC );
-  if( enc==SQLITE_UTF16 ) enc = SQLITE_UTF16NATIVE;
+  if( enc!=SQLITE_UTF8 ){
+    if( enc==SQLITE_UTF16 ) enc = SQLITE_UTF16NATIVE;
+    nData &= ~(u16)1;
+  }
   return bindText(pStmt, i, zData, nData, xDel, enc);
 }
 #ifndef SQLITE_OMIT_UTF16
@@ -87605,10 +91264,10 @@ SQLITE_API int sqlite3_bind_text16(
   sqlite3_stmt *pStmt,
   int i,
   const void *zData,
-  int nData,
+  int n,
   void (*xDel)(void*)
 ){
-  return bindText(pStmt, i, zData, nData, xDel, SQLITE_UTF16NATIVE);
+  return bindText(pStmt, i, zData, n & ~(u64)1, xDel, SQLITE_UTF16NATIVE);
 }
 #endif /* SQLITE_OMIT_UTF16 */
 SQLITE_API int sqlite3_bind_value(sqlite3_stmt *pStmt, int i, const sqlite3_value *pValue){
@@ -87648,7 +91307,7 @@ SQLITE_API int sqlite3_bind_value(sqlite3_stmt *pStmt, int i, const sqlite3_valu
 SQLITE_API int sqlite3_bind_zeroblob(sqlite3_stmt *pStmt, int i, int n){
   int rc;
   Vdbe *p = (Vdbe *)pStmt;
-  rc = vdbeUnbind(p, i);
+  rc = vdbeUnbind(p, (u32)(i-1));
   if( rc==SQLITE_OK ){
 #ifndef SQLITE_OMIT_INCRBLOB
     sqlite3VdbeMemSetZeroBlob(&p->aVar[i-1], n);
@@ -87662,6 +91321,9 @@ SQLITE_API int sqlite3_bind_zeroblob(sqlite3_stmt *pStmt, int i, int n){
 SQLITE_API int sqlite3_bind_zeroblob64(sqlite3_stmt *pStmt, int i, sqlite3_uint64 n){
   int rc;
   Vdbe *p = (Vdbe *)pStmt;
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( p==0 ) return SQLITE_MISUSE_BKPT;
+#endif
   sqlite3_mutex_enter(p->db->mutex);
   if( n>(u64)p->db->aLimit[SQLITE_LIMIT_LENGTH] ){
     rc = SQLITE_TOOBIG;
@@ -87782,6 +91444,42 @@ SQLITE_API int sqlite3_stmt_isexplain(sqlite3_stmt *pStmt){
   return pStmt ? ((Vdbe*)pStmt)->explain : 0;
 }
 
+/*
+** Set the explain mode for a statement.
+*/
+SQLITE_API int sqlite3_stmt_explain(sqlite3_stmt *pStmt, int eMode){
+  Vdbe *v = (Vdbe*)pStmt;
+  int rc;
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pStmt==0 ) return SQLITE_MISUSE_BKPT;
+#endif
+  sqlite3_mutex_enter(v->db->mutex);
+  if( ((int)v->explain)==eMode ){
+    rc = SQLITE_OK;
+  }else if( eMode<0 || eMode>2 ){
+    rc = SQLITE_ERROR;
+  }else if( (v->prepFlags & SQLITE_PREPARE_SAVESQL)==0 ){
+    rc = SQLITE_ERROR;
+  }else if( v->eVdbeState!=VDBE_READY_STATE ){
+    rc = SQLITE_BUSY;
+  }else if( v->nMem>=10 && (eMode!=2 || v->haveEqpOps) ){
+    /* No reprepare necessary */
+    v->explain = eMode;
+    rc = SQLITE_OK;
+  }else{
+    v->explain = eMode;
+    rc = sqlite3Reprepare(v);
+    v->haveEqpOps = eMode==2;
+  }
+  if( v->explain ){
+    v->nResColumn = 12 - 4*v->explain;
+  }else{
+    v->nResColumn = v->nResAlloc;
+  }
+  sqlite3_mutex_leave(v->db->mutex);
+  return rc;
+}
+
 /*
 ** Return true if the prepared statement is in need of being reset.
 */
@@ -87808,7 +91506,7 @@ SQLITE_API sqlite3_stmt *sqlite3_next_stmt(sqlite3 *pDb, sqlite3_stmt *pStmt){
   if( pStmt==0 ){
     pNext = (sqlite3_stmt*)pDb->pVdbe;
   }else{
-    pNext = (sqlite3_stmt*)((Vdbe*)pStmt)->pNext;
+    pNext = (sqlite3_stmt*)((Vdbe*)pStmt)->pVNext;
   }
   sqlite3_mutex_leave(pDb->mutex);
   return pNext;
@@ -87833,8 +91531,11 @@ SQLITE_API int sqlite3_stmt_status(sqlite3_stmt *pStmt, int op, int resetFlag){
     sqlite3_mutex_enter(db->mutex);
     v = 0;
     db->pnBytesFreed = (int*)&v;
+    assert( db->lookaside.pEnd==db->lookaside.pTrueEnd );
+    db->lookaside.pEnd = db->lookaside.pStart;
     sqlite3VdbeDelete(pVdbe);
     db->pnBytesFreed = 0;
+    db->lookaside.pEnd = db->lookaside.pTrueEnd;
     sqlite3_mutex_leave(db->mutex);
   }else{
     v = pVdbe->aCounter[op];
@@ -87918,10 +91619,16 @@ static UnpackedRecord *vdbeUnpackRecord(
 ** a field of the row currently being updated or deleted.
 */
 SQLITE_API int sqlite3_preupdate_old(sqlite3 *db, int iIdx, sqlite3_value **ppValue){
-  PreUpdate *p = db->pPreUpdate;
+  PreUpdate *p;
   Mem *pMem;
   int rc = SQLITE_OK;
 
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( db==0 || ppValue==0 ){
+    return SQLITE_MISUSE_BKPT;
+  }
+#endif
+  p = db->pPreUpdate;
   /* Test that this call is being made from within an SQLITE_DELETE or
   ** SQLITE_UPDATE pre-update callback, and that iIdx is within range. */
   if( !p || p->op==SQLITE_INSERT ){
@@ -87982,7 +91689,12 @@ SQLITE_API int sqlite3_preupdate_old(sqlite3 *db, int iIdx, sqlite3_value **ppVa
 ** the number of columns in the row being updated, deleted or inserted.
 */
 SQLITE_API int sqlite3_preupdate_count(sqlite3 *db){
-  PreUpdate *p = db->pPreUpdate;
+  PreUpdate *p;
+#ifdef SQLITE_ENABLE_API_ARMOR
+  p = db!=0 ? db->pPreUpdate : 0;
+#else
+  p = db->pPreUpdate;
+#endif
   return (p ? p->keyinfo.nKeyField : 0);
 }
 #endif /* SQLITE_ENABLE_PREUPDATE_HOOK */
@@ -88000,7 +91712,12 @@ SQLITE_API int sqlite3_preupdate_count(sqlite3 *db){
 ** or SET DEFAULT action is considered a trigger.
 */
 SQLITE_API int sqlite3_preupdate_depth(sqlite3 *db){
-  PreUpdate *p = db->pPreUpdate;
+  PreUpdate *p;
+#ifdef SQLITE_ENABLE_API_ARMOR
+  p = db!=0 ? db->pPreUpdate : 0;
+#else
+  p = db->pPreUpdate;
+#endif
   return (p ? p->v->nFrame : 0);
 }
 #endif /* SQLITE_ENABLE_PREUPDATE_HOOK */
@@ -88011,7 +91728,12 @@ SQLITE_API int sqlite3_preupdate_depth(sqlite3 *db){
 ** only.
 */
 SQLITE_API int sqlite3_preupdate_blobwrite(sqlite3 *db){
-  PreUpdate *p = db->pPreUpdate;
+  PreUpdate *p;
+#ifdef SQLITE_ENABLE_API_ARMOR
+  p = db!=0 ? db->pPreUpdate : 0;
+#else
+  p = db->pPreUpdate;
+#endif
   return (p ? p->iBlobWrite : -1);
 }
 #endif
@@ -88022,10 +91744,16 @@ SQLITE_API int sqlite3_preupdate_blobwrite(sqlite3 *db){
 ** a field of the row currently being updated or inserted.
 */
 SQLITE_API int sqlite3_preupdate_new(sqlite3 *db, int iIdx, sqlite3_value **ppValue){
-  PreUpdate *p = db->pPreUpdate;
+  PreUpdate *p;
   int rc = SQLITE_OK;
   Mem *pMem;
 
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( db==0 || ppValue==0 ){
+    return SQLITE_MISUSE_BKPT;
+  }
+#endif
+  p = db->pPreUpdate;
   if( !p || p->op==SQLITE_DELETE ){
     rc = SQLITE_MISUSE_BKPT;
     goto preupdate_new_out;
@@ -88096,23 +91824,78 @@ SQLITE_API int sqlite3_preupdate_new(sqlite3 *db, int iIdx, sqlite3_value **ppVa
 /*
 ** Return status data for a single loop within query pStmt.
 */
-SQLITE_API int sqlite3_stmt_scanstatus(
+SQLITE_API int sqlite3_stmt_scanstatus_v2(
   sqlite3_stmt *pStmt,            /* Prepared statement being queried */
-  int idx,                        /* Index of loop to report on */
+  int iScan,                      /* Index of loop to report on */
   int iScanStatusOp,              /* Which metric to return */
+  int flags,
   void *pOut                      /* OUT: Write the answer here */
 ){
   Vdbe *p = (Vdbe*)pStmt;
-  ScanStatus *pScan;
-  if( idx<0 || idx>=p->nScan ) return 1;
-  pScan = &p->aScan[idx];
+  VdbeOp *aOp;
+  int nOp;
+  ScanStatus *pScan = 0;
+  int idx;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( p==0 || pOut==0
+      || iScanStatusOp<SQLITE_SCANSTAT_NLOOP
+      || iScanStatusOp>SQLITE_SCANSTAT_NCYCLE ){
+    return 1;
+  }
+#endif
+  aOp = p->aOp;
+  nOp = p->nOp;
+  if( p->pFrame ){
+    VdbeFrame *pFrame;
+    for(pFrame=p->pFrame; pFrame->pParent; pFrame=pFrame->pParent);
+    aOp = pFrame->aOp;
+    nOp = pFrame->nOp;
+  }
+
+  if( iScan<0 ){
+    int ii;
+    if( iScanStatusOp==SQLITE_SCANSTAT_NCYCLE ){
+      i64 res = 0;
+      for(ii=0; ii<nOp; ii++){
+        res += aOp[ii].nCycle;
+      }
+      *(i64*)pOut = res;
+      return 0;
+    }
+    return 1;
+  }
+  if( flags & SQLITE_SCANSTAT_COMPLEX ){
+    idx = iScan;
+    pScan = &p->aScan[idx];
+  }else{
+    /* If the COMPLEX flag is clear, then this function must ignore any
+    ** ScanStatus structures with ScanStatus.addrLoop set to 0. */
+    for(idx=0; idx<p->nScan; idx++){
+      pScan = &p->aScan[idx];
+      if( pScan->zName ){
+        iScan--;
+        if( iScan<0 ) break;
+      }
+    }
+  }
+  if( idx>=p->nScan ) return 1;
+
   switch( iScanStatusOp ){
     case SQLITE_SCANSTAT_NLOOP: {
-      *(sqlite3_int64*)pOut = p->anExec[pScan->addrLoop];
+      if( pScan->addrLoop>0 ){
+        *(sqlite3_int64*)pOut = aOp[pScan->addrLoop].nExec;
+      }else{
+        *(sqlite3_int64*)pOut = -1;
+      }
       break;
     }
     case SQLITE_SCANSTAT_NVISIT: {
-      *(sqlite3_int64*)pOut = p->anExec[pScan->addrVisit];
+      if( pScan->addrVisit>0 ){
+        *(sqlite3_int64*)pOut = aOp[pScan->addrVisit].nExec;
+      }else{
+        *(sqlite3_int64*)pOut = -1;
+      }
       break;
     }
     case SQLITE_SCANSTAT_EST: {
@@ -88131,7 +91914,7 @@ SQLITE_API int sqlite3_stmt_scanstatus(
     }
     case SQLITE_SCANSTAT_EXPLAIN: {
       if( pScan->addrExplain ){
-        *(const char**)pOut = p->aOp[ pScan->addrExplain ].p4.z;
+        *(const char**)pOut = aOp[ pScan->addrExplain ].p4.z;
       }else{
         *(const char**)pOut = 0;
       }
@@ -88139,12 +91922,51 @@ SQLITE_API int sqlite3_stmt_scanstatus(
     }
     case SQLITE_SCANSTAT_SELECTID: {
       if( pScan->addrExplain ){
-        *(int*)pOut = p->aOp[ pScan->addrExplain ].p1;
+        *(int*)pOut = aOp[ pScan->addrExplain ].p1;
+      }else{
+        *(int*)pOut = -1;
+      }
+      break;
+    }
+    case SQLITE_SCANSTAT_PARENTID: {
+      if( pScan->addrExplain ){
+        *(int*)pOut = aOp[ pScan->addrExplain ].p2;
       }else{
         *(int*)pOut = -1;
       }
       break;
     }
+    case SQLITE_SCANSTAT_NCYCLE: {
+      i64 res = 0;
+      if( pScan->aAddrRange[0]==0 ){
+        res = -1;
+      }else{
+        int ii;
+        for(ii=0; ii<ArraySize(pScan->aAddrRange); ii+=2){
+          int iIns = pScan->aAddrRange[ii];
+          int iEnd = pScan->aAddrRange[ii+1];
+          if( iIns==0 ) break;
+          if( iIns>0 ){
+            while( iIns<=iEnd ){
+              res += aOp[iIns].nCycle;
+              iIns++;
+            }
+          }else{
+            int iOp;
+            for(iOp=0; iOp<nOp; iOp++){
+              Op *pOp = &aOp[iOp];
+              if( pOp->p1!=iEnd ) continue;
+              if( (sqlite3OpcodeProperty[pOp->opcode] & OPFLG_NCYCLE)==0 ){
+                continue;
+              }
+              res += aOp[iOp].nCycle;
+            }
+          }
+        }
+      }
+      *(i64*)pOut = res;
+      break;
+    }
     default: {
       return 1;
     }
@@ -88152,12 +91974,29 @@ SQLITE_API int sqlite3_stmt_scanstatus(
   return 0;
 }
 
+/*
+** Return status data for a single loop within query pStmt.
+*/
+SQLITE_API int sqlite3_stmt_scanstatus(
+  sqlite3_stmt *pStmt,            /* Prepared statement being queried */
+  int iScan,                      /* Index of loop to report on */
+  int iScanStatusOp,              /* Which metric to return */
+  void *pOut                      /* OUT: Write the answer here */
+){
+  return sqlite3_stmt_scanstatus_v2(pStmt, iScan, iScanStatusOp, 0, pOut);
+}
+
 /*
 ** Zero all counters associated with the sqlite3_stmt_scanstatus() data.
 */
 SQLITE_API void sqlite3_stmt_scanstatus_reset(sqlite3_stmt *pStmt){
   Vdbe *p = (Vdbe*)pStmt;
-  memset(p->anExec, 0, p->nOp * sizeof(i64));
+  int ii;
+  for(ii=0; p!=0 && ii<p->nOp; ii++){
+    Op *pOp = &p->aOp[ii];
+    pOp->nExec = 0;
+    pOp->nCycle = 0;
+  }
 }
 #endif /* SQLITE_ENABLE_STMT_SCANSTATUS */
 
@@ -88492,8 +92331,12 @@ SQLITE_API int sqlite3_found_count = 0;
 **   sqlite3CantopenError(lineno)
 */
 static void test_trace_breakpoint(int pc, Op *pOp, Vdbe *v){
-  static int n = 0;
+  static u64 n = 0;
+  (void)pc;
+  (void)pOp;
+  (void)v;
   n++;
+  if( n==LARGEST_UINT64 ) abort(); /* So that n is used, preventing a warning */
 }
 #endif
 
@@ -88674,7 +92517,8 @@ static VdbeCursor *allocateCursor(
 ** return false.
 */
 static int alsoAnInt(Mem *pRec, double rValue, i64 *piValue){
-  i64 iValue = (double)rValue;
+  i64 iValue;
+  iValue = sqlite3RealToI64(rValue);
   if( sqlite3RealSameAsInt(rValue,iValue) ){
     *piValue = iValue;
     return 1;
@@ -88730,6 +92574,10 @@ static void applyNumericAffinity(Mem *pRec, int bTryForInt){
 **    always preferred, even if the affinity is REAL, because
 **    an integer representation is more space efficient on disk.
 **
+** SQLITE_AFF_FLEXNUM:
+**    If the value is text, then try to convert it into a number of
+**    some kind (integer or real) but do not make any other changes.
+**
 ** SQLITE_AFF_TEXT:
 **    Convert pRec to a text representation.
 **
@@ -88744,11 +92592,11 @@ static void applyAffinity(
 ){
   if( affinity>=SQLITE_AFF_NUMERIC ){
     assert( affinity==SQLITE_AFF_INTEGER || affinity==SQLITE_AFF_REAL
-             || affinity==SQLITE_AFF_NUMERIC );
+             || affinity==SQLITE_AFF_NUMERIC || affinity==SQLITE_AFF_FLEXNUM );
     if( (pRec->flags & MEM_Int)==0 ){ /*OPTIMIZATION-IF-FALSE*/
-      if( (pRec->flags & MEM_Real)==0 ){
+      if( (pRec->flags & (MEM_Real|MEM_IntReal))==0 ){
         if( pRec->flags & MEM_Str ) applyNumericAffinity(pRec,1);
-      }else{
+      }else if( affinity<=SQLITE_AFF_REAL ){
         sqlite3VdbeIntegerAffinity(pRec);
       }
     }
@@ -88836,17 +92684,18 @@ static u16 SQLITE_NOINLINE computeNumericType(Mem *pMem){
 ** But it does set pMem->u.r and pMem->u.i appropriately.
 */
 static u16 numericType(Mem *pMem){
-  if( pMem->flags & (MEM_Int|MEM_Real|MEM_IntReal) ){
+  assert( (pMem->flags & MEM_Null)==0
+       || pMem->db==0 || pMem->db->mallocFailed );
+  if( pMem->flags & (MEM_Int|MEM_Real|MEM_IntReal|MEM_Null) ){
     testcase( pMem->flags & MEM_Int );
     testcase( pMem->flags & MEM_Real );
     testcase( pMem->flags & MEM_IntReal );
-    return pMem->flags & (MEM_Int|MEM_Real|MEM_IntReal);
-  }
-  if( pMem->flags & (MEM_Str|MEM_Blob) ){
-    testcase( pMem->flags & MEM_Str );
-    testcase( pMem->flags & MEM_Blob );
-    return computeNumericType(pMem);
+    return pMem->flags & (MEM_Int|MEM_Real|MEM_IntReal|MEM_Null);
   }
+  assert( pMem->flags & (MEM_Str|MEM_Blob) );
+  testcase( pMem->flags & MEM_Str );
+  testcase( pMem->flags & MEM_Blob );
+  return computeNumericType(pMem);
   return 0;
 }
 
@@ -88907,6 +92756,9 @@ SQLITE_PRIVATE void sqlite3VdbeMemPrettyPrint(Mem *pMem, StrAccum *pStr){
       sqlite3_str_appendchar(pStr, 1, (c>=0x20&&c<=0x7f) ? c : '.');
     }
     sqlite3_str_appendf(pStr, "]%s", encnames[pMem->enc]);
+    if( f & MEM_Term ){
+      sqlite3_str_appendf(pStr, "(0-term)");
+    }
   }
 }
 #endif
@@ -88975,17 +92827,6 @@ SQLITE_PRIVATE void sqlite3VdbeRegisterDump(Vdbe *v){
 #  define REGISTER_TRACE(R,M)
 #endif
 
-
-#ifdef VDBE_PROFILE
-
-/*
-** hwtime.h contains inline assembler code for implementing
-** high-performance timing routines.
-*/
-/* #include "hwtime.h" */
-
-#endif
-
 #ifndef NDEBUG
 /*
 ** This function is only called from within an assert() expression. It
@@ -89045,13 +92886,102 @@ static u64 filterHash(const Mem *aMem, const Op *pOp){
     }else if( p->flags & MEM_Real ){
       h += sqlite3VdbeIntValue(p);
     }else if( p->flags & (MEM_Str|MEM_Blob) ){
-      h += p->n;
-      if( p->flags & MEM_Zero ) h += p->u.nZero;
+      /* All strings have the same hash and all blobs have the same hash,
+      ** though, at least, those hashes are different from each other and
+      ** from NULL. */
+      h += 4093 + (p->flags & (MEM_Str|MEM_Blob));
     }
   }
   return h;
 }
 
+
+/*
+** For OP_Column, factor out the case where content is loaded from
+** overflow pages, so that the code to implement this case is separate
+** the common case where all content fits on the page.  Factoring out
+** the code reduces register pressure and helps the common case
+** to run faster.
+*/
+static SQLITE_NOINLINE int vdbeColumnFromOverflow(
+  VdbeCursor *pC,       /* The BTree cursor from which we are reading */
+  int iCol,             /* The column to read */
+  int t,                /* The serial-type code for the column value */
+  i64 iOffset,          /* Offset to the start of the content value */
+  u32 cacheStatus,      /* Current Vdbe.cacheCtr value */
+  u32 colCacheCtr,      /* Current value of the column cache counter */
+  Mem *pDest            /* Store the value into this register. */
+){
+  int rc;
+  sqlite3 *db = pDest->db;
+  int encoding = pDest->enc;
+  int len = sqlite3VdbeSerialTypeLen(t);
+  assert( pC->eCurType==CURTYPE_BTREE );
+  if( len>db->aLimit[SQLITE_LIMIT_LENGTH] ) return SQLITE_TOOBIG;
+  if( len > 4000 && pC->pKeyInfo==0 ){
+    /* Cache large column values that are on overflow pages using
+    ** an RCStr (reference counted string) so that if they are reloaded,
+    ** that do not have to be copied a second time.  The overhead of
+    ** creating and managing the cache is such that this is only
+    ** profitable for larger TEXT and BLOB values.
+    **
+    ** Only do this on table-btrees so that writes to index-btrees do not
+    ** need to clear the cache.  This buys performance in the common case
+    ** in exchange for generality.
+    */
+    VdbeTxtBlbCache *pCache;
+    char *pBuf;
+    if( pC->colCache==0 ){
+      pC->pCache = sqlite3DbMallocZero(db, sizeof(VdbeTxtBlbCache) );
+      if( pC->pCache==0 ) return SQLITE_NOMEM;
+      pC->colCache = 1;
+    }
+    pCache = pC->pCache;
+    if( pCache->pCValue==0
+     || pCache->iCol!=iCol
+     || pCache->cacheStatus!=cacheStatus
+     || pCache->colCacheCtr!=colCacheCtr
+     || pCache->iOffset!=sqlite3BtreeOffset(pC->uc.pCursor)
+    ){
+      if( pCache->pCValue ) sqlite3RCStrUnref(pCache->pCValue);
+      pBuf = pCache->pCValue = sqlite3RCStrNew( len+3 );
+      if( pBuf==0 ) return SQLITE_NOMEM;
+      rc = sqlite3BtreePayload(pC->uc.pCursor, iOffset, len, pBuf);
+      if( rc ) return rc;
+      pBuf[len] = 0;
+      pBuf[len+1] = 0;
+      pBuf[len+2] = 0;
+      pCache->iCol = iCol;
+      pCache->cacheStatus = cacheStatus;
+      pCache->colCacheCtr = colCacheCtr;
+      pCache->iOffset = sqlite3BtreeOffset(pC->uc.pCursor);
+    }else{
+      pBuf = pCache->pCValue;
+    }
+    assert( t>=12 );
+    sqlite3RCStrRef(pBuf);
+    if( t&1 ){
+      rc = sqlite3VdbeMemSetStr(pDest, pBuf, len, encoding,
+                                sqlite3RCStrUnref);
+      pDest->flags |= MEM_Term;
+    }else{
+      rc = sqlite3VdbeMemSetStr(pDest, pBuf, len, 0,
+                                sqlite3RCStrUnref);
+    }
+  }else{
+    rc = sqlite3VdbeMemFromBtree(pC->uc.pCursor, iOffset, len, pDest);
+    if( rc ) return rc;
+    sqlite3VdbeSerialGet((const u8*)pDest->z, t, pDest);
+    if( (t&1)!=0 && encoding==SQLITE_UTF8 ){
+      pDest->z[len] = 0;
+      pDest->flags |= MEM_Term;
+    }
+  }
+  pDest->flags &= ~MEM_Ephem;
+  return rc;
+}
+
+
 /*
 ** Return the symbolic name for the data type of a pMem
 */
@@ -89075,11 +93005,10 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
 ){
   Op *aOp = p->aOp;          /* Copy of p->aOp */
   Op *pOp = aOp;             /* Current operation */
-#if defined(SQLITE_DEBUG) || defined(VDBE_PROFILE)
-  Op *pOrigOp;               /* Value of pOp at the top of the loop */
-#endif
 #ifdef SQLITE_DEBUG
+  Op *pOrigOp;               /* Value of pOp at the top of the loop */
   int nExtraDelete = 0;      /* Verifies FORDELETE and AUXDELETE flags */
+  u8 iCompareIsInit = 0;     /* iCompare is initialized */
 #endif
   int rc = SQLITE_OK;        /* Value to return */
   sqlite3 *db = p->db;       /* The database */
@@ -89095,13 +93024,17 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
   Mem *pIn2 = 0;             /* 2nd input operand */
   Mem *pIn3 = 0;             /* 3rd input operand */
   Mem *pOut = 0;             /* Output operand */
-#ifdef VDBE_PROFILE
-  u64 start;                 /* CPU clock count at start of opcode */
+  u32 colCacheCtr = 0;       /* Column cache counter */
+#if defined(SQLITE_ENABLE_STMT_SCANSTATUS) || defined(VDBE_PROFILE)
+  u64 *pnCycle = 0;
+  int bStmtScanStatus = IS_STMT_SCANSTATUS(db)!=0;
 #endif
   /*** INSERT STACK UNION HERE ***/
 
   assert( p->eVdbeState==VDBE_RUN_STATE );  /* sqlite3_step() verifies this */
-  sqlite3VdbeEnter(p);
+  if( DbMaskNonZero(p->lockMask) ){
+    sqlite3VdbeEnter(p);
+  }
 #ifndef SQLITE_OMIT_PROGRESS_CALLBACK
   if( db->xProgress ){
     u32 iPrior = p->aCounter[SQLITE_STMTSTATUS_VM_STEP];
@@ -89122,7 +93055,6 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
   assert( p->bIsReader || p->readOnly!=0 );
   p->iCurrentTime = 0;
   assert( p->explain==0 );
-  p->pResultSet = 0;
   db->busyHandler.nBusy = 0;
   if( AtomicLoad(&db->u1.isInterrupted) ) goto abort_due_to_interrupt;
   sqlite3VdbeIOTraceSql(p);
@@ -89159,12 +93091,18 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
     assert( rc==SQLITE_OK );
 
     assert( pOp>=aOp && pOp<&aOp[p->nOp]);
-#ifdef VDBE_PROFILE
-    start = sqlite3NProfileCnt ? sqlite3NProfileCnt : sqlite3Hwtime();
-#endif
     nVmStep++;
-#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
-    if( p->anExec ) p->anExec[(int)(pOp-aOp)]++;
+
+#if defined(VDBE_PROFILE)
+    pOp->nExec++;
+    pnCycle = &pOp->nCycle;
+    if( sqlite3NProfileCnt==0 ) *pnCycle -= sqlite3Hwtime();
+#elif defined(SQLITE_ENABLE_STMT_SCANSTATUS)
+    if( bStmtScanStatus ){
+      pOp->nExec++;
+      pnCycle = &pOp->nCycle;
+      *pnCycle -= sqlite3Hwtime();
+    }
 #endif
 
     /* Only allow tracing if SQLITE_DEBUG is defined.
@@ -89226,7 +93164,7 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
       }
     }
 #endif
-#if defined(SQLITE_DEBUG) || defined(VDBE_PROFILE)
+#ifdef SQLITE_DEBUG
     pOrigOp = pOp;
 #endif
 
@@ -89282,8 +93220,8 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
 case OP_Goto: {             /* jump */
 
 #ifdef SQLITE_DEBUG
-  /* In debuggging mode, when the p5 flags is set on an OP_Goto, that
-  ** means we should really jump back to the preceeding OP_ReleaseReg
+  /* In debugging mode, when the p5 flags is set on an OP_Goto, that
+  ** means we should really jump back to the preceding OP_ReleaseReg
   ** instruction. */
   if( pOp->p5 ){
     assert( pOp->p2 < (int)(pOp - aOp) );
@@ -89491,7 +93429,7 @@ case OP_HaltIfNull: {      /* in3 */
 ** P5 is a value between 0 and 4, inclusive, that modifies the P4 string.
 **
 **    0:  (no change)
-**    1:  NOT NULL contraint failed: P4
+**    1:  NOT NULL constraint failed: P4
 **    2:  UNIQUE constraint failed: P4
 **    3:  CHECK constraint failed: P4
 **    4:  FOREIGN KEY constraint failed: P4
@@ -89510,6 +93448,12 @@ case OP_Halt: {
 #ifdef SQLITE_DEBUG
   if( pOp->p2==OE_Abort ){ sqlite3VdbeAssertAbortable(p); }
 #endif
+
+  /* A deliberately coded "OP_Halt SQLITE_INTERNAL * * * *" opcode indicates
+  ** something is wrong with the code generator.  Raise an assertion in order
+  ** to bring this to the attention of fuzzers and other testing tools. */
+  assert( pOp->p1!=SQLITE_INTERNAL );
+
   if( p->pFrame && pOp->p1==SQLITE_OK ){
     /* Halt the sub-program. Return control to the parent frame. */
     pFrame = p->pFrame;
@@ -89951,10 +93895,10 @@ case OP_ResultRow: {
   assert( pOp->p1+pOp->p2<=(p->nMem+1 - p->nCursor)+1 );
 
   p->cacheCtr = (p->cacheCtr + 2)|1;
-  p->pResultSet = &aMem[pOp->p1];
+  p->pResultRow = &aMem[pOp->p1];
 #ifdef SQLITE_DEBUG
   {
-    Mem *pMem = p->pResultSet;
+    Mem *pMem = p->pResultRow;
     int i;
     for(i=0; i<pOp->p2; i++){
       assert( memIsValid(&pMem[i]) );
@@ -90091,7 +94035,6 @@ case OP_Subtract:              /* same as TK_MINUS, in1, in2, out3 */
 case OP_Multiply:              /* same as TK_STAR, in1, in2, out3 */
 case OP_Divide:                /* same as TK_SLASH, in1, in2, out3 */
 case OP_Remainder: {           /* same as TK_REM, in1, in2, out3 */
-  u16 flags;      /* Combined MEM_* flags from both inputs */
   u16 type1;      /* Numeric type of left operand */
   u16 type2;      /* Numeric type of right operand */
   i64 iA;         /* Integer value of left operand */
@@ -90100,12 +94043,12 @@ case OP_Remainder: {           /* same as TK_REM, in1, in2, out3 */
   double rB;      /* Real value of right operand */
 
   pIn1 = &aMem[pOp->p1];
-  type1 = numericType(pIn1);
+  type1 = pIn1->flags;
   pIn2 = &aMem[pOp->p2];
-  type2 = numericType(pIn2);
+  type2 = pIn2->flags;
   pOut = &aMem[pOp->p3];
-  flags = pIn1->flags | pIn2->flags;
   if( (type1 & type2 & MEM_Int)!=0 ){
+int_math:
     iA = pIn1->u.i;
     iB = pIn2->u.i;
     switch( pOp->opcode ){
@@ -90127,9 +94070,12 @@ case OP_Remainder: {           /* same as TK_REM, in1, in2, out3 */
     }
     pOut->u.i = iB;
     MemSetTypeFlag(pOut, MEM_Int);
-  }else if( (flags & MEM_Null)!=0 ){
+  }else if( ((type1 | type2) & MEM_Null)!=0 ){
     goto arithmetic_result_is_null;
   }else{
+    type1 = numericType(pIn1);
+    type2 = numericType(pIn2);
+    if( (type1 & type2 & MEM_Int)!=0 ) goto int_math;
 fp_math:
     rA = sqlite3VdbeRealValue(pIn1);
     rB = sqlite3VdbeRealValue(pIn2);
@@ -90482,7 +94428,6 @@ case OP_Ge: {             /* same as TK_GE, jump, in1, in3 */
   flags1 = pIn1->flags;
   flags3 = pIn3->flags;
   if( (flags1 & flags3 & MEM_Int)!=0 ){
-    assert( (pOp->p5 & SQLITE_AFF_MASK)!=SQLITE_AFF_TEXT || CORRUPT_DB );
     /* Common case of comparison of two integers */
     if( pIn3->u.i > pIn1->u.i ){
       if( sqlite3aGTb[pOp->opcode] ){
@@ -90490,18 +94435,21 @@ case OP_Ge: {             /* same as TK_GE, jump, in1, in3 */
         goto jump_to_p2;
       }
       iCompare = +1;
+      VVA_ONLY( iCompareIsInit = 1; )
     }else if( pIn3->u.i < pIn1->u.i ){
       if( sqlite3aLTb[pOp->opcode] ){
         VdbeBranchTaken(1, (pOp->p5 & SQLITE_NULLEQ)?2:3);
         goto jump_to_p2;
       }
       iCompare = -1;
+      VVA_ONLY( iCompareIsInit = 1; )
     }else{
       if( sqlite3aEQb[pOp->opcode] ){
         VdbeBranchTaken(1, (pOp->p5 & SQLITE_NULLEQ)?2:3);
         goto jump_to_p2;
       }
       iCompare = 0;
+      VVA_ONLY( iCompareIsInit = 1; )
     }
     VdbeBranchTaken(0, (pOp->p5 & SQLITE_NULLEQ)?2:3);
     break;
@@ -90533,6 +94481,7 @@ case OP_Ge: {             /* same as TK_GE, jump, in1, in3 */
         goto jump_to_p2;
       }
       iCompare = 1;    /* Operands are not equal */
+      VVA_ONLY( iCompareIsInit = 1; )
       break;
     }
   }else{
@@ -90543,14 +94492,14 @@ case OP_Ge: {             /* same as TK_GE, jump, in1, in3 */
       if( (flags1 | flags3)&MEM_Str ){
         if( (flags1 & (MEM_Int|MEM_IntReal|MEM_Real|MEM_Str))==MEM_Str ){
           applyNumericAffinity(pIn1,0);
-          testcase( flags3==pIn3->flags );
+          assert( flags3==pIn3->flags || CORRUPT_DB );
           flags3 = pIn3->flags;
         }
         if( (flags3 & (MEM_Int|MEM_IntReal|MEM_Real|MEM_Str))==MEM_Str ){
           applyNumericAffinity(pIn3,0);
         }
       }
-    }else if( affinity==SQLITE_AFF_TEXT ){
+    }else if( affinity==SQLITE_AFF_TEXT && ((flags1 | flags3) & MEM_Str)!=0 ){
       if( (flags1 & MEM_Str)==0 && (flags1&(MEM_Int|MEM_Real|MEM_IntReal))!=0 ){
         testcase( pIn1->flags & MEM_Int );
         testcase( pIn1->flags & MEM_Real );
@@ -90558,7 +94507,7 @@ case OP_Ge: {             /* same as TK_GE, jump, in1, in3 */
         sqlite3VdbeMemStringify(pIn1, encoding, 1);
         testcase( (flags1&MEM_Dyn) != (pIn1->flags&MEM_Dyn) );
         flags1 = (pIn1->flags & ~MEM_TypeMask) | (flags1 & MEM_TypeMask);
-        if( pIn1==pIn3 ) flags3 = flags1 | MEM_Str;
+        if( NEVER(pIn1==pIn3) ) flags3 = flags1 | MEM_Str;
       }
       if( (flags3 & MEM_Str)==0 && (flags3&(MEM_Int|MEM_Real|MEM_IntReal))!=0 ){
         testcase( pIn3->flags & MEM_Int );
@@ -90589,6 +94538,7 @@ case OP_Ge: {             /* same as TK_GE, jump, in1, in3 */
     res2 = sqlite3aGTb[pOp->opcode];
   }
   iCompare = res;
+  VVA_ONLY( iCompareIsInit = 1; )
 
   /* Undo any changes made by applyAffinity() to the input registers. */
   assert( (pIn3->flags & MEM_Dyn) == (flags3 & MEM_Dyn) );
@@ -90610,10 +94560,10 @@ case OP_Ge: {             /* same as TK_GE, jump, in1, in3 */
 ** opcodes are allowed to occur between this instruction and the previous
 ** OP_Lt or OP_Gt.
 **
-** If result of an OP_Eq comparison on the same two operands as the
-** prior OP_Lt or OP_Gt would have been true, then jump to P2.
-** If the result of an OP_Eq comparison on the two previous
-** operands would have been false or NULL, then fall through.
+** If the result of an OP_Eq comparison on the same two operands as
+** the prior OP_Lt or OP_Gt would have been true, then jump to P2.  If
+** the result of an OP_Eq comparison on the two previous operands
+** would have been false or NULL, then fall through.
 */
 case OP_ElseEq: {       /* same as TK_ESCAPE, jump */
 
@@ -90627,6 +94577,7 @@ case OP_ElseEq: {       /* same as TK_ESCAPE, jump */
     break;
   }
 #endif /* SQLITE_DEBUG */
+  assert( iCompareIsInit );
   VdbeBranchTaken(iCompare==0, 2);
   if( iCompare==0 ) goto jump_to_p2;
   break;
@@ -90721,6 +94672,7 @@ case OP_Compare: {
     pColl = pKeyInfo->aColl[i];
     bRev = (pKeyInfo->aSortFlags[i] & KEYINFO_ORDER_DESC);
     iCompare = sqlite3MemCompare(&aMem[p1+idx], &aMem[p2+idx], pColl);
+    VVA_ONLY( iCompareIsInit = 1; )
     if( iCompare ){
       if( (pKeyInfo->aSortFlags[i] & KEYINFO_ORDER_BIGNULL)
        && ((aMem[p1+idx].flags & MEM_Null) || (aMem[p2+idx].flags & MEM_Null))
@@ -90738,13 +94690,14 @@ case OP_Compare: {
 /* Opcode: Jump P1 P2 P3 * *
 **
 ** Jump to the instruction at address P1, P2, or P3 depending on whether
-** in the most recent OP_Compare instruction the P1 vector was less than
+** in the most recent OP_Compare instruction the P1 vector was less than,
 ** equal to, or greater than the P2 vector, respectively.
 **
 ** This opcode must immediately follow an OP_Compare opcode.
 */
 case OP_Jump: {             /* jump */
   assert( pOp>aOp && pOp[-1].opcode==OP_Compare );
+  assert( iCompareIsInit );
   if( iCompare<0 ){
     VdbeBranchTaken(0,4); pOp = &aOp[pOp->p1 - 1];
   }else if( iCompare==0 ){
@@ -90944,26 +94897,103 @@ case OP_IsNull: {            /* same as TK_ISNULL, jump, in1 */
   break;
 }
 
-/* Opcode: IsNullOrType P1 P2 P3 * *
-** Synopsis: if typeof(r[P1]) IN (P3,5) goto P2
+/* Opcode: IsType P1 P2 P3 P4 P5
+** Synopsis: if typeof(P1.P3) in P5 goto P2
+**
+** Jump to P2 if the type of a column in a btree is one of the types specified
+** by the P5 bitmask.
+**
+** P1 is normally a cursor on a btree for which the row decode cache is
+** valid through at least column P3.  In other words, there should have been
+** a prior OP_Column for column P3 or greater.  If the cursor is not valid,
+** then this opcode might give spurious results.
+** The the btree row has fewer than P3 columns, then use P4 as the
+** datatype.
+**
+** If P1 is -1, then P3 is a register number and the datatype is taken
+** from the value in that register.
+**
+** P5 is a bitmask of data types.  SQLITE_INTEGER is the least significant
+** (0x01) bit. SQLITE_FLOAT is the 0x02 bit. SQLITE_TEXT is 0x04.
+** SQLITE_BLOB is 0x08.  SQLITE_NULL is 0x10.
+**
+** WARNING: This opcode does not reliably distinguish between NULL and REAL
+** when P1>=0.  If the database contains a NaN value, this opcode will think
+** that the datatype is REAL when it should be NULL.  When P1<0 and the value
+** is already stored in register P3, then this opcode does reliably
+** distinguish between NULL and REAL.  The problem only arises then P1>=0.
+**
+** Take the jump to address P2 if and only if the datatype of the
+** value determined by P1 and P3 corresponds to one of the bits in the
+** P5 bitmask.
 **
-** Jump to P2 if the value in register P1 is NULL or has a datatype P3.
-** P3 is an integer which should be one of SQLITE_INTEGER, SQLITE_FLOAT,
-** SQLITE_BLOB, SQLITE_NULL, or SQLITE_TEXT.
 */
-case OP_IsNullOrType: {      /* jump, in1 */
-  int doTheJump;
-  pIn1 = &aMem[pOp->p1];
-  doTheJump = (pIn1->flags & MEM_Null)!=0 || sqlite3_value_type(pIn1)==pOp->p3;
-  VdbeBranchTaken( doTheJump, 2);
-  if( doTheJump ) goto jump_to_p2;
+case OP_IsType: {        /* jump */
+  VdbeCursor *pC;
+  u16 typeMask;
+  u32 serialType;
+
+  assert( pOp->p1>=(-1) && pOp->p1<p->nCursor );
+  assert( pOp->p1>=0 || (pOp->p3>=0 && pOp->p3<=(p->nMem+1 - p->nCursor)) );
+  if( pOp->p1>=0 ){
+    pC = p->apCsr[pOp->p1];
+    assert( pC!=0 );
+    assert( pOp->p3>=0 );
+    if( pOp->p3<pC->nHdrParsed ){
+      serialType = pC->aType[pOp->p3];
+      if( serialType>=12 ){
+        if( serialType&1 ){
+          typeMask = 0x04;   /* SQLITE_TEXT */
+        }else{
+          typeMask = 0x08;   /* SQLITE_BLOB */
+        }
+      }else{
+        static const unsigned char aMask[] = {
+           0x10, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x2,
+           0x01, 0x01, 0x10, 0x10
+        };
+        testcase( serialType==0 );
+        testcase( serialType==1 );
+        testcase( serialType==2 );
+        testcase( serialType==3 );
+        testcase( serialType==4 );
+        testcase( serialType==5 );
+        testcase( serialType==6 );
+        testcase( serialType==7 );
+        testcase( serialType==8 );
+        testcase( serialType==9 );
+        testcase( serialType==10 );
+        testcase( serialType==11 );
+        typeMask = aMask[serialType];
+      }
+    }else{
+      typeMask = 1 << (pOp->p4.i - 1);
+      testcase( typeMask==0x01 );
+      testcase( typeMask==0x02 );
+      testcase( typeMask==0x04 );
+      testcase( typeMask==0x08 );
+      testcase( typeMask==0x10 );
+    }
+  }else{
+    assert( memIsValid(&aMem[pOp->p3]) );
+    typeMask = 1 << (sqlite3_value_type((sqlite3_value*)&aMem[pOp->p3])-1);
+    testcase( typeMask==0x01 );
+    testcase( typeMask==0x02 );
+    testcase( typeMask==0x04 );
+    testcase( typeMask==0x08 );
+    testcase( typeMask==0x10 );
+  }
+  VdbeBranchTaken( (typeMask & pOp->p5)!=0, 2);
+  if( typeMask & pOp->p5 ){
+    goto jump_to_p2;
+  }
   break;
 }
 
 /* Opcode: ZeroOrNull P1 P2 P3 * *
 ** Synopsis: r[P2] = 0 OR NULL
 **
-** If all both registers P1 and P3 are NOT NULL, then store a zero in
+** If both registers P1 and P3 are NOT NULL, then store a zero in
 ** register P2.  If either registers P1 or P3 are NULL then put
 ** a NULL in register P2.
 */
@@ -90999,11 +95029,14 @@ case OP_NotNull: {            /* same as TK_NOTNULL, jump, in1 */
 ** If it is, then set register P3 to NULL and jump immediately to P2.
 ** If P1 is not on a NULL row, then fall through without making any
 ** changes.
+**
+** If P1 is not an open cursor, then this opcode is a no-op.
 */
 case OP_IfNullRow: {         /* jump */
+  VdbeCursor *pC;
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  assert( p->apCsr[pOp->p1]!=0 );
-  if( p->apCsr[pOp->p1]->nullRow ){
+  pC = p->apCsr[pOp->p1];
+  if( pC && pC->nullRow ){
     sqlite3VdbeMemSetNull(aMem + pOp->p3);
     goto jump_to_p2;
   }
@@ -91054,7 +95087,7 @@ case OP_Offset: {          /* out3 */
 ** Interpret the data that cursor P1 points to as a structure built using
 ** the MakeRecord instruction.  (See the MakeRecord opcode for additional
 ** information about the format of the data.)  Extract the P2-th column
-** from this record.  If there are less that (P2+1)
+** from this record.  If there are less than (P2+1)
 ** values in the record, extract a NULL.
 **
 ** The value extracted is stored in register P3.
@@ -91063,12 +95096,14 @@ case OP_Offset: {          /* out3 */
 ** if the P4 argument is a P4_MEM use the value of the P4 argument as
 ** the result.
 **
-** If the OPFLAG_LENGTHARG and OPFLAG_TYPEOFARG bits are set on P5 then
-** the result is guaranteed to only be used as the argument of a length()
-** or typeof() function, respectively.  The loading of large blobs can be
-** skipped for length() and all content loading can be skipped for typeof().
+** If the OPFLAG_LENGTHARG bit is set in P5 then the result is guaranteed
+** to only be used by the length() function or the equivalent.  The content
+** of large blobs is not loaded, thus saving CPU cycles.  If the
+** OPFLAG_TYPEOFARG bit is set then the result will only be used by the
+** typeof() function or the IS NULL or IS NOT NULL operators or the
+** equivalent.  In this case, all content loading can be omitted.
 */
-case OP_Column: {
+case OP_Column: {            /* ncycle */
   u32 p2;            /* column number to retrieve */
   VdbeCursor *pC;    /* The VDBE cursor */
   BtCursor *pCrsr;   /* The B-Tree cursor corresponding to pC */
@@ -91312,11 +95347,16 @@ case OP_Column: {
       pDest->flags = aFlag[t&1];
     }
   }else{
+    u8 p5;
     pDest->enc = encoding;
+    assert( pDest->db==db );
     /* This branch happens only when content is on overflow pages */
-    if( ((pOp->p5 & (OPFLAG_LENGTHARG|OPFLAG_TYPEOFARG))!=0
-          && ((t>=12 && (t&1)==0) || (pOp->p5 & OPFLAG_TYPEOFARG)!=0))
-     || (len = sqlite3VdbeSerialTypeLen(t))==0
+    if( ((p5 = (pOp->p5 & OPFLAG_BYTELENARG))!=0
+          && (p5==OPFLAG_TYPEOFARG
+              || (t>=12 && ((t&1)==0 || p5==OPFLAG_BYTELENARG))
+             )
+        )
+     || sqlite3VdbeSerialTypeLen(t)==0
     ){
       /* Content is irrelevant for
       **    1. the typeof() function,
@@ -91333,11 +95373,13 @@ case OP_Column: {
       */
       sqlite3VdbeSerialGet((u8*)sqlite3CtypeMap, t, pDest);
     }else{
-      if( len>db->aLimit[SQLITE_LIMIT_LENGTH] ) goto too_big;
-      rc = sqlite3VdbeMemFromBtree(pC->uc.pCursor, aOffset[p2], len, pDest);
-      if( rc!=SQLITE_OK ) goto abort_due_to_error;
-      sqlite3VdbeSerialGet((const u8*)pDest->z, t, pDest);
-      pDest->flags &= ~MEM_Ephem;
+      rc = vdbeColumnFromOverflow(pC, p2, t, aOffset[p2],
+                p->cacheCtr, colCacheCtr, pDest);
+      if( rc ){
+        if( rc==SQLITE_NOMEM ) goto no_mem;
+        if( rc==SQLITE_TOOBIG ) goto too_big;
+        goto abort_due_to_error;
+      }
     }
   }
 
@@ -91417,7 +95459,7 @@ case OP_TypeCheck: {
         }
         case COLTYPE_REAL: {
           testcase( (pIn1->flags & (MEM_Real|MEM_IntReal))==MEM_Real );
-          testcase( (pIn1->flags & (MEM_Real|MEM_IntReal))==MEM_IntReal );
+          assert( (pIn1->flags & MEM_IntReal)==0 );
           if( pIn1->flags & MEM_Int ){
             /* When applying REAL affinity, if the result is still an MEM_Int
             ** that will fit in 6 bytes, then change the type to MEM_IntReal
@@ -91496,7 +95538,7 @@ case OP_Affinity: {
       }else{
         pIn1->u.r = (double)pIn1->u.i;
         pIn1->flags |= MEM_Real;
-        pIn1->flags &= ~MEM_Int;
+        pIn1->flags &= ~(MEM_Int|MEM_Str);
       }
     }
     REGISTER_TRACE((int)(pIn1-aMem), pIn1);
@@ -91799,7 +95841,6 @@ case OP_MakeRecord: {
         /* NULL value.  No change in zPayload */
       }else{
         u64 v;
-        u32 i;
         if( serial_type==7 ){
           assert( sizeof(v)==sizeof(pRec->u.r) );
           memcpy(&v, &pRec->u.r, sizeof(v));
@@ -91807,12 +95848,17 @@ case OP_MakeRecord: {
         }else{
           v = pRec->u.i;
         }
-        len = i = sqlite3SmallTypeSizes[serial_type];
-        assert( i>0 );
-        while( 1 /*exit-by-break*/ ){
-          zPayload[--i] = (u8)(v&0xFF);
-          if( i==0 ) break;
-          v >>= 8;
+        len = sqlite3SmallTypeSizes[serial_type];
+        assert( len>=1 && len<=8 && len!=5 && len!=7 );
+        switch( len ){
+          default: zPayload[7] = (u8)(v&0xff); v >>= 8;
+                   zPayload[6] = (u8)(v&0xff); v >>= 8;
+          case 6:  zPayload[5] = (u8)(v&0xff); v >>= 8;
+                   zPayload[4] = (u8)(v&0xff); v >>= 8;
+          case 4:  zPayload[3] = (u8)(v&0xff); v >>= 8;
+          case 3:  zPayload[2] = (u8)(v&0xff); v >>= 8;
+          case 2:  zPayload[1] = (u8)(v&0xff); v >>= 8;
+          case 1:  zPayload[0] = (u8)(v&0xff);
         }
         zPayload += len;
       }
@@ -92420,7 +96466,7 @@ case OP_SetCookie: {
 **
 ** See also: OP_OpenRead, OP_ReopenIdx
 */
-case OP_ReopenIdx: {
+case OP_ReopenIdx: {         /* ncycle */
   int nField;
   KeyInfo *pKeyInfo;
   u32 p2;
@@ -92441,7 +96487,7 @@ case OP_ReopenIdx: {
   }
   /* If the cursor is not currently open or is open on a different
   ** index, then fall through into OP_OpenRead to force a reopen */
-case OP_OpenRead:
+case OP_OpenRead:            /* ncycle */
 case OP_OpenWrite:
 
   assert( pOp->opcode==OP_OpenWrite || pOp->p5==0 || pOp->p5==OPFLAG_SEEKEQ );
@@ -92535,7 +96581,7 @@ case OP_OpenWrite:
 **
 ** Duplicate ephemeral cursors are used for self-joins of materialized views.
 */
-case OP_OpenDup: {
+case OP_OpenDup: {           /* ncycle */
   VdbeCursor *pOrig;    /* The original cursor to be duplicated */
   VdbeCursor *pCx;      /* The new cursor */
 
@@ -92597,8 +96643,8 @@ case OP_OpenDup: {
 ** by this opcode will be used for automatically created transient
 ** indices in joins.
 */
-case OP_OpenAutoindex:
-case OP_OpenEphemeral: {
+case OP_OpenAutoindex:       /* ncycle */
+case OP_OpenEphemeral: {     /* ncycle */
   VdbeCursor *pCx;
   KeyInfo *pKeyInfo;
 
@@ -92621,7 +96667,7 @@ case OP_OpenEphemeral: {
   }
   pCx = p->apCsr[pOp->p1];
   if( pCx && !pCx->noReuse &&  ALWAYS(pOp->p2<=pCx->nField) ){
-    /* If the ephermeral table is already open and has no duplicates from
+    /* If the ephemeral table is already open and has no duplicates from
     ** OP_OpenDup, then erase all existing content so that the table is
     ** empty again, rather than creating a new table. */
     assert( pCx->isEphemeral );
@@ -92756,7 +96802,7 @@ case OP_OpenPseudo: {
 ** Close a cursor previously opened as P1.  If P1 is not
 ** currently open, this instruction is a no-op.
 */
-case OP_Close: {
+case OP_Close: {             /* ncycle */
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
   sqlite3VdbeFreeCursor(p, p->apCsr[pOp->p1]);
   p->apCsr[pOp->p1] = 0;
@@ -92873,10 +96919,10 @@ case OP_ColumnsUsed: {
 **
 ** See also: Found, NotFound, SeekGt, SeekGe, SeekLt
 */
-case OP_SeekLT:         /* jump, in3, group */
-case OP_SeekLE:         /* jump, in3, group */
-case OP_SeekGE:         /* jump, in3, group */
-case OP_SeekGT: {       /* jump, in3, group */
+case OP_SeekLT:         /* jump, in3, group, ncycle */
+case OP_SeekLE:         /* jump, in3, group, ncycle */
+case OP_SeekGE:         /* jump, in3, group, ncycle */
+case OP_SeekGT: {       /* jump, in3, group, ncycle */
   int res;           /* Comparison result */
   int oc;            /* Opcode */
   VdbeCursor *pC;    /* The cursor to seek */
@@ -93005,7 +97051,13 @@ case OP_SeekGT: {       /* jump, in3, group */
 
     r.aMem = &aMem[pOp->p3];
 #ifdef SQLITE_DEBUG
-    { int i; for(i=0; i<r.nField; i++) assert( memIsValid(&r.aMem[i]) ); }
+    {
+      int i;
+      for(i=0; i<r.nField; i++){
+        assert( memIsValid(&r.aMem[i]) );
+        if( i>0 ) REGISTER_TRACE(pOp->p3+i, &r.aMem[i]);
+      }
+    }
 #endif
     r.eqSeen = 0;
     rc = sqlite3BtreeIndexMoveto(pC->uc.pCursor, &r, &res);
@@ -93068,7 +97120,7 @@ case OP_SeekGT: {       /* jump, in3, group */
 }
 
 
-/* Opcode: SeekScan  P1 P2 * * *
+/* Opcode: SeekScan  P1 P2 * * P5
 ** Synopsis: Scan-ahead up to P1 rows
 **
 ** This opcode is a prefix opcode to OP_SeekGE.  In other words, this
@@ -93078,8 +97130,8 @@ case OP_SeekGT: {       /* jump, in3, group */
 ** This opcode uses the P1 through P4 operands of the subsequent
 ** OP_SeekGE.  In the text that follows, the operands of the subsequent
 ** OP_SeekGE opcode are denoted as SeekOP.P1 through SeekOP.P4.   Only
-** the P1 and P2 operands of this opcode are also used, and  are called
-** This.P1 and This.P2.
+** the P1, P2 and P5 operands of this opcode are also used, and  are called
+** This.P1, This.P2 and This.P5.
 **
 ** This opcode helps to optimize IN operators on a multi-column index
 ** where the IN operator is on the later terms of the index by avoiding
@@ -93089,32 +97141,54 @@ case OP_SeekGT: {       /* jump, in3, group */
 **
 ** The SeekGE.P3 and SeekGE.P4 operands identify an unpacked key which
 ** is the desired entry that we want the cursor SeekGE.P1 to be pointing
-** to.  Call this SeekGE.P4/P5 row the "target".
+** to.  Call this SeekGE.P3/P4 row the "target".
 **
 ** If the SeekGE.P1 cursor is not currently pointing to a valid row,
 ** then this opcode is a no-op and control passes through into the OP_SeekGE.
 **
 ** If the SeekGE.P1 cursor is pointing to a valid row, then that row
 ** might be the target row, or it might be near and slightly before the
-** target row.  This opcode attempts to position the cursor on the target
-** row by, perhaps by invoking sqlite3BtreeStep() on the cursor
-** between 0 and This.P1 times.
-**
-** There are three possible outcomes from this opcode:<ol>
-**
-** <li> If after This.P1 steps, the cursor is still pointing to a place that
-**      is earlier in the btree than the target row, then fall through
-**      into the subsquence OP_SeekGE opcode.
-**
-** <li> If the cursor is successfully moved to the target row by 0 or more
-**      sqlite3BtreeNext() calls, then jump to This.P2, which will land just
-**      past the OP_IdxGT or OP_IdxGE opcode that follows the OP_SeekGE.
-**
-** <li> If the cursor ends up past the target row (indicating the the target
-**      row does not exist in the btree) then jump to SeekOP.P2.
+** target row, or it might be after the target row.  If the cursor is
+** currently before the target row, then this opcode attempts to position
+** the cursor on or after the target row by invoking sqlite3BtreeStep()
+** on the cursor between 1 and This.P1 times.
+**
+** The This.P5 parameter is a flag that indicates what to do if the
+** cursor ends up pointing at a valid row that is past the target
+** row.  If This.P5 is false (0) then a jump is made to SeekGE.P2.  If
+** This.P5 is true (non-zero) then a jump is made to This.P2.  The P5==0
+** case occurs when there are no inequality constraints to the right of
+** the IN constraint.  The jump to SeekGE.P2 ends the loop.  The P5!=0 case
+** occurs when there are inequality constraints to the right of the IN
+** operator.  In that case, the This.P2 will point either directly to or
+** to setup code prior to the OP_IdxGT or OP_IdxGE opcode that checks for
+** loop terminate.
+**
+** Possible outcomes from this opcode:<ol>
+**
+** <li> If the cursor is initially not pointed to any valid row, then
+**      fall through into the subsequent OP_SeekGE opcode.
+**
+** <li> If the cursor is left pointing to a row that is before the target
+**      row, even after making as many as This.P1 calls to
+**      sqlite3BtreeNext(), then also fall through into OP_SeekGE.
+**
+** <li> If the cursor is left pointing at the target row, either because it
+**      was at the target row to begin with or because one or more
+**      sqlite3BtreeNext() calls moved the cursor to the target row,
+**      then jump to This.P2..,
+**
+** <li> If the cursor started out before the target row and a call to
+**      to sqlite3BtreeNext() moved the cursor off the end of the index
+**      (indicating that the target row definitely does not exist in the
+**      btree) then jump to SeekGE.P2, ending the loop.
+**
+** <li> If the cursor ends up on a valid row that is past the target row
+**      (indicating that the target row does not exist in the btree) then
+**      jump to SeekOP.P2 if This.P5==0 or to This.P2 if This.P5>0.
 ** </ol>
 */
-case OP_SeekScan: {
+case OP_SeekScan: {          /* ncycle */
   VdbeCursor *pC;
   int res;
   int nStep;
@@ -93122,14 +97196,25 @@ case OP_SeekScan: {
 
   assert( pOp[1].opcode==OP_SeekGE );
 
-  /* pOp->p2 points to the first instruction past the OP_IdxGT that
-  ** follows the OP_SeekGE.  */
+  /* If pOp->p5 is clear, then pOp->p2 points to the first instruction past the
+  ** OP_IdxGT that follows the OP_SeekGE. Otherwise, it points to the first
+  ** opcode past the OP_SeekGE itself.  */
   assert( pOp->p2>=(int)(pOp-aOp)+2 );
-  assert( aOp[pOp->p2-1].opcode==OP_IdxGT || aOp[pOp->p2-1].opcode==OP_IdxGE );
-  testcase( aOp[pOp->p2-1].opcode==OP_IdxGE );
-  assert( pOp[1].p1==aOp[pOp->p2-1].p1 );
-  assert( pOp[1].p2==aOp[pOp->p2-1].p2 );
-  assert( pOp[1].p3==aOp[pOp->p2-1].p3 );
+#ifdef SQLITE_DEBUG
+  if( pOp->p5==0 ){
+    /* There are no inequality constraints following the IN constraint. */
+    assert( pOp[1].p1==aOp[pOp->p2-1].p1 );
+    assert( pOp[1].p2==aOp[pOp->p2-1].p2 );
+    assert( pOp[1].p3==aOp[pOp->p2-1].p3 );
+    assert( aOp[pOp->p2-1].opcode==OP_IdxGT
+         || aOp[pOp->p2-1].opcode==OP_IdxGE );
+    testcase( aOp[pOp->p2-1].opcode==OP_IdxGE );
+  }else{
+    /* There are inequality constraints.  */
+    assert( pOp->p2==(int)(pOp-aOp)+2 );
+    assert( aOp[pOp->p2-1].opcode==OP_SeekGE );
+  }
+#endif
 
   assert( pOp->p1>0 );
   pC = p->apCsr[pOp[1].p1];
@@ -93163,8 +97248,9 @@ case OP_SeekScan: {
   while(1){
     rc = sqlite3VdbeIdxKeyCompare(db, pC, &r, &res);
     if( rc ) goto abort_due_to_error;
-    if( res>0 ){
+    if( res>0 && pOp->p5==0 ){
       seekscan_search_fail:
+      /* Jump to SeekGE.P2, ending the loop */
 #ifdef SQLITE_DEBUG
       if( db->flags&SQLITE_VdbeTrace ){
         printf("... %d steps and then skip\n", pOp->p1 - nStep);
@@ -93174,7 +97260,8 @@ case OP_SeekScan: {
       pOp++;
       goto jump_to_p2;
     }
-    if( res==0 ){
+    if( res>=0 ){
+      /* Jump to This.P2, bypassing the OP_SeekGE opcode */
 #ifdef SQLITE_DEBUG
       if( db->flags&SQLITE_VdbeTrace ){
         printf("... %d steps and then success\n", pOp->p1 - nStep);
@@ -93194,6 +97281,7 @@ case OP_SeekScan: {
       break;
     }
     nStep--;
+    pC->cacheStatus = CACHE_STALE;
     rc = sqlite3BtreeNext(pC->uc.pCursor, 0);
     if( rc ){
       if( rc==SQLITE_DONE ){
@@ -93223,7 +97311,7 @@ case OP_SeekScan: {
 **
 ** P1 must be a valid b-tree cursor.
 */
-case OP_SeekHit: {
+case OP_SeekHit: {           /* ncycle */
   VdbeCursor *pC;
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
   pC = p->apCsr[pOp->p1];
@@ -93250,12 +97338,16 @@ case OP_SeekHit: {
 /* Opcode: IfNotOpen P1 P2 * * *
 ** Synopsis: if( !csr[P1] ) goto P2
 **
-** If cursor P1 is not open, jump to instruction P2. Otherwise, fall through.
+** If cursor P1 is not open or if P1 is set to a NULL row using the
+** OP_NullRow opcode, then jump to instruction P2. Otherwise, fall through.
 */
 case OP_IfNotOpen: {        /* jump */
+  VdbeCursor *pCur;
+
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  VdbeBranchTaken(p->apCsr[pOp->p1]==0, 2);
-  if( !p->apCsr[pOp->p1] ){
+  pCur = p->apCsr[pOp->p1];
+  VdbeBranchTaken(pCur==0 || pCur->nullRow, 2);
+  if( pCur==0 || pCur->nullRow ){
     goto jump_to_p2_and_check_for_interrupt;
   }
   break;
@@ -93306,13 +97398,13 @@ case OP_IfNotOpen: {        /* jump */
 ** operands to OP_NotFound and OP_IdxGT.
 **
 ** This opcode is an optimization attempt only.  If this opcode always
-** falls through, the correct answer is still obtained, but extra works
+** falls through, the correct answer is still obtained, but extra work
 ** is performed.
 **
 ** A value of N in the seekHit flag of cursor P1 means that there exists
 ** a key P3:N that will match some record in the index.  We want to know
 ** if it is possible for a record P3:P4 to match some record in the
-** index.  If it is not possible, we can skips some work.  So if seekHit
+** index.  If it is not possible, we can skip some work.  So if seekHit
 ** is less than P4, attempt to find out if a match is possible by running
 ** OP_NotFound.
 **
@@ -93351,7 +97443,7 @@ case OP_IfNotOpen: {        /* jump */
 **
 ** See also: NotFound, Found, NotExists
 */
-case OP_IfNoHope: {     /* jump, in3 */
+case OP_IfNoHope: {     /* jump, in3, ncycle */
   VdbeCursor *pC;
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
   pC = p->apCsr[pOp->p1];
@@ -93365,9 +97457,9 @@ case OP_IfNoHope: {     /* jump, in3 */
   /* Fall through into OP_NotFound */
   /* no break */ deliberate_fall_through
 }
-case OP_NoConflict:     /* jump, in3 */
-case OP_NotFound:       /* jump, in3 */
-case OP_Found: {        /* jump, in3 */
+case OP_NoConflict:     /* jump, in3, ncycle */
+case OP_NotFound:       /* jump, in3, ncycle */
+case OP_Found: {        /* jump, in3, ncycle */
   int alreadyExists;
   int ii;
   VdbeCursor *pC;
@@ -93497,7 +97589,7 @@ case OP_Found: {        /* jump, in3 */
 **
 ** See also: Found, NotFound, NoConflict, SeekRowid
 */
-case OP_SeekRowid: {        /* jump, in3 */
+case OP_SeekRowid: {        /* jump, in3, ncycle */
   VdbeCursor *pC;
   BtCursor *pCrsr;
   int res;
@@ -93522,7 +97614,7 @@ case OP_SeekRowid: {        /* jump, in3 */
   }
   /* Fall through into OP_NotExists */
   /* no break */ deliberate_fall_through
-case OP_NotExists:          /* jump, in3 */
+case OP_NotExists:          /* jump, in3, ncycle */
   pIn3 = &aMem[pOp->p3];
   assert( (pIn3->flags & MEM_Int)!=0 || pOp->opcode==OP_SeekRowid );
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
@@ -93802,8 +97894,11 @@ case OP_Insert: {
   if( pOp->p5 & OPFLAG_ISNOOP ) break;
 #endif
 
-  if( pOp->p5 & OPFLAG_NCHANGE ) p->nChange++;
-  if( pOp->p5 & OPFLAG_LASTROWID ) db->lastRowid = x.nKey;
+  assert( (pOp->p5 & OPFLAG_LASTROWID)==0 || (pOp->p5 & OPFLAG_NCHANGE)!=0 );
+  if( pOp->p5 & OPFLAG_NCHANGE ){
+    p->nChange++;
+    if( pOp->p5 & OPFLAG_LASTROWID ) db->lastRowid = x.nKey;
+  }
   assert( (pData->flags & (MEM_Blob|MEM_Str))!=0 || pData->n==0 );
   x.pData = pData->z;
   x.nData = pData->n;
@@ -93814,12 +97909,14 @@ case OP_Insert: {
     x.nZero = 0;
   }
   x.pKey = 0;
+  assert( BTREE_PREFORMAT==OPFLAG_PREFORMAT );
   rc = sqlite3BtreeInsert(pC->uc.pCursor, &x,
       (pOp->p5 & (OPFLAG_APPEND|OPFLAG_SAVEPOSITION|OPFLAG_PREFORMAT)),
       seekResult
   );
   pC->deferredMoveto = 0;
   pC->cacheStatus = CACHE_STALE;
+  colCacheCtr++;
 
   /* Invoke the update-hook if required. */
   if( rc ) goto abort_due_to_error;
@@ -93873,13 +97970,18 @@ case OP_RowCell: {
 ** left in an undefined state.
 **
 ** If the OPFLAG_AUXDELETE bit is set on P5, that indicates that this
-** delete one of several associated with deleting a table row and all its
-** associated index entries.  Exactly one of those deletes is the "primary"
-** delete.  The others are all on OPFLAG_FORDELETE cursors or else are
-** marked with the AUXDELETE flag.
+** delete is one of several associated with deleting a table row and
+** all its associated index entries.  Exactly one of those deletes is
+** the "primary" delete.  The others are all on OPFLAG_FORDELETE
+** cursors or else are marked with the AUXDELETE flag.
+**
+** If the OPFLAG_NCHANGE (0x01) flag of P2 (NB: P2 not P5) is set, then
+** the row change count is incremented (otherwise not).
 **
-** If the OPFLAG_NCHANGE flag of P2 (NB: P2 not P5) is set, then the row
-** change count is incremented (otherwise not).
+** If the OPFLAG_ISNOOP (0x40) flag of P2 (not P5!) is set, then the
+** pre-update-hook for deletes is run, but the btree is otherwise unchanged.
+** This happens when the OP_Delete is to be shortly followed by an OP_Insert
+** with the same key, causing the btree entry to be overwritten.
 **
 ** P1 must not be pseudo-table.  It has to be a real table with
 ** multiple rows.
@@ -93980,6 +98082,7 @@ case OP_Delete: {
 
   rc = sqlite3BtreeDelete(pC->uc.pCursor, pOp->p5);
   pC->cacheStatus = CACHE_STALE;
+  colCacheCtr++;
   pC->seekResult = 0;
   if( rc ) goto abort_due_to_error;
 
@@ -94047,13 +98150,13 @@ case OP_SorterCompare: {
 ** Write into register P2 the current sorter data for sorter cursor P1.
 ** Then clear the column header cache on cursor P3.
 **
-** This opcode is normally use to move a record out of the sorter and into
+** This opcode is normally used to move a record out of the sorter and into
 ** a register that is the source for a pseudo-table cursor created using
 ** OpenPseudo.  That pseudo-table cursor is the one that is identified by
 ** parameter P3.  Clearing the P3 column cache as part of this opcode saves
 ** us from having to issue a separate NullRow instruction to clear that cache.
 */
-case OP_SorterData: {
+case OP_SorterData: {       /* ncycle */
   VdbeCursor *pC;
 
   pOut = &aMem[pOp->p2];
@@ -94145,7 +98248,7 @@ case OP_RowData: {
 ** be a separate OP_VRowid opcode for use with virtual tables, but this
 ** one opcode now works for both table types.
 */
-case OP_Rowid: {                 /* out2 */
+case OP_Rowid: {                 /* out2, ncycle */
   VdbeCursor *pC;
   i64 v;
   sqlite3_vtab *pVtab;
@@ -94244,8 +98347,8 @@ case OP_NullRow: {
 ** from the end toward the beginning.  In other words, the cursor is
 ** configured to use Prev, not Next.
 */
-case OP_SeekEnd:
-case OP_Last: {        /* jump */
+case OP_SeekEnd:             /* ncycle */
+case OP_Last: {              /* jump, ncycle */
   VdbeCursor *pC;
   BtCursor *pCrsr;
   int res;
@@ -94328,8 +98431,8 @@ case OP_IfSmaller: {        /* jump */
 ** regression tests can determine whether or not the optimizer is
 ** correctly optimizing out sorts.
 */
-case OP_SorterSort:    /* jump */
-case OP_Sort: {        /* jump */
+case OP_SorterSort:    /* jump ncycle */
+case OP_Sort: {        /* jump ncycle */
 #ifdef SQLITE_TEST
   sqlite3_sort_count++;
   sqlite3_search_count--;
@@ -94346,17 +98449,22 @@ case OP_Sort: {        /* jump */
 ** If the table or index is not empty, fall through to the following
 ** instruction.
 **
+** If P2 is zero, that is an assertion that the P1 table is never
+** empty and hence the jump will never be taken.
+**
 ** This opcode leaves the cursor configured to move in forward order,
 ** from the beginning toward the end.  In other words, the cursor is
 ** configured to use Next, not Prev.
 */
-case OP_Rewind: {        /* jump */
+case OP_Rewind: {        /* jump, ncycle */
   VdbeCursor *pC;
   BtCursor *pCrsr;
   int res;
 
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
   assert( pOp->p5==0 );
+  assert( pOp->p2>=0 && pOp->p2<p->nOp );
+
   pC = p->apCsr[pOp->p1];
   assert( pC!=0 );
   assert( isSorter(pC)==(pOp->opcode==OP_SorterSort) );
@@ -94376,9 +98484,10 @@ case OP_Rewind: {        /* jump */
   }
   if( rc ) goto abort_due_to_error;
   pC->nullRow = (u8)res;
-  assert( pOp->p2>0 && pOp->p2<p->nOp );
-  VdbeBranchTaken(res!=0,2);
-  if( res ) goto jump_to_p2;
+  if( pOp->p2>0 ){
+    VdbeBranchTaken(res!=0,2);
+    if( res ) goto jump_to_p2;
+  }
   break;
 }
 
@@ -94444,9 +98553,11 @@ case OP_SorterNext: {  /* jump */
   rc = sqlite3VdbeSorterNext(db, pC);
   goto next_tail;
 
-case OP_Prev:          /* jump */
+case OP_Prev:          /* jump, ncycle */
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  assert( pOp->p5<ArraySize(p->aCounter) );
+  assert( pOp->p5==0
+       || pOp->p5==SQLITE_STMTSTATUS_FULLSCAN_STEP
+       || pOp->p5==SQLITE_STMTSTATUS_AUTOINDEX);
   pC = p->apCsr[pOp->p1];
   assert( pC!=0 );
   assert( pC->deferredMoveto==0 );
@@ -94457,9 +98568,11 @@ case OP_Prev:          /* jump */
   rc = sqlite3BtreePrevious(pC->uc.pCursor, pOp->p3);
   goto next_tail;
 
-case OP_Next:          /* jump */
+case OP_Next:          /* jump, ncycle */
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  assert( pOp->p5<ArraySize(p->aCounter) );
+  assert( pOp->p5==0
+       || pOp->p5==SQLITE_STMTSTATUS_FULLSCAN_STEP
+       || pOp->p5==SQLITE_STMTSTATUS_AUTOINDEX);
   pC = p->apCsr[pOp->p1];
   assert( pC!=0 );
   assert( pC->deferredMoveto==0 );
@@ -94647,8 +98760,8 @@ case OP_IdxDelete: {
 **
 ** See also: Rowid, MakeRecord.
 */
-case OP_DeferredSeek:
-case OP_IdxRowid: {           /* out2 */
+case OP_DeferredSeek:         /* ncycle */
+case OP_IdxRowid: {           /* out2, ncycle */
   VdbeCursor *pC;             /* The P1 index cursor */
   VdbeCursor *pTabCur;        /* The P2 table cursor (OP_DeferredSeek only) */
   i64 rowid;                  /* Rowid that P1 current points to */
@@ -94666,10 +98779,10 @@ case OP_IdxRowid: {           /* out2 */
   ** of sqlite3VdbeCursorRestore() and sqlite3VdbeIdxRowid(). */
   rc = sqlite3VdbeCursorRestore(pC);
 
-  /* sqlite3VbeCursorRestore() can only fail if the record has been deleted
-  ** out from under the cursor.  That will never happens for an IdxRowid
-  ** or Seek opcode */
-  if( NEVER(rc!=SQLITE_OK) ) goto abort_due_to_error;
+  /* sqlite3VdbeCursorRestore() may fail if the cursor has been disturbed
+  ** since it was last positioned and an error (e.g. OOM or an IO error)
+  ** occurs while trying to reposition it. */
+  if( rc!=SQLITE_OK ) goto abort_due_to_error;
 
   if( !pC->nullRow ){
     rowid = 0;  /* Not needed.  Only used to silence a warning. */
@@ -94710,8 +98823,8 @@ case OP_IdxRowid: {           /* out2 */
 ** seek operation now, without further delay.  If the cursor seek has
 ** already occurred, this instruction is a no-op.
 */
-case OP_FinishSeek: {
-  VdbeCursor *pC;             /* The P1 index cursor */
+case OP_FinishSeek: {        /* ncycle */
+  VdbeCursor *pC;            /* The P1 index cursor */
 
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
   pC = p->apCsr[pOp->p1];
@@ -94766,10 +98879,10 @@ case OP_FinishSeek: {
 ** If the P1 index entry is less than or equal to the key value then jump
 ** to P2. Otherwise fall through to the next instruction.
 */
-case OP_IdxLE:          /* jump */
-case OP_IdxGT:          /* jump */
-case OP_IdxLT:          /* jump */
-case OP_IdxGE:  {       /* jump */
+case OP_IdxLE:          /* jump, ncycle */
+case OP_IdxGT:          /* jump, ncycle */
+case OP_IdxLT:          /* jump, ncycle */
+case OP_IdxGE:  {       /* jump, ncycle */
   VdbeCursor *pC;
   int res;
   UnpackedRecord r;
@@ -94846,7 +98959,7 @@ case OP_IdxGE:  {       /* jump */
 ** file is given by P1.
 **
 ** The table being destroyed is in the main database file if P3==0.  If
-** P3==1 then the table to be clear is in the auxiliary database file
+** P3==1 then the table to be destroyed is in the auxiliary database file
 ** that is used to store tables create using CREATE TEMPORARY TABLE.
 **
 ** If AUTOVACUUM is enabled then it is possible that another root page
@@ -94906,8 +99019,8 @@ case OP_Destroy: {     /* out2 */
 ** in the database file is given by P1.  But, unlike Destroy, do not
 ** remove the table or index from the database file.
 **
-** The table being clear is in the main database file if P2==0.  If
-** P2==1 then the table to be clear is in the auxiliary database file
+** The table being cleared is in the main database file if P2==0.  If
+** P2==1 then the table to be cleared is in the auxiliary database file
 ** that is used to store tables create using CREATE TEMPORARY TABLE.
 **
 ** If the P3 value is non-zero, then the row change count is incremented
@@ -94993,13 +99106,41 @@ case OP_CreateBtree: {          /* out2 */
 /* Opcode: SqlExec * * * P4 *
 **
 ** Run the SQL statement or statements specified in the P4 string.
+** Disable Auth and Trace callbacks while those statements are running if
+** P1 is true.
 */
 case OP_SqlExec: {
+  char *zErr;
+#ifndef SQLITE_OMIT_AUTHORIZATION
+  sqlite3_xauth xAuth;
+#endif
+  u8 mTrace;
+
   sqlite3VdbeIncrWriteCounter(p, 0);
   db->nSqlExec++;
-  rc = sqlite3_exec(db, pOp->p4.z, 0, 0, 0);
+  zErr = 0;
+#ifndef SQLITE_OMIT_AUTHORIZATION
+  xAuth = db->xAuth;
+#endif
+  mTrace = db->mTrace;
+  if( pOp->p1 ){
+#ifndef SQLITE_OMIT_AUTHORIZATION
+    db->xAuth = 0;
+#endif
+    db->mTrace = 0;
+  }
+  rc = sqlite3_exec(db, pOp->p4.z, 0, 0, &zErr);
   db->nSqlExec--;
-  if( rc ) goto abort_due_to_error;
+#ifndef SQLITE_OMIT_AUTHORIZATION
+  db->xAuth = xAuth;
+#endif
+  db->mTrace = mTrace;
+  if( zErr || rc ){
+    sqlite3VdbeError(p, "%s", zErr);
+    sqlite3_free(zErr);
+    if( rc==SQLITE_NOMEM ) goto no_mem;
+    goto abort_due_to_error;
+  }
   break;
 }
 
@@ -95180,13 +99321,14 @@ case OP_IntegrityCk: {
   pIn1 = &aMem[pOp->p1];
   assert( pOp->p5<db->nDb );
   assert( DbMaskTest(p->btreeMask, pOp->p5) );
-  z = sqlite3BtreeIntegrityCheck(db, db->aDb[pOp->p5].pBt, &aRoot[1], nRoot,
-                                 (int)pnErr->u.i+1, &nErr);
+  rc = sqlite3BtreeIntegrityCheck(db, db->aDb[pOp->p5].pBt, &aRoot[1], nRoot,
+                                 (int)pnErr->u.i+1, &nErr, &z);
   sqlite3VdbeMemSetNull(pIn1);
   if( nErr==0 ){
     assert( z==0 );
-  }else if( z==0 ){
-    goto no_mem;
+  }else if( rc ){
+    sqlite3_free(z);
+    goto abort_due_to_error;
   }else{
     pnErr->u.i -= nErr-1;
     sqlite3VdbeMemSetStr(pIn1, z, -1, SQLITE_UTF8, sqlite3_free);
@@ -95390,9 +99532,6 @@ case OP_Program: {        /* jump */
     pFrame->aOp = p->aOp;
     pFrame->nOp = p->nOp;
     pFrame->token = pProgram->token;
-#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
-    pFrame->anExec = p->anExec;
-#endif
 #ifdef SQLITE_DEBUG
     pFrame->iFrameMagic = SQLITE_FRAME_MAGIC;
 #endif
@@ -95429,9 +99568,6 @@ case OP_Program: {        /* jump */
   memset(pFrame->aOnce, 0, (pProgram->nOp + 7)/8);
   p->aOp = aOp = pProgram->aOp;
   p->nOp = pProgram->nOp;
-#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
-  p->anExec = 0;
-#endif
 #ifdef SQLITE_DEBUG
   /* Verify that second and subsequent executions of the same trigger do not
   ** try to reuse register values from the first use. */
@@ -95571,7 +99707,7 @@ case OP_IfPos: {        /* jump, in1 */
 ** Synopsis: if r[P1]>0 then r[P2]=r[P1]+max(0,r[P3]) else r[P2]=(-1)
 **
 ** This opcode performs a commonly used computation associated with
-** LIMIT and OFFSET process.  r[P1] holds the limit counter.  r[P3]
+** LIMIT and OFFSET processing.  r[P1] holds the limit counter.  r[P3]
 ** holds the offset counter.  The opcode computes the combined value
 ** of the LIMIT and OFFSET and stores that value in r[P2].  The r[P2]
 ** value computed is the total number of rows that will need to be
@@ -95738,7 +99874,7 @@ case OP_AggStep1: {
   /* If this function is inside of a trigger, the register array in aMem[]
   ** might change from one evaluation to the next.  The next block of code
   ** checks to see if the register array has changed, and if so it
-  ** reinitializes the relavant parts of the sqlite3_context object */
+  ** reinitializes the relevant parts of the sqlite3_context object */
   if( pCtx->pMem != pMem ){
     pCtx->pMem = pMem;
     for(i=pCtx->argc-1; i>=0; i--) pCtx->argv[i] = &aMem[pOp->p2+i];
@@ -95833,6 +99969,7 @@ case OP_AggFinal: {
   }
   sqlite3VdbeChangeEncoding(pMem, encoding);
   UPDATE_MAX_BLOBSIZE(pMem);
+  REGISTER_TRACE((int)(pMem-aMem), pMem);
   break;
 }
 
@@ -96188,7 +100325,7 @@ case OP_VDestroy: {
 ** P1 is a cursor number.  This opcode opens a cursor to the virtual
 ** table and stores that cursor in P1.
 */
-case OP_VOpen: {
+case OP_VOpen: {             /* ncycle */
   VdbeCursor *pCur;
   sqlite3_vtab_cursor *pVCur;
   sqlite3_vtab *pVtab;
@@ -96224,6 +100361,53 @@ case OP_VOpen: {
 }
 #endif /* SQLITE_OMIT_VIRTUALTABLE */
 
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+/* Opcode: VCheck P1 P2 P3 P4 *
+**
+** P4 is a pointer to a Table object that is a virtual table in schema P1
+** that supports the xIntegrity() method.  This opcode runs the xIntegrity()
+** method for that virtual table, using P3 as the integer argument.  If
+** an error is reported back, the table name is prepended to the error
+** message and that message is stored in P2.  If no errors are seen,
+** register P2 is set to NULL.
+*/
+case OP_VCheck: {             /* out2 */
+  Table *pTab;
+  sqlite3_vtab *pVtab;
+  const sqlite3_module *pModule;
+  char *zErr = 0;
+
+  pOut = &aMem[pOp->p2];
+  sqlite3VdbeMemSetNull(pOut);  /* Innocent until proven guilty */
+  assert( pOp->p4type==P4_TABLE );
+  pTab = pOp->p4.pTab;
+  assert( pTab!=0 );
+  assert( IsVirtual(pTab) );
+  if( pTab->u.vtab.p==0 ) break;
+  pVtab = pTab->u.vtab.p->pVtab;
+  assert( pVtab!=0 );
+  pModule = pVtab->pModule;
+  assert( pModule!=0 );
+  assert( pModule->iVersion>=4 );
+  assert( pModule->xIntegrity!=0 );
+  pTab->nTabRef++;
+  sqlite3VtabLock(pTab->u.vtab.p);
+  assert( pOp->p1>=0 && pOp->p1<db->nDb );
+  rc = pModule->xIntegrity(pVtab, db->aDb[pOp->p1].zDbSName, pTab->zName,
+                           pOp->p3, &zErr);
+  sqlite3VtabUnlock(pTab->u.vtab.p);
+  sqlite3DeleteTable(db, pTab);
+  if( rc ){
+    sqlite3_free(zErr);
+    goto abort_due_to_error;
+  }
+  if( zErr ){
+    sqlite3VdbeMemSetStr(pOut, zErr, -1, SQLITE_UTF8, sqlite3_free);
+  }
+  break;
+}
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
 #ifndef SQLITE_OMIT_VIRTUALTABLE
 /* Opcode: VInitIn P1 P2 P3 * *
 ** Synopsis: r[P2]=ValueList(P1,P3)
@@ -96235,7 +100419,7 @@ case OP_VOpen: {
 ** cursor.  Register P3 is used to hold the values returned by
 ** sqlite3_vtab_in_first() and sqlite3_vtab_in_next().
 */
-case OP_VInitIn: {        /* out2 */
+case OP_VInitIn: {        /* out2, ncycle */
   VdbeCursor *pC;         /* The cursor containing the RHS values */
   ValueList *pRhs;        /* New ValueList object to put in reg[P2] */
 
@@ -96246,7 +100430,7 @@ case OP_VInitIn: {        /* out2 */
   pRhs->pOut = &aMem[pOp->p3];
   pOut = out2Prerelease(p, pOp);
   pOut->flags = MEM_Null;
-  sqlite3VdbeMemSetPointer(pOut, pRhs, "ValueList", sqlite3_free);
+  sqlite3VdbeMemSetPointer(pOut, pRhs, "ValueList", sqlite3VdbeValueListFree);
   break;
 }
 #endif /* SQLITE_OMIT_VIRTUALTABLE */
@@ -96272,7 +100456,7 @@ case OP_VInitIn: {        /* out2 */
 **
 ** A jump is made to P2 if the result set after filtering would be empty.
 */
-case OP_VFilter: {   /* jump */
+case OP_VFilter: {   /* jump, ncycle */
   int nArg;
   int iQuery;
   const sqlite3_module *pModule;
@@ -96332,7 +100516,7 @@ case OP_VFilter: {   /* jump */
 ** bits (OPFLAG_LENGTHARG or OPFLAG_TYPEOFARG) but those bits are
 ** unused by OP_VColumn.
 */
-case OP_VColumn: {
+case OP_VColumn: {           /* ncycle */
   sqlite3_vtab *pVtab;
   const sqlite3_module *pModule;
   Mem *pDest;
@@ -96384,7 +100568,7 @@ case OP_VColumn: {
 ** jump to instruction P2.  Or, if the virtual table has reached
 ** the end of its result set, then fall through to the next instruction.
 */
-case OP_VNext: {   /* jump */
+case OP_VNext: {   /* jump, ncycle */
   sqlite3_vtab *pVtab;
   const sqlite3_module *pModule;
   int res;
@@ -96615,7 +100799,7 @@ case OP_MaxPgcnt: {            /* out2 */
 ** This opcode works exactly like OP_Function.  The only difference is in
 ** its name.  This opcode is used in places where the function must be
 ** purely non-deterministic.  Some built-in date/time functions can be
-** either determinitic of non-deterministic, depending on their arguments.
+** either deterministic of non-deterministic, depending on their arguments.
 ** When those function are used in a non-deterministic way, they will check
 ** to see if they were called using OP_PureFunc instead of OP_Function, and
 ** if they were, they throw an error.
@@ -96633,7 +100817,7 @@ case OP_Function: {            /* group */
   /* If this function is inside of a trigger, the register array in aMem[]
   ** might change from one evaluation to the next.  The next block of code
   ** checks to see if the register array has changed, and if so it
-  ** reinitializes the relavant parts of the sqlite3_context object */
+  ** reinitializes the relevant parts of the sqlite3_context object */
   pOut = &aMem[pOp->p3];
   if( pCtx->pOut != pOut ){
     pCtx->pVdbe = p;
@@ -96709,7 +100893,7 @@ case OP_FilterAdd: {
     printf("hash: %llu modulo %d -> %u\n", h, pIn1->n, (int)(h%pIn1->n));
   }
 #endif
-  h %= pIn1->n;
+  h %= (pIn1->n*8);
   pIn1->z[h/8] |= 1<<(h&7);
   break;
 }
@@ -96745,7 +100929,7 @@ case OP_Filter: {          /* jump */
     printf("hash: %llu modulo %d -> %u\n", h, pIn1->n, (int)(h%pIn1->n));
   }
 #endif
-  h %= pIn1->n;
+  h %= (pIn1->n*8);
   if( (pIn1->z[h/8] & (1<<(h&7)))==0 ){
     VdbeBranchTaken(1, 2);
     p->aCounter[SQLITE_STMTSTATUS_FILTER_HIT]++;
@@ -96967,11 +101151,13 @@ default: {          /* This is really OP_Noop, OP_Explain */
 *****************************************************************************/
     }
 
-#ifdef VDBE_PROFILE
-    {
-      u64 endTime = sqlite3NProfileCnt ? sqlite3NProfileCnt : sqlite3Hwtime();
-      if( endTime>start ) pOrigOp->cycles += endTime - start;
-      pOrigOp->cnt++;
+#if defined(VDBE_PROFILE)
+    *pnCycle += sqlite3NProfileCnt ? sqlite3NProfileCnt : sqlite3Hwtime();
+    pnCycle = 0;
+#elif defined(SQLITE_ENABLE_STMT_SCANSTATUS)
+    if( pnCycle ){
+      *pnCycle += sqlite3Hwtime();
+      pnCycle = 0;
     }
 #endif
 
@@ -96995,7 +101181,7 @@ default: {          /* This is really OP_Noop, OP_Explain */
       }
       if( opProperty==0xff ){
         /* Never happens.  This code exists to avoid a harmless linkage
-        ** warning aboud sqlite3VdbeRegisterDump() being defined but not
+        ** warning about sqlite3VdbeRegisterDump() being defined but not
         ** used. */
         sqlite3VdbeRegisterDump(p);
       }
@@ -97048,6 +101234,18 @@ default: {          /* This is really OP_Noop, OP_Explain */
   ** release the mutexes on btrees that were acquired at the
   ** top. */
 vdbe_return:
+#if defined(VDBE_PROFILE)
+  if( pnCycle ){
+    *pnCycle += sqlite3NProfileCnt ? sqlite3NProfileCnt : sqlite3Hwtime();
+    pnCycle = 0;
+  }
+#elif defined(SQLITE_ENABLE_STMT_SCANSTATUS)
+  if( pnCycle ){
+    *pnCycle += sqlite3Hwtime();
+    pnCycle = 0;
+  }
+#endif
+
 #ifndef SQLITE_OMIT_PROGRESS_CALLBACK
   while( nVmStep>=nProgressLimit && db->xProgress!=0 ){
     nProgressLimit += db->nProgressOps;
@@ -97059,7 +101257,9 @@ default: {          /* This is really OP_Noop, OP_Explain */
   }
 #endif
   p->aCounter[SQLITE_STMTSTATUS_VM_STEP] += (int)nVmStep;
-  sqlite3VdbeLeave(p);
+  if( DbMaskNonZero(p->lockMask) ){
+    sqlite3VdbeLeave(p);
+  }
   assert( rc!=SQLITE_OK || nExtraDelete==0
        || sqlite3_strlike("DELETE%",p->zSql,0)!=0
   );
@@ -97154,8 +101354,7 @@ static int blobSeekToRow(Incrblob *p, sqlite3_int64 iRow, char **pzErr){
   /* Set the value of register r[1] in the SQL statement to integer iRow.
   ** This is done directly as a performance optimization
   */
-  v->aMem[1].flags = MEM_Int;
-  v->aMem[1].u.i = iRow;
+  sqlite3VdbeMemSetInt64(&v->aMem[1], iRow);
 
   /* If the statement has been run before (and is paused at the OP_ResultRow)
   ** then back it up to the point where it does the OP_NotExists.  This could
@@ -97238,7 +101437,7 @@ SQLITE_API int sqlite3_blob_open(
 #endif
   *ppBlob = 0;
 #ifdef SQLITE_ENABLE_API_ARMOR
-  if( !sqlite3SafetyCheckOk(db) || zTable==0 ){
+  if( !sqlite3SafetyCheckOk(db) || zTable==0 || zColumn==0 ){
     return SQLITE_MISUSE_BKPT;
   }
 #endif
@@ -97437,7 +101636,7 @@ SQLITE_API int sqlite3_blob_open(
     if( pBlob && pBlob->pStmt ) sqlite3VdbeFinalize((Vdbe *)pBlob->pStmt);
     sqlite3DbFree(db, pBlob);
   }
-  sqlite3ErrorWithMsg(db, rc, (zErr ? "%s" : 0), zErr);
+  sqlite3ErrorWithMsg(db, rc, (zErr ? "%s" : (char*)0), zErr);
   sqlite3DbFree(db, zErr);
   sqlite3ParseObjectReset(&sParse);
   rc = sqlite3ApiExit(db, rc);
@@ -97596,7 +101795,7 @@ SQLITE_API int sqlite3_blob_reopen(sqlite3_blob *pBlob, sqlite3_int64 iRow){
     ((Vdbe*)p->pStmt)->rc = SQLITE_OK;
     rc = blobSeekToRow(p, iRow, &zErr);
     if( rc!=SQLITE_OK ){
-      sqlite3ErrorWithMsg(db, rc, (zErr ? "%s" : 0), zErr);
+      sqlite3ErrorWithMsg(db, rc, (zErr ? "%s" : (char*)0), zErr);
       sqlite3DbFree(db, zErr);
     }
     assert( rc!=SQLITE_SCHEMA );
@@ -97699,7 +101898,7 @@ SQLITE_API int sqlite3_blob_reopen(sqlite3_blob *pBlob, sqlite3_int64 iRow){
 ** The threshold for the amount of main memory to use before flushing
 ** records to a PMA is roughly the same as the limit configured for the
 ** page-cache of the main database. Specifically, the threshold is set to
-** the value returned by "PRAGMA main.page_size" multipled by
+** the value returned by "PRAGMA main.page_size" multiplied by
 ** that returned by "PRAGMA main.cache_size", in bytes.
 **
 ** If the sorter is running in single-threaded mode, then all PMAs generated
@@ -97722,7 +101921,7 @@ SQLITE_API int sqlite3_blob_reopen(sqlite3_blob *pBlob, sqlite3_int64 iRow){
 **
 ** If there are fewer than SORTER_MAX_MERGE_COUNT PMAs in total and the
 ** sorter is running in single-threaded mode, then these PMAs are merged
-** incrementally as keys are retreived from the sorter by the VDBE.  The
+** incrementally as keys are retrieved from the sorter by the VDBE.  The
 ** MergeEngine object, described in further detail below, performs this
 ** merge.
 **
@@ -97800,7 +101999,7 @@ struct SorterFile {
 struct SorterList {
   SorterRecord *pList;            /* Linked list of records */
   u8 *aMemory;                    /* If non-NULL, bulk memory to hold pList */
-  int szPMA;                      /* Size of pList as PMA in bytes */
+  i64 szPMA;                      /* Size of pList as PMA in bytes */
 };
 
 /*
@@ -97885,7 +102084,7 @@ struct MergeEngine {
 **
 ** Essentially, this structure contains all those fields of the VdbeSorter
 ** structure for which each thread requires a separate instance. For example,
-** each thread requries its own UnpackedRecord object to unpack records in
+** each thread requeries its own UnpackedRecord object to unpack records in
 ** as part of comparison operations.
 **
 ** Before a background thread is launched, variable bDone is set to 0. Then,
@@ -97909,10 +102108,10 @@ typedef int (*SorterCompare)(SortSubtask*,int*,const void*,int,const void*,int);
 struct SortSubtask {
   SQLiteThread *pThread;          /* Background thread, if any */
   int bDone;                      /* Set if thread is finished but not joined */
+  int nPMA;                       /* Number of PMAs currently in file */
   VdbeSorter *pSorter;            /* Sorter that owns this sub-task */
   UnpackedRecord *pUnpacked;      /* Space to unpack a record */
   SorterList list;                /* List for thread to write to a PMA */
-  int nPMA;                       /* Number of PMAs currently in file */
   SorterCompare xCompare;         /* Compare function to use */
   SorterFile file;                /* Temp file for level-0 PMAs */
   SorterFile file2;               /* Space for other PMAs */
@@ -97957,7 +102156,7 @@ struct VdbeSorter {
 ** PMA, in sorted order.  The next key to be read is cached in nKey/aKey.
 ** aKey might point into aMap or into aBuffer.  If neither of those locations
 ** contain a contiguous representation of the key, then aAlloc is allocated
-** and the key is copied into aAlloc and aKey is made to poitn to aAlloc.
+** and the key is copied into aAlloc and aKey is made to point to aAlloc.
 **
 ** pFd==0 at EOF.
 */
@@ -99328,7 +103527,7 @@ static int vdbeSorterFlushPMA(VdbeSorter *pSorter){
   ** the background thread from a sub-tasks previous turn is still running,
   ** skip it. If the first (pSorter->nTask-1) sub-tasks are all still busy,
   ** fall back to using the final sub-task. The first (pSorter->nTask-1)
-  ** sub-tasks are prefered as they use background threads - the final
+  ** sub-tasks are preferred as they use background threads - the final
   ** sub-task uses the main thread. */
   for(i=0; i<nWorker; i++){
     int iTest = (pSorter->iPrev + i + 1) % nWorker;
@@ -99386,8 +103585,8 @@ SQLITE_PRIVATE int sqlite3VdbeSorterWrite(
   int rc = SQLITE_OK;             /* Return Code */
   SorterRecord *pNew;             /* New list element */
   int bFlush;                     /* True to flush contents of memory to PMA */
-  int nReq;                       /* Bytes of memory required */
-  int nPMA;                       /* Bytes of PMA space required */
+  i64 nReq;                       /* Bytes of memory required */
+  i64 nPMA;                       /* Bytes of PMA space required */
   int t;                          /* serial type of first record field */
 
   assert( pCsr->eCurType==CURTYPE_SORTER );
@@ -99812,7 +104011,7 @@ static int vdbePmaReaderIncrMergeInit(PmaReader *pReadr, int eMode){
 
   rc = vdbeMergeEngineInit(pTask, pIncr->pMerger, eMode);
 
-  /* Set up the required files for pIncr. A multi-theaded IncrMerge object
+  /* Set up the required files for pIncr. A multi-threaded IncrMerge object
   ** requires two temp files to itself, whereas a single-threaded object
   ** only requires a region of pTask->file2. */
   if( rc==SQLITE_OK ){
@@ -100452,6 +104651,8 @@ static int bytecodevtabConnect(
       "p5 INT,"
       "comment TEXT,"
       "subprog TEXT,"
+      "nexec INT,"
+      "ncycle INT,"
       "stmt HIDDEN"
     ");",
 
@@ -100466,6 +104667,9 @@ static int bytecodevtabConnect(
    ");"
   };
 
+  (void)argc;
+  (void)argv;
+  (void)pzErr;
   rc = sqlite3_declare_vtab(db, azSchema[isTabUsed]);
   if( rc==SQLITE_OK ){
     pNew = sqlite3_malloc( sizeof(*pNew) );
@@ -100611,7 +104815,7 @@ static int bytecodevtabColumn(
           }
         }
       }
-      i += 10;
+      i += 20;
     }
   }
   switch( i ){
@@ -100661,16 +104865,31 @@ static int bytecodevtabColumn(
       }
       break;
     }
-    case 10:  /* tables_used.type */
+
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+    case 9:     /* nexec */
+      sqlite3_result_int(ctx, pOp->nExec);
+      break;
+    case 10:    /* ncycle */
+      sqlite3_result_int(ctx, pOp->nCycle);
+      break;
+#else
+    case 9:     /* nexec */
+    case 10:    /* ncycle */
+      sqlite3_result_int(ctx, 0);
+      break;
+#endif
+
+    case 20:  /* tables_used.type */
       sqlite3_result_text(ctx, pCur->zType, -1, SQLITE_STATIC);
       break;
-    case 11:  /* tables_used.schema */
+    case 21:  /* tables_used.schema */
       sqlite3_result_text(ctx, pCur->zSchema, -1, SQLITE_STATIC);
       break;
-    case 12:  /* tables_used.name */
+    case 22:  /* tables_used.name */
       sqlite3_result_text(ctx, pCur->zName, -1, SQLITE_STATIC);
       break;
-    case 13:  /* tables_used.wr */
+    case 23:  /* tables_used.wr */
       sqlite3_result_int(ctx, pOp->opcode==OP_OpenWrite);
       break;
   }
@@ -100701,6 +104920,7 @@ static int bytecodevtabFilter(
   bytecodevtab_cursor *pCur = (bytecodevtab_cursor *)pVtabCursor;
   bytecodevtab *pVTab = (bytecodevtab *)pVtabCursor->pVtab;
   int rc = SQLITE_OK;
+  (void)idxStr;
 
   bytecodevtabCursorClear(pCur);
   pCur->iRowid = 0;
@@ -100743,7 +104963,7 @@ static int bytecodevtabBestIndex(
   int rc = SQLITE_CONSTRAINT;
   struct sqlite3_index_constraint *p;
   bytecodevtab *pVTab = (bytecodevtab*)tab;
-  int iBaseCol = pVTab->bTablesUsed ? 4 : 8;
+  int iBaseCol = pVTab->bTablesUsed ? 4 : 10;
   pIdxInfo->estimatedCost = (double)100;
   pIdxInfo->estimatedRows = 100;
   pIdxInfo->idxNum = 0;
@@ -100790,7 +105010,8 @@ static sqlite3_module bytecodevtabModule = {
   /* xSavepoint  */ 0,
   /* xRelease    */ 0,
   /* xRollbackTo */ 0,
-  /* xShadowName */ 0
+  /* xShadowName */ 0,
+  /* xIntegrity  */ 0
 };
 
 
@@ -101169,6 +105390,8 @@ SQLITE_PRIVATE int sqlite3JournalOpen(
 ){
   MemJournal *p = (MemJournal*)pJfd;
 
+  assert( zName || nSpill<0 || (flags & SQLITE_OPEN_EXCLUSIVE) );
+
   /* Zero the file-handle object. If nSpill was passed zero, initialize
   ** it using the sqlite3OsOpen() function of the underlying VFS. In this
   ** case none of the code in this module is executed as a result of calls
@@ -101312,7 +105535,7 @@ static int walkWindowList(Walker *pWalker, Window *pList, int bOneOnly){
 ** The return value from this routine is WRC_Abort to abandon the tree walk
 ** and WRC_Continue to continue.
 */
-static SQLITE_NOINLINE int walkExpr(Walker *pWalker, Expr *pExpr){
+SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3WalkExprNN(Walker *pWalker, Expr *pExpr){
   int rc;
   testcase( ExprHasProperty(pExpr, EP_TokenOnly) );
   testcase( ExprHasProperty(pExpr, EP_Reduced) );
@@ -101321,7 +105544,9 @@ static SQLITE_NOINLINE int walkExpr(Walker *pWalker, Expr *pExpr){
     if( rc ) return rc & WRC_Abort;
     if( !ExprHasProperty(pExpr,(EP_TokenOnly|EP_Leaf)) ){
       assert( pExpr->x.pList==0 || pExpr->pRight==0 );
-      if( pExpr->pLeft && walkExpr(pWalker, pExpr->pLeft) ) return WRC_Abort;
+      if( pExpr->pLeft && sqlite3WalkExprNN(pWalker, pExpr->pLeft) ){
+        return WRC_Abort;
+      }
       if( pExpr->pRight ){
         assert( !ExprHasProperty(pExpr, EP_WinFunc) );
         pExpr = pExpr->pRight;
@@ -101345,7 +105570,7 @@ static SQLITE_NOINLINE int walkExpr(Walker *pWalker, Expr *pExpr){
   return WRC_Continue;
 }
 SQLITE_PRIVATE int sqlite3WalkExpr(Walker *pWalker, Expr *pExpr){
-  return pExpr ? walkExpr(pWalker,pExpr) : WRC_Continue;
+  return pExpr ? sqlite3WalkExprNN(pWalker,pExpr) : WRC_Continue;
 }
 
 /*
@@ -101471,7 +105696,7 @@ SQLITE_PRIVATE int sqlite3WalkSelect(Walker *pWalker, Select *p){
 }
 
 /* Increase the walkerDepth when entering a subquery, and
-** descrease when leaving the subquery.
+** decrease when leaving the subquery.
 */
 SQLITE_PRIVATE int sqlite3WalkerDepthIncrease(Walker *pWalker, Select *pSelect){
   UNUSED_PARAMETER(pSelect);
@@ -101610,28 +105835,41 @@ static void resolveAlias(
         pExpr->y.pWin->pOwner = pExpr;
       }
     }
-    sqlite3ParserAddCleanup(pParse,
-      (void(*)(sqlite3*,void*))sqlite3ExprDelete,
-      pDup);
+    sqlite3ExprDeferredDelete(pParse, pDup);
   }
 }
 
 /*
-** Subqueries stores the original database, table and column names for their
-** result sets in ExprList.a[].zSpan, in the form "DATABASE.TABLE.COLUMN".
-** Check to see if the zSpan given to this routine matches the zDb, zTab,
-** and zCol.  If any of zDb, zTab, and zCol are NULL then those fields will
-** match anything.
+** Subqueries store the original database, table and column names for their
+** result sets in ExprList.a[].zSpan, in the form "DATABASE.TABLE.COLUMN",
+** and mark the expression-list item by setting ExprList.a[].fg.eEName
+** to ENAME_TAB.
+**
+** Check to see if the zSpan/eEName of the expression-list item passed to this
+** routine matches the zDb, zTab, and zCol.  If any of zDb, zTab, and zCol are
+** NULL then those fields will match anything. Return true if there is a match,
+** or false otherwise.
+**
+** SF_NestedFrom subqueries also store an entry for the implicit rowid (or
+** _rowid_, or oid) column by setting ExprList.a[].fg.eEName to ENAME_ROWID,
+** and setting zSpan to "DATABASE.TABLE.<rowid-alias>". This type of pItem
+** argument matches if zCol is a rowid alias. If it is not NULL, (*pbRowid)
+** is set to 1 if there is this kind of match.
 */
 SQLITE_PRIVATE int sqlite3MatchEName(
   const struct ExprList_item *pItem,
   const char *zCol,
   const char *zTab,
-  const char *zDb
+  const char *zDb,
+  int *pbRowid
 ){
   int n;
   const char *zSpan;
-  if( pItem->fg.eEName!=ENAME_TAB ) return 0;
+  int eEName = pItem->fg.eEName;
+  if( eEName!=ENAME_TAB && (eEName!=ENAME_ROWID || NEVER(pbRowid==0)) ){
+    return 0;
+  }
+  assert( pbRowid==0 || *pbRowid==0 );
   zSpan = pItem->zEName;
   for(n=0; ALWAYS(zSpan[n]) && zSpan[n]!='.'; n++){}
   if( zDb && (sqlite3StrNICmp(zSpan, zDb, n)!=0 || zDb[n]!=0) ){
@@ -101643,9 +105881,11 @@ SQLITE_PRIVATE int sqlite3MatchEName(
     return 0;
   }
   zSpan += n+1;
-  if( zCol && sqlite3StrICmp(zSpan, zCol)!=0 ){
-    return 0;
+  if( zCol ){
+    if( eEName==ENAME_TAB && sqlite3StrICmp(zSpan, zCol)!=0 ) return 0;
+    if( eEName==ENAME_ROWID && sqlite3IsRowid(zCol)==0 ) return 0;
   }
+  if( eEName==ENAME_ROWID ) *pbRowid = 1;
   return 1;
 }
 
@@ -101715,6 +105955,32 @@ static void extendFJMatch(
   }
 }
 
+/*
+** Return TRUE (non-zero) if zTab is a valid name for the schema table pTab.
+*/
+static SQLITE_NOINLINE int isValidSchemaTableName(
+  const char *zTab,         /* Name as it appears in the SQL */
+  Table *pTab,              /* The schema table we are trying to match */
+  Schema *pSchema           /* non-NULL if a database qualifier is present */
+){
+  const char *zLegacy;
+  assert( pTab!=0 );
+  assert( pTab->tnum==1 );
+  if( sqlite3StrNICmp(zTab, "sqlite_", 7)!=0 ) return 0;
+  zLegacy = pTab->zName;
+  if( strcmp(zLegacy+7, &LEGACY_TEMP_SCHEMA_TABLE[7])==0 ){
+    if( sqlite3StrICmp(zTab+7, &PREFERRED_TEMP_SCHEMA_TABLE[7])==0 ){
+      return 1;
+    }
+    if( pSchema==0 ) return 0;
+    if( sqlite3StrICmp(zTab+7, &LEGACY_SCHEMA_TABLE[7])==0 ) return 1;
+    if( sqlite3StrICmp(zTab+7, &PREFERRED_SCHEMA_TABLE[7])==0 ) return 1;
+  }else{
+    if( sqlite3StrICmp(zTab+7, &PREFERRED_SCHEMA_TABLE[7])==0 ) return 1;
+  }
+  return 0;
+}
+
 /*
 ** Given the name of a column of the form X.Y.Z or Y.Z or just Z, look up
 ** that name in the set of source tables in pSrcList and make the pExpr
@@ -101752,7 +106018,7 @@ static int lookupName(
 ){
   int i, j;                         /* Loop counters */
   int cnt = 0;                      /* Number of matching column names */
-  int cntTab = 0;                   /* Number of matching table names */
+  int cntTab = 0;                   /* Number of potential "rowid" matches */
   int nSubquery = 0;                /* How many levels of subquery */
   sqlite3 *db = pParse->db;         /* The database connection */
   SrcItem *pItem;                   /* Use for looping over pSrcList items */
@@ -101829,54 +106095,66 @@ static int lookupName(
           assert( pEList!=0 );
           assert( pEList->nExpr==pTab->nCol );
           for(j=0; j<pEList->nExpr; j++){
-            if( !sqlite3MatchEName(&pEList->a[j], zCol, zTab, zDb) ){
+            int bRowid = 0;       /* True if possible rowid match */
+            if( !sqlite3MatchEName(&pEList->a[j], zCol, zTab, zDb, &bRowid) ){
               continue;
             }
-            if( cnt>0 ){
-              if( pItem->fg.isUsing==0
-               || sqlite3IdListIndex(pItem->u3.pUsing, zCol)<0
-              ){
-                /* Two or more tables have the same column name which is
-                ** not joined by USING.  This is an error.  Signal as much
-                ** by clearing pFJMatch and letting cnt go above 1. */
-                sqlite3ExprListDelete(db, pFJMatch);
-                pFJMatch = 0;
-              }else
-              if( (pItem->fg.jointype & JT_RIGHT)==0 ){
-                /* An INNER or LEFT JOIN.  Use the left-most table */
-                continue;
-              }else
-              if( (pItem->fg.jointype & JT_LEFT)==0 ){
-                /* A RIGHT JOIN.  Use the right-most table */
-                cnt = 0;
-                sqlite3ExprListDelete(db, pFJMatch);
-                pFJMatch = 0;
-              }else{
-                /* For a FULL JOIN, we must construct a coalesce() func */
-                extendFJMatch(pParse, &pFJMatch, pMatch, pExpr->iColumn);
+            if( bRowid==0 ){
+              if( cnt>0 ){
+                if( pItem->fg.isUsing==0
+                 || sqlite3IdListIndex(pItem->u3.pUsing, zCol)<0
+                ){
+                  /* Two or more tables have the same column name which is
+                  ** not joined by USING.  This is an error.  Signal as much
+                  ** by clearing pFJMatch and letting cnt go above 1. */
+                  sqlite3ExprListDelete(db, pFJMatch);
+                  pFJMatch = 0;
+                }else
+                if( (pItem->fg.jointype & JT_RIGHT)==0 ){
+                  /* An INNER or LEFT JOIN.  Use the left-most table */
+                  continue;
+                }else
+                if( (pItem->fg.jointype & JT_LEFT)==0 ){
+                  /* A RIGHT JOIN.  Use the right-most table */
+                  cnt = 0;
+                  sqlite3ExprListDelete(db, pFJMatch);
+                  pFJMatch = 0;
+                }else{
+                  /* For a FULL JOIN, we must construct a coalesce() func */
+                  extendFJMatch(pParse, &pFJMatch, pMatch, pExpr->iColumn);
+                }
               }
+              cnt++;
+              hit = 1;
+            }else if( cnt>0 ){
+              /* This is a potential rowid match, but there has already been
+              ** a real match found. So this can be ignored.  */
+              continue;
             }
-            cnt++;
-            cntTab = 2;
+            cntTab++;
             pMatch = pItem;
             pExpr->iColumn = j;
             pEList->a[j].fg.bUsed = 1;
-            hit = 1;
+
+            /* rowid cannot be part of a USING clause - assert() this. */
+            assert( bRowid==0 || pEList->a[j].fg.bUsingTerm==0 );
             if( pEList->a[j].fg.bUsingTerm ) break;
           }
           if( hit || zTab==0 ) continue;
         }
         assert( zDb==0 || zTab!=0 );
         if( zTab ){
-          const char *zTabName;
           if( zDb ){
             if( pTab->pSchema!=pSchema ) continue;
             if( pSchema==0 && strcmp(zDb,"*")!=0 ) continue;
           }
-          zTabName = pItem->zAlias ? pItem->zAlias : pTab->zName;
-          assert( zTabName!=0 );
-          if( sqlite3StrICmp(zTabName, zTab)!=0 ){
-            continue;
+          if( pItem->zAlias!=0 ){
+            if( sqlite3StrICmp(zTab, pItem->zAlias)!=0 ){
+              continue;
+            }
+          }else if( sqlite3StrICmp(zTab, pTab->zName)!=0 ){
+            if( pTab->tnum!=1 ) continue;
+            if( !isValidSchemaTableName(zTab, pTab, pSchema) ) continue;
           }
           assert( ExprUseYTab(pExpr) );
           if( IN_RENAME_OBJECT && pItem->zAlias ){
@@ -101952,7 +106230,8 @@ static int lookupName(
         assert( op==TK_DELETE || op==TK_UPDATE || op==TK_INSERT );
         if( pParse->bReturning ){
           if( (pNC->ncFlags & NC_UBaseReg)!=0
-           && (zTab==0 || sqlite3StrICmp(zTab,pParse->pTriggerTab->zName)==0)
+           && ALWAYS(zTab==0
+                     || sqlite3StrICmp(zTab,pParse->pTriggerTab->zName)==0)
           ){
             pExpr->iTable = op!=TK_DELETE;
             pTab = pParse->pTriggerTab;
@@ -102019,6 +106298,7 @@ static int lookupName(
             if( pParse->bReturning ){
               eNewExprOp = TK_REGISTER;
               pExpr->op2 = TK_COLUMN;
+              pExpr->iColumn = iCol;
               pExpr->iTable = pNC->uNC.iBaseReg + (pTab->nCol+1)*pExpr->iTable +
                  sqlite3TableColumnToStorage(pTab, iCol) + 1;
             }else{
@@ -102052,10 +106332,10 @@ static int lookupName(
      && pMatch
      && (pNC->ncFlags & (NC_IdxExpr|NC_GenCol))==0
      && sqlite3IsRowid(zCol)
-     && ALWAYS(VisibleRowid(pMatch->pTab))
+     && ALWAYS(VisibleRowid(pMatch->pTab) || pMatch->fg.isNestedFrom)
     ){
       cnt = 1;
-      pExpr->iColumn = -1;
+      if( pMatch->fg.isNestedFrom==0 ) pExpr->iColumn = -1;
       pExpr->affExpr = SQLITE_AFF_INTEGER;
     }
 
@@ -102431,14 +106711,10 @@ static int resolveExprStep(Walker *pWalker, Expr *pExpr){
       if( 0==sqlite3ExprCanBeNull(pExpr->pLeft) && !IN_RENAME_OBJECT ){
         testcase( ExprHasProperty(pExpr, EP_OuterON) );
         assert( !ExprHasProperty(pExpr, EP_IntValue) );
-        if( pExpr->op==TK_NOTNULL ){
-          pExpr->u.zToken = "true";
-          ExprSetProperty(pExpr, EP_IsTrue);
-        }else{
-          pExpr->u.zToken = "false";
-          ExprSetProperty(pExpr, EP_IsFalse);
-        }
-        pExpr->op = TK_TRUEFALSE;
+        pExpr->u.iValue = (pExpr->op==TK_NOTNULL);
+        pExpr->flags |= EP_IntValue;
+        pExpr->op = TK_INTEGER;
+
         for(i=0, p=pNC; p && i<ArraySize(anRef); p=p->pNext, i++){
           p->nRef = anRef[i];
         }
@@ -102512,6 +106788,7 @@ static int resolveExprStep(Walker *pWalker, Expr *pExpr){
       Window *pWin = (IsWindowFunc(pExpr) ? pExpr->y.pWin : 0);
 #endif
       assert( !ExprHasProperty(pExpr, EP_xIsSelect|EP_IntValue) );
+      assert( pExpr->pLeft==0 || pExpr->pLeft->op==TK_ORDER );
       zId = pExpr->u.zToken;
       pDef = sqlite3FindFunction(pParse->db, zId, n, enc, 0);
       if( pDef==0 ){
@@ -102653,6 +106930,10 @@ static int resolveExprStep(Walker *pWalker, Expr *pExpr){
           pNC->nNcErr++;
         }
 #endif
+        else if( is_agg==0 && pExpr->pLeft ){
+          sqlite3ExprOrderByAggregateError(pParse, pExpr);
+          pNC->nNcErr++;
+        }
         if( is_agg ){
           /* Window functions may not be arguments of aggregate functions.
           ** Or arguments of other window functions. But aggregate functions
@@ -102671,6 +106952,11 @@ static int resolveExprStep(Walker *pWalker, Expr *pExpr){
 #endif
       sqlite3WalkExprList(pWalker, pList);
       if( is_agg ){
+        if( pExpr->pLeft ){
+          assert( pExpr->pLeft->op==TK_ORDER );
+          assert( ExprUseXList(pExpr->pLeft) );
+          sqlite3WalkExprList(pWalker, pExpr->pLeft->x.pList);
+        }
 #ifndef SQLITE_OMIT_WINDOWFUNC
         if( pWin ){
           Select *pSel = pNC->pWinSelect;
@@ -102740,8 +107026,8 @@ static int resolveExprStep(Walker *pWalker, Expr *pExpr){
         assert( pNC->nRef>=nRef );
         if( nRef!=pNC->nRef ){
           ExprSetProperty(pExpr, EP_VarSelect);
-          pNC->ncFlags |= NC_VarSelect;
         }
+        pNC->ncFlags |= NC_Subquery;
       }
       break;
     }
@@ -103181,7 +107467,7 @@ static int resolveOrderGroupBy(
     }
     for(j=0; j<pSelect->pEList->nExpr; j++){
       if( sqlite3ExprCompare(0, pE, pSelect->pEList->a[j].pExpr, -1)==0 ){
-        /* Since this expresion is being changed into a reference
+        /* Since this expression is being changed into a reference
         ** to an identical expression in the result set, remove all Window
         ** objects belonging to the expression from the Select.pWin list. */
         windowRemoveExprFromSelect(pSelect, pE);
@@ -103234,10 +107520,8 @@ static int resolveSelectStep(Walker *pWalker, Select *p){
   while( p ){
     assert( (p->selFlags & SF_Expanded)!=0 );
     assert( (p->selFlags & SF_Resolved)==0 );
-    assert( db->suppressErr==0 ); /* SF_Resolved not set if errors suppressed */
     p->selFlags |= SF_Resolved;
 
-
     /* Resolve the expressions in the LIMIT and OFFSET clauses. These
     ** are not allowed to refer to any names, so pass an empty NameContext.
     */
@@ -103504,7 +107788,8 @@ SQLITE_PRIVATE int sqlite3ResolveExprNames(
     return SQLITE_ERROR;
   }
 #endif
-  sqlite3WalkExpr(&w, pExpr);
+  assert( pExpr!=0 );
+  sqlite3WalkExprNN(&w, pExpr);
 #if SQLITE_MAX_EXPR_DEPTH>0
   w.pParse->nHeight -= pExpr->nHeight;
 #endif
@@ -103546,7 +107831,7 @@ SQLITE_PRIVATE int sqlite3ResolveExprListNames(
       return WRC_Abort;
     }
 #endif
-    sqlite3WalkExpr(&w, pExpr);
+    sqlite3WalkExprNN(&w, pExpr);
 #if SQLITE_MAX_EXPR_DEPTH>0
     w.pParse->nHeight -= pExpr->nHeight;
 #endif
@@ -103568,7 +107853,7 @@ SQLITE_PRIVATE int sqlite3ResolveExprListNames(
 
 /*
 ** Resolve all names in all expressions of a SELECT and in all
-** decendents of the SELECT, including compounds off of p->pPrior,
+** descendants of the SELECT, including compounds off of p->pPrior,
 ** subqueries in expressions, and subqueries used as FROM clause
 ** terms.
 **
@@ -103695,49 +107980,122 @@ SQLITE_PRIVATE char sqlite3TableColumnAffinity(const Table *pTab, int iCol){
 */
 SQLITE_PRIVATE char sqlite3ExprAffinity(const Expr *pExpr){
   int op;
-  while( ExprHasProperty(pExpr, EP_Skip|EP_IfNullRow) ){
-    assert( pExpr->op==TK_COLLATE
-         || pExpr->op==TK_IF_NULL_ROW
-         || (pExpr->op==TK_REGISTER && pExpr->op2==TK_IF_NULL_ROW) );
-    pExpr = pExpr->pLeft;
-    assert( pExpr!=0 );
-  }
   op = pExpr->op;
-  if( op==TK_REGISTER ) op = pExpr->op2;
-  if( op==TK_COLUMN || op==TK_AGG_COLUMN ){
-    assert( ExprUseYTab(pExpr) );
-    if( pExpr->y.pTab ){
+  while( 1 /* exit-by-break */ ){
+    if( op==TK_COLUMN || (op==TK_AGG_COLUMN && pExpr->y.pTab!=0) ){
+      assert( ExprUseYTab(pExpr) );
+      assert( pExpr->y.pTab!=0 );
       return sqlite3TableColumnAffinity(pExpr->y.pTab, pExpr->iColumn);
     }
-  }
-  if( op==TK_SELECT ){
-    assert( ExprUseXSelect(pExpr) );
-    assert( pExpr->x.pSelect!=0 );
-    assert( pExpr->x.pSelect->pEList!=0 );
-    assert( pExpr->x.pSelect->pEList->a[0].pExpr!=0 );
-    return sqlite3ExprAffinity(pExpr->x.pSelect->pEList->a[0].pExpr);
-  }
+    if( op==TK_SELECT ){
+      assert( ExprUseXSelect(pExpr) );
+      assert( pExpr->x.pSelect!=0 );
+      assert( pExpr->x.pSelect->pEList!=0 );
+      assert( pExpr->x.pSelect->pEList->a[0].pExpr!=0 );
+      return sqlite3ExprAffinity(pExpr->x.pSelect->pEList->a[0].pExpr);
+    }
 #ifndef SQLITE_OMIT_CAST
-  if( op==TK_CAST ){
-    assert( !ExprHasProperty(pExpr, EP_IntValue) );
-    return sqlite3AffinityType(pExpr->u.zToken, 0);
-  }
+    if( op==TK_CAST ){
+      assert( !ExprHasProperty(pExpr, EP_IntValue) );
+      return sqlite3AffinityType(pExpr->u.zToken, 0);
+    }
 #endif
-  if( op==TK_SELECT_COLUMN ){
-    assert( pExpr->pLeft!=0 && ExprUseXSelect(pExpr->pLeft) );
-    assert( pExpr->iColumn < pExpr->iTable );
-    assert( pExpr->iTable==pExpr->pLeft->x.pSelect->pEList->nExpr );
-    return sqlite3ExprAffinity(
-        pExpr->pLeft->x.pSelect->pEList->a[pExpr->iColumn].pExpr
-    );
-  }
-  if( op==TK_VECTOR ){
-    assert( ExprUseXList(pExpr) );
-    return sqlite3ExprAffinity(pExpr->x.pList->a[0].pExpr);
+    if( op==TK_SELECT_COLUMN ){
+      assert( pExpr->pLeft!=0 && ExprUseXSelect(pExpr->pLeft) );
+      assert( pExpr->iColumn < pExpr->iTable );
+      assert( pExpr->iColumn >= 0 );
+      assert( pExpr->iTable==pExpr->pLeft->x.pSelect->pEList->nExpr );
+      return sqlite3ExprAffinity(
+          pExpr->pLeft->x.pSelect->pEList->a[pExpr->iColumn].pExpr
+      );
+    }
+    if( op==TK_VECTOR ){
+      assert( ExprUseXList(pExpr) );
+      return sqlite3ExprAffinity(pExpr->x.pList->a[0].pExpr);
+    }
+    if( ExprHasProperty(pExpr, EP_Skip|EP_IfNullRow) ){
+      assert( pExpr->op==TK_COLLATE
+           || pExpr->op==TK_IF_NULL_ROW
+           || (pExpr->op==TK_REGISTER && pExpr->op2==TK_IF_NULL_ROW) );
+      pExpr = pExpr->pLeft;
+      op = pExpr->op;
+      continue;
+    }
+    if( op!=TK_REGISTER || (op = pExpr->op2)==TK_REGISTER ) break;
   }
   return pExpr->affExpr;
 }
 
+/*
+** Make a guess at all the possible datatypes of the result that could
+** be returned by an expression.  Return a bitmask indicating the answer:
+**
+**     0x01         Numeric
+**     0x02         Text
+**     0x04         Blob
+**
+** If the expression must return NULL, then 0x00 is returned.
+*/
+SQLITE_PRIVATE int sqlite3ExprDataType(const Expr *pExpr){
+  while( pExpr ){
+    switch( pExpr->op ){
+      case TK_COLLATE:
+      case TK_IF_NULL_ROW:
+      case TK_UPLUS:  {
+        pExpr = pExpr->pLeft;
+        break;
+      }
+      case TK_NULL: {
+        pExpr = 0;
+        break;
+      }
+      case TK_STRING: {
+        return 0x02;
+      }
+      case TK_BLOB: {
+        return 0x04;
+      }
+      case TK_CONCAT: {
+        return 0x06;
+      }
+      case TK_VARIABLE:
+      case TK_AGG_FUNCTION:
+      case TK_FUNCTION: {
+        return 0x07;
+      }
+      case TK_COLUMN:
+      case TK_AGG_COLUMN:
+      case TK_SELECT:
+      case TK_CAST:
+      case TK_SELECT_COLUMN:
+      case TK_VECTOR:  {
+        int aff = sqlite3ExprAffinity(pExpr);
+        if( aff>=SQLITE_AFF_NUMERIC ) return 0x05;
+        if( aff==SQLITE_AFF_TEXT )    return 0x06;
+        return 0x07;
+      }
+      case TK_CASE: {
+        int res = 0;
+        int ii;
+        ExprList *pList = pExpr->x.pList;
+        assert( ExprUseXList(pExpr) && pList!=0 );
+        assert( pList->nExpr > 0);
+        for(ii=1; ii<pList->nExpr; ii+=2){
+          res |= sqlite3ExprDataType(pList->a[ii].pExpr);
+        }
+        if( pList->nExpr % 2 ){
+          res |= sqlite3ExprDataType(pList->a[pList->nExpr-1].pExpr);
+        }
+        return res;
+      }
+      default: {
+        return 0x01;
+      }
+    } /* End of switch(op) */
+  } /* End of while(pExpr) */
+  return 0x00;
+}
+
 /*
 ** Set the collating sequence for expression pExpr to be the collating
 ** sequence named by pToken.   Return a pointer to a new Expr node that
@@ -103825,18 +108183,17 @@ SQLITE_PRIVATE CollSeq *sqlite3ExprCollSeq(Parse *pParse, const Expr *pExpr){
   while( p ){
     int op = p->op;
     if( op==TK_REGISTER ) op = p->op2;
-    if( op==TK_AGG_COLUMN || op==TK_COLUMN || op==TK_TRIGGER ){
+    if( (op==TK_AGG_COLUMN && p->y.pTab!=0)
+     || op==TK_COLUMN || op==TK_TRIGGER
+    ){
+      int j;
       assert( ExprUseYTab(p) );
-      if( p->y.pTab!=0 ){
-        /* op==TK_REGISTER && p->y.pTab!=0 happens when pExpr was originally
-        ** a TK_COLUMN but was previously evaluated and cached in a register */
-        int j = p->iColumn;
-        if( j>=0 ){
-          const char *zColl = sqlite3ColumnColl(&p->y.pTab->aCol[j]);
-          pColl = sqlite3FindCollSeq(db, ENC(db), zColl, 0);
-        }
-        break;
+      assert( p->y.pTab!=0 );
+      if( (j = p->iColumn)>=0 ){
+        const char *zColl = sqlite3ColumnColl(&p->y.pTab->aCol[j]);
+        pColl = sqlite3FindCollSeq(db, ENC(db), zColl, 0);
       }
+      break;
     }
     if( op==TK_CAST || op==TK_UPLUS ){
       p = p->pLeft;
@@ -103858,11 +108215,10 @@ SQLITE_PRIVATE CollSeq *sqlite3ExprCollSeq(Parse *pParse, const Expr *pExpr){
       }else{
         Expr *pNext  = p->pRight;
         /* The Expr.x union is never used at the same time as Expr.pRight */
-        assert( ExprUseXList(p) );
-        assert( p->x.pList==0 || p->pRight==0 );
-        if( p->x.pList!=0 && !db->mallocFailed ){
+        assert( !ExprUseXList(p) || p->x.pList==0 || p->pRight==0 );
+        if( ExprUseXList(p) && p->x.pList!=0 && !db->mallocFailed ){
           int i;
-          for(i=0; ALWAYS(i<p->x.pList->nExpr); i++){
+          for(i=0; i<p->x.pList->nExpr; i++){
             if( ExprHasProperty(p->x.pList->a[i].pExpr, EP_Collate) ){
               pNext = p->x.pList->a[i].pExpr;
               break;
@@ -103884,7 +108240,7 @@ SQLITE_PRIVATE CollSeq *sqlite3ExprCollSeq(Parse *pParse, const Expr *pExpr){
 /*
 ** Return the collation sequence for the expression pExpr. If
 ** there is no defined collating sequence, return a pointer to the
-** defautl collation sequence.
+** default collation sequence.
 **
 ** See also: sqlite3ExprCollSeq()
 **
@@ -104014,7 +108370,7 @@ SQLITE_PRIVATE CollSeq *sqlite3BinaryCompareCollSeq(
   return pColl;
 }
 
-/* Expresssion p is a comparison operator.  Return a collation sequence
+/* Expression p is a comparison operator.  Return a collation sequence
 ** appropriate for the comparison operator.
 **
 ** This is normally just a wrapper around sqlite3BinaryCompareCollSeq().
@@ -104171,6 +108527,7 @@ SQLITE_PRIVATE Expr *sqlite3ExprForVectorField(
     */
     pRet = sqlite3PExpr(pParse, TK_SELECT_COLUMN, 0, 0);
     if( pRet ){
+      ExprSetProperty(pRet, EP_FullSize);
       pRet->iTable = nField;
       pRet->iColumn = iField;
       pRet->pLeft = pVector;
@@ -104421,7 +108778,9 @@ static void heightOfSelect(const Select *pSelect, int *pnHeight){
 */
 static void exprSetHeight(Expr *p){
   int nHeight = p->pLeft ? p->pLeft->nHeight : 0;
-  if( p->pRight && p->pRight->nHeight>nHeight ) nHeight = p->pRight->nHeight;
+  if( NEVER(p->pRight) && p->pRight->nHeight>nHeight ){
+    nHeight = p->pRight->nHeight;
+  }
   if( ExprUseXSelect(p) ){
     heightOfSelect(p->x.pSelect, &nHeight);
   }else if( p->x.pList ){
@@ -104468,6 +108827,15 @@ SQLITE_PRIVATE void sqlite3ExprSetHeightAndFlags(Parse *pParse, Expr *p){
 #define exprSetHeight(y)
 #endif /* SQLITE_MAX_EXPR_DEPTH>0 */
 
+/*
+** Set the error offset for an Expr node, if possible.
+*/
+SQLITE_PRIVATE void sqlite3ExprSetErrorOffset(Expr *pExpr, int iOfst){
+  if( pExpr==0 ) return;
+  if( NEVER(ExprUseWJoin(pExpr)) ) return;
+  pExpr->w.iOfst = iOfst;
+}
+
 /*
 ** This routine is the core allocator for Expr nodes.
 **
@@ -104564,15 +108932,26 @@ SQLITE_PRIVATE void sqlite3ExprAttachSubtrees(
     sqlite3ExprDelete(db, pLeft);
     sqlite3ExprDelete(db, pRight);
   }else{
+    assert( ExprUseXList(pRoot) );
+    assert( pRoot->x.pSelect==0 );
     if( pRight ){
       pRoot->pRight = pRight;
       pRoot->flags |= EP_Propagate & pRight->flags;
+#if SQLITE_MAX_EXPR_DEPTH>0
+      pRoot->nHeight = pRight->nHeight+1;
+    }else{
+      pRoot->nHeight = 1;
+#endif
     }
     if( pLeft ){
       pRoot->pLeft = pLeft;
       pRoot->flags |= EP_Propagate & pLeft->flags;
+#if SQLITE_MAX_EXPR_DEPTH>0
+      if( pLeft->nHeight>=pRoot->nHeight ){
+        pRoot->nHeight = pLeft->nHeight+1;
+      }
+#endif
     }
-    exprSetHeight(pRoot);
   }
 }
 
@@ -104681,9 +109060,9 @@ SQLITE_PRIVATE Select *sqlite3ExprListToValues(Parse *pParse, int nElem, ExprLis
 ** Join two expressions using an AND operator.  If either expression is
 ** NULL, then just return the other expression.
 **
-** If one side or the other of the AND is known to be false, then instead
-** of returning an AND expression, just return a constant expression with
-** a value of false.
+** If one side or the other of the AND is known to be false, and neither side
+** is part of an ON clause, then instead of returning an AND expression,
+** just return a constant expression with a value of false.
 */
 SQLITE_PRIVATE Expr *sqlite3ExprAnd(Parse *pParse, Expr *pLeft, Expr *pRight){
   sqlite3 *db = pParse->db;
@@ -104691,14 +109070,17 @@ SQLITE_PRIVATE Expr *sqlite3ExprAnd(Parse *pParse, Expr *pLeft, Expr *pRight){
     return pRight;
   }else if( pRight==0 ){
     return pLeft;
-  }else if( (ExprAlwaysFalse(pLeft) || ExprAlwaysFalse(pRight))
-         && !IN_RENAME_OBJECT
-  ){
-    sqlite3ExprDeferredDelete(pParse, pLeft);
-    sqlite3ExprDeferredDelete(pParse, pRight);
-    return sqlite3Expr(db, TK_INTEGER, "0");
   }else{
-    return sqlite3PExpr(pParse, TK_AND, pLeft, pRight);
+    u32 f = pLeft->flags | pRight->flags;
+    if( (f&(EP_OuterON|EP_InnerON|EP_IsFalse))==EP_IsFalse
+     && !IN_RENAME_OBJECT
+    ){
+      sqlite3ExprDeferredDelete(pParse, pLeft);
+      sqlite3ExprDeferredDelete(pParse, pRight);
+      return sqlite3Expr(db, TK_INTEGER, "0");
+    }else{
+      return sqlite3PExpr(pParse, TK_AND, pLeft, pRight);
+    }
   }
 }
 
@@ -104736,6 +109118,69 @@ SQLITE_PRIVATE Expr *sqlite3ExprFunction(
   return pNew;
 }
 
+/*
+** Report an error when attempting to use an ORDER BY clause within
+** the arguments of a non-aggregate function.
+*/
+SQLITE_PRIVATE void sqlite3ExprOrderByAggregateError(Parse *pParse, Expr *p){
+  sqlite3ErrorMsg(pParse,
+     "ORDER BY may not be used with non-aggregate %#T()", p
+  );
+}
+
+/*
+** Attach an ORDER BY clause to a function call.
+**
+**     functionname( arguments ORDER BY sortlist )
+**     \_____________________/          \______/
+**             pExpr                    pOrderBy
+**
+** The ORDER BY clause is inserted into a new Expr node of type TK_ORDER
+** and added to the Expr.pLeft field of the parent TK_FUNCTION node.
+*/
+SQLITE_PRIVATE void sqlite3ExprAddFunctionOrderBy(
+  Parse *pParse,        /* Parsing context */
+  Expr *pExpr,          /* The function call to which ORDER BY is to be added */
+  ExprList *pOrderBy    /* The ORDER BY clause to add */
+){
+  Expr *pOB;
+  sqlite3 *db = pParse->db;
+  if( NEVER(pOrderBy==0) ){
+    assert( db->mallocFailed );
+    return;
+  }
+  if( pExpr==0 ){
+    assert( db->mallocFailed );
+    sqlite3ExprListDelete(db, pOrderBy);
+    return;
+  }
+  assert( pExpr->op==TK_FUNCTION );
+  assert( pExpr->pLeft==0 );
+  assert( ExprUseXList(pExpr) );
+  if( pExpr->x.pList==0 || NEVER(pExpr->x.pList->nExpr==0) ){
+    /* Ignore ORDER BY on zero-argument aggregates */
+    sqlite3ParserAddCleanup(pParse,
+        (void(*)(sqlite3*,void*))sqlite3ExprListDelete,
+        pOrderBy);
+    return;
+  }
+  if( IsWindowFunc(pExpr) ){
+    sqlite3ExprOrderByAggregateError(pParse, pExpr);
+    sqlite3ExprListDelete(db, pOrderBy);
+    return;
+  }
+
+  pOB = sqlite3ExprAlloc(db, TK_ORDER, 0, 0);
+  if( pOB==0 ){
+    sqlite3ExprListDelete(db, pOrderBy);
+    return;
+  }
+  pOB->x.pList = pOrderBy;
+  assert( ExprUseXList(pOB) );
+  pExpr->pLeft = pOB;
+  ExprSetProperty(pOB, EP_FullSize);
+}
+
 /*
 ** Check to see if a function is usable according to current access
 ** rules:
@@ -104858,6 +109303,7 @@ SQLITE_PRIVATE void sqlite3ExprAssignVarNumber(Parse *pParse, Expr *pExpr, u32 n
 */
 static SQLITE_NOINLINE void sqlite3ExprDeleteNN(sqlite3 *db, Expr *p){
   assert( p!=0 );
+  assert( db!=0 );
   assert( !ExprUseUValue(p) || p->u.iValue>=0 );
   assert( !ExprUseYWin(p) || !ExprUseYSub(p) );
   assert( !ExprUseYWin(p) || p->y.pWin!=0 || db->mallocFailed );
@@ -104889,12 +109335,8 @@ static SQLITE_NOINLINE void sqlite3ExprDeleteNN(sqlite3 *db, Expr *p){
 #endif
     }
   }
-  if( ExprHasProperty(p, EP_MemToken) ){
-    assert( !ExprHasProperty(p, EP_IntValue) );
-    sqlite3DbFree(db, p->u.zToken);
-  }
   if( !ExprHasProperty(p, EP_Static) ){
-    sqlite3DbFreeNN(db, p);
+    sqlite3DbNNFreeNN(db, p);
   }
 }
 SQLITE_PRIVATE void sqlite3ExprDelete(sqlite3 *db, Expr *p){
@@ -104917,7 +109359,7 @@ SQLITE_PRIVATE void sqlite3ClearOnOrUsing(sqlite3 *db, OnOrUsing *p){
 /*
 ** Arrange to cause pExpr to be deleted when the pParse is deleted.
 ** This is similar to sqlite3ExprDelete() except that the delete is
-** deferred untilthe pParse is deleted.
+** deferred until the pParse is deleted.
 **
 ** The pExpr might be deleted immediately on an OOM error.
 **
@@ -104925,8 +109367,9 @@ SQLITE_PRIVATE void sqlite3ClearOnOrUsing(sqlite3 *db, OnOrUsing *p){
 ** pExpr to the pParse->pConstExpr list with a register number of 0.
 */
 SQLITE_PRIVATE void sqlite3ExprDeferredDelete(Parse *pParse, Expr *pExpr){
-  pParse->pConstExpr =
-      sqlite3ExprListAppend(pParse, pParse->pConstExpr, pExpr);
+  sqlite3ParserAddCleanup(pParse,
+    (void(*)(sqlite3*,void*))sqlite3ExprDelete,
+    pExpr);
 }
 
 /* Invoke sqlite3RenameExprUnmap() and sqlite3ExprDelete() on the
@@ -104991,16 +109434,11 @@ static int dupedExprStructSize(const Expr *p, int flags){
   assert( flags==EXPRDUP_REDUCE || flags==0 ); /* Only one flag value allowed */
   assert( EXPR_FULLSIZE<=0xfff );
   assert( (0xfff & (EP_Reduced|EP_TokenOnly))==0 );
-  if( 0==flags || p->op==TK_SELECT_COLUMN
-#ifndef SQLITE_OMIT_WINDOWFUNC
-   || ExprHasProperty(p, EP_WinFunc)
-#endif
-  ){
+  if( 0==flags || ExprHasProperty(p, EP_FullSize) ){
     nSize = EXPR_FULLSIZE;
   }else{
     assert( !ExprHasProperty(p, EP_TokenOnly|EP_Reduced) );
     assert( !ExprHasProperty(p, EP_OuterON) );
-    assert( !ExprHasProperty(p, EP_MemToken) );
     assert( !ExprHasVVAProperty(p, EP_NoReduce) );
     if( p->pLeft || p->x.pList ){
       nSize = EXPR_REDUCEDSIZE | EP_Reduced;
@@ -105027,56 +109465,93 @@ static int dupedExprNodeSize(const Expr *p, int flags){
 
 /*
 ** Return the number of bytes required to create a duplicate of the
-** expression passed as the first argument. The second argument is a
-** mask containing EXPRDUP_XXX flags.
+** expression passed as the first argument.
 **
 ** The value returned includes space to create a copy of the Expr struct
 ** itself and the buffer referred to by Expr.u.zToken, if any.
 **
-** If the EXPRDUP_REDUCE flag is set, then the return value includes
-** space to duplicate all Expr nodes in the tree formed by Expr.pLeft
-** and Expr.pRight variables (but not for any structures pointed to or
-** descended from the Expr.x.pList or Expr.x.pSelect variables).
+** The return value includes space to duplicate all Expr nodes in the
+** tree formed by Expr.pLeft and Expr.pRight, but not any other
+** substructure such as Expr.x.pList, Expr.x.pSelect, and Expr.y.pWin.
 */
-static int dupedExprSize(const Expr *p, int flags){
-  int nByte = 0;
-  if( p ){
-    nByte = dupedExprNodeSize(p, flags);
-    if( flags&EXPRDUP_REDUCE ){
-      nByte += dupedExprSize(p->pLeft, flags) + dupedExprSize(p->pRight, flags);
-    }
-  }
+static int dupedExprSize(const Expr *p){
+  int nByte;
+  assert( p!=0 );
+  nByte = dupedExprNodeSize(p, EXPRDUP_REDUCE);
+  if( p->pLeft ) nByte += dupedExprSize(p->pLeft);
+  if( p->pRight ) nByte += dupedExprSize(p->pRight);
+  assert( nByte==ROUND8(nByte) );
   return nByte;
 }
 
 /*
-** This function is similar to sqlite3ExprDup(), except that if pzBuffer
-** is not NULL then *pzBuffer is assumed to point to a buffer large enough
-** to store the copy of expression p, the copies of p->u.zToken
-** (if applicable), and the copies of the p->pLeft and p->pRight expressions,
-** if any. Before returning, *pzBuffer is set to the first byte past the
-** portion of the buffer copied into by this function.
+** An EdupBuf is a memory allocation used to stored multiple Expr objects
+** together with their Expr.zToken content.  This is used to help implement
+** compression while doing sqlite3ExprDup().  The top-level Expr does the
+** allocation for itself and many of its decendents, then passes an instance
+** of the structure down into exprDup() so that they decendents can have
+** access to that memory.
 */
-static Expr *exprDup(sqlite3 *db, const Expr *p, int dupFlags, u8 **pzBuffer){
+typedef struct EdupBuf EdupBuf;
+struct EdupBuf {
+  u8 *zAlloc;          /* Memory space available for storage */
+#ifdef SQLITE_DEBUG
+  u8 *zEnd;            /* First byte past the end of memory */
+#endif
+};
+
+/*
+** This function is similar to sqlite3ExprDup(), except that if pEdupBuf
+** is not NULL then it points to memory that can be used to store a copy
+** of the input Expr p together with its p->u.zToken (if any).  pEdupBuf
+** is updated with the new buffer tail prior to returning.
+*/
+static Expr *exprDup(
+  sqlite3 *db,          /* Database connection (for memory allocation) */
+  const Expr *p,        /* Expr tree to be duplicated */
+  int dupFlags,         /* EXPRDUP_REDUCE for compression.  0 if not */
+  EdupBuf *pEdupBuf     /* Preallocated storage space, or NULL */
+){
   Expr *pNew;           /* Value to return */
-  u8 *zAlloc;           /* Memory space from which to build Expr object */
+  EdupBuf sEdupBuf;     /* Memory space from which to build Expr object */
   u32 staticFlag;       /* EP_Static if space not obtained from malloc */
+  int nToken = -1;       /* Space needed for p->u.zToken.  -1 means unknown */
 
   assert( db!=0 );
   assert( p );
   assert( dupFlags==0 || dupFlags==EXPRDUP_REDUCE );
-  assert( pzBuffer==0 || dupFlags==EXPRDUP_REDUCE );
+  assert( pEdupBuf==0 || dupFlags==EXPRDUP_REDUCE );
 
   /* Figure out where to write the new Expr structure. */
-  if( pzBuffer ){
-    zAlloc = *pzBuffer;
+  if( pEdupBuf ){
+    sEdupBuf.zAlloc = pEdupBuf->zAlloc;
+#ifdef SQLITE_DEBUG
+    sEdupBuf.zEnd = pEdupBuf->zEnd;
+#endif
     staticFlag = EP_Static;
-    assert( zAlloc!=0 );
+    assert( sEdupBuf.zAlloc!=0 );
+    assert( dupFlags==EXPRDUP_REDUCE );
   }else{
-    zAlloc = sqlite3DbMallocRawNN(db, dupedExprSize(p, dupFlags));
+    int nAlloc;
+    if( dupFlags ){
+      nAlloc = dupedExprSize(p);
+    }else if( !ExprHasProperty(p, EP_IntValue) && p->u.zToken ){
+      nToken = sqlite3Strlen30NN(p->u.zToken)+1;
+      nAlloc = ROUND8(EXPR_FULLSIZE + nToken);
+    }else{
+      nToken = 0;
+      nAlloc = ROUND8(EXPR_FULLSIZE);
+    }
+    assert( nAlloc==ROUND8(nAlloc) );
+    sEdupBuf.zAlloc = sqlite3DbMallocRawNN(db, nAlloc);
+#ifdef SQLITE_DEBUG
+    sEdupBuf.zEnd = sEdupBuf.zAlloc ? sEdupBuf.zAlloc+nAlloc : 0;
+#endif
+
     staticFlag = 0;
   }
-  pNew = (Expr *)zAlloc;
+  pNew = (Expr *)sEdupBuf.zAlloc;
+  assert( EIGHT_BYTE_ALIGNMENT(pNew) );
 
   if( pNew ){
     /* Set nNewSize to the size allocated for the structure pointed to
@@ -105085,26 +109560,31 @@ static Expr *exprDup(sqlite3 *db, const Expr *p, int dupFlags, u8 **pzBuffer){
     ** by the copy of the p->u.zToken string (if any).
     */
     const unsigned nStructSize = dupedExprStructSize(p, dupFlags);
-    const int nNewSize = nStructSize & 0xfff;
-    int nToken;
-    if( !ExprHasProperty(p, EP_IntValue) && p->u.zToken ){
-      nToken = sqlite3Strlen30(p->u.zToken) + 1;
-    }else{
-      nToken = 0;
+    int nNewSize = nStructSize & 0xfff;
+    if( nToken<0 ){
+      if( !ExprHasProperty(p, EP_IntValue) && p->u.zToken ){
+        nToken = sqlite3Strlen30(p->u.zToken) + 1;
+      }else{
+        nToken = 0;
+      }
     }
     if( dupFlags ){
+      assert( (int)(sEdupBuf.zEnd - sEdupBuf.zAlloc) >= nNewSize+nToken );
       assert( ExprHasProperty(p, EP_Reduced)==0 );
-      memcpy(zAlloc, p, nNewSize);
+      memcpy(sEdupBuf.zAlloc, p, nNewSize);
     }else{
       u32 nSize = (u32)exprStructSize(p);
-      memcpy(zAlloc, p, nSize);
+      assert( (int)(sEdupBuf.zEnd - sEdupBuf.zAlloc) >=
+                                                   (int)EXPR_FULLSIZE+nToken );
+      memcpy(sEdupBuf.zAlloc, p, nSize);
       if( nSize<EXPR_FULLSIZE ){
-        memset(&zAlloc[nSize], 0, EXPR_FULLSIZE-nSize);
+        memset(&sEdupBuf.zAlloc[nSize], 0, EXPR_FULLSIZE-nSize);
       }
+      nNewSize = EXPR_FULLSIZE;
     }
 
     /* Set the EP_Reduced, EP_TokenOnly, and EP_Static flags appropriately. */
-    pNew->flags &= ~(EP_Reduced|EP_TokenOnly|EP_Static|EP_MemToken);
+    pNew->flags &= ~(EP_Reduced|EP_TokenOnly|EP_Static);
     pNew->flags |= nStructSize & (EP_Reduced|EP_TokenOnly);
     pNew->flags |= staticFlag;
     ExprClearVVAProperties(pNew);
@@ -105113,44 +109593,50 @@ static Expr *exprDup(sqlite3 *db, const Expr *p, int dupFlags, u8 **pzBuffer){
     }
 
     /* Copy the p->u.zToken string, if any. */
-    if( nToken ){
-      char *zToken = pNew->u.zToken = (char*)&zAlloc[nNewSize];
+    assert( nToken>=0 );
+    if( nToken>0 ){
+      char *zToken = pNew->u.zToken = (char*)&sEdupBuf.zAlloc[nNewSize];
       memcpy(zToken, p->u.zToken, nToken);
+      nNewSize += nToken;
     }
+    sEdupBuf.zAlloc += ROUND8(nNewSize);
+
+    if( ((p->flags|pNew->flags)&(EP_TokenOnly|EP_Leaf))==0 ){
 
-    if( 0==((p->flags|pNew->flags) & (EP_TokenOnly|EP_Leaf)) ){
       /* Fill in the pNew->x.pSelect or pNew->x.pList member. */
       if( ExprUseXSelect(p) ){
         pNew->x.pSelect = sqlite3SelectDup(db, p->x.pSelect, dupFlags);
       }else{
-        pNew->x.pList = sqlite3ExprListDup(db, p->x.pList, dupFlags);
+        pNew->x.pList = sqlite3ExprListDup(db, p->x.pList,
+                           p->op!=TK_ORDER ? dupFlags : 0);
       }
-    }
 
-    /* Fill in pNew->pLeft and pNew->pRight. */
-    if( ExprHasProperty(pNew, EP_Reduced|EP_TokenOnly|EP_WinFunc) ){
-      zAlloc += dupedExprNodeSize(p, dupFlags);
-      if( !ExprHasProperty(pNew, EP_TokenOnly|EP_Leaf) ){
-        pNew->pLeft = p->pLeft ?
-                      exprDup(db, p->pLeft, EXPRDUP_REDUCE, &zAlloc) : 0;
-        pNew->pRight = p->pRight ?
-                       exprDup(db, p->pRight, EXPRDUP_REDUCE, &zAlloc) : 0;
-      }
 #ifndef SQLITE_OMIT_WINDOWFUNC
       if( ExprHasProperty(p, EP_WinFunc) ){
         pNew->y.pWin = sqlite3WindowDup(db, pNew, p->y.pWin);
         assert( ExprHasProperty(pNew, EP_WinFunc) );
       }
 #endif /* SQLITE_OMIT_WINDOWFUNC */
-      if( pzBuffer ){
-        *pzBuffer = zAlloc;
-      }
-    }else{
-      if( !ExprHasProperty(p, EP_TokenOnly|EP_Leaf) ){
-        if( pNew->op==TK_SELECT_COLUMN ){
+
+      /* Fill in pNew->pLeft and pNew->pRight. */
+      if( dupFlags ){
+        if( p->op==TK_SELECT_COLUMN ){
           pNew->pLeft = p->pLeft;
-          assert( p->pRight==0  || p->pRight==p->pLeft
-                                || ExprHasProperty(p->pLeft, EP_Subquery) );
+          assert( p->pRight==0
+               || p->pRight==p->pLeft
+               || ExprHasProperty(p->pLeft, EP_Subquery) );
+        }else{
+          pNew->pLeft = p->pLeft ?
+                      exprDup(db, p->pLeft, EXPRDUP_REDUCE, &sEdupBuf) : 0;
+        }
+        pNew->pRight = p->pRight ?
+                       exprDup(db, p->pRight, EXPRDUP_REDUCE, &sEdupBuf) : 0;
+      }else{
+        if( p->op==TK_SELECT_COLUMN ){
+          pNew->pLeft = p->pLeft;
+          assert( p->pRight==0
+               || p->pRight==p->pLeft
+               || ExprHasProperty(p->pLeft, EP_Subquery) );
         }else{
           pNew->pLeft = sqlite3ExprDup(db, p->pLeft, 0);
         }
@@ -105158,6 +109644,8 @@ static Expr *exprDup(sqlite3 *db, const Expr *p, int dupFlags, u8 **pzBuffer){
       }
     }
   }
+  if( pEdupBuf ) memcpy(pEdupBuf, &sEdupBuf, sizeof(sEdupBuf));
+  assert( sEdupBuf.zAlloc <= sEdupBuf.zEnd );
   return pNew;
 }
 
@@ -105422,11 +109910,7 @@ SQLITE_PRIVATE Select *sqlite3SelectDup(sqlite3 *db, const Select *p, int flags)
 ** initially NULL, then create a new expression list.
 **
 ** The pList argument must be either NULL or a pointer to an ExprList
-** obtained from a prior call to sqlite3ExprListAppend().  This routine
-** may not be used with an ExprList obtained from sqlite3ExprListDup().
-** Reason:  This routine assumes that the number of slots in pList->a[]
-** is a power of two.  That is true for sqlite3ExprListAppend() returns
-** but is not necessarily true from the return value of sqlite3ExprListDup().
+** obtained from a prior call to sqlite3ExprListAppend().
 **
 ** If a memory allocation error occurs, the entire list is freed and
 ** NULL is returned.  If non-NULL is returned, then it is guaranteed
@@ -105680,12 +110164,13 @@ static SQLITE_NOINLINE void exprListDeleteNN(sqlite3 *db, ExprList *pList){
   int i = pList->nExpr;
   struct ExprList_item *pItem =  pList->a;
   assert( pList->nExpr>0 );
+  assert( db!=0 );
   do{
     sqlite3ExprDelete(db, pItem->pExpr);
-    sqlite3DbFree(db, pItem->zEName);
+    if( pItem->zEName ) sqlite3DbNNFreeNN(db, pItem->zEName);
     pItem++;
   }while( --i>0 );
-  sqlite3DbFreeNN(db, pList);
+  sqlite3DbNNFreeNN(db, pList);
 }
 SQLITE_PRIVATE void sqlite3ExprListDelete(sqlite3 *db, ExprList *pList){
   if( pList ) exprListDeleteNN(db, pList);
@@ -105758,7 +110243,7 @@ SQLITE_PRIVATE int sqlite3ExprIdToTrueFalse(Expr *pExpr){
 ** and 0 if it is FALSE.
 */
 SQLITE_PRIVATE int sqlite3ExprTruthValue(const Expr *pExpr){
-  pExpr = sqlite3ExprSkipCollate((Expr*)pExpr);
+  pExpr = sqlite3ExprSkipCollateAndLikely((Expr*)pExpr);
   assert( pExpr->op==TK_TRUEFALSE );
   assert( !ExprHasProperty(pExpr, EP_IntValue) );
   assert( sqlite3StrICmp(pExpr->u.zToken,"true")==0
@@ -105945,12 +110430,17 @@ SQLITE_PRIVATE int sqlite3ExprIsTableConstant(Expr *p, int iCur){
 }
 
 /*
-** Check pExpr to see if it is an invariant constraint on data source pSrc.
+** Check pExpr to see if it is an constraint on the single data source
+** pSrc = &pSrcList->a[iSrc].  In other words, check to see if pExpr
+** constrains pSrc but does not depend on any other tables or data
+** sources anywhere else in the query.  Return true (non-zero) if pExpr
+** is a constraint on pSrc only.
+**
 ** This is an optimization.  False negatives will perhaps cause slower
 ** queries, but false positives will yield incorrect answers.  So when in
 ** doubt, return 0.
 **
-** To be an invariant constraint, the following must be true:
+** To be an single-source constraint, the following must be true:
 **
 **   (1)  pExpr cannot refer to any table other than pSrc->iCursor.
 **
@@ -105961,13 +110451,31 @@ SQLITE_PRIVATE int sqlite3ExprIsTableConstant(Expr *p, int iCur){
 **
 **   (4)  If pSrc is the right operand of a LEFT JOIN, then...
 **         (4a)  pExpr must come from an ON clause..
-           (4b)  and specifically the ON clause associated with the LEFT JOIN.
+**         (4b)  and specifically the ON clause associated with the LEFT JOIN.
 **
 **   (5)  If pSrc is not the right operand of a LEFT JOIN or the left
 **        operand of a RIGHT JOIN, then pExpr must be from the WHERE
 **        clause, not an ON clause.
+**
+**   (6) Either:
+**
+**       (6a) pExpr does not originate in an ON or USING clause, or
+**
+**       (6b) The ON or USING clause from which pExpr is derived is
+**            not to the left of a RIGHT JOIN (or FULL JOIN).
+**
+**       Without this restriction, accepting pExpr as a single-table
+**       constraint might move the the ON/USING filter expression
+**       from the left side of a RIGHT JOIN over to the right side,
+**       which leads to incorrect answers.  See also restriction (9)
+**       on push-down.
 */
-SQLITE_PRIVATE int sqlite3ExprIsTableConstraint(Expr *pExpr, const SrcItem *pSrc){
+SQLITE_PRIVATE int sqlite3ExprIsSingleTableConstraint(
+  Expr *pExpr,                 /* The constraint */
+  const SrcList *pSrcList,     /* Complete FROM clause */
+  int iSrc                     /* Which element of pSrcList to use */
+){
+  const SrcItem *pSrc = &pSrcList->a[iSrc];
   if( pSrc->fg.jointype & JT_LTORJ ){
     return 0;  /* rule (3) */
   }
@@ -105977,6 +110485,19 @@ SQLITE_PRIVATE int sqlite3ExprIsTableConstraint(Expr *pExpr, const SrcItem *pSrc
   }else{
     if( ExprHasProperty(pExpr, EP_OuterON) ) return 0;    /* rule (5) */
   }
+  if( ExprHasProperty(pExpr, EP_OuterON|EP_InnerON)  /* (6a) */
+   && (pSrcList->a[0].fg.jointype & JT_LTORJ)!=0     /* Fast pre-test of (6b) */
+  ){
+    int jj;
+    for(jj=0; jj<iSrc; jj++){
+      if( pExpr->w.iJoin==pSrcList->a[jj].iCursor ){
+        if( (pSrcList->a[jj].fg.jointype & JT_LTORJ)!=0 ){
+          return 0;  /* restriction (6) */
+        }
+        break;
+      }
+    }
+  }
   return sqlite3ExprIsTableConstant(pExpr, pSrc->iCursor); /* rules (1), (2) */
 }
 
@@ -106215,11 +110736,32 @@ SQLITE_PRIVATE int sqlite3IsRowid(const char *z){
   return 0;
 }
 
+/*
+** Return a pointer to a buffer containing a usable rowid alias for table
+** pTab. An alias is usable if there is not an explicit user-defined column
+** of the same name.
+*/
+SQLITE_PRIVATE const char *sqlite3RowidAlias(Table *pTab){
+  const char *azOpt[] = {"_ROWID_", "ROWID", "OID"};
+  int ii;
+  assert( VisibleRowid(pTab) );
+  for(ii=0; ii<ArraySize(azOpt); ii++){
+    int iCol;
+    for(iCol=0; iCol<pTab->nCol; iCol++){
+      if( sqlite3_stricmp(azOpt[ii], pTab->aCol[iCol].zCnName)==0 ) break;
+    }
+    if( iCol==pTab->nCol ){
+      return azOpt[ii];
+    }
+  }
+  return 0;
+}
+
 /*
 ** pX is the RHS of an IN operator.  If pX is a SELECT statement
 ** that can be simplified to a direct table access, then return
 ** a pointer to the SELECT statement.  If pX is not a SELECT statement,
-** or if the SELECT statement needs to be manifested into a transient
+** or if the SELECT statement needs to be materialized into a transient
 ** table, then return NULL.
 */
 #ifndef SQLITE_OMIT_SUBQUERY
@@ -106315,7 +110857,7 @@ static int sqlite3InRhsIsConstant(Expr *pIn){
 **   IN_INDEX_INDEX_ASC  - The cursor was opened on an ascending index.
 **   IN_INDEX_INDEX_DESC - The cursor was opened on a descending index.
 **   IN_INDEX_EPH        - The cursor was opened on a specially created and
-**                         populated epheremal table.
+**                         populated ephemeral table.
 **   IN_INDEX_NOOP       - No cursor was allocated.  The IN operator must be
 **                         implemented as a sequence of comparisons.
 **
@@ -106328,7 +110870,7 @@ static int sqlite3InRhsIsConstant(Expr *pIn){
 ** an ephemeral table might need to be generated from the RHS and then
 ** pX->iTable made to point to the ephemeral table instead of an
 ** existing table.  In this case, the creation and initialization of the
-** ephmeral table might be put inside of a subroutine, the EP_Subrtn flag
+** ephemeral table might be put inside of a subroutine, the EP_Subrtn flag
 ** will be set on pX and the pX->y.sub fields will be set to show where
 ** the subroutine is coded.
 **
@@ -106340,12 +110882,12 @@ static int sqlite3InRhsIsConstant(Expr *pIn){
 **
 ** When IN_INDEX_LOOP is used (and the b-tree will be used to iterate
 ** through the set members) then the b-tree must not contain duplicates.
-** An epheremal table will be created unless the selected columns are guaranteed
+** An ephemeral table will be created unless the selected columns are guaranteed
 ** to be unique - either because it is an INTEGER PRIMARY KEY or due to
 ** a UNIQUE constraint or index.
 **
 ** When IN_INDEX_MEMBERSHIP is used (and the b-tree will be used
-** for fast set membership tests) then an epheremal table must
+** for fast set membership tests) then an ephemeral table must
 ** be used unless <columns> is a single INTEGER PRIMARY KEY column or an
 ** index can be found with the specified <columns> as its left-most.
 **
@@ -106505,7 +111047,6 @@ SQLITE_PRIVATE int sqlite3FindInIndex(
             CollSeq *pReq = sqlite3BinaryCompareCollSeq(pParse, pLhs, pRhs);
             int j;
 
-            assert( pReq!=0 || pRhs->iColumn==XN_ROWID || pParse->nErr );
             for(j=0; j<nExpr; j++){
               if( pIdx->aiColumn[j]!=pRhs->iColumn ) continue;
               assert( pIdx->azColl[j] );
@@ -106679,7 +111220,7 @@ SQLITE_PRIVATE void sqlite3VectorErrorMsg(Parse *pParse, Expr *pExpr){
 **     x IN (SELECT a FROM b)     -- IN operator with subquery on the right
 **
 ** The pExpr parameter is the IN operator.  The cursor number for the
-** constructed ephermeral table is returned.  The first time the ephemeral
+** constructed ephemeral table is returned.  The first time the ephemeral
 ** table is computed, the cursor number is also stored in pExpr->iTable,
 ** however the cursor number returned might not be the same, as it might
 ** have been duplicated using OP_OpenDup.
@@ -106863,6 +111404,7 @@ SQLITE_PRIVATE void sqlite3CodeRhsOfIN(
     sqlite3VdbeChangeP4(v, addr, (void *)pKeyInfo, P4_KEYINFO);
   }
   if( addrOnce ){
+    sqlite3VdbeAddOp1(v, OP_NullRow, iTab);
     sqlite3VdbeJumpHere(v, addrOnce);
     /* Subroutine return */
     assert( ExprUseYSub(pExpr) );
@@ -106898,6 +111440,9 @@ SQLITE_PRIVATE int sqlite3CodeSubselect(Parse *pParse, Expr *pExpr){
   SelectDest dest;            /* How to deal with SELECT result */
   int nReg;                   /* Registers to allocate */
   Expr *pLimit;               /* New limit expression */
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+  int addrExplain;            /* Address of OP_Explain instruction */
+#endif
 
   Vdbe *v = pParse->pVdbe;
   assert( v!=0 );
@@ -106950,8 +111495,9 @@ SQLITE_PRIVATE int sqlite3CodeSubselect(Parse *pParse, Expr *pExpr){
   ** In both cases, the query is augmented with "LIMIT 1".  Any
   ** preexisting limit is discarded in place of the new LIMIT 1.
   */
-  ExplainQueryPlan((pParse, 1, "%sSCALAR SUBQUERY %d",
+  ExplainQueryPlan2(addrExplain, (pParse, 1, "%sSCALAR SUBQUERY %d",
         addrOnce?"":"CORRELATED ", pSel->selId));
+  sqlite3VdbeScanStatusCounters(v, addrExplain, addrExplain, -1);
   nReg = pExpr->op==TK_SELECT ? pSel->pEList->nExpr : 1;
   sqlite3SelectDestInit(&dest, 0, pParse->nMem+1);
   pParse->nMem += nReg;
@@ -106976,7 +111522,7 @@ SQLITE_PRIVATE int sqlite3CodeSubselect(Parse *pParse, Expr *pExpr){
       pLimit = sqlite3PExpr(pParse, TK_NE,
                             sqlite3ExprDup(db, pSel->pLimit->pLeft, 0), pLimit);
     }
-    sqlite3ExprDelete(db, pSel->pLimit->pLeft);
+    sqlite3ExprDeferredDelete(pParse, pSel->pLimit->pLeft);
     pSel->pLimit->pLeft = pLimit;
   }else{
     /* If there is no pre-existing limit add a limit of 1 */
@@ -106994,6 +111540,7 @@ SQLITE_PRIVATE int sqlite3CodeSubselect(Parse *pParse, Expr *pExpr){
   if( addrOnce ){
     sqlite3VdbeJumpHere(v, addrOnce);
   }
+  sqlite3VdbeScanStatusRange(v, addrExplain, addrExplain, -1);
 
   /* Subroutine return */
   assert( ExprUseYSub(pExpr) );
@@ -107402,6 +111949,7 @@ SQLITE_PRIVATE void sqlite3ExprCodeGeneratedColumn(
 ){
   int iAddr;
   Vdbe *v = pParse->pVdbe;
+  int nErr = pParse->nErr;
   assert( v!=0 );
   assert( pParse->iSelfTab!=0 );
   if( pParse->iSelfTab>0 ){
@@ -107414,6 +111962,7 @@ SQLITE_PRIVATE void sqlite3ExprCodeGeneratedColumn(
     sqlite3VdbeAddOp4(v, OP_Affinity, regOut, 1, 0, &pCol->affinity, 1);
   }
   if( iAddr ) sqlite3VdbeJumpHere(v, iAddr);
+  if( pParse->nErr>nErr ) pParse->db->errByteOffset = -1;
 }
 #endif /* SQLITE_OMIT_GENERATED_COLUMNS */
 
@@ -107429,10 +111978,8 @@ SQLITE_PRIVATE void sqlite3ExprCodeGetColumnOfTable(
 ){
   Column *pCol;
   assert( v!=0 );
-  if( pTab==0 ){
-    sqlite3VdbeAddOp3(v, OP_Column, iTabCur, iCol, regOut);
-    return;
-  }
+  assert( pTab!=0 );
+  assert( iCol!=XN_EXPR );
   if( iCol<0 || iCol==pTab->iPKey ){
     sqlite3VdbeAddOp2(v, OP_Rowid, iTabCur, regOut);
     VdbeComment((v, "%s.rowid", pTab->zName));
@@ -107488,10 +112035,13 @@ SQLITE_PRIVATE int sqlite3ExprCodeGetColumn(
   u8 p5            /* P5 value for OP_Column + FLAGS */
 ){
   assert( pParse->pVdbe!=0 );
+  assert( (p5 & (OPFLAG_NOCHNG|OPFLAG_TYPEOFARG|OPFLAG_LENGTHARG))==p5 );
+  assert( IsVirtual(pTab) || (p5 & OPFLAG_NOCHNG)==0 );
   sqlite3ExprCodeGetColumnOfTable(pParse->pVdbe, pTab, iTable, iColumn, iReg);
   if( p5 ){
-    VdbeOp *pOp = sqlite3VdbeGetOp(pParse->pVdbe,-1);
+    VdbeOp *pOp = sqlite3VdbeGetLastOp(pParse->pVdbe);
     if( pOp->opcode==OP_Column ) pOp->p5 = p5;
+    if( pOp->opcode==OP_VColumn ) pOp->p5 = (p5 & OPFLAG_NOCHNG);
   }
   return iReg;
 }
@@ -107520,7 +112070,7 @@ static void exprToRegister(Expr *pExpr, int iReg){
 
 /*
 ** Evaluate an expression (either a vector or a scalar expression) and store
-** the result in continguous temporary registers.  Return the index of
+** the result in contiguous temporary registers.  Return the index of
 ** the first register used to store the result.
 **
 ** If the returned result register is a temporary scalar, then also write
@@ -107559,8 +112109,8 @@ static int exprCodeVector(Parse *pParse, Expr *p, int *piFreeable){
 ** so that a subsequent copy will not be merged into this one.
 */
 static void setDoNotMergeFlagOnCopy(Vdbe *v){
-  if( sqlite3VdbeGetOp(v, -1)->opcode==OP_Copy ){
-    sqlite3VdbeChangeP5(v, 1);  /* Tag trailing OP_Copy as not mergable */
+  if( sqlite3VdbeGetLastOp(v)->opcode==OP_Copy ){
+    sqlite3VdbeChangeP5(v, 1);  /* Tag trailing OP_Copy as not mergeable */
   }
 }
 
@@ -107650,13 +112200,13 @@ static int exprCodeInlineFunction(
     }
 
     case INLINEFUNC_implies_nonnull_row: {
-      /* REsult of sqlite3ExprImpliesNonNullRow() */
+      /* Result of sqlite3ExprImpliesNonNullRow() */
       Expr *pA1;
       assert( nFarg==2 );
       pA1 = pFarg->a[1].pExpr;
       if( pA1->op==TK_COLUMN ){
         sqlite3VdbeAddOp2(v, OP_Integer,
-           sqlite3ExprImpliesNonNullRow(pFarg->a[0].pExpr,pA1->iTable),
+           sqlite3ExprImpliesNonNullRow(pFarg->a[0].pExpr,pA1->iTable,1),
            target);
       }else{
         sqlite3VdbeAddOp2(v, OP_Null, 0, target);
@@ -107669,10 +112219,13 @@ static int exprCodeInlineFunction(
       ** the type affinity of the argument.  This is used for testing of
       ** the SQLite type logic.
       */
-      const char *azAff[] = { "blob", "text", "numeric", "integer", "real" };
+      const char *azAff[] = { "blob", "text", "numeric", "integer",
+                              "real", "flexnum" };
       char aff;
       assert( nFarg==1 );
       aff = sqlite3ExprAffinity(pFarg->a[0].pExpr);
+      assert( aff<=SQLITE_AFF_NONE
+           || (aff>=SQLITE_AFF_BLOB && aff<=SQLITE_AFF_FLEXNUM) );
       sqlite3VdbeLoadString(v, target,
               (aff<=SQLITE_AFF_NONE) ? "none" : azAff[aff-SQLITE_AFF_BLOB]);
       break;
@@ -107682,6 +112235,99 @@ static int exprCodeInlineFunction(
   return target;
 }
 
+/*
+** Check to see if pExpr is one of the indexed expressions on pParse->pIdxEpr.
+** If it is, then resolve the expression by reading from the index and
+** return the register into which the value has been read.  If pExpr is
+** not an indexed expression, then return negative.
+*/
+static SQLITE_NOINLINE int sqlite3IndexedExprLookup(
+  Parse *pParse,   /* The parsing context */
+  Expr *pExpr,     /* The expression to potentially bypass */
+  int target       /* Where to store the result of the expression */
+){
+  IndexedExpr *p;
+  Vdbe *v;
+  for(p=pParse->pIdxEpr; p; p=p->pIENext){
+    u8 exprAff;
+    int iDataCur = p->iDataCur;
+    if( iDataCur<0 ) continue;
+    if( pParse->iSelfTab ){
+      if( p->iDataCur!=pParse->iSelfTab-1 ) continue;
+      iDataCur = -1;
+    }
+    if( sqlite3ExprCompare(0, pExpr, p->pExpr, iDataCur)!=0 ) continue;
+    assert( p->aff>=SQLITE_AFF_BLOB && p->aff<=SQLITE_AFF_NUMERIC );
+    exprAff = sqlite3ExprAffinity(pExpr);
+    if( (exprAff<=SQLITE_AFF_BLOB && p->aff!=SQLITE_AFF_BLOB)
+     || (exprAff==SQLITE_AFF_TEXT && p->aff!=SQLITE_AFF_TEXT)
+     || (exprAff>=SQLITE_AFF_NUMERIC && p->aff!=SQLITE_AFF_NUMERIC)
+    ){
+      /* Affinity mismatch on a generated column */
+      continue;
+    }
+
+    v = pParse->pVdbe;
+    assert( v!=0 );
+    if( p->bMaybeNullRow ){
+      /* If the index is on a NULL row due to an outer join, then we
+      ** cannot extract the value from the index.  The value must be
+      ** computed using the original expression. */
+      int addr = sqlite3VdbeCurrentAddr(v);
+      sqlite3VdbeAddOp3(v, OP_IfNullRow, p->iIdxCur, addr+3, target);
+      VdbeCoverage(v);
+      sqlite3VdbeAddOp3(v, OP_Column, p->iIdxCur, p->iIdxCol, target);
+      VdbeComment((v, "%s expr-column %d", p->zIdxName, p->iIdxCol));
+      sqlite3VdbeGoto(v, 0);
+      p = pParse->pIdxEpr;
+      pParse->pIdxEpr = 0;
+      sqlite3ExprCode(pParse, pExpr, target);
+      pParse->pIdxEpr = p;
+      sqlite3VdbeJumpHere(v, addr+2);
+    }else{
+      sqlite3VdbeAddOp3(v, OP_Column, p->iIdxCur, p->iIdxCol, target);
+      VdbeComment((v, "%s expr-column %d", p->zIdxName, p->iIdxCol));
+    }
+    return target;
+  }
+  return -1;  /* Not found */
+}
+
+
+/*
+** Expresion pExpr is guaranteed to be a TK_COLUMN or equivalent. This
+** function checks the Parse.pIdxPartExpr list to see if this column
+** can be replaced with a constant value. If so, it generates code to
+** put the constant value in a register (ideally, but not necessarily,
+** register iTarget) and returns the register number.
+**
+** Or, if the TK_COLUMN cannot be replaced by a constant, zero is
+** returned.
+*/
+static int exprPartidxExprLookup(Parse *pParse, Expr *pExpr, int iTarget){
+  IndexedExpr *p;
+  for(p=pParse->pIdxPartExpr; p; p=p->pIENext){
+    if( pExpr->iColumn==p->iIdxCol && pExpr->iTable==p->iDataCur ){
+      Vdbe *v = pParse->pVdbe;
+      int addr = 0;
+      int ret;
+
+      if( p->bMaybeNullRow ){
+        addr = sqlite3VdbeAddOp1(v, OP_IfNullRow, p->iIdxCur);
+      }
+      ret = sqlite3ExprCodeTarget(pParse, p->pExpr, iTarget);
+      sqlite3VdbeAddOp4(pParse->pVdbe, OP_Affinity, ret, 1, 0,
+                        (const char*)&p->aff, 1);
+      if( addr ){
+        sqlite3VdbeJumpHere(v, addr);
+        sqlite3VdbeChangeP3(v, addr, ret);
+      }
+      return ret;
+    }
+  }
+  return 0;
+}
+
 
 /*
 ** Generate code into the current Vdbe to evaluate the given
@@ -107710,25 +112356,44 @@ SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target)
 expr_code_doover:
   if( pExpr==0 ){
     op = TK_NULL;
+  }else if( pParse->pIdxEpr!=0
+   && !ExprHasProperty(pExpr, EP_Leaf)
+   && (r1 = sqlite3IndexedExprLookup(pParse, pExpr, target))>=0
+  ){
+    return r1;
   }else{
     assert( !ExprHasVVAProperty(pExpr,EP_Immutable) );
     op = pExpr->op;
   }
+  assert( op!=TK_ORDER );
   switch( op ){
     case TK_AGG_COLUMN: {
       AggInfo *pAggInfo = pExpr->pAggInfo;
       struct AggInfo_col *pCol;
       assert( pAggInfo!=0 );
-      assert( pExpr->iAgg>=0 && pExpr->iAgg<pAggInfo->nColumn );
+      assert( pExpr->iAgg>=0 );
+      if( pExpr->iAgg>=pAggInfo->nColumn ){
+        /* Happens when the left table of a RIGHT JOIN is null and
+        ** is using an expression index */
+        sqlite3VdbeAddOp2(v, OP_Null, 0, target);
+#ifdef SQLITE_VDBE_COVERAGE
+        /* Verify that the OP_Null above is exercised by tests
+        ** tag-20230325-2 */
+        sqlite3VdbeAddOp3(v, OP_NotNull, target, 1, 20230325);
+        VdbeCoverageNeverTaken(v);
+#endif
+        break;
+      }
       pCol = &pAggInfo->aCol[pExpr->iAgg];
       if( !pAggInfo->directMode ){
-        assert( pCol->iMem>0 );
-        return pCol->iMem;
+        return AggInfoColumnReg(pAggInfo, pExpr->iAgg);
       }else if( pAggInfo->useSortingIdx ){
         Table *pTab = pCol->pTab;
         sqlite3VdbeAddOp3(v, OP_Column, pAggInfo->sortingIdxPTab,
                               pCol->iSorterColumn, target);
-        if( pCol->iColumn<0 ){
+        if( pTab==0 ){
+          /* No comment added */
+        }else if( pCol->iColumn<0 ){
           VdbeComment((v,"%s.rowid",pTab->zName));
         }else{
           VdbeComment((v,"%s.%s",
@@ -107738,6 +112403,11 @@ SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target)
           }
         }
         return target;
+      }else if( pExpr->y.pTab==0 ){
+        /* This case happens when the argument to an aggregate function
+        ** is rewritten by aggregateConvertIndexedExprRefToColumn() */
+        sqlite3VdbeAddOp3(v, OP_Column, pExpr->iTable, pExpr->iColumn, target);
+        return target;
       }
       /* Otherwise, fall thru into the TK_COLUMN case */
       /* no break */ deliberate_fall_through
@@ -107748,20 +112418,17 @@ SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target)
       if( ExprHasProperty(pExpr, EP_FixedCol) ){
         /* This COLUMN expression is really a constant due to WHERE clause
         ** constraints, and that constant is coded by the pExpr->pLeft
-        ** expresssion.  However, make sure the constant has the correct
+        ** expression.  However, make sure the constant has the correct
         ** datatype by applying the Affinity of the table column to the
         ** constant.
         */
         int aff;
         iReg = sqlite3ExprCodeTarget(pParse, pExpr->pLeft,target);
         assert( ExprUseYTab(pExpr) );
-        if( pExpr->y.pTab ){
-          aff = sqlite3TableColumnAffinity(pExpr->y.pTab, pExpr->iColumn);
-        }else{
-          aff = pExpr->affExpr;
-        }
+        assert( pExpr->y.pTab!=0 );
+        aff = sqlite3TableColumnAffinity(pExpr->y.pTab, pExpr->iColumn);
         if( aff>SQLITE_AFF_BLOB ){
-          static const char zAff[] = "B\000C\000D\000E";
+          static const char zAff[] = "B\000C\000D\000E\000F";
           assert( SQLITE_AFF_BLOB=='A' );
           assert( SQLITE_AFF_TEXT=='B' );
           sqlite3VdbeAddOp4(v, OP_Affinity, iReg, 1, 0,
@@ -107820,13 +112487,16 @@ SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target)
           iTab = pParse->iSelfTab - 1;
         }
       }
+      else if( pParse->pIdxPartExpr
+       && 0!=(r1 = exprPartidxExprLookup(pParse, pExpr, target))
+      ){
+        return r1;
+      }
       assert( ExprUseYTab(pExpr) );
+      assert( pExpr->y.pTab!=0 );
       iReg = sqlite3ExprCodeGetColumn(pParse, pExpr->y.pTab,
                                pExpr->iColumn, iTab, target,
                                pExpr->op2);
-      if( pExpr->y.pTab==0 && pExpr->affExpr==SQLITE_AFF_REAL ){
-        sqlite3VdbeAddOp1(v, OP_RealAffinity, iReg);
-      }
       return iReg;
     }
     case TK_INTEGER: {
@@ -107893,11 +112563,8 @@ SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target)
 #ifndef SQLITE_OMIT_CAST
     case TK_CAST: {
       /* Expressions of the form:   CAST(pLeft AS token) */
-      inReg = sqlite3ExprCodeTarget(pParse, pExpr->pLeft, target);
-      if( inReg!=target ){
-        sqlite3VdbeAddOp2(v, OP_SCopy, inReg, target);
-        inReg = target;
-      }
+      sqlite3ExprCode(pParse, pExpr->pLeft, target);
+      assert( inReg==target );
       assert( !ExprHasProperty(pExpr, EP_IntValue) );
       sqlite3VdbeAddOp2(v, OP_Cast, target,
                         sqlite3AffinityType(pExpr->u.zToken, 0));
@@ -108040,7 +112707,7 @@ SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target)
         assert( !ExprHasProperty(pExpr, EP_IntValue) );
         sqlite3ErrorMsg(pParse, "misuse of aggregate: %#T()", pExpr);
       }else{
-        return pInfo->aFunc[pExpr->iAgg].iMem;
+        return AggInfoFuncReg(pInfo, pExpr->iAgg);
       }
       break;
     }
@@ -108082,7 +112749,7 @@ SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target)
         sqlite3ErrorMsg(pParse, "unknown function: %#T()", pExpr);
         break;
       }
-      if( pDef->funcFlags & SQLITE_FUNC_INLINE ){
+      if( (pDef->funcFlags & SQLITE_FUNC_INLINE)!=0 && ALWAYS(pFarg!=0) ){
         assert( (pDef->funcFlags & SQLITE_FUNC_UNSAFE)==0 );
         assert( (pDef->funcFlags & SQLITE_FUNC_DIRECT)==0 );
         return exprCodeInlineFunction(pParse, pFarg,
@@ -108108,10 +112775,10 @@ SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target)
           r1 = sqlite3GetTempRange(pParse, nFarg);
         }
 
-        /* For length() and typeof() functions with a column argument,
+        /* For length() and typeof() and octet_length() functions,
         ** set the P5 parameter to the OP_Column opcode to OPFLAG_LENGTHARG
-        ** or OPFLAG_TYPEOFARG respectively, to avoid unnecessary data
-        ** loading.
+        ** or OPFLAG_TYPEOFARG or OPFLAG_BYTELENARG respectively, to avoid
+        ** unnecessary data loading.
         */
         if( (pDef->funcFlags & (SQLITE_FUNC_LENGTH|SQLITE_FUNC_TYPEOF))!=0 ){
           u8 exprOp;
@@ -108121,14 +112788,16 @@ SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target)
           if( exprOp==TK_COLUMN || exprOp==TK_AGG_COLUMN ){
             assert( SQLITE_FUNC_LENGTH==OPFLAG_LENGTHARG );
             assert( SQLITE_FUNC_TYPEOF==OPFLAG_TYPEOFARG );
-            testcase( pDef->funcFlags & OPFLAG_LENGTHARG );
-            pFarg->a[0].pExpr->op2 =
-                  pDef->funcFlags & (OPFLAG_LENGTHARG|OPFLAG_TYPEOFARG);
+            assert( SQLITE_FUNC_BYTELEN==OPFLAG_BYTELENARG );
+            assert( (OPFLAG_LENGTHARG|OPFLAG_TYPEOFARG)==OPFLAG_BYTELENARG );
+            testcase( (pDef->funcFlags & OPFLAG_BYTELENARG)==OPFLAG_LENGTHARG );
+            testcase( (pDef->funcFlags & OPFLAG_BYTELENARG)==OPFLAG_TYPEOFARG );
+            testcase( (pDef->funcFlags & OPFLAG_BYTELENARG)==OPFLAG_BYTELENARG);
+            pFarg->a[0].pExpr->op2 = pDef->funcFlags & OPFLAG_BYTELENARG;
           }
         }
 
-        sqlite3ExprCodeExprList(pParse, pFarg, r1, 0,
-                                SQLITE_ECEL_DUP|SQLITE_ECEL_FACTOR);
+        sqlite3ExprCodeExprList(pParse, pFarg, r1, 0, SQLITE_ECEL_FACTOR);
       }else{
         r1 = 0;
       }
@@ -108229,17 +112898,16 @@ SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target)
       return target;
     }
     case TK_COLLATE: {
-      if( !ExprHasProperty(pExpr, EP_Collate)
-       && ALWAYS(pExpr->pLeft)
-       && pExpr->pLeft->op==TK_FUNCTION
-      ){
-        inReg = sqlite3ExprCodeTarget(pParse, pExpr->pLeft, target);
-        if( inReg!=target ){
-          sqlite3VdbeAddOp2(v, OP_SCopy, inReg, target);
-          inReg = target;
-        }
-        sqlite3VdbeAddOp1(v, OP_ClrSubtype, inReg);
-        return inReg;
+      if( !ExprHasProperty(pExpr, EP_Collate) ){
+        /* A TK_COLLATE Expr node without the EP_Collate tag is a so-called
+        ** "SOFT-COLLATE" that is added to constraints that are pushed down
+        ** from outer queries into sub-queries by the push-down optimization.
+        ** Clear subtypes as subtypes may not cross a subquery boundary.
+        */
+        assert( pExpr->pLeft );
+        sqlite3ExprCode(pParse, pExpr->pLeft, target);
+        sqlite3VdbeAddOp1(v, OP_ClrSubtype, target);
+        return target;
       }else{
         pExpr = pExpr->pLeft;
         goto expr_code_doover; /* 2018-04-28: Prevent deep recursion. */
@@ -108325,16 +112993,34 @@ SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target)
     case TK_IF_NULL_ROW: {
       int addrINR;
       u8 okConstFactor = pParse->okConstFactor;
-      addrINR = sqlite3VdbeAddOp1(v, OP_IfNullRow, pExpr->iTable);
-      /* Temporarily disable factoring of constant expressions, since
-      ** even though expressions may appear to be constant, they are not
-      ** really constant because they originate from the right-hand side
-      ** of a LEFT JOIN. */
-      pParse->okConstFactor = 0;
-      inReg = sqlite3ExprCodeTarget(pParse, pExpr->pLeft, target);
+      AggInfo *pAggInfo = pExpr->pAggInfo;
+      if( pAggInfo ){
+        assert( pExpr->iAgg>=0 && pExpr->iAgg<pAggInfo->nColumn );
+        if( !pAggInfo->directMode ){
+          inReg = AggInfoColumnReg(pAggInfo, pExpr->iAgg);
+          break;
+        }
+        if( pExpr->pAggInfo->useSortingIdx ){
+          sqlite3VdbeAddOp3(v, OP_Column, pAggInfo->sortingIdxPTab,
+                            pAggInfo->aCol[pExpr->iAgg].iSorterColumn,
+                            target);
+          inReg = target;
+          break;
+        }
+      }
+      addrINR = sqlite3VdbeAddOp3(v, OP_IfNullRow, pExpr->iTable, 0, target);
+      /* The OP_IfNullRow opcode above can overwrite the result register with
+      ** NULL.  So we have to ensure that the result register is not a value
+      ** that is suppose to be a constant.  Two defenses are needed:
+      **   (1)  Temporarily disable factoring of constant expressions
+      **   (2)  Make sure the computed value really is stored in register
+      **        "target" and not someplace else.
+      */
+      pParse->okConstFactor = 0;   /* note (1) above */
+      sqlite3ExprCode(pParse, pExpr->pLeft, target);
+      assert( target==inReg );
       pParse->okConstFactor = okConstFactor;
       sqlite3VdbeJumpHere(v, addrINR);
-      sqlite3VdbeChangeP3(v, addrINR, inReg);
       break;
     }
 
@@ -108466,9 +113152,9 @@ SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target)
 ** once. If no functions are involved, then factor the code out and put it at
 ** the end of the prepared statement in the initialization section.
 **
-** If regDest>=0 then the result is always stored in that register and the
+** If regDest>0 then the result is always stored in that register and the
 ** result is not reusable.  If regDest<0 then this routine is free to
-** store the value whereever it wants.  The register where the expression
+** store the value wherever it wants.  The register where the expression
 ** is stored is returned.  When regDest<0, two identical expressions might
 ** code to the same register, if they do not contain function calls and hence
 ** are factored out into the initialization section at the end of the
@@ -108481,6 +113167,7 @@ SQLITE_PRIVATE int sqlite3ExprCodeRunJustOnce(
 ){
   ExprList *p;
   assert( ConstFactorOk(pParse) );
+  assert( regDest!=0 );
   p = pParse->pConstExpr;
   if( regDest<0 && p ){
     struct ExprList_item *pItem;
@@ -108571,7 +113258,9 @@ SQLITE_PRIVATE void sqlite3ExprCode(Parse *pParse, Expr *pExpr, int target){
   inReg = sqlite3ExprCodeTarget(pParse, pExpr, target);
   if( inReg!=target ){
     u8 op;
-    if( ALWAYS(pExpr) && ExprHasProperty(pExpr,EP_Subquery) ){
+    if( ALWAYS(pExpr)
+     && (ExprHasProperty(pExpr,EP_Subquery) || pExpr->op==TK_REGISTER)
+    ){
       op = OP_Copy;
     }else{
       op = OP_SCopy;
@@ -108666,7 +113355,7 @@ SQLITE_PRIVATE int sqlite3ExprCodeExprList(
       if( inReg!=target+i ){
         VdbeOp *pOp;
         if( copyOp==OP_Copy
-         && (pOp=sqlite3VdbeGetOp(v, -1))->opcode==OP_Copy
+         && (pOp=sqlite3VdbeGetLastOp(v))->opcode==OP_Copy
          && pOp->p1+pOp->p3+1==inReg
          && pOp->p2+pOp->p3+1==target+i
          && pOp->p5==0  /* The do-not-merge flag must be clear */
@@ -108865,6 +113554,7 @@ SQLITE_PRIVATE void sqlite3ExprIfTrue(Parse *pParse, Expr *pExpr, int dest, int
       assert( TK_ISNULL==OP_IsNull );   testcase( op==TK_ISNULL );
       assert( TK_NOTNULL==OP_NotNull ); testcase( op==TK_NOTNULL );
       r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, &regFree1);
+      sqlite3VdbeTypeofColumn(v, r1);
       sqlite3VdbeAddOp2(v, op, r1, dest);
       VdbeCoverageIf(v, op==TK_ISNULL);
       VdbeCoverageIf(v, op==TK_NOTNULL);
@@ -109039,6 +113729,7 @@ SQLITE_PRIVATE void sqlite3ExprIfFalse(Parse *pParse, Expr *pExpr, int dest, int
     case TK_ISNULL:
     case TK_NOTNULL: {
       r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, &regFree1);
+      sqlite3VdbeTypeofColumn(v, r1);
       sqlite3VdbeAddOp2(v, op, r1, dest);
       testcase( op==TK_ISNULL );   VdbeCoverageIf(v, op==TK_ISNULL);
       testcase( op==TK_NOTNULL );  VdbeCoverageIf(v, op==TK_NOTNULL);
@@ -109192,7 +113883,13 @@ SQLITE_PRIVATE int sqlite3ExprCompare(
     if( pB->op==TK_COLLATE && sqlite3ExprCompare(pParse, pA,pB->pLeft,iTab)<2 ){
       return 1;
     }
-    return 2;
+    if( pA->op==TK_AGG_COLUMN && pB->op==TK_COLUMN
+     && pB->iTable<0 && pA->iTable==iTab
+    ){
+      /* fall through */
+    }else{
+      return 2;
+    }
   }
   assert( !ExprHasProperty(pA, EP_IntValue) );
   assert( !ExprHasProperty(pB, EP_IntValue) );
@@ -109282,8 +113979,8 @@ SQLITE_PRIVATE int sqlite3ExprListCompare(const ExprList *pA, const ExprList *pB
 */
 SQLITE_PRIVATE int sqlite3ExprCompareSkip(Expr *pA,Expr *pB, int iTab){
   return sqlite3ExprCompare(0,
-             sqlite3ExprSkipCollateAndLikely(pA),
-             sqlite3ExprSkipCollateAndLikely(pB),
+             sqlite3ExprSkipCollate(pA),
+             sqlite3ExprSkipCollate(pB),
              iTab);
 }
 
@@ -109376,7 +114073,7 @@ static int exprImpliesNotNull(
 **     pE1: x!=123     pE2: x IS NOT NULL    Result: true
 **     pE1: x!=?1      pE2: x IS NOT NULL    Result: true
 **     pE1: x IS NULL  pE2: x IS NOT NULL    Result: false
-**     pE1: x IS ?2    pE2: x IS NOT NULL    Reuslt: false
+**     pE1: x IS ?2    pE2: x IS NOT NULL    Result: false
 **
 ** When comparing TK_COLUMN nodes between pE1 and pE2, if pE2 has
 ** Expr.iTable<0 then assume a table number given by iTab.
@@ -109413,11 +114110,29 @@ SQLITE_PRIVATE int sqlite3ExprImpliesExpr(
   return 0;
 }
 
+/* This is a helper function to impliesNotNullRow().  In this routine,
+** set pWalker->eCode to one only if *both* of the input expressions
+** separately have the implies-not-null-row property.
+*/
+static void bothImplyNotNullRow(Walker *pWalker, Expr *pE1, Expr *pE2){
+  if( pWalker->eCode==0 ){
+    sqlite3WalkExpr(pWalker, pE1);
+    if( pWalker->eCode ){
+      pWalker->eCode = 0;
+      sqlite3WalkExpr(pWalker, pE2);
+    }
+  }
+}
+
 /*
 ** This is the Expr node callback for sqlite3ExprImpliesNonNullRow().
 ** If the expression node requires that the table at pWalker->iCur
 ** have one or more non-NULL column, then set pWalker->eCode to 1 and abort.
 **
+** pWalker->mWFlags is non-zero if this inquiry is being undertaking on
+** behalf of a RIGHT JOIN (or FULL JOIN).  That makes a difference when
+** evaluating terms in the ON clause of an inner join.
+**
 ** This routine controls an optimization.  False positives (setting
 ** pWalker->eCode to 1 when it should not be) are deadly, but false-negatives
 ** (never setting pWalker->eCode) is a harmless missed optimization.
@@ -109426,28 +114141,33 @@ static int impliesNotNullRow(Walker *pWalker, Expr *pExpr){
   testcase( pExpr->op==TK_AGG_COLUMN );
   testcase( pExpr->op==TK_AGG_FUNCTION );
   if( ExprHasProperty(pExpr, EP_OuterON) ) return WRC_Prune;
+  if( ExprHasProperty(pExpr, EP_InnerON) && pWalker->mWFlags ){
+    /* If iCur is used in an inner-join ON clause to the left of a
+    ** RIGHT JOIN, that does *not* mean that the table must be non-null.
+    ** But it is difficult to check for that condition precisely.
+    ** To keep things simple, any use of iCur from any inner-join is
+    ** ignored while attempting to simplify a RIGHT JOIN. */
+    return WRC_Prune;
+  }
   switch( pExpr->op ){
     case TK_ISNOT:
     case TK_ISNULL:
     case TK_NOTNULL:
     case TK_IS:
-    case TK_OR:
     case TK_VECTOR:
-    case TK_CASE:
-    case TK_IN:
     case TK_FUNCTION:
     case TK_TRUTH:
+    case TK_CASE:
       testcase( pExpr->op==TK_ISNOT );
       testcase( pExpr->op==TK_ISNULL );
       testcase( pExpr->op==TK_NOTNULL );
       testcase( pExpr->op==TK_IS );
-      testcase( pExpr->op==TK_OR );
       testcase( pExpr->op==TK_VECTOR );
-      testcase( pExpr->op==TK_CASE );
-      testcase( pExpr->op==TK_IN );
       testcase( pExpr->op==TK_FUNCTION );
       testcase( pExpr->op==TK_TRUTH );
+      testcase( pExpr->op==TK_CASE );
       return WRC_Prune;
+
     case TK_COLUMN:
       if( pWalker->u.iCur==pExpr->iTable ){
         pWalker->eCode = 1;
@@ -109455,21 +114175,38 @@ static int impliesNotNullRow(Walker *pWalker, Expr *pExpr){
       }
       return WRC_Prune;
 
+    case TK_OR:
     case TK_AND:
-      if( pWalker->eCode==0 ){
+      /* Both sides of an AND or OR must separately imply non-null-row.
+      ** Consider these cases:
+      **    1.  NOT (x AND y)
+      **    2.  x OR y
+      ** If only one of x or y is non-null-row, then the overall expression
+      ** can be true if the other arm is false (case 1) or true (case 2).
+      */
+      testcase( pExpr->op==TK_OR );
+      testcase( pExpr->op==TK_AND );
+      bothImplyNotNullRow(pWalker, pExpr->pLeft, pExpr->pRight);
+      return WRC_Prune;
+
+    case TK_IN:
+      /* Beware of "x NOT IN ()" and "x NOT IN (SELECT 1 WHERE false)",
+      ** both of which can be true.  But apart from these cases, if
+      ** the left-hand side of the IN is NULL then the IN itself will be
+      ** NULL. */
+      if( ExprUseXList(pExpr) && ALWAYS(pExpr->x.pList->nExpr>0) ){
         sqlite3WalkExpr(pWalker, pExpr->pLeft);
-        if( pWalker->eCode ){
-          pWalker->eCode = 0;
-          sqlite3WalkExpr(pWalker, pExpr->pRight);
-        }
       }
       return WRC_Prune;
 
     case TK_BETWEEN:
-      if( sqlite3WalkExpr(pWalker, pExpr->pLeft)==WRC_Abort ){
-        assert( pWalker->eCode );
-        return WRC_Abort;
-      }
+      /* In "x NOT BETWEEN y AND z" either x must be non-null-row or else
+      ** both y and z must be non-null row */
+      assert( ExprUseXList(pExpr) );
+      assert( pExpr->x.pList->nExpr==2 );
+      sqlite3WalkExpr(pWalker, pExpr->pLeft);
+      bothImplyNotNullRow(pWalker, pExpr->x.pList->a[0].pExpr,
+                                   pExpr->x.pList->a[1].pExpr);
       return WRC_Prune;
 
     /* Virtual tables are allowed to use constraints like x=NULL.  So
@@ -109494,10 +114231,10 @@ static int impliesNotNullRow(Walker *pWalker, Expr *pExpr){
       assert( pLeft->op!=TK_COLUMN || ExprUseYTab(pLeft) );
       assert( pRight->op!=TK_COLUMN || ExprUseYTab(pRight) );
       if( (pLeft->op==TK_COLUMN
-           && pLeft->y.pTab!=0
+           && ALWAYS(pLeft->y.pTab!=0)
            && IsVirtual(pLeft->y.pTab))
        || (pRight->op==TK_COLUMN
-           && pRight->y.pTab!=0
+           && ALWAYS(pRight->y.pTab!=0)
            && IsVirtual(pRight->y.pTab))
       ){
         return WRC_Prune;
@@ -109531,7 +114268,7 @@ static int impliesNotNullRow(Walker *pWalker, Expr *pExpr){
 ** be non-NULL, then the LEFT JOIN can be safely converted into an
 ** ordinary join.
 */
-SQLITE_PRIVATE int sqlite3ExprImpliesNonNullRow(Expr *p, int iTab){
+SQLITE_PRIVATE int sqlite3ExprImpliesNonNullRow(Expr *p, int iTab, int isRJ){
   Walker w;
   p = sqlite3ExprSkipCollateAndLikely(p);
   if( p==0 ) return 0;
@@ -109539,7 +114276,7 @@ SQLITE_PRIVATE int sqlite3ExprImpliesNonNullRow(Expr *p, int iTab){
     p = p->pLeft;
   }else{
     while( p->op==TK_AND ){
-      if( sqlite3ExprImpliesNonNullRow(p->pLeft, iTab) ) return 1;
+      if( sqlite3ExprImpliesNonNullRow(p->pLeft, iTab, isRJ) ) return 1;
       p = p->pRight;
     }
   }
@@ -109547,6 +114284,7 @@ SQLITE_PRIVATE int sqlite3ExprImpliesNonNullRow(Expr *p, int iTab){
   w.xSelectCallback = 0;
   w.xSelectCallback2 = 0;
   w.eCode = 0;
+  w.mWFlags = isRJ!=0;
   w.u.iCur = iTab;
   sqlite3WalkExpr(&w, p);
   return w.eCode;
@@ -109607,7 +114345,7 @@ SQLITE_PRIVATE int sqlite3ExprCoveredByIndex(
 }
 
 
-/* Structure used to pass information throught the Walker in order to
+/* Structure used to pass information throughout the Walker in order to
 ** implement sqlite3ReferencesSrcList().
 */
 struct RefSrcList {
@@ -109702,6 +114440,7 @@ static int exprRefToSrcList(Walker *pWalker, Expr *pExpr){
 SQLITE_PRIVATE int sqlite3ReferencesSrcList(Parse *pParse, Expr *pExpr, SrcList *pSrcList){
   Walker w;
   struct RefSrcList x;
+  assert( pParse->db!=0 );
   memset(&w, 0, sizeof(w));
   memset(&x, 0, sizeof(x));
   w.xExprCallback = exprRefToSrcList;
@@ -109713,12 +114452,18 @@ SQLITE_PRIVATE int sqlite3ReferencesSrcList(Parse *pParse, Expr *pExpr, SrcList
   assert( pExpr->op==TK_AGG_FUNCTION );
   assert( ExprUseXList(pExpr) );
   sqlite3WalkExprList(&w, pExpr->x.pList);
+  if( pExpr->pLeft ){
+    assert( pExpr->pLeft->op==TK_ORDER );
+    assert( ExprUseXList(pExpr->pLeft) );
+    assert( pExpr->pLeft->x.pList!=0 );
+    sqlite3WalkExprList(&w, pExpr->pLeft->x.pList);
+  }
 #ifndef SQLITE_OMIT_WINDOWFUNC
   if( ExprHasProperty(pExpr, EP_WinFunc) ){
     sqlite3WalkExpr(&w, pExpr->y.pWin->pFilter);
   }
 #endif
-  sqlite3DbFree(pParse->db, x.aiExclude);
+  if( x.aiExclude ) sqlite3DbNNFreeNN(pParse->db, x.aiExclude);
   if( w.eCode & 0x01 ){
     return 1;
   }else if( w.eCode ){
@@ -109736,10 +114481,8 @@ SQLITE_PRIVATE int sqlite3ReferencesSrcList(Parse *pParse, Expr *pExpr, SrcList
 ** it does, make a copy.  This is done because the pExpr argument is
 ** subject to change.
 **
-** The copy is stored on pParse->pConstExpr with a register number of 0.
-** This will cause the expression to be deleted automatically when the
-** Parse object is destroyed, but the zero register number means that it
-** will not generate any code in the preamble.
+** The copy is scheduled for deletion using the sqlite3ExprDeferredDelete()
+** which builds on the sqlite3ParserAddCleanup() mechanism.
 */
 static int agginfoPersistExprCb(Walker *pWalker, Expr *pExpr){
   if( ALWAYS(!ExprHasProperty(pExpr, EP_TokenOnly|EP_Reduced))
@@ -109749,10 +114492,11 @@ static int agginfoPersistExprCb(Walker *pWalker, Expr *pExpr){
     int iAgg = pExpr->iAgg;
     Parse *pParse = pWalker->pParse;
     sqlite3 *db = pParse->db;
-    assert( pExpr->op==TK_AGG_COLUMN || pExpr->op==TK_AGG_FUNCTION );
-    if( pExpr->op==TK_AGG_COLUMN ){
-      assert( iAgg>=0 && iAgg<pAggInfo->nColumn );
-      if( pAggInfo->aCol[iAgg].pCExpr==pExpr ){
+    assert( iAgg>=0 );
+    if( pExpr->op!=TK_AGG_FUNCTION ){
+      if( iAgg<pAggInfo->nColumn
+       && pAggInfo->aCol[iAgg].pCExpr==pExpr
+      ){
         pExpr = sqlite3ExprDup(db, pExpr, 0);
         if( pExpr ){
           pAggInfo->aCol[iAgg].pCExpr = pExpr;
@@ -109760,8 +114504,10 @@ static int agginfoPersistExprCb(Walker *pWalker, Expr *pExpr){
         }
       }
     }else{
-      assert( iAgg>=0 && iAgg<pAggInfo->nFunc );
-      if( pAggInfo->aFunc[iAgg].pFExpr==pExpr ){
+      assert( pExpr->op==TK_AGG_FUNCTION );
+      if( ALWAYS(iAgg<pAggInfo->nFunc)
+       && pAggInfo->aFunc[iAgg].pFExpr==pExpr
+      ){
         pExpr = sqlite3ExprDup(db, pExpr, 0);
         if( pExpr ){
           pAggInfo->aFunc[iAgg].pFExpr = pExpr;
@@ -109816,6 +114562,74 @@ static int addAggInfoFunc(sqlite3 *db, AggInfo *pInfo){
   return i;
 }
 
+/*
+** Search the AggInfo object for an aCol[] entry that has iTable and iColumn.
+** Return the index in aCol[] of the entry that describes that column.
+**
+** If no prior entry is found, create a new one and return -1.  The
+** new column will have an index of pAggInfo->nColumn-1.
+*/
+static void findOrCreateAggInfoColumn(
+  Parse *pParse,       /* Parsing context */
+  AggInfo *pAggInfo,   /* The AggInfo object to search and/or modify */
+  Expr *pExpr          /* Expr describing the column to find or insert */
+){
+  struct AggInfo_col *pCol;
+  int k;
+
+  assert( pAggInfo->iFirstReg==0 );
+  pCol = pAggInfo->aCol;
+  for(k=0; k<pAggInfo->nColumn; k++, pCol++){
+    if( pCol->pCExpr==pExpr ) return;
+    if( pCol->iTable==pExpr->iTable
+     && pCol->iColumn==pExpr->iColumn
+     && pExpr->op!=TK_IF_NULL_ROW
+    ){
+      goto fix_up_expr;
+    }
+  }
+  k = addAggInfoColumn(pParse->db, pAggInfo);
+  if( k<0 ){
+    /* OOM on resize */
+    assert( pParse->db->mallocFailed );
+    return;
+  }
+  pCol = &pAggInfo->aCol[k];
+  assert( ExprUseYTab(pExpr) );
+  pCol->pTab = pExpr->y.pTab;
+  pCol->iTable = pExpr->iTable;
+  pCol->iColumn = pExpr->iColumn;
+  pCol->iSorterColumn = -1;
+  pCol->pCExpr = pExpr;
+  if( pAggInfo->pGroupBy && pExpr->op!=TK_IF_NULL_ROW ){
+    int j, n;
+    ExprList *pGB = pAggInfo->pGroupBy;
+    struct ExprList_item *pTerm = pGB->a;
+    n = pGB->nExpr;
+    for(j=0; j<n; j++, pTerm++){
+      Expr *pE = pTerm->pExpr;
+      if( pE->op==TK_COLUMN
+       && pE->iTable==pExpr->iTable
+       && pE->iColumn==pExpr->iColumn
+      ){
+        pCol->iSorterColumn = j;
+        break;
+      }
+    }
+  }
+  if( pCol->iSorterColumn<0 ){
+    pCol->iSorterColumn = pAggInfo->nSortingColumn++;
+  }
+fix_up_expr:
+  ExprSetVVAProperty(pExpr, EP_NoReduce);
+  assert( pExpr->pAggInfo==0 || pExpr->pAggInfo==pAggInfo );
+  pExpr->pAggInfo = pAggInfo;
+  if( pExpr->op==TK_COLUMN ){
+    pExpr->op = TK_AGG_COLUMN;
+  }
+  pExpr->iAgg = (i16)k;
+}
+
 /*
 ** This is the xExprCallback for a tree walker.  It is used to
 ** implement sqlite3ExprAnalyzeAggregates().  See sqlite3ExprAnalyzeAggregates
@@ -109829,76 +114643,64 @@ static int analyzeAggregate(Walker *pWalker, Expr *pExpr){
   AggInfo *pAggInfo = pNC->uNC.pAggInfo;
 
   assert( pNC->ncFlags & NC_UAggInfo );
+  assert( pAggInfo->iFirstReg==0 );
   switch( pExpr->op ){
+    default: {
+      IndexedExpr *pIEpr;
+      Expr tmp;
+      assert( pParse->iSelfTab==0 );
+      if( (pNC->ncFlags & NC_InAggFunc)==0 ) break;
+      if( pParse->pIdxEpr==0 ) break;
+      for(pIEpr=pParse->pIdxEpr; pIEpr; pIEpr=pIEpr->pIENext){
+        int iDataCur = pIEpr->iDataCur;
+        if( iDataCur<0 ) continue;
+        if( sqlite3ExprCompare(0, pExpr, pIEpr->pExpr, iDataCur)==0 ) break;
+      }
+      if( pIEpr==0 ) break;
+      if( NEVER(!ExprUseYTab(pExpr)) ) break;
+      for(i=0; i<pSrcList->nSrc; i++){
+         if( pSrcList->a[0].iCursor==pIEpr->iDataCur ) break;
+      }
+      if( i>=pSrcList->nSrc ) break;
+      if( NEVER(pExpr->pAggInfo!=0) ) break; /* Resolved by outer context */
+      if( pParse->nErr ){ return WRC_Abort; }
+
+      /* If we reach this point, it means that expression pExpr can be
+      ** translated into a reference to an index column as described by
+      ** pIEpr.
+      */
+      memset(&tmp, 0, sizeof(tmp));
+      tmp.op = TK_AGG_COLUMN;
+      tmp.iTable = pIEpr->iIdxCur;
+      tmp.iColumn = pIEpr->iIdxCol;
+      findOrCreateAggInfoColumn(pParse, pAggInfo, &tmp);
+      if( pParse->nErr ){ return WRC_Abort; }
+      assert( pAggInfo->aCol!=0 );
+      assert( tmp.iAgg<pAggInfo->nColumn );
+      pAggInfo->aCol[tmp.iAgg].pCExpr = pExpr;
+      pExpr->pAggInfo = pAggInfo;
+      pExpr->iAgg = tmp.iAgg;
+      return WRC_Prune;
+    }
+    case TK_IF_NULL_ROW:
     case TK_AGG_COLUMN:
     case TK_COLUMN: {
       testcase( pExpr->op==TK_AGG_COLUMN );
       testcase( pExpr->op==TK_COLUMN );
+      testcase( pExpr->op==TK_IF_NULL_ROW );
       /* Check to see if the column is in one of the tables in the FROM
       ** clause of the aggregate query */
       if( ALWAYS(pSrcList!=0) ){
         SrcItem *pItem = pSrcList->a;
         for(i=0; i<pSrcList->nSrc; i++, pItem++){
-          struct AggInfo_col *pCol;
           assert( !ExprHasProperty(pExpr, EP_TokenOnly|EP_Reduced) );
           if( pExpr->iTable==pItem->iCursor ){
-            /* If we reach this point, it means that pExpr refers to a table
-            ** that is in the FROM clause of the aggregate query.
-            **
-            ** Make an entry for the column in pAggInfo->aCol[] if there
-            ** is not an entry there already.
-            */
-            int k;
-            pCol = pAggInfo->aCol;
-            for(k=0; k<pAggInfo->nColumn; k++, pCol++){
-              if( pCol->iTable==pExpr->iTable &&
-                  pCol->iColumn==pExpr->iColumn ){
-                break;
-              }
-            }
-            if( (k>=pAggInfo->nColumn)
-             && (k = addAggInfoColumn(pParse->db, pAggInfo))>=0
-            ){
-              pCol = &pAggInfo->aCol[k];
-              assert( ExprUseYTab(pExpr) );
-              pCol->pTab = pExpr->y.pTab;
-              pCol->iTable = pExpr->iTable;
-              pCol->iColumn = pExpr->iColumn;
-              pCol->iMem = ++pParse->nMem;
-              pCol->iSorterColumn = -1;
-              pCol->pCExpr = pExpr;
-              if( pAggInfo->pGroupBy ){
-                int j, n;
-                ExprList *pGB = pAggInfo->pGroupBy;
-                struct ExprList_item *pTerm = pGB->a;
-                n = pGB->nExpr;
-                for(j=0; j<n; j++, pTerm++){
-                  Expr *pE = pTerm->pExpr;
-                  if( pE->op==TK_COLUMN && pE->iTable==pExpr->iTable &&
-                      pE->iColumn==pExpr->iColumn ){
-                    pCol->iSorterColumn = j;
-                    break;
-                  }
-                }
-              }
-              if( pCol->iSorterColumn<0 ){
-                pCol->iSorterColumn = pAggInfo->nSortingColumn++;
-              }
-            }
-            /* There is now an entry for pExpr in pAggInfo->aCol[] (either
-            ** because it was there before or because we just created it).
-            ** Convert the pExpr to be a TK_AGG_COLUMN referring to that
-            ** pAggInfo->aCol[] entry.
-            */
-            ExprSetVVAProperty(pExpr, EP_NoReduce);
-            pExpr->pAggInfo = pAggInfo;
-            pExpr->op = TK_AGG_COLUMN;
-            pExpr->iAgg = (i16)k;
+            findOrCreateAggInfoColumn(pParse, pAggInfo, pExpr);
             break;
           } /* endif pExpr->iTable==pItem->iCursor */
         } /* end loop over pSrcList */
       }
-      return WRC_Prune;
+      return WRC_Continue;
     }
     case TK_AGG_FUNCTION: {
       if( (pNC->ncFlags & NC_InAggFunc)==0
@@ -109920,15 +114722,42 @@ static int analyzeAggregate(Walker *pWalker, Expr *pExpr){
           u8 enc = ENC(pParse->db);
           i = addAggInfoFunc(pParse->db, pAggInfo);
           if( i>=0 ){
+            int nArg;
             assert( !ExprHasProperty(pExpr, EP_xIsSelect) );
             pItem = &pAggInfo->aFunc[i];
             pItem->pFExpr = pExpr;
-            pItem->iMem = ++pParse->nMem;
             assert( ExprUseUToken(pExpr) );
+            nArg = pExpr->x.pList ? pExpr->x.pList->nExpr : 0;
             pItem->pFunc = sqlite3FindFunction(pParse->db,
-                   pExpr->u.zToken,
-                   pExpr->x.pList ? pExpr->x.pList->nExpr : 0, enc, 0);
-            if( pExpr->flags & EP_Distinct ){
+                                         pExpr->u.zToken, nArg, enc, 0);
+            assert( pItem->bOBUnique==0 );
+            if( pExpr->pLeft
+             && (pItem->pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL)==0
+            ){
+              /* The NEEDCOLL test above causes any ORDER BY clause on
+              ** aggregate min() or max() to be ignored. */
+              ExprList *pOBList;
+              assert( nArg>0 );
+              assert( pExpr->pLeft->op==TK_ORDER );
+              assert( ExprUseXList(pExpr->pLeft) );
+              pItem->iOBTab = pParse->nTab++;
+              pOBList = pExpr->pLeft->x.pList;
+              assert( pOBList->nExpr>0 );
+              assert( pItem->bOBUnique==0 );
+              if( pOBList->nExpr==1
+               && nArg==1
+               && sqlite3ExprCompare(0,pOBList->a[0].pExpr,
+                               pExpr->x.pList->a[0].pExpr,0)==0
+              ){
+                pItem->bOBPayload = 0;
+                pItem->bOBUnique = ExprHasProperty(pExpr, EP_Distinct);
+              }else{
+                pItem->bOBPayload = 1;
+              }
+            }else{
+              pItem->iOBTab = -1;
+            }
+            if( ExprHasProperty(pExpr, EP_Distinct) && !pItem->bOBUnique ){
               pItem->iDistinct = pParse->nTab++;
             }else{
               pItem->iDistinct = -1;
@@ -110052,6 +114881,37 @@ SQLITE_PRIVATE void sqlite3ClearTempRegCache(Parse *pParse){
   pParse->nRangeReg = 0;
 }
 
+/*
+** Make sure sufficient registers have been allocated so that
+** iReg is a valid register number.
+*/
+SQLITE_PRIVATE void sqlite3TouchRegister(Parse *pParse, int iReg){
+  if( pParse->nMem<iReg ) pParse->nMem = iReg;
+}
+
+#if defined(SQLITE_ENABLE_STAT4) || defined(SQLITE_DEBUG)
+/*
+** Return the latest reusable register in the set of all registers.
+** The value returned is no less than iMin.  If any register iMin or
+** greater is in permanent use, then return one more than that last
+** permanent register.
+*/
+SQLITE_PRIVATE int sqlite3FirstAvailableRegister(Parse *pParse, int iMin){
+  const ExprList *pList = pParse->pConstExpr;
+  if( pList ){
+    int i;
+    for(i=0; i<pList->nExpr; i++){
+      if( pList->a[i].u.iConstExprReg>=iMin ){
+        iMin = pList->a[i].u.iConstExprReg + 1;
+      }
+    }
+  }
+  pParse->nTempReg = 0;
+  pParse->nRangeReg = 0;
+  return iMin;
+}
+#endif /* SQLITE_ENABLE_STAT4 || SQLITE_DEBUG */
+
 /*
 ** Validate that no temporary register falls within the range of
 ** iFirst..iLast, inclusive.  This routine is only call from within assert()
@@ -110071,6 +114931,14 @@ SQLITE_PRIVATE int sqlite3NoTempsInRange(Parse *pParse, int iFirst, int iLast){
       return 0;
     }
   }
+  if( pParse->pConstExpr ){
+    ExprList *pList = pParse->pConstExpr;
+    for(i=0; i<pList->nExpr; i++){
+      int iReg = pList->a[i].u.iConstExprReg;
+      if( iReg==0 ) continue;
+      if( iReg>=iFirst && iReg<=iLast ) return 0;
+    }
+  }
   return 1;
 }
 #endif /* SQLITE_DEBUG */
@@ -110525,14 +115393,19 @@ SQLITE_PRIVATE void sqlite3AlterFinishAddColumn(Parse *pParse, Token *pColDef){
     /* Verify that constraints are still satisfied */
     if( pNew->pCheck!=0
      || (pCol->notNull && (pCol->colFlags & COLFLAG_GENERATED)!=0)
+     || (pTab->tabFlags & TF_Strict)!=0
     ){
       sqlite3NestedParse(pParse,
         "SELECT CASE WHEN quick_check GLOB 'CHECK*'"
         " THEN raise(ABORT,'CHECK constraint failed')"
+        " WHEN quick_check GLOB 'non-* value in*'"
+        " THEN raise(ABORT,'type mismatch on DEFAULT')"
         " ELSE raise(ABORT,'NOT NULL constraint failed')"
         " END"
         "  FROM pragma_quick_check(%Q,%Q)"
-        " WHERE quick_check GLOB 'CHECK*' OR quick_check GLOB 'NULL*'",
+        " WHERE quick_check GLOB 'CHECK*'"
+        " OR quick_check GLOB 'NULL*'"
+        " OR quick_check GLOB 'non-* value in*'",
         zTab, zDb
       );
     }
@@ -110621,7 +115494,7 @@ SQLITE_PRIVATE void sqlite3AlterBeginAddColumn(Parse *pParse, SrcList *pSrc){
   pNew->u.tab.pDfltList = sqlite3ExprListDup(db, pTab->u.tab.pDfltList, 0);
   pNew->pSchema = db->aDb[iDb].pSchema;
   pNew->u.tab.addColOffset = pTab->u.tab.addColOffset;
-  pNew->nTabRef = 1;
+  assert( pNew->nTabRef==1 );
 
 exit_begin_add_column:
   sqlite3SrcListDelete(db, pSrc);
@@ -110820,13 +115693,14 @@ static void renameTokenCheckAll(Parse *pParse, const void *pPtr){
   assert( pParse->db->mallocFailed==0 || pParse->nErr!=0 );
   if( pParse->nErr==0 ){
     const RenameToken *p;
-    u8 i = 0;
+    u32 i = 1;
     for(p=pParse->pRename; p; p=p->pNext){
       if( p->p ){
         assert( p->p!=pPtr );
-        i += *(u8*)(p->p);
+        i += *(u8*)(p->p) | 1;
       }
     }
+    assert( i>0 );
   }
 }
 #else
@@ -111125,7 +115999,7 @@ static RenameToken *renameColumnTokenNext(RenameCtx *pCtx){
 }
 
 /*
-** An error occured while parsing or otherwise processing a database
+** An error occurred while parsing or otherwise processing a database
 ** object (either pParse->pNewTable, pNewIndex or pNewTrigger) as part of an
 ** ALTER TABLE RENAME COLUMN program. The error message emitted by the
 ** sub-routine is currently stored in pParse->zErrMsg. This function
@@ -111357,6 +116231,19 @@ static int renameEditSql(
   return rc;
 }
 
+/*
+** Set all pEList->a[].fg.eEName fields in the expression-list to val.
+*/
+static void renameSetENames(ExprList *pEList, int val){
+  if( pEList ){
+    int i;
+    for(i=0; i<pEList->nExpr; i++){
+      assert( val==ENAME_NAME || pEList->a[i].fg.eEName==ENAME_NAME );
+      pEList->a[i].fg.eEName = val;
+    }
+  }
+}
+
 /*
 ** Resolve all symbols in the trigger at pParse->pNewTrigger, assuming
 ** it was read from the schema of database zDb. Return SQLITE_OK if
@@ -111404,7 +116291,17 @@ static int renameResolveTrigger(Parse *pParse){
           pSrc = 0;
           rc = SQLITE_NOMEM;
         }else{
+          /* pStep->pExprList contains an expression-list used for an UPDATE
+          ** statement. So the a[].zEName values are the RHS of the
+          ** "<col> = <expr>" clauses of the UPDATE statement. So, before
+          ** running SelectPrep(), change all the eEName values in
+          ** pStep->pExprList to ENAME_SPAN (from their current value of
+          ** ENAME_NAME). This is to prevent any ids in ON() clauses that are
+          ** part of pSrc from being incorrectly resolved against the
+          ** a[].zEName values as if they were column aliases.  */
+          renameSetENames(pStep->pExprList, ENAME_SPAN);
           sqlite3SelectPrep(pParse, pSel, 0);
+          renameSetENames(pStep->pExprList, ENAME_NAME);
           rc = pParse->nErr ? SQLITE_ERROR : SQLITE_OK;
           assert( pStep->pExprList==0 || pStep->pExprList==pSel->pEList );
           assert( pSrc==pSel->pSrc );
@@ -113312,6 +118209,7 @@ static void analyzeVdbeCommentIndexWithColumnName(
   if( NEVER(i==XN_ROWID) ){
     VdbeComment((v,"%s.rowid",pIdx->zName));
   }else if( i==XN_EXPR ){
+    assert( pIdx->bHasExpr );
     VdbeComment((v,"%s.expr(%d)",pIdx->zName, k));
   }else{
     VdbeComment((v,"%s.%s", pIdx->zName, pIdx->pTable->aCol[i].zCnName));
@@ -113352,11 +118250,15 @@ static void analyzeOneTable(
   int regIdxname = iMem++;     /* Register containing index name */
   int regStat1 = iMem++;       /* Value for the stat column of sqlite_stat1 */
   int regPrev = iMem;          /* MUST BE LAST (see below) */
+#ifdef SQLITE_ENABLE_STAT4
+  int doOnce = 1;              /* Flag for a one-time computation */
+#endif
 #ifdef SQLITE_ENABLE_PREUPDATE_HOOK
   Table *pStat1 = 0;
 #endif
 
-  pParse->nMem = MAX(pParse->nMem, iMem);
+  sqlite3TouchRegister(pParse, iMem);
+  assert( sqlite3NoTempsInRange(pParse, regNewRowid, iMem) );
   v = sqlite3GetVdbe(pParse);
   if( v==0 || NEVER(pTab==0) ){
     return;
@@ -113462,7 +118364,7 @@ static void analyzeOneTable(
     ** the regPrev array and a trailing rowid (the rowid slot is required
     ** when building a record to insert into the sample column of
     ** the sqlite_stat4 table.  */
-    pParse->nMem = MAX(pParse->nMem, regPrev+nColTest);
+    sqlite3TouchRegister(pParse, regPrev+nColTest);
 
     /* Open a read-only cursor on the index being analyzed. */
     assert( iDb==sqlite3SchemaToIndex(db, pIdx->pSchema) );
@@ -113634,7 +118536,35 @@ static void analyzeOneTable(
       int addrIsNull;
       u8 seekOp = HasRowid(pTab) ? OP_NotExists : OP_NotFound;
 
-      pParse->nMem = MAX(pParse->nMem, regCol+nCol);
+      if( doOnce ){
+        int mxCol = nCol;
+        Index *pX;
+
+        /* Compute the maximum number of columns in any index */
+        for(pX=pTab->pIndex; pX; pX=pX->pNext){
+          int nColX;                     /* Number of columns in pX */
+          if( !HasRowid(pTab) && IsPrimaryKeyIndex(pX) ){
+            nColX = pX->nKeyCol;
+          }else{
+            nColX = pX->nColumn;
+          }
+          if( nColX>mxCol ) mxCol = nColX;
+        }
+
+        /* Allocate space to compute results for the largest index */
+        sqlite3TouchRegister(pParse, regCol+mxCol);
+        doOnce = 0;
+#ifdef SQLITE_DEBUG
+        /* Verify that the call to sqlite3ClearTempRegCache() below
+        ** really is needed.
+        ** https://sqlite.org/forum/forumpost/83cb4a95a0 (2023-03-25)
+        */
+        testcase( !sqlite3NoTempsInRange(pParse, regEq, regCol+mxCol) );
+#endif
+        sqlite3ClearTempRegCache(pParse);  /* tag-20230325-1 */
+        assert( sqlite3NoTempsInRange(pParse, regEq, regCol+mxCol) );
+      }
+      assert( sqlite3NoTempsInRange(pParse, regEq, regCol+nCol) );
 
       addrNext = sqlite3VdbeCurrentAddr(v);
       callStatGet(pParse, regStat, STAT_GET_ROWID, regSampleRowid);
@@ -113715,6 +118645,11 @@ static void analyzeDatabase(Parse *pParse, int iDb){
   for(k=sqliteHashFirst(&pSchema->tblHash); k; k=sqliteHashNext(k)){
     Table *pTab = (Table*)sqliteHashData(k);
     analyzeOneTable(pParse, pTab, 0, iStatCur, iMem, iTab);
+#ifdef SQLITE_ENABLE_STAT4
+    iMem = sqlite3FirstAvailableRegister(pParse, iMem);
+#else
+    assert( iMem==sqlite3FirstAvailableRegister(pParse,iMem) );
+#endif
   }
   loadAnalysis(pParse, iDb);
 }
@@ -113955,6 +118890,8 @@ static int analysisLoader(void *pData, int argc, char **argv, char **NotUsed){
 ** and its contents.
 */
 SQLITE_PRIVATE void sqlite3DeleteIndexSamples(sqlite3 *db, Index *pIdx){
+  assert( db!=0 );
+  assert( pIdx!=0 );
 #ifdef SQLITE_ENABLE_STAT4
   if( pIdx->aSample ){
     int j;
@@ -113964,7 +118901,7 @@ SQLITE_PRIVATE void sqlite3DeleteIndexSamples(sqlite3 *db, Index *pIdx){
     }
     sqlite3DbFree(db, pIdx->aSample);
   }
-  if( db && db->pnBytesFreed==0 ){
+  if( db->pnBytesFreed==0 ){
     pIdx->nSample = 0;
     pIdx->aSample = 0;
   }
@@ -114100,6 +119037,10 @@ static int loadStatTbl(
     pIdx = findIndexOrPrimaryKey(db, zIndex, zDb);
     assert( pIdx==0 || pIdx->nSample==0 );
     if( pIdx==0 ) continue;
+    if( pIdx->aSample!=0 ){
+      /* The same index appears in sqlite_stat4 under multiple names */
+      continue;
+    }
     assert( !HasRowid(pIdx->pTable) || pIdx->nColumn==pIdx->nKeyCol+1 );
     if( !HasRowid(pIdx->pTable) && IsPrimaryKeyIndex(pIdx) ){
       nIdxCol = pIdx->nKeyCol;
@@ -114107,6 +119048,7 @@ static int loadStatTbl(
       nIdxCol = pIdx->nColumn;
     }
     pIdx->nSampleCol = nIdxCol;
+    pIdx->mxSample = nSample;
     nByte = sizeof(IndexSample) * nSample;
     nByte += sizeof(tRowcnt) * nIdxCol * 3 * nSample;
     nByte += nIdxCol * sizeof(tRowcnt);     /* Space for Index.aAvgEq[] */
@@ -114146,6 +119088,11 @@ static int loadStatTbl(
     if( zIndex==0 ) continue;
     pIdx = findIndexOrPrimaryKey(db, zIndex, zDb);
     if( pIdx==0 ) continue;
+    if( pIdx->nSample>=pIdx->mxSample ){
+      /* Too many slots used because the same index appears in
+      ** sqlite_stat4 using multiple names */
+      continue;
+    }
     /* This next condition is true if data has already been loaded from
     ** the sqlite_stat4 table. */
     nCol = pIdx->nSampleCol;
@@ -114158,14 +119105,15 @@ static int loadStatTbl(
     decodeIntArray((char*)sqlite3_column_text(pStmt,2),nCol,pSample->anLt,0,0);
     decodeIntArray((char*)sqlite3_column_text(pStmt,3),nCol,pSample->anDLt,0,0);
 
-    /* Take a copy of the sample. Add two 0x00 bytes the end of the buffer.
+    /* Take a copy of the sample. Add 8 extra 0x00 bytes the end of the buffer.
     ** This is in case the sample record is corrupted. In that case, the
     ** sqlite3VdbeRecordCompare() may read up to two varints past the
     ** end of the allocated buffer before it realizes it is dealing with
-    ** a corrupt record. Adding the two 0x00 bytes prevents this from causing
+    ** a corrupt record.  Or it might try to read a large integer from the
+    ** buffer.  In any case, eight 0x00 bytes prevents this from causing
     ** a buffer overread.  */
     pSample->n = sqlite3_column_bytes(pStmt, 4);
-    pSample->p = sqlite3DbMallocZero(db, pSample->n + 2);
+    pSample->p = sqlite3DbMallocZero(db, pSample->n + 8);
     if( pSample->p==0 ){
       sqlite3_finalize(pStmt);
       return SQLITE_NOMEM_BKPT;
@@ -114189,11 +119137,12 @@ static int loadStat4(sqlite3 *db, const char *zDb){
   const Table *pStat4;
 
   assert( db->lookaside.bDisable );
-  if( (pStat4 = sqlite3FindTable(db, "sqlite_stat4", zDb))!=0
+  if( OptimizationEnabled(db, SQLITE_Stat4)
+   && (pStat4 = sqlite3FindTable(db, "sqlite_stat4", zDb))!=0
    && IsOrdinaryTable(pStat4)
   ){
     rc = loadStatTbl(db,
-      "SELECT idx,count(*) FROM %Q.sqlite_stat4 GROUP BY idx",
+      "SELECT idx,count(*) FROM %Q.sqlite_stat4 GROUP BY idx COLLATE nocase",
       "SELECT idx,neq,nlt,ndlt,sample FROM %Q.sqlite_stat4",
       zDb
     );
@@ -114383,7 +119332,7 @@ static void attachFunc(
   char *zErr = 0;
   unsigned int flags;
   Db *aNew;                 /* New array of Db pointers */
-  Db *pNew;                 /* Db object for the newly attached database */
+  Db *pNew = 0;             /* Db object for the newly attached database */
   char *zErrDyn = 0;
   sqlite3_vfs *pVfs;
 
@@ -114403,13 +119352,26 @@ static void attachFunc(
     /* This is not a real ATTACH.  Instead, this routine is being called
     ** from sqlite3_deserialize() to close database db->init.iDb and
     ** reopen it as a MemDB */
+    Btree *pNewBt = 0;
     pVfs = sqlite3_vfs_find("memdb");
     if( pVfs==0 ) return;
-    pNew = &db->aDb[db->init.iDb];
-    if( pNew->pBt ) sqlite3BtreeClose(pNew->pBt);
-    pNew->pBt = 0;
-    pNew->pSchema = 0;
-    rc = sqlite3BtreeOpen(pVfs, "x\0", db, &pNew->pBt, 0, SQLITE_OPEN_MAIN_DB);
+    rc = sqlite3BtreeOpen(pVfs, "x\0", db, &pNewBt, 0, SQLITE_OPEN_MAIN_DB);
+    if( rc==SQLITE_OK ){
+      Schema *pNewSchema = sqlite3SchemaGet(db, pNewBt);
+      if( pNewSchema ){
+        /* Both the Btree and the new Schema were allocated successfully.
+        ** Close the old db and update the aDb[] slot with the new memdb
+        ** values.  */
+        pNew = &db->aDb[db->init.iDb];
+        if( ALWAYS(pNew->pBt) ) sqlite3BtreeClose(pNew->pBt);
+        pNew->pBt = pNewBt;
+        pNew->pSchema = pNewSchema;
+      }else{
+        sqlite3BtreeClose(pNewBt);
+        rc = SQLITE_NOMEM;
+      }
+    }
+    if( rc ) goto attach_error;
   }else{
     /* This is a real ATTACH
     **
@@ -114522,7 +119484,7 @@ static void attachFunc(
   }
 #endif
   if( rc ){
-    if( !REOPEN_AS_MEMDB(db) ){
+    if( ALWAYS(!REOPEN_AS_MEMDB(db)) ){
       int iDb = db->nDb - 1;
       assert( iDb>=2 );
       if( db->aDb[iDb].pBt ){
@@ -114639,6 +119601,8 @@ static void codeAttach(
   sqlite3* db = pParse->db;
   int regArgs;
 
+  if( SQLITE_OK!=sqlite3ReadSchema(pParse) ) goto attach_end;
+
   if( pParse->nErr ) goto attach_end;
   memset(&sName, 0, sizeof(NameContext));
   sName.pParse = pParse;
@@ -115107,7 +120071,7 @@ SQLITE_PRIVATE int sqlite3AuthCheck(
   sqlite3 *db = pParse->db;
   int rc;
 
-  /* Don't do any authorization checks if the database is initialising
+  /* Don't do any authorization checks if the database is initializing
   ** or if the parser is being invoked from within sqlite3_declare_vtab.
   */
   assert( !IN_RENAME_OBJECT || db->xAuth==0 );
@@ -115314,6 +120278,7 @@ SQLITE_PRIVATE int sqlite3DbMaskAllZero(yDbMask m){
 SQLITE_PRIVATE void sqlite3FinishCoding(Parse *pParse){
   sqlite3 *db;
   Vdbe *v;
+  int iDb, i;
 
   assert( pParse->pToplevel==0 );
   db = pParse->db;
@@ -115343,7 +120308,6 @@ SQLITE_PRIVATE void sqlite3FinishCoding(Parse *pParse){
     if( pParse->bReturning ){
       Returning *pReturning = pParse->u1.pReturning;
       int addrRewind;
-      int i;
       int reg;
 
       if( pReturning->nRetCol ){
@@ -115380,76 +120344,66 @@ SQLITE_PRIVATE void sqlite3FinishCoding(Parse *pParse){
     ** transaction on each used database and to verify the schema cookie
     ** on each used database.
     */
-    if( db->mallocFailed==0
-     && (DbMaskNonZero(pParse->cookieMask) || pParse->pConstExpr)
-    ){
-      int iDb, i;
-      assert( sqlite3VdbeGetOp(v, 0)->opcode==OP_Init );
-      sqlite3VdbeJumpHere(v, 0);
-      assert( db->nDb>0 );
-      iDb = 0;
-      do{
-        Schema *pSchema;
-        if( DbMaskTest(pParse->cookieMask, iDb)==0 ) continue;
-        sqlite3VdbeUsesBtree(v, iDb);
-        pSchema = db->aDb[iDb].pSchema;
-        sqlite3VdbeAddOp4Int(v,
-          OP_Transaction,                    /* Opcode */
-          iDb,                               /* P1 */
-          DbMaskTest(pParse->writeMask,iDb), /* P2 */
-          pSchema->schema_cookie,            /* P3 */
-          pSchema->iGeneration               /* P4 */
-        );
-        if( db->init.busy==0 ) sqlite3VdbeChangeP5(v, 1);
-        VdbeComment((v,
-              "usesStmtJournal=%d", pParse->mayAbort && pParse->isMultiWrite));
-      }while( ++iDb<db->nDb );
+    assert( pParse->nErr>0 || sqlite3VdbeGetOp(v, 0)->opcode==OP_Init );
+    sqlite3VdbeJumpHere(v, 0);
+    assert( db->nDb>0 );
+    iDb = 0;
+    do{
+      Schema *pSchema;
+      if( DbMaskTest(pParse->cookieMask, iDb)==0 ) continue;
+      sqlite3VdbeUsesBtree(v, iDb);
+      pSchema = db->aDb[iDb].pSchema;
+      sqlite3VdbeAddOp4Int(v,
+        OP_Transaction,                    /* Opcode */
+        iDb,                               /* P1 */
+        DbMaskTest(pParse->writeMask,iDb), /* P2 */
+        pSchema->schema_cookie,            /* P3 */
+        pSchema->iGeneration               /* P4 */
+      );
+      if( db->init.busy==0 ) sqlite3VdbeChangeP5(v, 1);
+      VdbeComment((v,
+            "usesStmtJournal=%d", pParse->mayAbort && pParse->isMultiWrite));
+    }while( ++iDb<db->nDb );
 #ifndef SQLITE_OMIT_VIRTUALTABLE
-      for(i=0; i<pParse->nVtabLock; i++){
-        char *vtab = (char *)sqlite3GetVTable(db, pParse->apVtabLock[i]);
-        sqlite3VdbeAddOp4(v, OP_VBegin, 0, 0, 0, vtab, P4_VTAB);
-      }
-      pParse->nVtabLock = 0;
+    for(i=0; i<pParse->nVtabLock; i++){
+      char *vtab = (char *)sqlite3GetVTable(db, pParse->apVtabLock[i]);
+      sqlite3VdbeAddOp4(v, OP_VBegin, 0, 0, 0, vtab, P4_VTAB);
+    }
+    pParse->nVtabLock = 0;
 #endif
 
-      /* Once all the cookies have been verified and transactions opened,
-      ** obtain the required table-locks. This is a no-op unless the
-      ** shared-cache feature is enabled.
-      */
-      codeTableLocks(pParse);
+#ifndef SQLITE_OMIT_SHARED_CACHE
+    /* Once all the cookies have been verified and transactions opened,
+    ** obtain the required table-locks. This is a no-op unless the
+    ** shared-cache feature is enabled.
+    */
+    if( pParse->nTableLock ) codeTableLocks(pParse);
+#endif
 
-      /* Initialize any AUTOINCREMENT data structures required.
-      */
-      sqlite3AutoincrementBegin(pParse);
+    /* Initialize any AUTOINCREMENT data structures required.
+    */
+    if( pParse->pAinc ) sqlite3AutoincrementBegin(pParse);
 
-      /* Code constant expressions that where factored out of inner loops.
-      **
-      ** The pConstExpr list might also contain expressions that we simply
-      ** want to keep around until the Parse object is deleted.  Such
-      ** expressions have iConstExprReg==0.  Do not generate code for
-      ** those expressions, of course.
-      */
-      if( pParse->pConstExpr ){
-        ExprList *pEL = pParse->pConstExpr;
-        pParse->okConstFactor = 0;
-        for(i=0; i<pEL->nExpr; i++){
-          int iReg = pEL->a[i].u.iConstExprReg;
-          if( iReg>0 ){
-            sqlite3ExprCode(pParse, pEL->a[i].pExpr, iReg);
-          }
-        }
+    /* Code constant expressions that were factored out of inner loops.
+    */
+    if( pParse->pConstExpr ){
+      ExprList *pEL = pParse->pConstExpr;
+      pParse->okConstFactor = 0;
+      for(i=0; i<pEL->nExpr; i++){
+        assert( pEL->a[i].u.iConstExprReg>0 );
+        sqlite3ExprCode(pParse, pEL->a[i].pExpr, pEL->a[i].u.iConstExprReg);
       }
+    }
 
-      if( pParse->bReturning ){
-        Returning *pRet = pParse->u1.pReturning;
-        if( pRet->nRetCol ){
-          sqlite3VdbeAddOp2(v, OP_OpenEphemeral, pRet->iRetCur, pRet->nRetCol);
-        }
+    if( pParse->bReturning ){
+      Returning *pRet = pParse->u1.pReturning;
+      if( pRet->nRetCol ){
+        sqlite3VdbeAddOp2(v, OP_OpenEphemeral, pRet->iRetCur, pRet->nRetCol);
       }
-
-      /* Finally, jump back to the beginning of the executable code. */
-      sqlite3VdbeGoto(v, 1);
     }
+
+    /* Finally, jump back to the beginning of the executable code. */
+    sqlite3VdbeGoto(v, 1);
   }
 
   /* Get the VDBE program ready for execution
@@ -115488,6 +120442,7 @@ SQLITE_PRIVATE void sqlite3NestedParse(Parse *pParse, const char *zFormat, ...){
   char saveBuf[PARSE_TAIL_SZ];
 
   if( pParse->nErr ) return;
+  if( pParse->eParseMode ) return;
   assert( pParse->nested<10 );  /* Nesting should only be of limited depth */
   va_start(ap, zFormat);
   zSql = sqlite3VMPrintf(db, zFormat, ap);
@@ -115634,7 +120589,7 @@ SQLITE_PRIVATE Table *sqlite3LocateTable(
     /* If zName is the not the name of a table in the schema created using
     ** CREATE, then check to see if it is the name of an virtual table that
     ** can be an eponymous virtual table. */
-    if( pParse->disableVtab==0 && db->init.busy==0 ){
+    if( (pParse->prepFlags & SQLITE_PREPARE_NO_VTAB)==0 && db->init.busy==0 ){
       Module *pMod = (Module*)sqlite3HashFind(&db->aModule, zName);
       if( pMod==0 && sqlite3_strnicmp(zName, "pragma_", 7)==0 ){
         pMod = sqlite3PragmaVtabRegister(db, zName);
@@ -115647,7 +120602,7 @@ SQLITE_PRIVATE Table *sqlite3LocateTable(
 #endif
     if( flags & LOCATE_NOERR ) return 0;
     pParse->checkSchema = 1;
-  }else if( IsVirtual(p) && pParse->disableVtab ){
+  }else if( IsVirtual(p) && (pParse->prepFlags & SQLITE_PREPARE_NO_VTAB)!=0 ){
     p = 0;
   }
 
@@ -115935,7 +120890,7 @@ SQLITE_PRIVATE void sqlite3ColumnSetColl(
 }
 
 /*
-** Return the collating squence name for a column
+** Return the collating sequence name for a column
 */
 SQLITE_PRIVATE const char *sqlite3ColumnColl(Column *pCol){
   const char *z;
@@ -115956,16 +120911,17 @@ SQLITE_PRIVATE void sqlite3DeleteColumnNames(sqlite3 *db, Table *pTable){
   int i;
   Column *pCol;
   assert( pTable!=0 );
+  assert( db!=0 );
   if( (pCol = pTable->aCol)!=0 ){
     for(i=0; i<pTable->nCol; i++, pCol++){
       assert( pCol->zCnName==0 || pCol->hName==sqlite3StrIHash(pCol->zCnName) );
       sqlite3DbFree(db, pCol->zCnName);
     }
-    sqlite3DbFree(db, pTable->aCol);
+    sqlite3DbNNFreeNN(db, pTable->aCol);
     if( IsOrdinaryTable(pTable) ){
       sqlite3ExprListDelete(db, pTable->u.tab.pDfltList);
     }
-    if( db==0 || db->pnBytesFreed==0 ){
+    if( db->pnBytesFreed==0 ){
       pTable->aCol = 0;
       pTable->nCol = 0;
       if( IsOrdinaryTable(pTable) ){
@@ -116002,7 +120958,8 @@ static void SQLITE_NOINLINE deleteTable(sqlite3 *db, Table *pTable){
   ** a Table object that was going to be marked ephemeral. So do not check
   ** that no lookaside memory is used in this case either. */
   int nLookaside = 0;
-  if( db && !db->mallocFailed && (pTable->tabFlags & TF_Ephemeral)==0 ){
+  assert( db!=0 );
+  if( !db->mallocFailed && (pTable->tabFlags & TF_Ephemeral)==0 ){
     nLookaside = sqlite3LookasideUsed(db, 0);
   }
 #endif
@@ -116012,7 +120969,7 @@ static void SQLITE_NOINLINE deleteTable(sqlite3 *db, Table *pTable){
     pNext = pIndex->pNext;
     assert( pIndex->pSchema==pTable->pSchema
          || (IsVirtual(pTable) && pIndex->idxType!=SQLITE_IDXTYPE_APPDEF) );
-    if( (db==0 || db->pnBytesFreed==0) && !IsVirtual(pTable) ){
+    if( db->pnBytesFreed==0 && !IsVirtual(pTable) ){
       char *zName = pIndex->zName;
       TESTONLY ( Index *pOld = ) sqlite3HashInsert(
          &pIndex->pSchema->idxHash, zName, 0
@@ -116026,7 +120983,7 @@ static void SQLITE_NOINLINE deleteTable(sqlite3 *db, Table *pTable){
   if( IsOrdinaryTable(pTable) ){
     sqlite3FkDelete(db, pTable);
   }
-#ifndef SQLITE_OMIT_VIRTUAL_TABLE
+#ifndef SQLITE_OMIT_VIRTUALTABLE
   else if( IsVirtual(pTable) ){
     sqlite3VtabClear(db, pTable);
   }
@@ -116049,8 +121006,9 @@ static void SQLITE_NOINLINE deleteTable(sqlite3 *db, Table *pTable){
 }
 SQLITE_PRIVATE void sqlite3DeleteTable(sqlite3 *db, Table *pTable){
   /* Do not delete the table until the reference count reaches zero. */
+  assert( db!=0 );
   if( !pTable ) return;
-  if( ((!db || db->pnBytesFreed==0) && (--pTable->nTabRef)>0) ) return;
+  if( db->pnBytesFreed==0 && (--pTable->nTabRef)>0 ) return;
   deleteTable(db, pTable);
 }
 
@@ -116587,20 +121545,13 @@ SQLITE_PRIVATE void sqlite3ColumnPropertiesFromName(Table *pTab, Column *pCol){
 }
 #endif
 
-/*
-** Name of the special TEMP trigger used to implement RETURNING.  The
-** name begins with "sqlite_" so that it is guaranteed not to collide
-** with any application-generated triggers.
-*/
-#define RETURNING_TRIGGER_NAME  "sqlite_returning"
-
 /*
 ** Clean up the data structures associated with the RETURNING clause.
 */
 static void sqlite3DeleteReturning(sqlite3 *db, Returning *pRet){
   Hash *pHash;
   pHash = &(db->aDb[1].pSchema->trigHash);
-  sqlite3HashInsert(pHash, RETURNING_TRIGGER_NAME, 0);
+  sqlite3HashInsert(pHash, pRet->zName, 0);
   sqlite3ExprListDelete(db, pRet->pReturnEL);
   sqlite3DbFree(db, pRet);
 }
@@ -116628,7 +121579,7 @@ SQLITE_PRIVATE void sqlite3AddReturning(Parse *pParse, ExprList *pList){
   if( pParse->pNewTrigger ){
     sqlite3ErrorMsg(pParse, "cannot use RETURNING in a trigger");
   }else{
-    assert( pParse->bReturning==0 );
+    assert( pParse->bReturning==0 || pParse->ifNotExists );
   }
   pParse->bReturning = 1;
   pRet = sqlite3DbMallocZero(db, sizeof(*pRet));
@@ -116643,7 +121594,9 @@ SQLITE_PRIVATE void sqlite3AddReturning(Parse *pParse, ExprList *pList){
      (void(*)(sqlite3*,void*))sqlite3DeleteReturning, pRet);
   testcase( pParse->earlyCleanup );
   if( db->mallocFailed ) return;
-  pRet->retTrig.zName = RETURNING_TRIGGER_NAME;
+  sqlite3_snprintf(sizeof(pRet->zName), pRet->zName,
+                   "sqlite_returning_%p", pParse);
+  pRet->retTrig.zName = pRet->zName;
   pRet->retTrig.op = TK_RETURNING;
   pRet->retTrig.tr_tm = TRIGGER_AFTER;
   pRet->retTrig.bReturning = 1;
@@ -116654,8 +121607,9 @@ SQLITE_PRIVATE void sqlite3AddReturning(Parse *pParse, ExprList *pList){
   pRet->retTStep.pTrig = &pRet->retTrig;
   pRet->retTStep.pExprList = pList;
   pHash = &(db->aDb[1].pSchema->trigHash);
-  assert( sqlite3HashFind(pHash, RETURNING_TRIGGER_NAME)==0 || pParse->nErr );
-  if( sqlite3HashInsert(pHash, RETURNING_TRIGGER_NAME, &pRet->retTrig)
+  assert( sqlite3HashFind(pHash, pRet->zName)==0
+          || pParse->nErr  || pParse->ifNotExists );
+  if( sqlite3HashInsert(pHash, pRet->zName, &pRet->retTrig)
           ==&pRet->retTrig ){
     sqlite3OomFault(db);
   }
@@ -116689,7 +121643,7 @@ SQLITE_PRIVATE void sqlite3AddColumn(Parse *pParse, Token sName, Token sType){
   }
   if( !IN_RENAME_OBJECT ) sqlite3DequoteToken(&sName);
 
-  /* Because keywords GENERATE ALWAYS can be converted into indentifiers
+  /* Because keywords GENERATE ALWAYS can be converted into identifiers
   ** by the parser, we can sometimes end up with a typename that ends
   ** with "generated always".  Check for this case and omit the surplus
   ** text. */
@@ -116910,7 +121864,7 @@ SQLITE_PRIVATE void sqlite3AddDefaultValue(
   Parse *pParse,           /* Parsing context */
   Expr *pExpr,             /* The parsed expression of the default value */
   const char *zStart,      /* Start of the default value text */
-  const char *zEnd         /* First character past end of defaut value text */
+  const char *zEnd         /* First character past end of default value text */
 ){
   Table *p;
   Column *pCol;
@@ -117182,6 +122136,14 @@ SQLITE_PRIVATE void sqlite3AddGenerated(Parse *pParse, Expr *pExpr, Token *pType
   if( pCol->colFlags & COLFLAG_PRIMKEY ){
     makeColumnPartOfPrimaryKey(pParse, pCol); /* For the error message */
   }
+  if( ALWAYS(pExpr) && pExpr->op==TK_ID ){
+    /* The value of a generated column needs to be a real expression, not
+    ** just a reference to another column, in order for covering index
+    ** optimizations to work correctly.  So if the value is not an expression,
+    ** turn it into one by adding a unary "+" operator. */
+    pExpr = sqlite3PExpr(pParse, TK_UPLUS, pExpr, 0);
+  }
+  if( pExpr && pExpr->op!=TK_RAISE ) pExpr->affExpr = pCol->affinity;
   sqlite3ColumnSetExpr(pParse, pTab, pCol, pExpr);
   pExpr = 0;
   goto generated_done;
@@ -117250,7 +122212,7 @@ static int identLength(const char *z){
 ** to the specified offset in the buffer and updates *pIdx to refer
 ** to the first byte after the last byte written before returning.
 **
-** If the string zSignedIdent consists entirely of alpha-numeric
+** If the string zSignedIdent consists entirely of alphanumeric
 ** characters, does not begin with a digit and is not an SQL keyword,
 ** then it is copied to the output buffer exactly as it is. Otherwise,
 ** it is quoted using double-quotes.
@@ -117318,7 +122280,8 @@ static char *createTableStmt(sqlite3 *db, Table *p){
         /* SQLITE_AFF_TEXT    */ " TEXT",
         /* SQLITE_AFF_NUMERIC */ " NUM",
         /* SQLITE_AFF_INTEGER */ " INT",
-        /* SQLITE_AFF_REAL    */ " REAL"
+        /* SQLITE_AFF_REAL    */ " REAL",
+        /* SQLITE_AFF_FLEXNUM */ " NUM",
     };
     int len;
     const char *zType;
@@ -117334,10 +122297,12 @@ static char *createTableStmt(sqlite3 *db, Table *p){
     testcase( pCol->affinity==SQLITE_AFF_NUMERIC );
     testcase( pCol->affinity==SQLITE_AFF_INTEGER );
     testcase( pCol->affinity==SQLITE_AFF_REAL );
+    testcase( pCol->affinity==SQLITE_AFF_FLEXNUM );
 
     zType = azType[pCol->affinity - SQLITE_AFF_BLOB];
     len = sqlite3Strlen30(zType);
     assert( pCol->affinity==SQLITE_AFF_BLOB
+            || pCol->affinity==SQLITE_AFF_FLEXNUM
             || pCol->affinity==sqlite3AffinityType(zType, 0) );
     memcpy(&zStmt[k], zType, len);
     k += len;
@@ -117399,7 +122364,7 @@ static void estimateIndexWidth(Index *pIdx){
   for(i=0; i<pIdx->nColumn; i++){
     i16 x = pIdx->aiColumn[i];
     assert( x<pIdx->pTable->nCol );
-    wIndex += x<0 ? 1 : aCol[pIdx->aiColumn[i]].szEst;
+    wIndex += x<0 ? 1 : aCol[x].szEst;
   }
   pIdx->szIdxRow = sqlite3LogEst(wIndex*4);
 }
@@ -117454,7 +122419,8 @@ static int isDupColumn(Index *pIdx, int nKey, Index *pPk, int iCol){
 /* Recompute the colNotIdxed field of the Index.
 **
 ** colNotIdxed is a bitmask that has a 0 bit representing each indexed
-** columns that are within the first 63 columns of the table.  The
+** columns that are within the first 63 columns of the table and a 1 for
+** all other bits (all columns that are not in the index).  The
 ** high-order bit of colNotIdxed is always 1.  All unindexed columns
 ** of the table have a 1.
 **
@@ -117482,7 +122448,7 @@ static void recomputeColumnsNotIndexed(Index *pIdx){
     }
   }
   pIdx->colNotIdxed = ~m;
-  assert( (pIdx->colNotIdxed>>63)==1 );
+  assert( (pIdx->colNotIdxed>>63)==1 );  /* See note-20221022-a */
 }
 
 /*
@@ -117751,6 +122717,7 @@ SQLITE_PRIVATE int sqlite3ShadowTableName(sqlite3 *db, const char *zName){
 ** not pass them into code generator routines by mistake.
 */
 static int markImmutableExprStep(Walker *pWalker, Expr *pExpr){
+  (void)pWalker;
   ExprSetVVAProperty(pExpr, EP_Immutable);
   return WRC_Continue;
 }
@@ -118086,6 +123053,17 @@ SQLITE_PRIVATE void sqlite3EndTable(
     /* Reparse everything to update our internal data structures */
     sqlite3VdbeAddParseSchemaOp(v, iDb,
            sqlite3MPrintf(db, "tbl_name='%q' AND type!='trigger'", p->zName),0);
+
+    /* Test for cycles in generated columns and illegal expressions
+    ** in CHECK constraints and in DEFAULT clauses. */
+    if( p->tabFlags & TF_HasGenerated ){
+      sqlite3VdbeAddOp4(v, OP_SqlExec, 1, 0, 0,
+             sqlite3MPrintf(db, "SELECT*FROM\"%w\".\"%w\"",
+                   db->aDb[iDb].zDbSName, p->zName), P4_DYNAMIC);
+    }
+    sqlite3VdbeAddOp4(v, OP_SqlExec, 1, 0, 0,
+           sqlite3MPrintf(db, "PRAGMA \"%w\".integrity_check(%Q)",
+                 db->aDb[iDb].zDbSName, p->zName), P4_DYNAMIC);
   }
 
   /* Add the table to the in-memory representation of the database.
@@ -118223,7 +123201,7 @@ SQLITE_PRIVATE void sqlite3CreateView(
 ** the columns of the view in the pTable structure.  Return the number
 ** of errors.  If an error is seen leave an error message in pParse->zErrMsg.
 */
-SQLITE_PRIVATE int sqlite3ViewGetColumnNames(Parse *pParse, Table *pTable){
+static SQLITE_NOINLINE int viewGetColumnNames(Parse *pParse, Table *pTable){
   Table *pSelTab;   /* A fake table from which we get the result set */
   Select *pSel;     /* Copy of the SELECT that implements the view */
   int nErr = 0;     /* Number of errors encountered */
@@ -118248,9 +123226,10 @@ SQLITE_PRIVATE int sqlite3ViewGetColumnNames(Parse *pParse, Table *pTable){
 
 #ifndef SQLITE_OMIT_VIEW
   /* A positive nCol means the columns names for this view are
-  ** already known.
+  ** already known.  This routine is not called unless either the
+  ** table is virtual or nCol is zero.
   */
-  if( pTable->nCol>0 ) return 0;
+  assert( pTable->nCol<=0 );
 
   /* A negative nCol is a special marker meaning that we are currently
   ** trying to compute the column names.  If we enter this routine with
@@ -118316,8 +123295,7 @@ SQLITE_PRIVATE int sqlite3ViewGetColumnNames(Parse *pParse, Table *pTable){
        && pTable->nCol==pSel->pEList->nExpr
       ){
         assert( db->mallocFailed==0 );
-        sqlite3SelectAddColumnTypeAndCollation(pParse, pTable, pSel,
-                                               SQLITE_AFF_NONE);
+        sqlite3SubqueryColumnTypes(pParse, pTable, pSel, SQLITE_AFF_NONE);
       }
     }else{
       /* CREATE VIEW name AS...  without an argument list.  Construct
@@ -118346,6 +123324,11 @@ SQLITE_PRIVATE int sqlite3ViewGetColumnNames(Parse *pParse, Table *pTable){
 #endif /* SQLITE_OMIT_VIEW */
   return nErr;
 }
+SQLITE_PRIVATE int sqlite3ViewGetColumnNames(Parse *pParse, Table *pTable){
+  assert( pTable!=0 );
+  if( !IsVirtual(pTable) && pTable->nCol>0 ) return 0;
+  return viewGetColumnNames(pParse, pTable);
+}
 #endif /* !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_VIRTUALTABLE) */
 
 #ifndef SQLITE_OMIT_VIEW
@@ -119130,7 +124113,7 @@ SQLITE_PRIVATE void sqlite3CreateIndex(
 #ifndef SQLITE_OMIT_TEMPDB
     /* If the index name was unqualified, check if the table
     ** is a temp table. If so, set the database to 1. Do not do this
-    ** if initialising a database schema.
+    ** if initializing a database schema.
     */
     if( !db->init.busy ){
       pTab = sqlite3SrcListLookup(pParse, pTblName);
@@ -119211,7 +124194,7 @@ SQLITE_PRIVATE void sqlite3CreateIndex(
     }
     if( !IN_RENAME_OBJECT ){
       if( !db->init.busy ){
-        if( sqlite3FindTable(db, zName, 0)!=0 ){
+        if( sqlite3FindTable(db, zName, pDb->zDbSName)!=0 ){
           sqlite3ErrorMsg(pParse, "there is already a table named %s", zName);
           goto exit_create_index;
         }
@@ -119364,6 +124347,7 @@ SQLITE_PRIVATE void sqlite3CreateIndex(
       j = XN_EXPR;
       pIndex->aiColumn[i] = XN_EXPR;
       pIndex->uniqNotNull = 0;
+      pIndex->bHasExpr = 1;
     }else{
       j = pCExpr->iColumn;
       assert( j<=0x7fff );
@@ -119375,6 +124359,7 @@ SQLITE_PRIVATE void sqlite3CreateIndex(
         }
         if( pTab->aCol[j].colFlags & COLFLAG_VIRTUAL ){
           pIndex->bHasVCol = 1;
+          pIndex->bHasExpr = 1;
         }
       }
       pIndex->aiColumn[i] = (i16)j;
@@ -119864,12 +124849,13 @@ SQLITE_PRIVATE IdList *sqlite3IdListAppend(Parse *pParse, IdList *pList, Token *
 */
 SQLITE_PRIVATE void sqlite3IdListDelete(sqlite3 *db, IdList *pList){
   int i;
+  assert( db!=0 );
   if( pList==0 ) return;
   assert( pList->eU4!=EU4_EXPR ); /* EU4_EXPR mode is not currently used */
   for(i=0; i<pList->nId; i++){
     sqlite3DbFree(db, pList->a[i].zName);
   }
-  sqlite3DbFreeNN(db, pList);
+  sqlite3DbNNFreeNN(db, pList);
 }
 
 /*
@@ -120072,11 +125058,12 @@ SQLITE_PRIVATE void sqlite3SrcListAssignCursors(Parse *pParse, SrcList *pList){
 SQLITE_PRIVATE void sqlite3SrcListDelete(sqlite3 *db, SrcList *pList){
   int i;
   SrcItem *pItem;
+  assert( db!=0 );
   if( pList==0 ) return;
   for(pItem=pList->a, i=0; i<pList->nSrc; i++, pItem++){
-    if( pItem->zDatabase ) sqlite3DbFreeNN(db, pItem->zDatabase);
-    sqlite3DbFree(db, pItem->zName);
-    if( pItem->zAlias ) sqlite3DbFreeNN(db, pItem->zAlias);
+    if( pItem->zDatabase ) sqlite3DbNNFreeNN(db, pItem->zDatabase);
+    if( pItem->zName ) sqlite3DbNNFreeNN(db, pItem->zName);
+    if( pItem->zAlias ) sqlite3DbNNFreeNN(db, pItem->zAlias);
     if( pItem->fg.isIndexedBy ) sqlite3DbFree(db, pItem->u1.zIndexedBy);
     if( pItem->fg.isTabFunc ) sqlite3ExprListDelete(db, pItem->u1.pFuncArg);
     sqlite3DeleteTable(db, pItem->pTab);
@@ -120087,7 +125074,7 @@ SQLITE_PRIVATE void sqlite3SrcListDelete(sqlite3 *db, SrcList *pList){
       sqlite3ExprDelete(db, pItem->u3.pOn);
     }
   }
-  sqlite3DbFreeNN(db, pList);
+  sqlite3DbNNFreeNN(db, pList);
 }
 
 /*
@@ -120783,7 +125770,7 @@ SQLITE_PRIVATE void sqlite3CteDelete(sqlite3 *db, Cte *pCte){
 
 /*
 ** This routine is invoked once per CTE by the parser while parsing a
-** WITH clause.  The CTE described by teh third argument is added to
+** WITH clause.  The CTE described by the third argument is added to
 ** the WITH clause of the second argument.  If the second argument is
 ** NULL, then a new WITH argument is created.
 */
@@ -121034,6 +126021,7 @@ SQLITE_PRIVATE void sqlite3SetTextEncoding(sqlite3 *db, u8 enc){
   ** strings is BINARY.
   */
   db->pDfltColl = sqlite3FindCollSeq(db, enc, sqlite3StrBINARY, 0);
+  sqlite3ExpirePreparedStatements(db, 1);
 }
 
 /*
@@ -121339,19 +126327,21 @@ SQLITE_PRIVATE void sqlite3SchemaClear(void *p){
   Hash temp2;
   HashElem *pElem;
   Schema *pSchema = (Schema *)p;
+  sqlite3 xdb;
 
+  memset(&xdb, 0, sizeof(xdb));
   temp1 = pSchema->tblHash;
   temp2 = pSchema->trigHash;
   sqlite3HashInit(&pSchema->trigHash);
   sqlite3HashClear(&pSchema->idxHash);
   for(pElem=sqliteHashFirst(&temp2); pElem; pElem=sqliteHashNext(pElem)){
-    sqlite3DeleteTrigger(0, (Trigger*)sqliteHashData(pElem));
+    sqlite3DeleteTrigger(&xdb, (Trigger*)sqliteHashData(pElem));
   }
   sqlite3HashClear(&temp2);
   sqlite3HashInit(&pSchema->tblHash);
   for(pElem=sqliteHashFirst(&temp1); pElem; pElem=sqliteHashNext(pElem)){
     Table *pTab = sqliteHashData(pElem);
-    sqlite3DeleteTable(0, pTab);
+    sqlite3DeleteTable(&xdb, pTab);
   }
   sqlite3HashClear(&temp1);
   sqlite3HashClear(&pSchema->fkeyHash);
@@ -121422,8 +126412,9 @@ SQLITE_PRIVATE Table *sqlite3SrcListLookup(Parse *pParse, SrcList *pSrc){
   Table *pTab;
   assert( pItem && pSrc->nSrc>=1 );
   pTab = sqlite3LocateTableItem(pParse, 0, pItem);
-  sqlite3DeleteTable(pParse->db, pItem->pTab);
+  if( pItem->pTab ) sqlite3DeleteTable(pParse->db, pItem->pTab);
   pItem->pTab = pTab;
+  pItem->fg.notCte = 1;
   if( pTab ){
     pTab->nTabRef++;
     if( pItem->fg.isIndexedBy && sqlite3IndexedByLookup(pParse, pItem) ){
@@ -121450,18 +126441,42 @@ SQLITE_PRIVATE void sqlite3CodeChangeCount(Vdbe *v, int regCounter, const char *
 **   1) It is a virtual table and no implementation of the xUpdate method
 **      has been provided
 **
-**   2) It is a system table (i.e. sqlite_schema), this call is not
+**   2) A trigger is currently being coded and the table is a virtual table
+**      that is SQLITE_VTAB_DIRECTONLY or if PRAGMA trusted_schema=OFF and
+**      the table is not SQLITE_VTAB_INNOCUOUS.
+**
+**   3) It is a system table (i.e. sqlite_schema), this call is not
 **      part of a nested parse and writable_schema pragma has not
 **      been specified
 **
-**   3) The table is a shadow table, the database connection is in
+**   4) The table is a shadow table, the database connection is in
 **      defensive mode, and the current sqlite3_prepare()
 **      is for a top-level SQL statement.
 */
+static int vtabIsReadOnly(Parse *pParse, Table *pTab){
+  if( sqlite3GetVTable(pParse->db, pTab)->pMod->pModule->xUpdate==0 ){
+    return 1;
+  }
+
+  /* Within triggers:
+  **   *  Do not allow DELETE, INSERT, or UPDATE of SQLITE_VTAB_DIRECTONLY
+  **      virtual tables
+  **   *  Only allow DELETE, INSERT, or UPDATE of non-SQLITE_VTAB_INNOCUOUS
+  **      virtual tables if PRAGMA trusted_schema=ON.
+  */
+  if( pParse->pToplevel!=0
+   && pTab->u.vtab.p->eVtabRisk >
+           ((pParse->db->flags & SQLITE_TrustedSchema)!=0)
+  ){
+    sqlite3ErrorMsg(pParse, "unsafe use of virtual table \"%s\"",
+      pTab->zName);
+  }
+  return 0;
+}
 static int tabIsReadOnly(Parse *pParse, Table *pTab){
   sqlite3 *db;
   if( IsVirtual(pTab) ){
-    return sqlite3GetVTable(pParse->db, pTab)->pMod->pModule->xUpdate==0;
+    return vtabIsReadOnly(pParse, pTab);
   }
   if( (pTab->tabFlags & (TF_Readonly|TF_Shadow))==0 ) return 0;
   db = pParse->db;
@@ -121473,17 +126488,21 @@ static int tabIsReadOnly(Parse *pParse, Table *pTab){
 }
 
 /*
-** Check to make sure the given table is writable.  If it is not
-** writable, generate an error message and return 1.  If it is
-** writable return 0;
+** Check to make sure the given table is writable.
+**
+** If pTab is not writable  ->  generate an error message and return 1.
+** If pTab is writable but other errors have occurred -> return 1.
+** If pTab is writable and no prior errors -> return 0;
 */
-SQLITE_PRIVATE int sqlite3IsReadOnly(Parse *pParse, Table *pTab, int viewOk){
+SQLITE_PRIVATE int sqlite3IsReadOnly(Parse *pParse, Table *pTab, Trigger *pTrigger){
   if( tabIsReadOnly(pParse, pTab) ){
     sqlite3ErrorMsg(pParse, "table %s may not be modified", pTab->zName);
     return 1;
   }
 #ifndef SQLITE_OMIT_VIEW
-  if( !viewOk && IsView(pTab) ){
+  if( IsView(pTab)
+   && (pTrigger==0 || (pTrigger->bReturning && pTrigger->pNext==0))
+  ){
     sqlite3ErrorMsg(pParse,"cannot modify %s because it is a view",pTab->zName);
     return 1;
   }
@@ -121548,7 +126567,7 @@ SQLITE_PRIVATE Expr *sqlite3LimitWhere(
   sqlite3 *db = pParse->db;
   Expr *pLhs = NULL;           /* LHS of IN(SELECT...) operator */
   Expr *pInClause = NULL;      /* WHERE rowid IN ( select ) */
-  ExprList *pEList = NULL;     /* Expression list contaning only pSelectRowid */
+  ExprList *pEList = NULL;     /* Expression list containing only pSelectRowid*/
   SrcList *pSelectSrc = NULL;  /* SELECT rowid FROM x ... (dup of pSrc) */
   Select *pSelect = NULL;      /* Complete SELECT tree */
   Table *pTab;
@@ -121586,14 +126605,20 @@ SQLITE_PRIVATE Expr *sqlite3LimitWhere(
     );
   }else{
     Index *pPk = sqlite3PrimaryKeyIndex(pTab);
+    assert( pPk!=0 );
+    assert( pPk->nKeyCol>=1 );
     if( pPk->nKeyCol==1 ){
-      const char *zName = pTab->aCol[pPk->aiColumn[0]].zCnName;
+      const char *zName;
+      assert( pPk->aiColumn[0]>=0 && pPk->aiColumn[0]<pTab->nCol );
+      zName = pTab->aCol[pPk->aiColumn[0]].zCnName;
       pLhs = sqlite3Expr(db, TK_ID, zName);
       pEList = sqlite3ExprListAppend(pParse, 0, sqlite3Expr(db, TK_ID, zName));
     }else{
       int i;
       for(i=0; i<pPk->nKeyCol; i++){
-        Expr *p = sqlite3Expr(db, TK_ID, pTab->aCol[pPk->aiColumn[i]].zCnName);
+        Expr *p;
+        assert( pPk->aiColumn[i]>=0 && pPk->aiColumn[i]<pTab->nCol );
+        p = sqlite3Expr(db, TK_ID, pTab->aCol[pPk->aiColumn[i]].zCnName);
         pEList = sqlite3ExprListAppend(pParse, pEList, p);
       }
       pLhs = sqlite3PExpr(pParse, TK_VECTOR, 0, 0);
@@ -121622,7 +126647,7 @@ SQLITE_PRIVATE Expr *sqlite3LimitWhere(
       pOrderBy,0,pLimit
   );
 
-  /* now generate the new WHERE rowid IN clause for the DELETE/UDPATE */
+  /* now generate the new WHERE rowid IN clause for the DELETE/UPDATE */
   pInClause = sqlite3PExpr(pParse, TK_IN, pLhs, 0);
   sqlite3PExprAddSelect(pParse, pInClause, pSelect);
   return pInClause;
@@ -121737,7 +126762,7 @@ SQLITE_PRIVATE void sqlite3DeleteFrom(
     goto delete_from_cleanup;
   }
 
-  if( sqlite3IsReadOnly(pParse, pTab, (pTrigger?1:0)) ){
+  if( sqlite3IsReadOnly(pParse, pTab, pTrigger) ){
     goto delete_from_cleanup;
   }
   iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
@@ -121836,21 +126861,22 @@ SQLITE_PRIVATE void sqlite3DeleteFrom(
     }
     for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
       assert( pIdx->pSchema==pTab->pSchema );
-      sqlite3VdbeAddOp2(v, OP_Clear, pIdx->tnum, iDb);
       if( IsPrimaryKeyIndex(pIdx) && !HasRowid(pTab) ){
-        sqlite3VdbeChangeP3(v, -1, memCnt ? memCnt : -1);
+        sqlite3VdbeAddOp3(v, OP_Clear, pIdx->tnum, iDb, memCnt ? memCnt : -1);
+      }else{
+        sqlite3VdbeAddOp2(v, OP_Clear, pIdx->tnum, iDb);
       }
     }
   }else
 #endif /* SQLITE_OMIT_TRUNCATE_OPTIMIZATION */
   {
     u16 wcf = WHERE_ONEPASS_DESIRED|WHERE_DUPLICATES_OK;
-    if( sNC.ncFlags & NC_VarSelect ) bComplex = 1;
+    if( sNC.ncFlags & NC_Subquery ) bComplex = 1;
     wcf |= (bComplex ? 0 : WHERE_ONEPASS_MULTIROW);
     if( HasRowid(pTab) ){
       /* For a rowid table, initialize the RowSet to an empty set */
       pPk = 0;
-      nPk = 1;
+      assert( nPk==1 );
       iRowSet = ++pParse->nMem;
       sqlite3VdbeAddOp2(v, OP_Null, 0, iRowSet);
     }else{
@@ -121878,7 +126904,8 @@ SQLITE_PRIVATE void sqlite3DeleteFrom(
     if( pWInfo==0 ) goto delete_from_cleanup;
     eOnePass = sqlite3WhereOkOnePass(pWInfo, aiCurOnePass);
     assert( IsVirtual(pTab)==0 || eOnePass!=ONEPASS_MULTI );
-    assert( IsVirtual(pTab) || bComplex || eOnePass!=ONEPASS_OFF );
+    assert( IsVirtual(pTab) || bComplex || eOnePass!=ONEPASS_OFF
+            || OptimizationDisabled(db, SQLITE_OnePass) );
     if( eOnePass!=ONEPASS_SINGLE ) sqlite3MultiWrite(pParse);
     if( sqlite3WhereUsesDeferredSeek(pWInfo) ){
       sqlite3VdbeAddOp1(v, OP_FinishSeek, iTabCur);
@@ -122038,7 +127065,7 @@ SQLITE_PRIVATE void sqlite3DeleteFrom(
   sqlite3ExprListDelete(db, pOrderBy);
   sqlite3ExprDelete(db, pLimit);
 #endif
-  sqlite3DbFree(db, aToOpen);
+  if( aToOpen ) sqlite3DbNNFreeNN(db, aToOpen);
   return;
 }
 /* Make sure "isView" and other macros defined above are undefined. Otherwise
@@ -122215,9 +127242,11 @@ SQLITE_PRIVATE void sqlite3GenerateRowDelete(
   sqlite3FkActions(pParse, pTab, 0, iOld, 0, 0);
 
   /* Invoke AFTER DELETE trigger programs. */
-  sqlite3CodeRowTrigger(pParse, pTrigger,
-      TK_DELETE, 0, TRIGGER_AFTER, pTab, iOld, onconf, iLabel
-  );
+  if( pTrigger ){
+    sqlite3CodeRowTrigger(pParse, pTrigger,
+        TK_DELETE, 0, TRIGGER_AFTER, pTab, iOld, onconf, iLabel
+    );
+  }
 
   /* Jump here if the row had already been deleted before any BEFORE
   ** trigger programs were invoked. Or if a trigger program throws a
@@ -122530,6 +127559,42 @@ static void lengthFunc(
   }
 }
 
+/*
+** Implementation of the octet_length() function
+*/
+static void bytelengthFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  assert( argc==1 );
+  UNUSED_PARAMETER(argc);
+  switch( sqlite3_value_type(argv[0]) ){
+    case SQLITE_BLOB: {
+      sqlite3_result_int(context, sqlite3_value_bytes(argv[0]));
+      break;
+    }
+    case SQLITE_INTEGER:
+    case SQLITE_FLOAT: {
+      i64 m = sqlite3_context_db_handle(context)->enc<=SQLITE_UTF8 ? 1 : 2;
+      sqlite3_result_int64(context, sqlite3_value_bytes(argv[0])*m);
+      break;
+    }
+    case SQLITE_TEXT: {
+      if( sqlite3_value_encoding(argv[0])<=SQLITE_UTF8 ){
+        sqlite3_result_int(context, sqlite3_value_bytes(argv[0]));
+      }else{
+        sqlite3_result_int(context, sqlite3_value_bytes16(argv[0]));
+      }
+      break;
+    }
+    default: {
+      sqlite3_result_null(context);
+      break;
+    }
+  }
+}
+
 /*
 ** Implementation of the abs() function.
 **
@@ -122806,7 +127871,7 @@ static void roundFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
   }else if( n==0 ){
     r = (double)((sqlite_int64)(r+(r<0?-0.5:+0.5)));
   }else{
-    zBuf = sqlite3_mprintf("%.*f",n,r);
+    zBuf = sqlite3_mprintf("%!.*f",n,r);
     if( zBuf==0 ){
       sqlite3_result_error_nomem(context);
       return;
@@ -123006,7 +128071,7 @@ struct compareInfo {
 
 /*
 ** For LIKE and GLOB matching on EBCDIC machines, assume that every
-** character is exactly one byte in size.  Also, provde the Utf8Read()
+** character is exactly one byte in size.  Also, provide the Utf8Read()
 ** macro for fast reading of the next character in the common case where
 ** the next character is ASCII.
 */
@@ -123121,7 +128186,7 @@ static int patternCompare(
       ** c but in the other case and search the input string for either
       ** c or cx.
       */
-      if( c<=0x80 ){
+      if( c<0x80 ){
         char zStop[3];
         int bMatch;
         if( noCase ){
@@ -123204,7 +128269,13 @@ static int patternCompare(
 ** non-zero if there is no match.
 */
 SQLITE_API int sqlite3_strglob(const char *zGlobPattern, const char *zString){
-  return patternCompare((u8*)zGlobPattern, (u8*)zString, &globInfo, '[');
+  if( zString==0 ){
+    return zGlobPattern!=0;
+  }else if( zGlobPattern==0 ){
+    return 1;
+  }else {
+    return patternCompare((u8*)zGlobPattern, (u8*)zString, &globInfo, '[');
+  }
 }
 
 /*
@@ -123212,7 +128283,13 @@ SQLITE_API int sqlite3_strglob(const char *zGlobPattern, const char *zString){
 ** a miss - like strcmp().
 */
 SQLITE_API int sqlite3_strlike(const char *zPattern, const char *zStr, unsigned int esc){
-  return patternCompare((u8*)zPattern, (u8*)zStr, &likeInfoNorm, esc);
+  if( zStr==0 ){
+    return zPattern!=0;
+  }else if( zPattern==0 ){
+    return 1;
+  }else{
+    return patternCompare((u8*)zPattern, (u8*)zStr, &likeInfoNorm, esc);
+  }
 }
 
 /*
@@ -123227,7 +128304,7 @@ SQLITE_API int sqlite3_like_count = 0;
 
 /*
 ** Implementation of the like() SQL function.  This function implements
-** the build-in LIKE operator.  The first argument to the function is the
+** the built-in LIKE operator.  The first argument to the function is the
 ** pattern and the second argument is the string.  So, the SQL statements:
 **
 **       A LIKE B
@@ -123451,7 +128528,7 @@ SQLITE_PRIVATE void sqlite3QuoteValue(StrAccum *pStr, sqlite3_value *pValue){
     }
     case SQLITE_BLOB: {
       char const *zBlob = sqlite3_value_blob(pValue);
-      int nBlob = sqlite3_value_bytes(pValue);
+      i64 nBlob = sqlite3_value_bytes(pValue);
       assert( zBlob==sqlite3_value_blob(pValue) ); /* No encoding change */
       sqlite3StrAccumEnlarge(pStr, nBlob*2 + 4);
       if( pStr->accError==0 ){
@@ -123560,6 +128637,7 @@ static void charFunc(
       *zOut++ = 0x80 + (u8)(c & 0x3F);
     }                                                    \
   }
+  *zOut = 0;
   sqlite3_result_text64(context, (char*)z, zOut-z, sqlite3_free, SQLITE_UTF8);
 }
 
@@ -123588,10 +128666,101 @@ static void hexFunc(
       *(z++) = hexdigits[c&0xf];
     }
     *z = 0;
-    sqlite3_result_text(context, zHex, n*2, sqlite3_free);
+    sqlite3_result_text64(context, zHex, (u64)(z-zHex),
+                          sqlite3_free, SQLITE_UTF8);
+  }
+}
+
+/*
+** Buffer zStr contains nStr bytes of utf-8 encoded text. Return 1 if zStr
+** contains character ch, or 0 if it does not.
+*/
+static int strContainsChar(const u8 *zStr, int nStr, u32 ch){
+  const u8 *zEnd = &zStr[nStr];
+  const u8 *z = zStr;
+  while( z<zEnd ){
+    u32 tst = Utf8Read(z);
+    if( tst==ch ) return 1;
+  }
+  return 0;
+}
+
+/*
+** The unhex() function. This function may be invoked with either one or
+** two arguments. In both cases the first argument is interpreted as text
+** a text value containing a set of pairs of hexadecimal digits which are
+** decoded and returned as a blob.
+**
+** If there is only a single argument, then it must consist only of an
+** even number of hexadecimal digits. Otherwise, return NULL.
+**
+** Or, if there is a second argument, then any character that appears in
+** the second argument is also allowed to appear between pairs of hexadecimal
+** digits in the first argument. If any other character appears in the
+** first argument, or if one of the allowed characters appears between
+** two hexadecimal digits that make up a single byte, NULL is returned.
+**
+** The following expressions are all true:
+**
+**     unhex('ABCD')       IS x'ABCD'
+**     unhex('AB CD')      IS NULL
+**     unhex('AB CD', ' ') IS x'ABCD'
+**     unhex('A BCD', ' ') IS NULL
+*/
+static void unhexFunc(
+  sqlite3_context *pCtx,
+  int argc,
+  sqlite3_value **argv
+){
+  const u8 *zPass = (const u8*)"";
+  int nPass = 0;
+  const u8 *zHex = sqlite3_value_text(argv[0]);
+  int nHex = sqlite3_value_bytes(argv[0]);
+#ifdef SQLITE_DEBUG
+  const u8 *zEnd = zHex ? &zHex[nHex] : 0;
+#endif
+  u8 *pBlob = 0;
+  u8 *p = 0;
+
+  assert( argc==1 || argc==2 );
+  if( argc==2 ){
+    zPass = sqlite3_value_text(argv[1]);
+    nPass = sqlite3_value_bytes(argv[1]);
   }
+  if( !zHex || !zPass ) return;
+
+  p = pBlob = contextMalloc(pCtx, (nHex/2)+1);
+  if( pBlob ){
+    u8 c;                         /* Most significant digit of next byte */
+    u8 d;                         /* Least significant digit of next byte */
+
+    while( (c = *zHex)!=0x00 ){
+      while( !sqlite3Isxdigit(c) ){
+        u32 ch = Utf8Read(zHex);
+        assert( zHex<=zEnd );
+        if( !strContainsChar(zPass, nPass, ch) ) goto unhex_null;
+        c = *zHex;
+        if( c==0x00 ) goto unhex_done;
+      }
+      zHex++;
+      assert( *zEnd==0x00 );
+      assert( zHex<=zEnd );
+      d = *(zHex++);
+      if( !sqlite3Isxdigit(d) ) goto unhex_null;
+      *(p++) = (sqlite3HexToInt(c)<<4) | sqlite3HexToInt(d);
+    }
+  }
+
+ unhex_done:
+  sqlite3_result_blob(pCtx, pBlob, (p - pBlob), sqlite3_free);
+  return;
+
+ unhex_null:
+  sqlite3_free(pBlob);
+  return;
 }
 
+
 /*
 ** The zeroblob(N) function returns a zero-filled blob of size N bytes.
 */
@@ -123792,12 +128961,87 @@ static void trimFunc(
   sqlite3_result_text(context, (char*)zIn, nIn, SQLITE_TRANSIENT);
 }
 
+/* The core implementation of the CONCAT(...) and CONCAT_WS(SEP,...)
+** functions.
+**
+** Return a string value that is the concatenation of all non-null
+** entries in argv[].  Use zSep as the separator.
+*/
+static void concatFuncCore(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv,
+  int nSep,
+  const char *zSep
+){
+  i64 j, k, n = 0;
+  int i;
+  char *z;
+  for(i=0; i<argc; i++){
+    n += sqlite3_value_bytes(argv[i]);
+  }
+  n += (argc-1)*nSep;
+  z = sqlite3_malloc64(n+1);
+  if( z==0 ){
+    sqlite3_result_error_nomem(context);
+    return;
+  }
+  j = 0;
+  for(i=0; i<argc; i++){
+    k = sqlite3_value_bytes(argv[i]);
+    if( k>0 ){
+      const char *v = (const char*)sqlite3_value_text(argv[i]);
+      if( v!=0 ){
+        if( j>0 && nSep>0 ){
+          memcpy(&z[j], zSep, nSep);
+          j += nSep;
+        }
+        memcpy(&z[j], v, k);
+        j += k;
+      }
+    }
+  }
+  z[j] = 0;
+  assert( j<=n );
+  sqlite3_result_text64(context, z, j, sqlite3_free, SQLITE_UTF8);
+}
+
+/*
+** The CONCAT(...) function.  Generate a string result that is the
+** concatentation of all non-null arguments.
+*/
+static void concatFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  concatFuncCore(context, argc, argv, 0, "");
+}
+
+/*
+** The CONCAT_WS(separator, ...) function.
+**
+** Generate a string that is the concatenation of 2nd through the Nth
+** argument.  Use the first argument (which must be non-NULL) as the
+** separator.
+*/
+static void concatwsFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  int nSep = sqlite3_value_bytes(argv[0]);
+  const char *zSep = (const char*)sqlite3_value_text(argv[0]);
+  if( zSep==0 ) return;
+  concatFuncCore(context, argc-1, argv+1, nSep, zSep);
+}
+
 
 #ifdef SQLITE_ENABLE_UNKNOWN_SQL_FUNCTION
 /*
 ** The "unknown" function is automatically substituted in place of
 ** any unrecognized function name when doing an EXPLAIN or EXPLAIN QUERY PLAN
-** when the SQLITE_ENABLE_UNKNOWN_FUNCTION compile-time option is used.
+** when the SQLITE_ENABLE_UNKNOWN_SQL_FUNCTION compile-time option is used.
 ** When the "sqlite3" command-line shell is built using this functionality,
 ** that allows an EXPLAIN or EXPLAIN QUERY PLAN for complex queries
 ** involving application-defined functions to be examined in a generic
@@ -123809,6 +129053,9 @@ static void unknownFunc(
   sqlite3_value **argv
 ){
   /* no-op */
+  (void)context;
+  (void)argc;
+  (void)argv;
 }
 #endif /*SQLITE_ENABLE_UNKNOWN_SQL_FUNCTION*/
 
@@ -123910,13 +129157,68 @@ static void loadExt(sqlite3_context *context, int argc, sqlite3_value **argv){
 */
 typedef struct SumCtx SumCtx;
 struct SumCtx {
-  double rSum;      /* Floating point sum */
-  i64 iSum;         /* Integer sum */
+  double rSum;      /* Running sum as as a double */
+  double rErr;      /* Error term for Kahan-Babushka-Neumaier summation */
+  i64 iSum;         /* Running sum as a signed integer */
   i64 cnt;          /* Number of elements summed */
-  u8 overflow;      /* True if integer overflow seen */
-  u8 approx;        /* True if non-integer value was input to the sum */
+  u8 approx;        /* True if any non-integer value was input to the sum */
+  u8 ovrfl;         /* Integer overflow seen */
 };
 
+/*
+** Do one step of the Kahan-Babushka-Neumaier summation.
+**
+** https://en.wikipedia.org/wiki/Kahan_summation_algorithm
+**
+** Variables are marked "volatile" to defeat c89 x86 floating point
+** optimizations can mess up this algorithm.
+*/
+static void kahanBabuskaNeumaierStep(
+  volatile SumCtx *pSum,
+  volatile double r
+){
+  volatile double s = pSum->rSum;
+  volatile double t = s + r;
+  if( fabs(s) > fabs(r) ){
+    pSum->rErr += (s - t) + r;
+  }else{
+    pSum->rErr += (r - t) + s;
+  }
+  pSum->rSum = t;
+}
+
+/*
+** Add a (possibly large) integer to the running sum.
+*/
+static void kahanBabuskaNeumaierStepInt64(volatile SumCtx *pSum, i64 iVal){
+  if( iVal<=-4503599627370496LL || iVal>=+4503599627370496LL ){
+    i64 iBig, iSm;
+    iSm = iVal % 16384;
+    iBig = iVal - iSm;
+    kahanBabuskaNeumaierStep(pSum, iBig);
+    kahanBabuskaNeumaierStep(pSum, iSm);
+  }else{
+    kahanBabuskaNeumaierStep(pSum, (double)iVal);
+  }
+}
+
+/*
+** Initialize the Kahan-Babaska-Neumaier sum from a 64-bit integer
+*/
+static void kahanBabuskaNeumaierInit(
+  volatile SumCtx *p,
+  i64 iVal
+){
+  if( iVal<=-4503599627370496LL || iVal>=+4503599627370496LL ){
+    i64 iSm = iVal % 16384;
+    p->rSum = (double)(iVal - iSm);
+    p->rErr = (double)iSm;
+  }else{
+    p->rSum = (double)iVal;
+    p->rErr = 0.0;
+  }
+}
+
 /*
 ** Routines used to compute the sum, average, and total.
 **
@@ -123936,15 +129238,29 @@ static void sumStep(sqlite3_context *context, int argc, sqlite3_value **argv){
   type = sqlite3_value_numeric_type(argv[0]);
   if( p && type!=SQLITE_NULL ){
     p->cnt++;
-    if( type==SQLITE_INTEGER ){
-      i64 v = sqlite3_value_int64(argv[0]);
-      p->rSum += v;
-      if( (p->approx|p->overflow)==0 && sqlite3AddInt64(&p->iSum, v) ){
-        p->approx = p->overflow = 1;
+    if( p->approx==0 ){
+      if( type!=SQLITE_INTEGER ){
+        kahanBabuskaNeumaierInit(p, p->iSum);
+        p->approx = 1;
+        kahanBabuskaNeumaierStep(p, sqlite3_value_double(argv[0]));
+      }else{
+        i64 x = p->iSum;
+        if( sqlite3AddInt64(&x, sqlite3_value_int64(argv[0]))==0 ){
+          p->iSum = x;
+        }else{
+          p->ovrfl = 1;
+          kahanBabuskaNeumaierInit(p, p->iSum);
+          p->approx = 1;
+          kahanBabuskaNeumaierStepInt64(p, sqlite3_value_int64(argv[0]));
+        }
       }
     }else{
-      p->rSum += sqlite3_value_double(argv[0]);
-      p->approx = 1;
+      if( type==SQLITE_INTEGER ){
+        kahanBabuskaNeumaierStepInt64(p, sqlite3_value_int64(argv[0]));
+      }else{
+        p->ovrfl = 0;
+        kahanBabuskaNeumaierStep(p, sqlite3_value_double(argv[0]));
+      }
     }
   }
 }
@@ -123961,13 +129277,18 @@ static void sumInverse(sqlite3_context *context, int argc, sqlite3_value**argv){
   if( ALWAYS(p) && type!=SQLITE_NULL ){
     assert( p->cnt>0 );
     p->cnt--;
-    assert( type==SQLITE_INTEGER || p->approx );
-    if( type==SQLITE_INTEGER && p->approx==0 ){
-      i64 v = sqlite3_value_int64(argv[0]);
-      p->rSum -= v;
-      p->iSum -= v;
+    if( !p->approx ){
+      p->iSum -= sqlite3_value_int64(argv[0]);
+    }else if( type==SQLITE_INTEGER ){
+      i64 iVal = sqlite3_value_int64(argv[0]);
+      if( iVal!=SMALLEST_INT64 ){
+        kahanBabuskaNeumaierStepInt64(p, -iVal);
+      }else{
+        kahanBabuskaNeumaierStepInt64(p, LARGEST_INT64);
+        kahanBabuskaNeumaierStepInt64(p, 1);
+      }
     }else{
-      p->rSum -= sqlite3_value_double(argv[0]);
+      kahanBabuskaNeumaierStep(p, -sqlite3_value_double(argv[0]));
     }
   }
 }
@@ -123978,10 +129299,14 @@ static void sumFinalize(sqlite3_context *context){
   SumCtx *p;
   p = sqlite3_aggregate_context(context, 0);
   if( p && p->cnt>0 ){
-    if( p->overflow ){
-      sqlite3_result_error(context,"integer overflow",-1);
-    }else if( p->approx ){
-      sqlite3_result_double(context, p->rSum);
+    if( p->approx ){
+      if( p->ovrfl ){
+        sqlite3_result_error(context,"integer overflow",-1);
+      }else if( !sqlite3IsNaN(p->rErr) ){
+        sqlite3_result_double(context, p->rSum+p->rErr);
+      }else{
+        sqlite3_result_double(context, p->rSum);
+      }
     }else{
       sqlite3_result_int64(context, p->iSum);
     }
@@ -123991,14 +129316,29 @@ static void avgFinalize(sqlite3_context *context){
   SumCtx *p;
   p = sqlite3_aggregate_context(context, 0);
   if( p && p->cnt>0 ){
-    sqlite3_result_double(context, p->rSum/(double)p->cnt);
+    double r;
+    if( p->approx ){
+      r = p->rSum;
+      if( !sqlite3IsNaN(p->rErr) ) r += p->rErr;
+    }else{
+      r = (double)(p->iSum);
+    }
+    sqlite3_result_double(context, r/(double)p->cnt);
   }
 }
 static void totalFinalize(sqlite3_context *context){
   SumCtx *p;
+  double r = 0.0;
   p = sqlite3_aggregate_context(context, 0);
-  /* (double)0 In case of SQLITE_OMIT_FLOATING_POINT... */
-  sqlite3_result_double(context, p ? p->rSum : (double)0);
+  if( p ){
+    if( p->approx ){
+      r = p->rSum;
+      if( !sqlite3IsNaN(p->rErr) ) r += p->rErr;
+    }else{
+      r = (double)(p->iSum);
+    }
+  }
+  sqlite3_result_double(context, r);
 }
 
 /*
@@ -124117,6 +129457,7 @@ static void minMaxFinalize(sqlite3_context *context){
 
 /*
 ** group_concat(EXPR, ?SEPARATOR?)
+** string_agg(EXPR, SEPARATOR)
 **
 ** The SEPARATOR goes before the EXPR string.  This is tragic.  The
 ** groupConcatInverse() implementation would have been easier if the
@@ -124220,7 +129561,7 @@ static void groupConcatInverse(
   if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
   pGCC = (GroupConcatCtx*)sqlite3_aggregate_context(context, sizeof(*pGCC));
   /* pGCC is always non-NULL since groupConcatStep() will have always
-  ** run frist to initialize it */
+  ** run first to initialize it */
   if( ALWAYS(pGCC) ){
     int nVS;
     /* Must call sqlite3_value_text() to convert the argument into text prior
@@ -124304,8 +129645,10 @@ SQLITE_PRIVATE void sqlite3RegisterPerConnectionBuiltinFunctions(sqlite3 *db){
 ** sensitive.
 */
 SQLITE_PRIVATE void sqlite3RegisterLikeFunctions(sqlite3 *db, int caseSensitive){
+  FuncDef *pDef;
   struct compareInfo *pInfo;
   int flags;
+  int nArg;
   if( caseSensitive ){
     pInfo = (struct compareInfo*)&likeInfoAlt;
     flags = SQLITE_FUNC_LIKE | SQLITE_FUNC_CASE;
@@ -124313,10 +129656,13 @@ SQLITE_PRIVATE void sqlite3RegisterLikeFunctions(sqlite3 *db, int caseSensitive)
     pInfo = (struct compareInfo*)&likeInfoNorm;
     flags = SQLITE_FUNC_LIKE;
   }
-  sqlite3CreateFunc(db, "like", 2, SQLITE_UTF8, pInfo, likeFunc, 0, 0, 0, 0, 0);
-  sqlite3CreateFunc(db, "like", 3, SQLITE_UTF8, pInfo, likeFunc, 0, 0, 0, 0, 0);
-  sqlite3FindFunction(db, "like", 2, SQLITE_UTF8, 0)->funcFlags |= flags;
-  sqlite3FindFunction(db, "like", 3, SQLITE_UTF8, 0)->funcFlags |= flags;
+  for(nArg=2; nArg<=3; nArg++){
+    sqlite3CreateFunc(db, "like", nArg, SQLITE_UTF8, pInfo, likeFunc,
+                      0, 0, 0, 0, 0);
+    pDef = sqlite3FindFunction(db, "like", nArg, SQLITE_UTF8, 0);
+    pDef->funcFlags |= flags;
+    pDef->funcFlags &= ~SQLITE_FUNC_UNSAFE;
+  }
 }
 
 /*
@@ -124437,6 +129783,18 @@ static void ceilingFunc(
 static double xCeil(double x){ return ceil(x); }
 static double xFloor(double x){ return floor(x); }
 
+/*
+** Some systems do not have log2() and log10() in their standard math
+** libraries.
+*/
+#if defined(HAVE_LOG10) && HAVE_LOG10==0
+# define log10(X) (0.4342944819032517867*log(X))
+#endif
+#if defined(HAVE_LOG2) && HAVE_LOG2==0
+# define log2(X) (1.442695040888963456*log(X))
+#endif
+
+
 /*
 ** Implementation of SQL functions:
 **
@@ -124475,17 +129833,15 @@ static void logFunc(
     }
     ans = log(x)/b;
   }else{
-    ans = log(x);
     switch( SQLITE_PTR_TO_INT(sqlite3_user_data(context)) ){
       case 1:
-        /* Convert from natural logarithm to log base 10 */
-        ans /= M_LN10;
+        ans = log10(x);
         break;
       case 2:
-        /* Convert from natural logarithm to log base 2 */
-        ans /= M_LN2;
+        ans = log2(x);
         break;
       default:
+        ans = log(x);
         break;
     }
   }
@@ -124554,6 +129910,7 @@ static void piFunc(
   sqlite3_value **argv
 ){
   assert( argc==0 );
+  (void)argv;
   sqlite3_result_double(context, M_PI);
 }
 
@@ -124577,6 +129934,37 @@ static void signFunc(
   sqlite3_result_int(context, x<0.0 ? -1 : x>0.0 ? +1 : 0);
 }
 
+#ifdef SQLITE_DEBUG
+/*
+** Implementation of fpdecode(x,y,z) function.
+**
+** x is a real number that is to be decoded.  y is the precision.
+** z is the maximum real precision.
+*/
+static void fpdecodeFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  FpDecode s;
+  double x;
+  int y, z;
+  char zBuf[100];
+  UNUSED_PARAMETER(argc);
+  assert( argc==3 );
+  x = sqlite3_value_double(argv[0]);
+  y = sqlite3_value_int(argv[1]);
+  z = sqlite3_value_int(argv[2]);
+  sqlite3FpDecode(&s, x, y, z);
+  if( s.isSpecial==2 ){
+    sqlite3_snprintf(sizeof(zBuf), zBuf, "NaN");
+  }else{
+    sqlite3_snprintf(sizeof(zBuf), zBuf, "%c%.*s/%d", s.sign, s.n, s.z, s.iDP);
+  }
+  sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
+}
+#endif /* SQLITE_DEBUG */
+
 /*
 ** All of the FuncDef structures in the aBuiltinFunc[] array above
 ** to the global function hash table.  This occurs at start-time (as
@@ -124641,12 +130029,16 @@ SQLITE_PRIVATE void sqlite3RegisterBuiltinFunctions(void){
     FUNCTION2(typeof,            1, 0, 0, typeofFunc,  SQLITE_FUNC_TYPEOF),
     FUNCTION2(subtype,           1, 0, 0, subtypeFunc, SQLITE_FUNC_TYPEOF),
     FUNCTION2(length,            1, 0, 0, lengthFunc,  SQLITE_FUNC_LENGTH),
+    FUNCTION2(octet_length,      1, 0, 0, bytelengthFunc,SQLITE_FUNC_BYTELEN),
     FUNCTION(instr,              2, 0, 0, instrFunc        ),
     FUNCTION(printf,            -1, 0, 0, printfFunc       ),
     FUNCTION(format,            -1, 0, 0, printfFunc       ),
     FUNCTION(unicode,            1, 0, 0, unicodeFunc      ),
     FUNCTION(char,              -1, 0, 0, charFunc         ),
     FUNCTION(abs,                1, 0, 0, absFunc          ),
+#ifdef SQLITE_DEBUG
+    FUNCTION(fpdecode,           3, 0, 0, fpdecodeFunc     ),
+#endif
 #ifndef SQLITE_OMIT_FLOATING_POINT
     FUNCTION(round,              1, 0, 0, roundFunc        ),
     FUNCTION(round,              2, 0, 0, roundFunc        ),
@@ -124654,6 +130046,13 @@ SQLITE_PRIVATE void sqlite3RegisterBuiltinFunctions(void){
     FUNCTION(upper,              1, 0, 0, upperFunc        ),
     FUNCTION(lower,              1, 0, 0, lowerFunc        ),
     FUNCTION(hex,                1, 0, 0, hexFunc          ),
+    FUNCTION(unhex,              1, 0, 0, unhexFunc        ),
+    FUNCTION(unhex,              2, 0, 0, unhexFunc        ),
+    FUNCTION(concat,            -1, 0, 0, concatFunc       ),
+    FUNCTION(concat,             0, 0, 0, 0                ),
+    FUNCTION(concat_ws,         -1, 0, 0, concatwsFunc     ),
+    FUNCTION(concat_ws,          0, 0, 0, 0                ),
+    FUNCTION(concat_ws,          1, 0, 0, 0                ),
     INLINE_FUNC(ifnull,          2, INLINEFUNC_coalesce, 0 ),
     VFUNCTION(random,            0, 0, 0, randomFunc       ),
     VFUNCTION(randomblob,        1, 0, 0, randomBlob       ),
@@ -124683,6 +130082,8 @@ SQLITE_PRIVATE void sqlite3RegisterBuiltinFunctions(void){
         groupConcatFinalize, groupConcatValue, groupConcatInverse, 0),
     WAGGREGATE(group_concat, 2, 0, 0, groupConcatStep,
         groupConcatFinalize, groupConcatValue, groupConcatInverse, 0),
+    WAGGREGATE(string_agg,   2, 0, 0, groupConcatStep,
+        groupConcatFinalize, groupConcatValue, groupConcatInverse, 0),
 
     LIKEFUNC(glob, 2, &globInfo, SQLITE_FUNC_LIKE|SQLITE_FUNC_CASE),
 #ifdef SQLITE_CASE_SENSITIVE_LIKE
@@ -125625,6 +131026,7 @@ static int isSetNullAction(Parse *pParse, FKey *pFKey){
     if( (p==pFKey->apTrigger[0] && pFKey->aAction[0]==OE_SetNull)
      || (p==pFKey->apTrigger[1] && pFKey->aAction[1]==OE_SetNull)
     ){
+      assert( (pTop->db->flags & SQLITE_FkNoAction)==0 );
       return 1;
     }
   }
@@ -125819,6 +131221,8 @@ SQLITE_PRIVATE void sqlite3FkCheck(
       }
       if( regOld!=0 ){
         int eAction = pFKey->aAction[aChange!=0];
+        if( (db->flags & SQLITE_FkNoAction) ) eAction = OE_None;
+
         fkScanChildren(pParse, pSrc, pTab, pIdx, pFKey, aiCol, regOld, 1);
         /* If this is a deferred FK constraint, or a CASCADE or SET NULL
         ** action applies, then any foreign key violations caused by
@@ -125934,7 +131338,11 @@ SQLITE_PRIVATE int sqlite3FkRequired(
       /* Check if any parent key columns are being modified. */
       for(p=sqlite3FkReferences(pTab); p; p=p->pNextTo){
         if( fkParentIsModified(pTab, p, aChange, chngRowid) ){
-          if( p->aAction[1]!=OE_None ) return 2;
+          if( (pParse->db->flags & SQLITE_FkNoAction)==0
+           && p->aAction[1]!=OE_None
+          ){
+            return 2;
+          }
           bHaveFK = 1;
         }
       }
@@ -125984,6 +131392,7 @@ static Trigger *fkActionTrigger(
   int iAction = (pChanges!=0);    /* 1 for UPDATE, 0 for DELETE */
 
   action = pFKey->aAction[iAction];
+  if( (db->flags & SQLITE_FkNoAction) ) action = OE_None;
   if( action==OE_Restrict && (db->flags & SQLITE_DeferFKs) ){
     return 0;
   }
@@ -126084,22 +131493,22 @@ static Trigger *fkActionTrigger(
 
     if( action==OE_Restrict ){
       int iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
-      Token tFrom;
-      Token tDb;
+      SrcList *pSrc;
       Expr *pRaise;
 
-      tFrom.z = zFrom;
-      tFrom.n = nFrom;
-      tDb.z = db->aDb[iDb].zDbSName;
-      tDb.n = sqlite3Strlen30(tDb.z);
-
       pRaise = sqlite3Expr(db, TK_RAISE, "FOREIGN KEY constraint failed");
       if( pRaise ){
         pRaise->affExpr = OE_Abort;
       }
+      pSrc = sqlite3SrcListAppend(pParse, 0, 0, 0);
+      if( pSrc ){
+        assert( pSrc->nSrc==1 );
+        pSrc->a[0].zName = sqlite3DbStrDup(db, zFrom);
+        pSrc->a[0].zDatabase = sqlite3DbStrDup(db, db->aDb[iDb].zDbSName);
+      }
       pSelect = sqlite3SelectNew(pParse,
           sqlite3ExprListAppend(pParse, 0, pRaise),
-          sqlite3SrcListAppend(pParse, 0, &tDb, &tFrom),
+          pSrc,
           pWhere,
           0, 0, 0, 0, 0
       );
@@ -126206,17 +131615,17 @@ SQLITE_PRIVATE void sqlite3FkDelete(sqlite3 *db, Table *pTab){
   FKey *pNext;                    /* Copy of pFKey->pNextFrom */
 
   assert( IsOrdinaryTable(pTab) );
+  assert( db!=0 );
   for(pFKey=pTab->u.tab.pFKey; pFKey; pFKey=pNext){
     assert( db==0 || sqlite3SchemaMutexHeld(db, 0, pTab->pSchema) );
 
     /* Remove the FK from the fkeyHash hash table. */
-    if( !db || db->pnBytesFreed==0 ){
+    if( db->pnBytesFreed==0 ){
       if( pFKey->pPrevTo ){
         pFKey->pPrevTo->pNextTo = pFKey->pNextTo;
       }else{
-        void *p = (void *)pFKey->pNextTo;
-        const char *z = (p ? pFKey->pNextTo->zTo : pFKey->zTo);
-        sqlite3HashInsert(&pTab->pSchema->fkeyHash, z, p);
+        const char *z = (pFKey->pNextTo ? pFKey->pNextTo->zTo : pFKey->zTo);
+        sqlite3HashInsert(&pTab->pSchema->fkeyHash, z, pFKey->pNextTo);
       }
       if( pFKey->pNextTo ){
         pFKey->pNextTo->pPrevTo = pFKey->pPrevTo;
@@ -126279,8 +131688,10 @@ SQLITE_PRIVATE void sqlite3OpenTable(
   assert( pParse->pVdbe!=0 );
   v = pParse->pVdbe;
   assert( opcode==OP_OpenWrite || opcode==OP_OpenRead );
-  sqlite3TableLock(pParse, iDb, pTab->tnum,
-                   (opcode==OP_OpenWrite)?1:0, pTab->zName);
+  if( !pParse->db->noSharedCache ){
+    sqlite3TableLock(pParse, iDb, pTab->tnum,
+                     (opcode==OP_OpenWrite)?1:0, pTab->zName);
+  }
   if( HasRowid(pTab) ){
     sqlite3VdbeAddOp4Int(v, opcode, iCur, pTab->tnum, iDb, pTab->nNVCol);
     VdbeComment((v, "%s", pTab->zName));
@@ -126314,43 +131725,68 @@ SQLITE_PRIVATE void sqlite3OpenTable(
 ** is managed along with the rest of the Index structure. It will be
 ** released when sqlite3DeleteIndex() is called.
 */
-SQLITE_PRIVATE const char *sqlite3IndexAffinityStr(sqlite3 *db, Index *pIdx){
+static SQLITE_NOINLINE const char *computeIndexAffStr(sqlite3 *db, Index *pIdx){
+  /* The first time a column affinity string for a particular index is
+  ** required, it is allocated and populated here. It is then stored as
+  ** a member of the Index structure for subsequent use.
+  **
+  ** The column affinity string will eventually be deleted by
+  ** sqliteDeleteIndex() when the Index structure itself is cleaned
+  ** up.
+  */
+  int n;
+  Table *pTab = pIdx->pTable;
+  pIdx->zColAff = (char *)sqlite3DbMallocRaw(0, pIdx->nColumn+1);
   if( !pIdx->zColAff ){
-    /* The first time a column affinity string for a particular index is
-    ** required, it is allocated and populated here. It is then stored as
-    ** a member of the Index structure for subsequent use.
-    **
-    ** The column affinity string will eventually be deleted by
-    ** sqliteDeleteIndex() when the Index structure itself is cleaned
-    ** up.
-    */
-    int n;
-    Table *pTab = pIdx->pTable;
-    pIdx->zColAff = (char *)sqlite3DbMallocRaw(0, pIdx->nColumn+1);
-    if( !pIdx->zColAff ){
-      sqlite3OomFault(db);
-      return 0;
+    sqlite3OomFault(db);
+    return 0;
+  }
+  for(n=0; n<pIdx->nColumn; n++){
+    i16 x = pIdx->aiColumn[n];
+    char aff;
+    if( x>=0 ){
+      aff = pTab->aCol[x].affinity;
+    }else if( x==XN_ROWID ){
+      aff = SQLITE_AFF_INTEGER;
+    }else{
+      assert( x==XN_EXPR );
+      assert( pIdx->bHasExpr );
+      assert( pIdx->aColExpr!=0 );
+      aff = sqlite3ExprAffinity(pIdx->aColExpr->a[n].pExpr);
     }
-    for(n=0; n<pIdx->nColumn; n++){
-      i16 x = pIdx->aiColumn[n];
-      char aff;
-      if( x>=0 ){
-        aff = pTab->aCol[x].affinity;
-      }else if( x==XN_ROWID ){
-        aff = SQLITE_AFF_INTEGER;
-      }else{
-        assert( x==XN_EXPR );
-        assert( pIdx->aColExpr!=0 );
-        aff = sqlite3ExprAffinity(pIdx->aColExpr->a[n].pExpr);
+    if( aff<SQLITE_AFF_BLOB ) aff = SQLITE_AFF_BLOB;
+    if( aff>SQLITE_AFF_NUMERIC) aff = SQLITE_AFF_NUMERIC;
+    pIdx->zColAff[n] = aff;
+  }
+  pIdx->zColAff[n] = 0;
+  return pIdx->zColAff;
+}
+SQLITE_PRIVATE const char *sqlite3IndexAffinityStr(sqlite3 *db, Index *pIdx){
+  if( !pIdx->zColAff ) return computeIndexAffStr(db, pIdx);
+  return pIdx->zColAff;
+}
+
+
+/*
+** Compute an affinity string for a table.   Space is obtained
+** from sqlite3DbMalloc().  The caller is responsible for freeing
+** the space when done.
+*/
+SQLITE_PRIVATE char *sqlite3TableAffinityStr(sqlite3 *db, const Table *pTab){
+  char *zColAff;
+  zColAff = (char *)sqlite3DbMallocRaw(db, pTab->nCol+1);
+  if( zColAff ){
+    int i, j;
+    for(i=j=0; i<pTab->nCol; i++){
+      if( (pTab->aCol[i].colFlags & COLFLAG_VIRTUAL)==0 ){
+        zColAff[j++] = pTab->aCol[i].affinity;
       }
-      if( aff<SQLITE_AFF_BLOB ) aff = SQLITE_AFF_BLOB;
-      if( aff>SQLITE_AFF_NUMERIC) aff = SQLITE_AFF_NUMERIC;
-      pIdx->zColAff[n] = aff;
     }
-    pIdx->zColAff[n] = 0;
+    do{
+      zColAff[j--] = 0;
+    }while( j>=0 && zColAff[j]<=SQLITE_AFF_BLOB );
   }
-
-  return pIdx->zColAff;
+  return zColAff;
 }
 
 /*
@@ -126384,7 +131820,7 @@ SQLITE_PRIVATE const char *sqlite3IndexAffinityStr(sqlite3 *db, Index *pIdx){
 ** For STRICT tables:
 ** ------------------
 **
-** Generate an appropropriate OP_TypeCheck opcode that will verify the
+** Generate an appropriate OP_TypeCheck opcode that will verify the
 ** datatypes against the column definitions in pTab.  If iReg==0, that
 ** means an OP_MakeRecord opcode has already been generated and should be
 ** the last opcode generated.  The new OP_TypeCheck needs to be inserted
@@ -126394,7 +131830,7 @@ SQLITE_PRIVATE const char *sqlite3IndexAffinityStr(sqlite3 *db, Index *pIdx){
 ** Apply the type checking to that array of registers.
 */
 SQLITE_PRIVATE void sqlite3TableAffinity(Vdbe *v, Table *pTab, int iReg){
-  int i, j;
+  int i;
   char *zColAff;
   if( pTab->tabFlags & TF_Strict ){
     if( iReg==0 ){
@@ -126403,7 +131839,7 @@ SQLITE_PRIVATE void sqlite3TableAffinity(Vdbe *v, Table *pTab, int iReg){
       ** OP_MakeRecord is found */
       VdbeOp *pPrev;
       sqlite3VdbeAppendP4(v, pTab, P4_TABLE);
-      pPrev = sqlite3VdbeGetOp(v, -1);
+      pPrev = sqlite3VdbeGetLastOp(v);
       assert( pPrev!=0 );
       assert( pPrev->opcode==OP_MakeRecord || sqlite3VdbeDb(v)->mallocFailed );
       pPrev->opcode = OP_TypeCheck;
@@ -126417,22 +131853,11 @@ SQLITE_PRIVATE void sqlite3TableAffinity(Vdbe *v, Table *pTab, int iReg){
   }
   zColAff = pTab->zColAff;
   if( zColAff==0 ){
-    sqlite3 *db = sqlite3VdbeDb(v);
-    zColAff = (char *)sqlite3DbMallocRaw(0, pTab->nCol+1);
+    zColAff = sqlite3TableAffinityStr(0, pTab);
     if( !zColAff ){
-      sqlite3OomFault(db);
+      sqlite3OomFault(sqlite3VdbeDb(v));
       return;
     }
-
-    for(i=j=0; i<pTab->nCol; i++){
-      assert( pTab->aCol[i].affinity!=0 || sqlite3VdbeParser(v)->nErr>0 );
-      if( (pTab->aCol[i].colFlags & COLFLAG_VIRTUAL)==0 ){
-        zColAff[j++] = pTab->aCol[i].affinity;
-      }
-    }
-    do{
-      zColAff[j--] = 0;
-    }while( j>=0 && zColAff[j]<=SQLITE_AFF_BLOB );
     pTab->zColAff = zColAff;
   }
   assert( zColAff!=0 );
@@ -126441,7 +131866,7 @@ SQLITE_PRIVATE void sqlite3TableAffinity(Vdbe *v, Table *pTab, int iReg){
     if( iReg ){
       sqlite3VdbeAddOp4(v, OP_Affinity, iReg, i, 0, zColAff, i);
     }else{
-      assert( sqlite3VdbeGetOp(v, -1)->opcode==OP_MakeRecord
+      assert( sqlite3VdbeGetLastOp(v)->opcode==OP_MakeRecord
               || sqlite3VdbeDb(v)->mallocFailed );
       sqlite3VdbeChangeP4(v, -1, zColAff, i);
     }
@@ -126527,7 +131952,7 @@ SQLITE_PRIVATE void sqlite3ComputeGeneratedColumns(
   */
   sqlite3TableAffinity(pParse->pVdbe, pTab, iRegStore);
   if( (pTab->tabFlags & TF_HasStored)!=0 ){
-    pOp = sqlite3VdbeGetOp(pParse->pVdbe,-1);
+    pOp = sqlite3VdbeGetLastOp(pParse->pVdbe);
     if( pOp->opcode==OP_Affinity ){
       /* Change the OP_Affinity argument to '@' (NONE) for all stored
       ** columns.  '@' is the no-op affinity and those columns have not
@@ -127026,7 +132451,7 @@ SQLITE_PRIVATE void sqlite3Insert(
 
   /* Cannot insert into a read-only table.
   */
-  if( sqlite3IsReadOnly(pParse, pTab, tmask) ){
+  if( sqlite3IsReadOnly(pParse, pTab, pTrigger) ){
     goto insert_cleanup;
   }
 
@@ -127433,7 +132858,12 @@ SQLITE_PRIVATE void sqlite3Insert(
         sqlite3VdbeAddOp2(v, OP_SCopy, regFromSelect+k, iRegStore);
       }
     }else{
-      sqlite3ExprCode(pParse, pList->a[k].pExpr, iRegStore);
+      Expr *pX = pList->a[k].pExpr;
+      int y = sqlite3ExprCodeTarget(pParse, pX, iRegStore);
+      if( y!=iRegStore ){
+        sqlite3VdbeAddOp2(v,
+          ExprHasProperty(pX, EP_Subquery) ? OP_Copy : OP_SCopy, y, iRegStore);
+      }
     }
   }
 
@@ -127468,7 +132898,7 @@ SQLITE_PRIVATE void sqlite3Insert(
     }
 
     /* Copy the new data already generated. */
-    assert( pTab->nNVCol>0 );
+    assert( pTab->nNVCol>0 || pParse->nErr>0 );
     sqlite3VdbeAddOp3(v, OP_Copy, regRowid+1, regCols+1, pTab->nNVCol-1);
 
 #ifndef SQLITE_OMIT_GENERATED_COLUMNS
@@ -127570,7 +133000,9 @@ SQLITE_PRIVATE void sqlite3Insert(
       sqlite3GenerateConstraintChecks(pParse, pTab, aRegIdx, iDataCur, iIdxCur,
           regIns, 0, ipkColumn>=0, onError, endOfLoop, &isReplace, 0, pUpsert
       );
-      sqlite3FkCheck(pParse, pTab, 0, regIns, 0, 0);
+      if( db->flags & SQLITE_ForeignKeys ){
+        sqlite3FkCheck(pParse, pTab, 0, regIns, 0, 0);
+      }
 
       /* Set the OPFLAG_USESEEKRESULT flag if either (a) there are no REPLACE
       ** constraints or (b) there are no triggers and this table is not a
@@ -127654,7 +133086,7 @@ SQLITE_PRIVATE void sqlite3Insert(
   sqlite3UpsertDelete(db, pUpsert);
   sqlite3SelectDelete(db, pSelect);
   sqlite3IdListDelete(db, pColumn);
-  sqlite3DbFree(db, aRegIdx);
+  if( aRegIdx ) sqlite3DbNNFreeNN(db, aRegIdx);
 }
 
 /* Make sure "isView" and other macros defined above are undefined. Otherwise
@@ -127680,7 +133112,7 @@ SQLITE_PRIVATE void sqlite3Insert(
 /* This is the Walker callback from sqlite3ExprReferencesUpdatedColumn().
 *  Set bit 0x01 of pWalker->eCode if pWalker->eCode to 0 and if this
 ** expression node references any of the
-** columns that are being modifed by an UPDATE statement.
+** columns that are being modified by an UPDATE statement.
 */
 static int checkConstraintExprNode(Walker *pWalker, Expr *pExpr){
   if( pExpr->op==TK_COLUMN ){
@@ -127903,7 +133335,7 @@ SQLITE_PRIVATE void sqlite3GenerateConstraintChecks(
   int *aiChng,         /* column i is unchanged if aiChng[i]<0 */
   Upsert *pUpsert      /* ON CONFLICT clauses, if any.  NULL otherwise */
 ){
-  Vdbe *v;             /* VDBE under constrution */
+  Vdbe *v;             /* VDBE under construction */
   Index *pIdx;         /* Pointer to one of the indices */
   Index *pPk = 0;      /* The PRIMARY KEY index for WITHOUT ROWID tables */
   sqlite3 *db;         /* Database connection */
@@ -128018,6 +133450,7 @@ SQLITE_PRIVATE void sqlite3GenerateConstraintChecks(
           case OE_Fail: {
             char *zMsg = sqlite3MPrintf(db, "%s.%s", pTab->zName,
                                         pCol->zCnName);
+            testcase( zMsg==0 && db->mallocFailed==0 );
             sqlite3VdbeAddOp3(v, OP_HaltIfNull, SQLITE_CONSTRAINT_NOTNULL,
                               onError, iReg);
             sqlite3VdbeAppendP4(v, zMsg, P4_DYNAMIC);
@@ -128385,7 +133818,7 @@ SQLITE_PRIVATE void sqlite3GenerateConstraintChecks(
       pIdx;
       pIdx = indexIteratorNext(&sIdxIter, &ix)
   ){
-    int regIdx;          /* Range of registers hold conent for pIdx */
+    int regIdx;          /* Range of registers holding content for pIdx */
     int regR;            /* Range of registers holding conflicting PK */
     int iThisCur;        /* Cursor for this UNIQUE index */
     int addrUniqueOk;    /* Jump here if the UNIQUE constraint is satisfied */
@@ -128880,6 +134313,8 @@ SQLITE_PRIVATE int sqlite3OpenTableAndIndices(
 
   assert( op==OP_OpenRead || op==OP_OpenWrite );
   assert( op==OP_OpenWrite || p5==0 );
+  assert( piDataCur!=0 );
+  assert( piIdxCur!=0 );
   if( IsVirtual(pTab) ){
     /* This routine is a no-op for virtual tables. Leave the output
     ** variables *piDataCur and *piIdxCur set to illegal cursor numbers
@@ -128892,18 +134327,18 @@ SQLITE_PRIVATE int sqlite3OpenTableAndIndices(
   assert( v!=0 );
   if( iBase<0 ) iBase = pParse->nTab;
   iDataCur = iBase++;
-  if( piDataCur ) *piDataCur = iDataCur;
+  *piDataCur = iDataCur;
   if( HasRowid(pTab) && (aToOpen==0 || aToOpen[0]) ){
     sqlite3OpenTable(pParse, iDataCur, iDb, pTab, op);
-  }else{
+  }else if( pParse->db->noSharedCache==0 ){
     sqlite3TableLock(pParse, iDb, pTab->tnum, op==OP_OpenWrite, pTab->zName);
   }
-  if( piIdxCur ) *piIdxCur = iBase;
+  *piIdxCur = iBase;
   for(i=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, i++){
     int iIdxCur = iBase++;
     assert( pIdx->pSchema==pTab->pSchema );
     if( IsPrimaryKeyIndex(pIdx) && !HasRowid(pTab) ){
-      if( piDataCur ) *piDataCur = iIdxCur;
+      *piDataCur = iIdxCur;
       p5 = 0;
     }
     if( aToOpen==0 || aToOpen[i+1] ){
@@ -129201,7 +134636,7 @@ static int xferOptimization(
   }
 #endif
 #ifndef SQLITE_OMIT_FOREIGN_KEY
-  /* Disallow the transfer optimization if the destination table constains
+  /* Disallow the transfer optimization if the destination table contains
   ** any foreign key constraints.  This is more restrictive than necessary.
   ** But the main beneficiary of the transfer optimization is the VACUUM
   ** command, and the VACUUM command disables foreign key constraints.  So
@@ -129881,9 +135316,9 @@ struct sqlite3_api_routines {
   const char *(*filename_journal)(const char*);
   const char *(*filename_wal)(const char*);
   /* Version 3.32.0 and later */
-  char *(*create_filename)(const char*,const char*,const char*,
+  const char *(*create_filename)(const char*,const char*,const char*,
                            int,const char**);
-  void (*free_filename)(char*);
+  void (*free_filename)(const char*);
   sqlite3_file *(*database_file_object)(const char*);
   /* Version 3.34.0 and later */
   int (*txn_state)(sqlite3*,const char*);
@@ -129907,6 +135342,15 @@ struct sqlite3_api_routines {
   unsigned char *(*serialize)(sqlite3*,const char *,sqlite3_int64*,
                               unsigned int);
   const char *(*db_name)(sqlite3*,int);
+  /* Version 3.40.0 and later */
+  int (*value_encoding)(sqlite3_value*);
+  /* Version 3.41.0 and later */
+  int (*is_interrupted)(sqlite3*);
+  /* Version 3.43.0 and later */
+  int (*stmt_explain)(sqlite3_stmt*,int);
+  /* Version 3.44.0 and later */
+  void *(*get_clientdata)(sqlite3*,const char*);
+  int (*set_clientdata)(sqlite3*, const char*, void*, void(*)(void*));
 };
 
 /*
@@ -130231,6 +135675,15 @@ typedef int (*sqlite3_loadext_entry)(
 #define sqlite3_serialize              sqlite3_api->serialize
 #endif
 #define sqlite3_db_name                sqlite3_api->db_name
+/* Version 3.40.0 and later */
+#define sqlite3_value_encoding         sqlite3_api->value_encoding
+/* Version 3.41.0 and later */
+#define sqlite3_is_interrupted         sqlite3_api->is_interrupted
+/* Version 3.43.0 and later */
+#define sqlite3_stmt_explain           sqlite3_api->stmt_explain
+/* Version 3.44.0 and later */
+#define sqlite3_get_clientdata         sqlite3_api->get_clientdata
+#define sqlite3_set_clientdata         sqlite3_api->set_clientdata
 #endif /* !defined(SQLITE_CORE) && !defined(SQLITE_OMIT_LOAD_EXTENSION) */
 
 #if !defined(SQLITE_CORE) && !defined(SQLITE_OMIT_LOAD_EXTENSION)
@@ -130743,7 +136196,16 @@ static const sqlite3_api_routines sqlite3Apis = {
   0,
   0,
 #endif
-  sqlite3_db_name
+  sqlite3_db_name,
+  /* Version 3.40.0 and later */
+  sqlite3_value_encoding,
+  /* Version 3.41.0 and later */
+  sqlite3_is_interrupted,
+  /* Version 3.43.0 and later */
+  sqlite3_stmt_explain,
+  /* Version 3.44.0 and later */
+  sqlite3_get_clientdata,
+  sqlite3_set_clientdata
 };
 
 /* True if x is the directory separator character
@@ -130816,15 +136278,25 @@ static int sqlite3LoadExtension(
   /* tag-20210611-1.  Some dlopen() implementations will segfault if given
   ** an oversize filename.  Most filesystems have a pathname limit of 4K,
   ** so limit the extension filename length to about twice that.
-  ** https://sqlite.org/forum/forumpost/08a0d6d9bf */
+  ** https://sqlite.org/forum/forumpost/08a0d6d9bf
+  **
+  ** Later (2023-03-25): Save an extra 6 bytes for the filename suffix.
+  ** See https://sqlite.org/forum/forumpost/24083b579d.
+  */
   if( nMsg>SQLITE_MAX_PATHLEN ) goto extension_not_found;
 
+  /* Do not allow sqlite3_load_extension() to link to a copy of the
+  ** running application, by passing in an empty filename. */
+  if( nMsg==0 ) goto extension_not_found;
+
   handle = sqlite3OsDlOpen(pVfs, zFile);
 #if SQLITE_OS_UNIX || SQLITE_OS_WIN
   for(ii=0; ii<ArraySize(azEndings) && handle==0; ii++){
     char *zAltFile = sqlite3_mprintf("%s.%s", zFile, azEndings[ii]);
     if( zAltFile==0 ) return SQLITE_NOMEM_BKPT;
-    handle = sqlite3OsDlOpen(pVfs, zAltFile);
+    if( nMsg+strlen(azEndings[ii])+1<=SQLITE_MAX_PATHLEN ){
+      handle = sqlite3OsDlOpen(pVfs, zAltFile);
+    }
     sqlite3_free(zAltFile);
   }
 #endif
@@ -130949,6 +136421,9 @@ SQLITE_PRIVATE void sqlite3CloseExtensions(sqlite3 *db){
 ** default so as not to open security holes in older applications.
 */
 SQLITE_API int sqlite3_enable_load_extension(sqlite3 *db, int onoff){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT;
+#endif
   sqlite3_mutex_enter(db->mutex);
   if( onoff ){
     db->flags |= SQLITE_LoadExtension|SQLITE_LoadExtFunc;
@@ -130998,6 +136473,9 @@ SQLITE_API int sqlite3_auto_extension(
   void (*xInit)(void)
 ){
   int rc = SQLITE_OK;
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( xInit==0 ) return SQLITE_MISUSE_BKPT;
+#endif
 #ifndef SQLITE_OMIT_AUTOINIT
   rc = sqlite3_initialize();
   if( rc ){
@@ -131050,6 +136528,9 @@ SQLITE_API int sqlite3_cancel_auto_extension(
   int i;
   int n = 0;
   wsdAutoextInit;
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( xInit==0 ) return 0;
+#endif
   sqlite3_mutex_enter(mutex);
   for(i=(int)wsdAutoext.nExt-1; i>=0; i--){
     if( wsdAutoext.aExt[i]==xInit ){
@@ -132649,7 +138130,7 @@ SQLITE_PRIVATE void sqlite3Pragma(
   **
   ** The first form reports the current local setting for the
   ** page cache spill size. The second form turns cache spill on
-  ** or off.  When turnning cache spill on, the size is set to the
+  ** or off.  When turning cache spill on, the size is set to the
   ** current cache_size.  The third form sets a spill size that
   ** may be different form the cache size.
   ** If N is positive then that is the
@@ -132919,7 +138400,11 @@ SQLITE_PRIVATE void sqlite3Pragma(
 #endif
 
       if( sqlite3GetBoolean(zRight, 0) ){
-        db->flags |= mask;
+        if( (mask & SQLITE_WriteSchema)==0
+         || (db->flags & SQLITE_Defensive)==0
+        ){
+          db->flags |= mask;
+        }
       }else{
         db->flags &= ~mask;
         if( mask==SQLITE_DeferFKs ) db->nDeferredImmCons = 0;
@@ -133319,7 +138804,7 @@ SQLITE_PRIVATE void sqlite3Pragma(
       zDb = db->aDb[iDb].zDbSName;
       sqlite3CodeVerifySchema(pParse, iDb);
       sqlite3TableLock(pParse, iDb, pTab->tnum, 0, pTab->zName);
-      if( pTab->nCol+regRow>pParse->nMem ) pParse->nMem = pTab->nCol + regRow;
+      sqlite3TouchRegister(pParse, pTab->nCol+regRow);
       sqlite3OpenTable(pParse, 0, iDb, pTab, OP_OpenRead);
       sqlite3VdbeLoadString(v, regResult, pTab->zName);
       assert( IsOrdinaryTable(pTab) );
@@ -133360,7 +138845,7 @@ SQLITE_PRIVATE void sqlite3Pragma(
         ** regRow..regRow+n. If any of the child key values are NULL, this
         ** row cannot cause an FK violation. Jump directly to addrOk in
         ** this case. */
-        if( regRow+pFK->nCol>pParse->nMem ) pParse->nMem = regRow+pFK->nCol;
+        sqlite3TouchRegister(pParse, regRow + pFK->nCol);
         for(j=0; j<pFK->nCol; j++){
           int iCol = aiCols ? aiCols[j] : pFK->aCol[j].iFrom;
           sqlite3ExprCodeGetColumnOfTable(v, pTab, 0, iCol, regRow+j);
@@ -133427,9 +138912,9 @@ SQLITE_PRIVATE void sqlite3Pragma(
   ** The "quick_check" is reduced version of
   ** integrity_check designed to detect most database corruption
   ** without the overhead of cross-checking indexes.  Quick_check
-  ** is linear time wherease integrity_check is O(NlogN).
+  ** is linear time whereas integrity_check is O(NlogN).
   **
-  ** The maximum nubmer of errors is 100 by default.  A different default
+  ** The maximum number of errors is 100 by default.  A different default
   ** can be specified using a numeric parameter N.
   **
   ** Or, the parameter N can be the name of a table.  In that case, only
@@ -133489,6 +138974,7 @@ SQLITE_PRIVATE void sqlite3Pragma(
       if( iDb>=0 && i!=iDb ) continue;
 
       sqlite3CodeVerifySchema(pParse, i);
+      pParse->okConstFactor = 0;  /* tag-20230327-1 */
 
       /* Do an integrity check of the B-Tree
       **
@@ -133524,7 +139010,7 @@ SQLITE_PRIVATE void sqlite3Pragma(
       aRoot[0] = cnt;
 
       /* Make sure sufficient number of registers have been allocated */
-      pParse->nMem = MAX( pParse->nMem, 8+mxIdx );
+      sqlite3TouchRegister(pParse, 8+mxIdx);
       sqlite3ClearTempRegCache(pParse);
 
       /* Do the b-tree integrity checks */
@@ -133543,15 +139029,47 @@ SQLITE_PRIVATE void sqlite3Pragma(
       for(x=sqliteHashFirst(pTbls); x; x=sqliteHashNext(x)){
         Table *pTab = sqliteHashData(x);
         Index *pIdx, *pPk;
-        Index *pPrior = 0;
+        Index *pPrior = 0;      /* Previous index */
         int loopTop;
         int iDataCur, iIdxCur;
         int r1 = -1;
-        int bStrict;
+        int bStrict;            /* True for a STRICT table */
+        int r2;                 /* Previous key for WITHOUT ROWID tables */
+        int mxCol;              /* Maximum non-virtual column number */
 
-        if( !IsOrdinaryTable(pTab) ) continue;
         if( pObjTab && pObjTab!=pTab ) continue;
-        pPk = HasRowid(pTab) ? 0 : sqlite3PrimaryKeyIndex(pTab);
+        if( !IsOrdinaryTable(pTab) ){
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+          sqlite3_vtab *pVTab;
+          int a1;
+          if( !IsVirtual(pTab) ) continue;
+          if( pTab->nCol<=0 ){
+            const char *zMod = pTab->u.vtab.azArg[0];
+            if( sqlite3HashFind(&db->aModule, zMod)==0 ) continue;
+          }
+          sqlite3ViewGetColumnNames(pParse, pTab);
+          if( pTab->u.vtab.p==0 ) continue;
+          pVTab = pTab->u.vtab.p->pVtab;
+          if( NEVER(pVTab==0) ) continue;
+          if( NEVER(pVTab->pModule==0) ) continue;
+          if( pVTab->pModule->iVersion<4 ) continue;
+          if( pVTab->pModule->xIntegrity==0 ) continue;
+          sqlite3VdbeAddOp3(v, OP_VCheck, i, 3, isQuick);
+          sqlite3VdbeAppendP4(v, pTab, P4_TABLE);
+          a1 = sqlite3VdbeAddOp1(v, OP_IsNull, 3); VdbeCoverage(v);
+          integrityCheckResultRow(v);
+          sqlite3VdbeJumpHere(v, a1);
+#endif
+          continue;
+        }
+        if( isQuick || HasRowid(pTab) ){
+          pPk = 0;
+          r2 = 0;
+        }else{
+          pPk = sqlite3PrimaryKeyIndex(pTab);
+          r2 = sqlite3GetTempRange(pParse, pPk->nKeyCol);
+          sqlite3VdbeAddOp3(v, OP_Null, 1, r2, r2+pPk->nKeyCol-1);
+        }
         sqlite3OpenTableAndIndices(pParse, pTab, OP_OpenRead, 0,
                                    1, 0, &iDataCur, &iIdxCur);
         /* reg[7] counts the number of entries in the table.
@@ -133565,52 +139083,180 @@ SQLITE_PRIVATE void sqlite3Pragma(
         assert( sqlite3NoTempsInRange(pParse,1,7+j) );
         sqlite3VdbeAddOp2(v, OP_Rewind, iDataCur, 0); VdbeCoverage(v);
         loopTop = sqlite3VdbeAddOp2(v, OP_AddImm, 7, 1);
+
+        /* Fetch the right-most column from the table.  This will cause
+        ** the entire record header to be parsed and sanity checked.  It
+        ** will also prepopulate the cursor column cache that is used
+        ** by the OP_IsType code, so it is a required step.
+        */
+        assert( !IsVirtual(pTab) );
+        if( HasRowid(pTab) ){
+          mxCol = -1;
+          for(j=0; j<pTab->nCol; j++){
+            if( (pTab->aCol[j].colFlags & COLFLAG_VIRTUAL)==0 ) mxCol++;
+          }
+          if( mxCol==pTab->iPKey ) mxCol--;
+        }else{
+          /* COLFLAG_VIRTUAL columns are not included in the WITHOUT ROWID
+          ** PK index column-count, so there is no need to account for them
+          ** in this case. */
+          mxCol = sqlite3PrimaryKeyIndex(pTab)->nColumn-1;
+        }
+        if( mxCol>=0 ){
+          sqlite3VdbeAddOp3(v, OP_Column, iDataCur, mxCol, 3);
+          sqlite3VdbeTypeofColumn(v, 3);
+        }
+
         if( !isQuick ){
-          /* Sanity check on record header decoding */
-          sqlite3VdbeAddOp3(v, OP_Column, iDataCur, pTab->nNVCol-1,3);
-          sqlite3VdbeChangeP5(v, OPFLAG_TYPEOFARG);
-          VdbeComment((v, "(right-most column)"));
+          if( pPk ){
+            /* Verify WITHOUT ROWID keys are in ascending order */
+            int a1;
+            char *zErr;
+            a1 = sqlite3VdbeAddOp4Int(v, OP_IdxGT, iDataCur, 0,r2,pPk->nKeyCol);
+            VdbeCoverage(v);
+            sqlite3VdbeAddOp1(v, OP_IsNull, r2); VdbeCoverage(v);
+            zErr = sqlite3MPrintf(db,
+                   "row not in PRIMARY KEY order for %s",
+                    pTab->zName);
+            sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0, zErr, P4_DYNAMIC);
+            integrityCheckResultRow(v);
+            sqlite3VdbeJumpHere(v, a1);
+            sqlite3VdbeJumpHere(v, a1+1);
+            for(j=0; j<pPk->nKeyCol; j++){
+              sqlite3ExprCodeLoadIndexColumn(pParse, pPk, iDataCur, j, r2+j);
+            }
+          }
         }
-        /* Verify that all NOT NULL columns really are NOT NULL.  At the
-        ** same time verify the type of the content of STRICT tables */
+        /* Verify datatypes for all columns:
+        **
+        **   (1) NOT NULL columns may not contain a NULL
+        **   (2) Datatype must be exact for non-ANY columns in STRICT tables
+        **   (3) Datatype for TEXT columns in non-STRICT tables must be
+        **       NULL, TEXT, or BLOB.
+        **   (4) Datatype for numeric columns in non-STRICT tables must not
+        **       be a TEXT value that can be losslessly converted to numeric.
+        */
         bStrict = (pTab->tabFlags & TF_Strict)!=0;
         for(j=0; j<pTab->nCol; j++){
           char *zErr;
-          Column *pCol = pTab->aCol + j;
-          int doError, jmp2;
+          Column *pCol = pTab->aCol + j;  /* The column to be checked */
+          int labelError;               /* Jump here to report an error */
+          int labelOk;                  /* Jump here if all looks ok */
+          int p1, p3, p4;               /* Operands to the OP_IsType opcode */
+          int doTypeCheck;              /* Check datatypes (besides NOT NULL) */
+
           if( j==pTab->iPKey ) continue;
-          if( pCol->notNull==0 && !bStrict ) continue;
-          doError = bStrict ? sqlite3VdbeMakeLabel(pParse) : 0;
-          sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, j, 3);
-          if( sqlite3VdbeGetOp(v,-1)->opcode==OP_Column ){
-            sqlite3VdbeChangeP5(v, OPFLAG_TYPEOFARG);
+          if( bStrict ){
+            doTypeCheck = pCol->eCType>COLTYPE_ANY;
+          }else{
+            doTypeCheck = pCol->affinity>SQLITE_AFF_BLOB;
           }
+          if( pCol->notNull==0 && !doTypeCheck ) continue;
+
+          /* Compute the operands that will be needed for OP_IsType */
+          p4 = SQLITE_NULL;
+          if( pCol->colFlags & COLFLAG_VIRTUAL ){
+            sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, j, 3);
+            p1 = -1;
+            p3 = 3;
+          }else{
+            if( pCol->iDflt ){
+              sqlite3_value *pDfltValue = 0;
+              sqlite3ValueFromExpr(db, sqlite3ColumnExpr(pTab,pCol), ENC(db),
+                                   pCol->affinity, &pDfltValue);
+              if( pDfltValue ){
+                p4 = sqlite3_value_type(pDfltValue);
+                sqlite3ValueFree(pDfltValue);
+              }
+            }
+            p1 = iDataCur;
+            if( !HasRowid(pTab) ){
+              testcase( j!=sqlite3TableColumnToStorage(pTab, j) );
+              p3 = sqlite3TableColumnToIndex(sqlite3PrimaryKeyIndex(pTab), j);
+            }else{
+              p3 = sqlite3TableColumnToStorage(pTab,j);
+              testcase( p3!=j);
+            }
+          }
+
+          labelError = sqlite3VdbeMakeLabel(pParse);
+          labelOk = sqlite3VdbeMakeLabel(pParse);
           if( pCol->notNull ){
-            jmp2 = sqlite3VdbeAddOp1(v, OP_NotNull, 3); VdbeCoverage(v);
+            /* (1) NOT NULL columns may not contain a NULL */
+            int jmp3;
+            int jmp2 = sqlite3VdbeAddOp4Int(v, OP_IsType, p1, labelOk, p3, p4);
+            VdbeCoverage(v);
+            if( p1<0 ){
+              sqlite3VdbeChangeP5(v, 0x0f); /* INT, REAL, TEXT, or BLOB */
+              jmp3 = jmp2;
+            }else{
+              sqlite3VdbeChangeP5(v, 0x0d); /* INT, TEXT, or BLOB */
+              /* OP_IsType does not detect NaN values in the database file
+              ** which should be treated as a NULL.  So if the header type
+              ** is REAL, we have to load the actual data using OP_Column
+              ** to reliably determine if the value is a NULL. */
+              sqlite3VdbeAddOp3(v, OP_Column, p1, p3, 3);
+              jmp3 = sqlite3VdbeAddOp2(v, OP_NotNull, 3, labelOk);
+              VdbeCoverage(v);
+            }
             zErr = sqlite3MPrintf(db, "NULL value in %s.%s", pTab->zName,
                                 pCol->zCnName);
             sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0, zErr, P4_DYNAMIC);
-            if( bStrict && pCol->eCType!=COLTYPE_ANY ){
-              sqlite3VdbeGoto(v, doError);
+            if( doTypeCheck ){
+              sqlite3VdbeGoto(v, labelError);
+              sqlite3VdbeJumpHere(v, jmp2);
+              sqlite3VdbeJumpHere(v, jmp3);
             }else{
-              integrityCheckResultRow(v);
+              /* VDBE byte code will fall thru */
             }
-            sqlite3VdbeJumpHere(v, jmp2);
           }
-          if( (pTab->tabFlags & TF_Strict)!=0
-           && pCol->eCType!=COLTYPE_ANY
-          ){
-            jmp2 = sqlite3VdbeAddOp3(v, OP_IsNullOrType, 3, 0,
-                                     sqlite3StdTypeMap[pCol->eCType-1]);
+          if( bStrict && doTypeCheck ){
+            /* (2) Datatype must be exact for non-ANY columns in STRICT tables*/
+            static unsigned char aStdTypeMask[] = {
+               0x1f,    /* ANY */
+               0x18,    /* BLOB */
+               0x11,    /* INT */
+               0x11,    /* INTEGER */
+               0x13,    /* REAL */
+               0x14     /* TEXT */
+            };
+            sqlite3VdbeAddOp4Int(v, OP_IsType, p1, labelOk, p3, p4);
+            assert( pCol->eCType>=1 && pCol->eCType<=sizeof(aStdTypeMask) );
+            sqlite3VdbeChangeP5(v, aStdTypeMask[pCol->eCType-1]);
             VdbeCoverage(v);
             zErr = sqlite3MPrintf(db, "non-%s value in %s.%s",
                                   sqlite3StdType[pCol->eCType-1],
                                   pTab->zName, pTab->aCol[j].zCnName);
             sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0, zErr, P4_DYNAMIC);
-            sqlite3VdbeResolveLabel(v, doError);
-            integrityCheckResultRow(v);
-            sqlite3VdbeJumpHere(v, jmp2);
+          }else if( !bStrict && pCol->affinity==SQLITE_AFF_TEXT ){
+            /* (3) Datatype for TEXT columns in non-STRICT tables must be
+            **     NULL, TEXT, or BLOB. */
+            sqlite3VdbeAddOp4Int(v, OP_IsType, p1, labelOk, p3, p4);
+            sqlite3VdbeChangeP5(v, 0x1c); /* NULL, TEXT, or BLOB */
+            VdbeCoverage(v);
+            zErr = sqlite3MPrintf(db, "NUMERIC value in %s.%s",
+                                  pTab->zName, pTab->aCol[j].zCnName);
+            sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0, zErr, P4_DYNAMIC);
+          }else if( !bStrict && pCol->affinity>=SQLITE_AFF_NUMERIC ){
+            /* (4) Datatype for numeric columns in non-STRICT tables must not
+            **     be a TEXT value that can be converted to numeric. */
+            sqlite3VdbeAddOp4Int(v, OP_IsType, p1, labelOk, p3, p4);
+            sqlite3VdbeChangeP5(v, 0x1b); /* NULL, INT, FLOAT, or BLOB */
+            VdbeCoverage(v);
+            if( p1>=0 ){
+              sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, j, 3);
+            }
+            sqlite3VdbeAddOp4(v, OP_Affinity, 3, 1, 0, "C", P4_STATIC);
+            sqlite3VdbeAddOp4Int(v, OP_IsType, -1, labelOk, 3, p4);
+            sqlite3VdbeChangeP5(v, 0x1c); /* NULL, TEXT, or BLOB */
+            VdbeCoverage(v);
+            zErr = sqlite3MPrintf(db, "TEXT value in %s.%s",
+                                  pTab->zName, pTab->aCol[j].zCnName);
+            sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0, zErr, P4_DYNAMIC);
           }
+          sqlite3VdbeResolveLabel(v, labelError);
+          integrityCheckResultRow(v);
+          sqlite3VdbeResolveLabel(v, labelOk);
         }
         /* Verify CHECK constraints */
         if( pTab->pCheck && (db->flags & SQLITE_IgnoreChecks)==0 ){
@@ -133639,7 +139285,8 @@ SQLITE_PRIVATE void sqlite3Pragma(
         if( !isQuick ){ /* Omit the remaining tests for quick_check */
           /* Validate index entries for the current row */
           for(j=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, j++){
-            int jmp2, jmp3, jmp4, jmp5;
+            int jmp2, jmp3, jmp4, jmp5, label6;
+            int kk;
             int ckUniq = sqlite3VdbeMakeLabel(pParse);
             if( pPk==pIdx ) continue;
             r1 = sqlite3GenerateIndexKey(pParse, pIdx, iDataCur, 0, 0, &jmp3,
@@ -133657,13 +139304,49 @@ SQLITE_PRIVATE void sqlite3Pragma(
             sqlite3VdbeAddOp3(v, OP_Concat, 4, 3, 3);
             jmp4 = integrityCheckResultRow(v);
             sqlite3VdbeJumpHere(v, jmp2);
+
+            /* The OP_IdxRowid opcode is an optimized version of OP_Column
+            ** that extracts the rowid off the end of the index record.
+            ** But it only works correctly if index record does not have
+            ** any extra bytes at the end.  Verify that this is the case. */
+            if( HasRowid(pTab) ){
+              int jmp7;
+              sqlite3VdbeAddOp2(v, OP_IdxRowid, iIdxCur+j, 3);
+              jmp7 = sqlite3VdbeAddOp3(v, OP_Eq, 3, 0, r1+pIdx->nColumn-1);
+              VdbeCoverageNeverNull(v);
+              sqlite3VdbeLoadString(v, 3,
+                 "rowid not at end-of-record for row ");
+              sqlite3VdbeAddOp3(v, OP_Concat, 7, 3, 3);
+              sqlite3VdbeLoadString(v, 4, " of index ");
+              sqlite3VdbeGoto(v, jmp5-1);
+              sqlite3VdbeJumpHere(v, jmp7);
+            }
+
+            /* Any indexed columns with non-BINARY collations must still hold
+            ** the exact same text value as the table. */
+            label6 = 0;
+            for(kk=0; kk<pIdx->nKeyCol; kk++){
+              if( pIdx->azColl[kk]==sqlite3StrBINARY ) continue;
+              if( label6==0 ) label6 = sqlite3VdbeMakeLabel(pParse);
+              sqlite3VdbeAddOp3(v, OP_Column, iIdxCur+j, kk, 3);
+              sqlite3VdbeAddOp3(v, OP_Ne, 3, label6, r1+kk); VdbeCoverage(v);
+            }
+            if( label6 ){
+              int jmp6 = sqlite3VdbeAddOp0(v, OP_Goto);
+              sqlite3VdbeResolveLabel(v, label6);
+              sqlite3VdbeLoadString(v, 3, "row ");
+              sqlite3VdbeAddOp3(v, OP_Concat, 7, 3, 3);
+              sqlite3VdbeLoadString(v, 4, " values differ from index ");
+              sqlite3VdbeGoto(v, jmp5-1);
+              sqlite3VdbeJumpHere(v, jmp6);
+            }
+
             /* For UNIQUE indexes, verify that only one entry exists with the
             ** current key.  The entry is unique if (1) any column is NULL
             ** or (2) the next entry has a different key */
             if( IsUniqueIndex(pIdx) ){
               int uniqOk = sqlite3VdbeMakeLabel(pParse);
               int jmp6;
-              int kk;
               for(kk=0; kk<pIdx->nKeyCol; kk++){
                 int iCol = pIdx->aiColumn[kk];
                 assert( iCol!=XN_ROWID && iCol<pTab->nCol );
@@ -133698,6 +139381,9 @@ SQLITE_PRIVATE void sqlite3Pragma(
             integrityCheckResultRow(v);
             sqlite3VdbeJumpHere(v, addr);
           }
+          if( pPk ){
+            sqlite3ReleaseTempRange(pParse, r2, pPk->nKeyCol);
+          }
         }
       }
     }
@@ -133848,6 +139534,11 @@ SQLITE_PRIVATE void sqlite3Pragma(
       aOp[1].p2 = iCookie;
       aOp[1].p3 = sqlite3Atoi(zRight);
       aOp[1].p5 = 1;
+      if( iCookie==BTREE_SCHEMA_VERSION && (db->flags & SQLITE_Defensive)!=0 ){
+        /* Do not allow the use of PRAGMA schema_version=VALUE in defensive
+        ** mode.  Change the OP_SetCookie opcode into a no-op.  */
+        aOp[1].opcode = OP_Noop;
+      }
     }else{
       /* Read the specified cookie value */
       static const VdbeOpList readCookie[] = {
@@ -134004,7 +139695,7 @@ SQLITE_PRIVATE void sqlite3Pragma(
     Schema *pSchema;       /* The current schema */
     Table *pTab;           /* A table in the schema */
     Index *pIdx;           /* An index of the table */
-    LogEst szThreshold;    /* Size threshold above which reanalysis is needd */
+    LogEst szThreshold;    /* Size threshold above which reanalysis needed */
     char *zSubSql;         /* SQL statement for the OP_SqlExec opcode */
     u32 opMask;            /* Mask of operations to perform */
 
@@ -134496,7 +140187,8 @@ static const sqlite3_module pragmaVtabModule = {
   0,                           /* xSavepoint */
   0,                           /* xRelease */
   0,                           /* xRollbackTo */
-  0                            /* xShadowName */
+  0,                           /* xShadowName */
+  0                            /* xIntegrity */
 };
 
 /*
@@ -134828,7 +140520,14 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl
 #else
       encoding = SQLITE_UTF8;
 #endif
-      sqlite3SetTextEncoding(db, encoding);
+      if( db->nVdbeActive>0 && encoding!=ENC(db)
+       && (db->mDbFlags & DBFLAG_Vacuum)==0
+      ){
+        rc = SQLITE_LOCKED;
+        goto initone_error_out;
+      }else{
+        sqlite3SetTextEncoding(db, encoding);
+      }
     }else{
       /* If opening an attached database, the encoding much match ENC(db) */
       if( (meta[BTREE_TEXT_ENCODING-1] & 3)!=ENC(db) ){
@@ -135042,8 +140741,8 @@ static void schemaIsValid(Parse *pParse){
     sqlite3BtreeGetMeta(pBt, BTREE_SCHEMA_VERSION, (u32 *)&cookie);
     assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
     if( cookie!=db->aDb[iDb].pSchema->schema_cookie ){
+      if( DbHasProperty(db, iDb, DB_SchemaLoaded) ) pParse->rc = SQLITE_SCHEMA;
       sqlite3ResetOneSchema(db, iDb);
-      pParse->rc = SQLITE_SCHEMA;
     }
 
     /* Close the transaction, if one was opened. */
@@ -135096,15 +140795,15 @@ SQLITE_PRIVATE void sqlite3ParseObjectReset(Parse *pParse){
   assert( db->pParse==pParse );
   assert( pParse->nested==0 );
 #ifndef SQLITE_OMIT_SHARED_CACHE
-  sqlite3DbFree(db, pParse->aTableLock);
+  if( pParse->aTableLock ) sqlite3DbNNFreeNN(db, pParse->aTableLock);
 #endif
   while( pParse->pCleanup ){
     ParseCleanup *pCleanup = pParse->pCleanup;
     pParse->pCleanup = pCleanup->pNext;
     pCleanup->xCleanup(db, pCleanup->pPtr);
-    sqlite3DbFreeNN(db, pCleanup);
+    sqlite3DbNNFreeNN(db, pCleanup);
   }
-  sqlite3DbFree(db, pParse->aLabel);
+  if( pParse->aLabel ) sqlite3DbNNFreeNN(db, pParse->aLabel);
   if( pParse->pConstExpr ){
     sqlite3ExprListDelete(db, pParse->pConstExpr);
   }
@@ -135113,8 +140812,6 @@ SQLITE_PRIVATE void sqlite3ParseObjectReset(Parse *pParse){
   db->lookaside.sz = db->lookaside.bDisable ? 0 : db->lookaside.szTrue;
   assert( pParse->db->pParse==pParse );
   db->pParse = pParse->pOuterParse;
-  pParse->db = 0;
-  pParse->disableLookaside = 0;
 }
 
 /*
@@ -135123,7 +140820,7 @@ SQLITE_PRIVATE void sqlite3ParseObjectReset(Parse *pParse){
 ** immediately.
 **
 ** Use this mechanism for uncommon cleanups.  There is a higher setup
-** cost for this mechansim (an extra malloc), so it should not be used
+** cost for this mechanism (an extra malloc), so it should not be used
 ** for common cleanups that happen on most calls.  But for less
 ** common cleanups, we save a single NULL-pointer comparison in
 ** sqlite3ParseObjectReset(), which reduces the total CPU cycle count.
@@ -135215,9 +140912,18 @@ static int sqlite3Prepare(
   sParse.pOuterParse = db->pParse;
   db->pParse = &sParse;
   sParse.db = db;
-  sParse.pReprepare = pReprepare;
+  if( pReprepare ){
+    sParse.pReprepare = pReprepare;
+    sParse.explain = sqlite3_stmt_isexplain((sqlite3_stmt*)pReprepare);
+  }else{
+    assert( sParse.pReprepare==0 );
+  }
   assert( ppStmt && *ppStmt==0 );
-  if( db->mallocFailed ) sqlite3ErrorMsg(&sParse, "out of memory");
+  if( db->mallocFailed ){
+    sqlite3ErrorMsg(&sParse, "out of memory");
+    db->errCode = rc = SQLITE_NOMEM;
+    goto end_prepare;
+  }
   assert( sqlite3_mutex_held(db->mutex) );
 
   /* For a long-term use prepared statement avoid the use of
@@ -135227,7 +140933,7 @@ static int sqlite3Prepare(
     sParse.disableLookaside++;
     DisableLookaside;
   }
-  sParse.disableVtab = (prepFlags & SQLITE_PREPARE_NO_VTAB)!=0;
+  sParse.prepFlags = prepFlags & 0xff;
 
   /* Check to verify that it is possible to get a read lock on all
   ** database schemas.  The inability to get a read lock indicates that
@@ -135268,7 +140974,9 @@ static int sqlite3Prepare(
     }
   }
 
-  sqlite3VtabUnlockList(db);
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  if( db->pDisconnect ) sqlite3VtabUnlockList(db);
+#endif
 
   if( nBytes>=0 && (nBytes==0 || zSql[nBytes-1]!=0) ){
     char *zSqlCopy;
@@ -135652,6 +141360,10 @@ struct SortCtx {
   } aDefer[4];
 #endif
   struct RowLoadInfo *pDeferredRowLoad;  /* Deferred row loading info or NULL */
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+  int addrPush;         /* First instruction to push data into sorter */
+  int addrPushEnd;      /* Last instruction that pushes data into sorter */
+#endif
 };
 #define SORTFLAG_UseSorter  0x01   /* Use SorterOpen instead of OpenEphemeral */
 
@@ -135663,6 +141375,7 @@ struct SortCtx {
 ** If bFree==0, Leave the first Select object unfreed
 */
 static void clearSelect(sqlite3 *db, Select *p, int bFree){
+  assert( db!=0 );
   while( p ){
     Select *pPrior = p->pPrior;
     sqlite3ExprListDelete(db, p->pEList);
@@ -135682,7 +141395,7 @@ static void clearSelect(sqlite3 *db, Select *p, int bFree){
       sqlite3WindowUnlinkFromSelect(p->pWin);
     }
 #endif
-    if( bFree ) sqlite3DbFreeNN(db, p);
+    if( bFree ) sqlite3DbNNFreeNN(db, p);
     p = pPrior;
     bFree = 1;
   }
@@ -135814,7 +141527,7 @@ static Select *findRightmost(Select *p){
 **     NATURAL  FULL     OUTER               JT_NATRUAL|JT_LEFT|JT_RIGHT
 **
 ** To preserve historical compatibly, SQLite also accepts a variety
-** of other non-standard and in many cases non-sensical join types.
+** of other non-standard and in many cases nonsensical join types.
 ** This routine makes as much sense at it can from the nonsense join
 ** type and returns a result.  Examples of accepted nonsense join types
 ** include but are not limited to:
@@ -136036,6 +141749,7 @@ static void unsetJoinExpr(Expr *p, int iTable, int nullable){
     }
     if( p->op==TK_FUNCTION ){
       assert( ExprUseXList(p) );
+      assert( p->pLeft==0 );
       if( p->x.pList ){
         int i;
         for(i=0; i<p->x.pList->nExpr; i++){
@@ -136085,7 +141799,7 @@ static int sqlite3ProcessJoin(Parse *pParse, Select *p){
     if( NEVER(pLeft->pTab==0 || pRightTab==0) ) continue;
     joinType = (pRight->fg.jointype & JT_OUTER)!=0 ? EP_OuterON : EP_InnerON;
 
-    /* If this is a NATURAL join, synthesize an approprate USING clause
+    /* If this is a NATURAL join, synthesize an appropriate USING clause
     ** to specify which columns should be joined.
     */
     if( pRight->fg.jointype & JT_NATURAL ){
@@ -136299,14 +142013,18 @@ static void pushOntoSorter(
   **   (2) All output columns are included in the sort record.  In that
   **       case regData==regOrigData.
   **   (3) Some output columns are omitted from the sort record due to
-  **       the SQLITE_ENABLE_SORTER_REFERENCE optimization, or due to the
+  **       the SQLITE_ENABLE_SORTER_REFERENCES optimization, or due to the
   **       SQLITE_ECEL_OMITREF optimization, or due to the
-  **       SortCtx.pDeferredRowLoad optimiation.  In any of these cases
+  **       SortCtx.pDeferredRowLoad optimization.  In any of these cases
   **       regOrigData is 0 to prevent this routine from trying to copy
   **       values that might not yet exist.
   */
   assert( nData==1 || regData==regOrigData || regOrigData==0 );
 
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+  pSort->addrPush = sqlite3VdbeCurrentAddr(v);
+#endif
+
   if( nPrefixReg ){
     assert( nPrefixReg==nExpr+bSeq );
     regBase = regData - nPrefixReg;
@@ -136353,7 +142071,7 @@ static void pushOntoSorter(
     testcase( pKI->nAllField > pKI->nKeyField+2 );
     pOp->p4.pKeyInfo = sqlite3KeyInfoFromExprList(pParse,pSort->pOrderBy,nOBSat,
                                            pKI->nAllField-pKI->nKeyField-1);
-    pOp = 0; /* Ensure pOp not used after sqltie3VdbeAddOp3() */
+    pOp = 0; /* Ensure pOp not used after sqlite3VdbeAddOp3() */
     addrJmp = sqlite3VdbeCurrentAddr(v);
     sqlite3VdbeAddOp3(v, OP_Jump, addrJmp+1, 0, addrJmp+1); VdbeCoverage(v);
     pSort->labelBkOut = sqlite3VdbeMakeLabel(pParse);
@@ -136407,6 +142125,9 @@ static void pushOntoSorter(
     sqlite3VdbeChangeP2(v, iSkip,
          pSort->labelOBLopt ? pSort->labelOBLopt : sqlite3VdbeCurrentAddr(v));
   }
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+  pSort->addrPushEnd = sqlite3VdbeCurrentAddr(v)-1;
+#endif
 }
 
 /*
@@ -136444,7 +142165,7 @@ static void codeOffset(
 **     The returned value in this case is a copy of parameter iTab.
 **
 **   WHERE_DISTINCT_ORDERED:
-**     In this case rows are being delivered sorted order. The ephermal
+**     In this case rows are being delivered sorted order. The ephemeral
 **     table is not required. Instead, the current set of values
 **     is compared against previous row. If they match, the new row
 **     is not distinct and control jumps to VM address addrRepeat. Otherwise,
@@ -136873,6 +142594,16 @@ static void selectInnerLoop(
       testcase( eDest==SRT_Fifo );
       testcase( eDest==SRT_DistFifo );
       sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r1+nPrefixReg);
+#if !defined(SQLITE_ENABLE_NULL_TRIM) && defined(SQLITE_DEBUG)
+      /* A destination of SRT_Table and a non-zero iSDParm2 parameter means
+      ** that this is an "UPDATE ... FROM" on a virtual table or view. In this
+      ** case set the p5 parameter of the OP_MakeRecord to OPFLAG_NOCHNG_MAGIC.
+      ** This does not affect operation in any way - it just allows MakeRecord
+      ** to process OPFLAG_NOCHANGE values without an assert() failing. */
+      if( eDest==SRT_Table && pDest->iSDParm2 ){
+        sqlite3VdbeChangeP5(v, OPFLAG_NOCHNG_MAGIC);
+      }
+#endif
 #ifndef SQLITE_OMIT_CTE
       if( eDest==SRT_DistFifo ){
         /* If the destination is DistFifo, then cursor (iParm+1) is open
@@ -137088,9 +142819,10 @@ SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoAlloc(sqlite3 *db, int N, int X){
 */
 SQLITE_PRIVATE void sqlite3KeyInfoUnref(KeyInfo *p){
   if( p ){
+    assert( p->db!=0 );
     assert( p->nRef>0 );
     p->nRef--;
-    if( p->nRef==0 ) sqlite3DbFreeNN(p->db, p);
+    if( p->nRef==0 ) sqlite3DbNNFreeNN(p->db, p);
   }
 }
 
@@ -137229,6 +142961,16 @@ static void generateSortTail(
   int bSeq;                       /* True if sorter record includes seq. no. */
   int nRefKey = 0;
   struct ExprList_item *aOutEx = p->pEList->a;
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+  int addrExplain;                /* Address of OP_Explain instruction */
+#endif
+
+  ExplainQueryPlan2(addrExplain, (pParse, 0,
+        "USE TEMP B-TREE FOR %sORDER BY", pSort->nOBSat>0?"RIGHT PART OF ":"")
+  );
+  sqlite3VdbeScanStatusRange(v, addrExplain,pSort->addrPush,pSort->addrPushEnd);
+  sqlite3VdbeScanStatusCounters(v, addrExplain, addrExplain, pSort->addrPush);
+
 
   assert( addrBreak<0 );
   if( pSort->labelBkOut ){
@@ -137341,6 +143083,7 @@ static void generateSortTail(
       VdbeComment((v, "%s", aOutEx[i].zEName));
     }
   }
+  sqlite3VdbeScanStatusRange(v, addrExplain, addrExplain, -1);
   switch( eDest ){
     case SRT_Table:
     case SRT_EphemTab: {
@@ -137402,6 +143145,7 @@ static void generateSortTail(
   }else{
     sqlite3VdbeAddOp2(v, OP_Next, iTab, addr); VdbeCoverage(v);
   }
+  sqlite3VdbeScanStatusRange(v, addrExplain, sqlite3VdbeCurrentAddr(v)-1, -1);
   if( pSort->regReturn ) sqlite3VdbeAddOp1(v, OP_Return, pSort->regReturn);
   sqlite3VdbeResolveLabel(v, addrBreak);
 }
@@ -137410,9 +143154,6 @@ static void generateSortTail(
 ** Return a pointer to a string containing the 'declaration type' of the
 ** expression pExpr. The string may be treated as static by the caller.
 **
-** Also try to estimate the size of the returned value and return that
-** result in *pEstWidth.
-**
 ** The declaration type is the exact datatype definition extracted from the
 ** original CREATE TABLE statement if the expression is a column. The
 ** declaration type for a ROWID field is INTEGER. Exactly when an expression
@@ -137666,17 +143407,10 @@ SQLITE_PRIVATE void sqlite3GenerateColumnNames(
   int fullName;    /* TABLE.COLUMN if no AS clause and is a direct table ref */
   int srcName;     /* COLUMN or TABLE.COLUMN if no AS clause and is direct */
 
-#ifndef SQLITE_OMIT_EXPLAIN
-  /* If this is an EXPLAIN, skip this step */
-  if( pParse->explain ){
-    return;
-  }
-#endif
-
   if( pParse->colNamesSet ) return;
   /* Column names are determined by the left-most term of a compound select */
   while( pSelect->pPrior ) pSelect = pSelect->pPrior;
-  SELECTTRACE(1,pParse,pSelect,("generating column names\n"));
+  TREETRACE(0x80,pParse,pSelect,("generating column names\n"));
   pTabList = pSelect->pSrc;
   pEList = pSelect->pEList;
   assert( v!=0 );
@@ -137776,7 +143510,7 @@ SQLITE_PRIVATE int sqlite3ColumnsFromExprList(
   *pnCol = nCol;
   *paCol = aCol;
 
-  for(i=0, pCol=aCol; i<nCol && !db->mallocFailed; i++, pCol++){
+  for(i=0, pCol=aCol; i<nCol && !pParse->nErr; i++, pCol++){
     struct ExprList_item *pX = &pEList->a[i];
     struct ExprList_item *pCollide;
     /* Get an appropriate name for the column
@@ -137826,7 +143560,10 @@ SQLITE_PRIVATE int sqlite3ColumnsFromExprList(
         if( zName[j]==':' ) nName = j;
       }
       zName = sqlite3MPrintf(db, "%.*z:%u", nName, zName, ++cnt);
-      if( cnt>3 ) sqlite3_randomness(sizeof(cnt), &cnt);
+      sqlite3ProgressCheck(pParse);
+      if( cnt>3 ){
+        sqlite3_randomness(sizeof(cnt), &cnt);
+      }
     }
     pCol->zCnName = zName;
     pCol->hName = sqlite3StrIHash(zName);
@@ -137839,71 +143576,105 @@ SQLITE_PRIVATE int sqlite3ColumnsFromExprList(
     }
   }
   sqlite3HashClear(&ht);
-  if( db->mallocFailed ){
+  if( pParse->nErr ){
     for(j=0; j<i; j++){
       sqlite3DbFree(db, aCol[j].zCnName);
     }
     sqlite3DbFree(db, aCol);
     *paCol = 0;
     *pnCol = 0;
-    return SQLITE_NOMEM_BKPT;
+    return pParse->rc;
   }
   return SQLITE_OK;
 }
 
 /*
-** Add type and collation information to a column list based on
-** a SELECT statement.
+** pTab is a transient Table object that represents a subquery of some
+** kind (maybe a parenthesized subquery in the FROM clause of a larger
+** query, or a VIEW, or a CTE).  This routine computes type information
+** for that Table object based on the Select object that implements the
+** subquery.  For the purposes of this routine, "type information" means:
 **
-** The column list presumably came from selectColumnNamesFromExprList().
-** The column list has only names, not types or collations.  This
-** routine goes through and adds the types and collations.
-**
-** This routine requires that all identifiers in the SELECT
-** statement be resolved.
+**    *   The datatype name, as it might appear in a CREATE TABLE statement
+**    *   Which collating sequence to use for the column
+**    *   The affinity of the column
 */
-SQLITE_PRIVATE void sqlite3SelectAddColumnTypeAndCollation(
-  Parse *pParse,        /* Parsing contexts */
-  Table *pTab,          /* Add column type information to this table */
-  Select *pSelect,      /* SELECT used to determine types and collations */
-  char aff              /* Default affinity for columns */
+SQLITE_PRIVATE void sqlite3SubqueryColumnTypes(
+  Parse *pParse,      /* Parsing contexts */
+  Table *pTab,        /* Add column type information to this table */
+  Select *pSelect,    /* SELECT used to determine types and collations */
+  char aff            /* Default affinity. */
 ){
   sqlite3 *db = pParse->db;
-  NameContext sNC;
   Column *pCol;
   CollSeq *pColl;
-  int i;
+  int i,j;
   Expr *p;
   struct ExprList_item *a;
+  NameContext sNC;
 
   assert( pSelect!=0 );
-  assert( (pSelect->selFlags & SF_Resolved)!=0 );
-  assert( pTab->nCol==pSelect->pEList->nExpr || db->mallocFailed );
-  if( db->mallocFailed ) return;
+  testcase( (pSelect->selFlags & SF_Resolved)==0 );
+  assert( (pSelect->selFlags & SF_Resolved)!=0 || IN_RENAME_OBJECT );
+  assert( pTab->nCol==pSelect->pEList->nExpr || pParse->nErr>0 );
+  assert( aff==SQLITE_AFF_NONE || aff==SQLITE_AFF_BLOB );
+  if( db->mallocFailed || IN_RENAME_OBJECT ) return;
+  while( pSelect->pPrior ) pSelect = pSelect->pPrior;
+  a = pSelect->pEList->a;
   memset(&sNC, 0, sizeof(sNC));
   sNC.pSrcList = pSelect->pSrc;
-  a = pSelect->pEList->a;
   for(i=0, pCol=pTab->aCol; i<pTab->nCol; i++, pCol++){
     const char *zType;
-    i64 n, m;
+    i64 n;
     pTab->tabFlags |= (pCol->colFlags & COLFLAG_NOINSERT);
     p = a[i].pExpr;
-    zType = columnType(&sNC, p, 0, 0, 0);
     /* pCol->szEst = ... // Column size est for SELECT tables never used */
     pCol->affinity = sqlite3ExprAffinity(p);
+    if( pCol->affinity<=SQLITE_AFF_NONE ){
+      pCol->affinity = aff;
+    }
+    if( pCol->affinity>=SQLITE_AFF_TEXT && pSelect->pNext ){
+      int m = 0;
+      Select *pS2;
+      for(m=0, pS2=pSelect->pNext; pS2; pS2=pS2->pNext){
+        m |= sqlite3ExprDataType(pS2->pEList->a[i].pExpr);
+      }
+      if( pCol->affinity==SQLITE_AFF_TEXT && (m&0x01)!=0 ){
+        pCol->affinity = SQLITE_AFF_BLOB;
+      }else
+      if( pCol->affinity>=SQLITE_AFF_NUMERIC && (m&0x02)!=0 ){
+        pCol->affinity = SQLITE_AFF_BLOB;
+      }
+      if( pCol->affinity>=SQLITE_AFF_NUMERIC && p->op==TK_CAST ){
+        pCol->affinity = SQLITE_AFF_FLEXNUM;
+      }
+    }
+    zType = columnType(&sNC, p, 0, 0, 0);
+    if( zType==0 || pCol->affinity!=sqlite3AffinityType(zType, 0) ){
+      if( pCol->affinity==SQLITE_AFF_NUMERIC
+       || pCol->affinity==SQLITE_AFF_FLEXNUM
+      ){
+        zType = "NUM";
+      }else{
+        zType = 0;
+        for(j=1; j<SQLITE_N_STDTYPE; j++){
+          if( sqlite3StdTypeAffinity[j]==pCol->affinity ){
+            zType = sqlite3StdType[j];
+            break;
+          }
+        }
+      }
+    }
     if( zType ){
-      m = sqlite3Strlen30(zType);
+      i64 m = sqlite3Strlen30(zType);
       n = sqlite3Strlen30(pCol->zCnName);
       pCol->zCnName = sqlite3DbReallocOrFree(db, pCol->zCnName, n+m+2);
+      pCol->colFlags &= ~(COLFLAG_HASTYPE|COLFLAG_HASCOLL);
       if( pCol->zCnName ){
         memcpy(&pCol->zCnName[n+1], zType, m+1);
         pCol->colFlags |= COLFLAG_HASTYPE;
-      }else{
-        testcase( pCol->colFlags & COLFLAG_HASTYPE );
-        pCol->colFlags &= ~(COLFLAG_HASTYPE|COLFLAG_HASCOLL);
       }
     }
-    if( pCol->affinity<=SQLITE_AFF_NONE ) pCol->affinity = aff;
     pColl = sqlite3ExprCollSeq(pParse, p);
     if( pColl ){
       assert( pTab->pIndex==0 );
@@ -137937,7 +143708,7 @@ SQLITE_PRIVATE Table *sqlite3ResultSetOfSelect(Parse *pParse, Select *pSelect, c
   pTab->zName = 0;
   pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) );
   sqlite3ColumnsFromExprList(pParse, pSelect->pEList, &pTab->nCol, &pTab->aCol);
-  sqlite3SelectAddColumnTypeAndCollation(pParse, pTab, pSelect, aff);
+  sqlite3SubqueryColumnTypes(pParse, pTab, pSelect, aff);
   pTab->iPKey = -1;
   if( db->mallocFailed ){
     sqlite3DeleteTable(db, pTab);
@@ -138152,7 +143923,7 @@ static void generateWithRecursiveQuery(
   int iQueue;                   /* The Queue table */
   int iDistinct = 0;            /* To ensure unique results if UNION */
   int eDest = SRT_Fifo;         /* How to write to Queue */
-  SelectDest destQueue;         /* SelectDest targetting the Queue table */
+  SelectDest destQueue;         /* SelectDest targeting the Queue table */
   int i;                        /* Loop counter */
   int rc;                       /* Result code */
   ExprList *pOrderBy;           /* The ORDER BY clause */
@@ -138462,7 +144233,7 @@ static int multiSelect(
         pPrior->iLimit = p->iLimit;
         pPrior->iOffset = p->iOffset;
         pPrior->pLimit = p->pLimit;
-        SELECTTRACE(1, pParse, p, ("multiSelect UNION ALL left...\n"));
+        TREETRACE(0x200, pParse, p, ("multiSelect UNION ALL left...\n"));
         rc = sqlite3Select(pParse, pPrior, &dest);
         pPrior->pLimit = 0;
         if( rc ){
@@ -138480,7 +144251,7 @@ static int multiSelect(
           }
         }
         ExplainQueryPlan((pParse, 1, "UNION ALL"));
-        SELECTTRACE(1, pParse, p, ("multiSelect UNION ALL right...\n"));
+        TREETRACE(0x200, pParse, p, ("multiSelect UNION ALL right...\n"));
         rc = sqlite3Select(pParse, p, &dest);
         testcase( rc!=SQLITE_OK );
         pDelete = p->pPrior;
@@ -138533,7 +144304,7 @@ static int multiSelect(
         */
         assert( !pPrior->pOrderBy );
         sqlite3SelectDestInit(&uniondest, priorOp, unionTab);
-        SELECTTRACE(1, pParse, p, ("multiSelect EXCEPT/UNION left...\n"));
+        TREETRACE(0x200, pParse, p, ("multiSelect EXCEPT/UNION left...\n"));
         rc = sqlite3Select(pParse, pPrior, &uniondest);
         if( rc ){
           goto multi_select_end;
@@ -138553,7 +144324,7 @@ static int multiSelect(
         uniondest.eDest = op;
         ExplainQueryPlan((pParse, 1, "%s USING TEMP B-TREE",
                           sqlite3SelectOpName(p->op)));
-        SELECTTRACE(1, pParse, p, ("multiSelect EXCEPT/UNION right...\n"));
+        TREETRACE(0x200, pParse, p, ("multiSelect EXCEPT/UNION right...\n"));
         rc = sqlite3Select(pParse, p, &uniondest);
         testcase( rc!=SQLITE_OK );
         assert( p->pOrderBy==0 );
@@ -138614,7 +144385,7 @@ static int multiSelect(
         /* Code the SELECTs to our left into temporary table "tab1".
         */
         sqlite3SelectDestInit(&intersectdest, SRT_Union, tab1);
-        SELECTTRACE(1, pParse, p, ("multiSelect INTERSECT left...\n"));
+        TREETRACE(0x400, pParse, p, ("multiSelect INTERSECT left...\n"));
         rc = sqlite3Select(pParse, pPrior, &intersectdest);
         if( rc ){
           goto multi_select_end;
@@ -138631,7 +144402,7 @@ static int multiSelect(
         intersectdest.iSDParm = tab2;
         ExplainQueryPlan((pParse, 1, "%s USING TEMP B-TREE",
                           sqlite3SelectOpName(p->op)));
-        SELECTTRACE(1, pParse, p, ("multiSelect INTERSECT right...\n"));
+        TREETRACE(0x400, pParse, p, ("multiSelect INTERSECT right...\n"));
         rc = sqlite3Select(pParse, p, &intersectdest);
         testcase( rc!=SQLITE_OK );
         pDelete = p->pPrior;
@@ -138752,7 +144523,7 @@ SQLITE_PRIVATE void sqlite3SelectWrongNumTermsError(Parse *pParse, Select *p){
 
 /*
 ** Code an output subroutine for a coroutine implementation of a
-** SELECT statment.
+** SELECT statement.
 **
 ** The data to be output is contained in pIn->iSdst.  There are
 ** pIn->nSdst columns to be output.  pDest is where the output should
@@ -138974,7 +144745,7 @@ static int generateOutputSubroutine(
 **
 ** We call AltB, AeqB, AgtB, EofA, and EofB "subroutines" but they are not
 ** actually called using Gosub and they do not Return.  EofA and EofB loop
-** until all data is exhausted then jump to the "end" labe.  AltB, AeqB,
+** until all data is exhausted then jump to the "end" label.  AltB, AeqB,
 ** and AgtB jump to either L2 or to one of EofA or EofB.
 */
 #ifndef SQLITE_OMIT_COMPOUND_SELECT
@@ -139011,7 +144782,7 @@ static int multiSelectOrderBy(
   int savedOffset;      /* Saved value of p->iOffset */
   int labelCmpr;        /* Label for the start of the merge algorithm */
   int labelEnd;         /* Label for the end of the overall SELECT stmt */
-  int addr1;            /* Jump instructions that get retargetted */
+  int addr1;            /* Jump instructions that get retargeted */
   int op;               /* One of TK_ALL, TK_UNION, TK_EXCEPT, TK_INTERSECT */
   KeyInfo *pKeyDup = 0; /* Comparison information for duplicate removal */
   KeyInfo *pKeyMerge;   /* Comparison information for merging rows */
@@ -139278,8 +145049,8 @@ static int multiSelectOrderBy(
   */
   sqlite3VdbeResolveLabel(v, labelEnd);
 
-  /* Reassemble the compound query so that it will be freed correctly
-  ** by the calling function */
+  /* Make arrangements to free the 2nd and subsequent arms of the compound
+  ** after the parse has finished */
   if( pSplit->pPrior ){
     sqlite3ParserAddCleanup(pParse,
        (void(*)(sqlite3*,void*))sqlite3SelectDelete, pSplit->pPrior);
@@ -139312,7 +145083,7 @@ static int multiSelectOrderBy(
 ** the left operands of a RIGHT JOIN.  In either case, we need to potentially
 ** bypass the substituted expression with OP_IfNullRow.
 **
-** Suppose the original expression integer constant.  Even though the table
+** Suppose the original expression is an integer constant. Even though the table
 ** has the nullRow flag set, because the expression is an integer constant,
 ** it will not be NULLed out.  So instead, we insert an OP_IfNullRow opcode
 ** that checks to see if the nullRow flag is set on the table.  If the nullRow
@@ -139338,6 +145109,7 @@ typedef struct SubstContext {
   int iNewTable;            /* New table number */
   int isOuterJoin;          /* Add TK_IF_NULL_ROW opcodes on each replacement */
   ExprList *pEList;         /* Replacement expressions */
+  ExprList *pCList;         /* Collation sequences for replacement expr */
 } SubstContext;
 
 /* Forward Declarations */
@@ -139379,19 +145151,26 @@ static Expr *substExpr(
 #endif
     {
       Expr *pNew;
-      Expr *pCopy = pSubst->pEList->a[pExpr->iColumn].pExpr;
+      int iColumn;
+      Expr *pCopy;
       Expr ifNullRow;
-      assert( pSubst->pEList!=0 && pExpr->iColumn<pSubst->pEList->nExpr );
+      iColumn = pExpr->iColumn;
+      assert( iColumn>=0 );
+      assert( pSubst->pEList!=0 && iColumn<pSubst->pEList->nExpr );
       assert( pExpr->pRight==0 );
+      pCopy = pSubst->pEList->a[iColumn].pExpr;
       if( sqlite3ExprIsVector(pCopy) ){
         sqlite3VectorErrorMsg(pSubst->pParse, pCopy);
       }else{
         sqlite3 *db = pSubst->pParse->db;
-        if( pSubst->isOuterJoin && pCopy->op!=TK_COLUMN ){
+        if( pSubst->isOuterJoin
+         && (pCopy->op!=TK_COLUMN || pCopy->iTable!=pSubst->iNewTable)
+        ){
           memset(&ifNullRow, 0, sizeof(ifNullRow));
           ifNullRow.op = TK_IF_NULL_ROW;
           ifNullRow.pLeft = pCopy;
           ifNullRow.iTable = pSubst->iNewTable;
+          ifNullRow.iColumn = -99;
           ifNullRow.flags = EP_IfNullRow;
           pCopy = &ifNullRow;
         }
@@ -139418,11 +145197,16 @@ static Expr *substExpr(
 
         /* Ensure that the expression now has an implicit collation sequence,
         ** just as it did when it was a column of a view or sub-query. */
-        if( pExpr->op!=TK_COLUMN && pExpr->op!=TK_COLLATE ){
-          CollSeq *pColl = sqlite3ExprCollSeq(pSubst->pParse, pExpr);
-          pExpr = sqlite3ExprAddCollateString(pSubst->pParse, pExpr,
-              (pColl ? pColl->zName : "BINARY")
+        {
+          CollSeq *pNat = sqlite3ExprCollSeq(pSubst->pParse, pExpr);
+          CollSeq *pColl = sqlite3ExprCollSeq(pSubst->pParse,
+                pSubst->pCList->a[iColumn].pExpr
           );
+          if( pNat!=pColl || (pExpr->op!=TK_COLUMN && pExpr->op!=TK_COLLATE) ){
+            pExpr = sqlite3ExprAddCollateString(pSubst->pParse, pExpr,
+                (pColl ? pColl->zName : "BINARY")
+            );
+          }
         }
         ExprClearProperty(pExpr, EP_Collate);
       }
@@ -139615,6 +145399,46 @@ static void renumberCursors(
 }
 #endif /* !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) */
 
+/*
+** If pSel is not part of a compound SELECT, return a pointer to its
+** expression list. Otherwise, return a pointer to the expression list
+** of the leftmost SELECT in the compound.
+*/
+static ExprList *findLeftmostExprlist(Select *pSel){
+  while( pSel->pPrior ){
+    pSel = pSel->pPrior;
+  }
+  return pSel->pEList;
+}
+
+/*
+** Return true if any of the result-set columns in the compound query
+** have incompatible affinities on one or more arms of the compound.
+*/
+static int compoundHasDifferentAffinities(Select *p){
+  int ii;
+  ExprList *pList;
+  assert( p!=0 );
+  assert( p->pEList!=0 );
+  assert( p->pPrior!=0 );
+  pList = p->pEList;
+  for(ii=0; ii<pList->nExpr; ii++){
+    char aff;
+    Select *pSub1;
+    assert( pList->a[ii].pExpr!=0 );
+    aff = sqlite3ExprAffinity(pList->a[ii].pExpr);
+    for(pSub1=p->pPrior; pSub1; pSub1=pSub1->pPrior){
+      assert( pSub1->pEList!=0 );
+      assert( pSub1->pEList->nExpr>ii );
+      assert( pSub1->pEList->a[ii].pExpr!=0 );
+      if( sqlite3ExprAffinity(pSub1->pEList->a[ii].pExpr)!=aff ){
+        return 1;
+      }
+    }
+  }
+  return 0;
+}
+
 #if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW)
 /*
 ** This routine attempts to flatten subqueries as a performance optimization.
@@ -139659,7 +145483,8 @@ static void renumberCursors(
 **        (3a) the subquery may not be a join and
 **        (3b) the FROM clause of the subquery may not contain a virtual
 **             table and
-**        (3c) the outer query may not be an aggregate.
+**        (**) Was: "The outer query may not have a GROUP BY." This case
+**             is now managed correctly
 **        (3d) the outer query may not be DISTINCT.
 **        See also (26) for restrictions on RIGHT JOIN.
 **
@@ -139682,7 +145507,7 @@ static void renumberCursors(
 **   (9)  If the subquery uses LIMIT then the outer query may not be aggregate.
 **
 **  (**)  Restriction (10) was removed from the code on 2005-02-05 but we
-**        accidently carried the comment forward until 2014-09-15.  Original
+**        accidentally carried the comment forward until 2014-09-15.  Original
 **        constraint: "If the subquery is aggregate then the outer query
 **        may not use LIMIT."
 **
@@ -139716,6 +145541,8 @@ static void renumberCursors(
 **        (17g) either the subquery is the first element of the outer
 **              query or there are no RIGHT or FULL JOINs in any arm
 **              of the subquery.  (This is a duplicate of condition (27b).)
+**        (17h) The corresponding result set expressions in all arms of the
+**              compound must have the same affinity.
 **
 **        The parent and sub-query may contain WHERE clauses. Subject to
 **        rules (11), (13) and (14), they may also contain ORDER BY,
@@ -139767,18 +145594,13 @@ static void renumberCursors(
 **        See also (3) for restrictions on LEFT JOIN.
 **
 **  (27)  The subquery may not contain a FULL or RIGHT JOIN unless it
-**        is the first element of the parent query.  This must be the
-**        the case if:
-**        (27a) the subquery is not compound query, and
+**        is the first element of the parent query.  Two subcases:
+**        (27a) the subquery is not a compound query.
 **        (27b) the subquery is a compound query and the RIGHT JOIN occurs
 **              in any arm of the compound query.  (See also (17g).)
 **
-**  (28)  The subquery is not a MATERIALIZED CTE.
-**
-**  (29)  Either the subquery is not the right-hand operand of a join with an
-**        ON or USING clause nor the right-hand operand of a NATURAL JOIN, or
-**        the right-most table within the FROM clause of the subquery
-**        is not part of an outer join.
+**  (28)  The subquery is not a MATERIALIZED CTE.  (This is handled
+**        in the caller before ever reaching this routine.)
 **
 **
 ** In this routine, the "p" parameter is a pointer to the outer query.
@@ -139871,16 +145693,10 @@ static int flattenSubquery(
   **
   ** which is not at all the same thing.
   **
-  ** If the subquery is the right operand of a LEFT JOIN, then the outer
-  ** query cannot be an aggregate. (3c)  This is an artifact of the way
-  ** aggregates are processed - there is no mechanism to determine if
-  ** the LEFT JOIN table should be all-NULL.
-  **
   ** See also tickets #306, #350, and #3300.
   */
   if( (pSubitem->fg.jointype & (JT_OUTER|JT_LTORJ))!=0 ){
     if( pSubSrc->nSrc>1                        /* (3a) */
-     || isAgg                                  /* (3c) */
      || IsVirtual(pSubSrc->a[0].pTab)          /* (3b) */
      || (p->selFlags & SF_Distinct)!=0         /* (3d) */
      || (pSubitem->fg.jointype & JT_RIGHT)!=0  /* (26) */
@@ -139889,52 +145705,14 @@ static int flattenSubquery(
     }
     isOuterJoin = 1;
   }
-#ifdef SQLITE_EXTRA_IFNULLROW
-  else if( iFrom>0 && !isAgg ){
-    /* Setting isOuterJoin to -1 causes OP_IfNullRow opcodes to be generated for
-    ** every reference to any result column from subquery in a join, even
-    ** though they are not necessary.  This will stress-test the OP_IfNullRow
-    ** opcode. */
-    isOuterJoin = -1;
-  }
-#endif
 
   assert( pSubSrc->nSrc>0 );  /* True by restriction (7) */
   if( iFrom>0 && (pSubSrc->a[0].fg.jointype & JT_LTORJ)!=0 ){
     return 0;   /* Restriction (27a) */
   }
-  if( pSubitem->fg.isCte && pSubitem->u2.pCteUse->eM10d==M10d_Yes ){
-    return 0;       /* (28) */
-  }
 
-  /* Restriction (29):
-  **
-  ** We do not want two constraints on the same term of the flattened
-  ** query where one constraint has EP_InnerON and the other is EP_OuterON.
-  ** To prevent this, one or the other of the following conditions must be
-  ** false:
-  **
-  **   (29a)  The right-most entry in the FROM clause of the subquery
-  **          must not be part of an outer join.
-  **
-  **   (29b)  The subquery itself must not be the right operand of a
-  **          NATURAL join or a join that as an ON or USING clause.
-  **
-  ** These conditions are sufficient to keep an EP_OuterON from being
-  ** flattened into an EP_InnerON.  Restrictions (3a) and (27a) prevent
-  ** an EP_InnerON from being flattened into an EP_OuterON.
-  */
-  if( pSubSrc->nSrc>=2
-   && (pSubSrc->a[pSubSrc->nSrc-1].fg.jointype & JT_OUTER)!=0
-  ){
-    if( (pSubitem->fg.jointype & JT_NATURAL)!=0
-     || pSubitem->fg.isUsing
-     || NEVER(pSubitem->u3.pOn!=0) /* ON clause already shifted into WHERE */
-     || pSubitem->fg.isOn
-    ){
-      return 0;
-    }
-  }
+  /* Condition (28) is blocked by the caller */
+  assert( !pSubitem->fg.isCte || pSubitem->u2.pCteUse->eM10d!=M10d_Yes );
 
   /* Restriction (17): If the sub-query is a compound SELECT, then it must
   ** use only the UNION ALL operator. And none of the simple select queries
@@ -139942,6 +145720,7 @@ static int flattenSubquery(
   ** queries.
   */
   if( pSub->pPrior ){
+    int ii;
     if( pSub->pOrderBy ){
       return 0;  /* Restriction (20) */
     }
@@ -139974,7 +145753,6 @@ static int flattenSubquery(
 
     /* Restriction (18). */
     if( p->pOrderBy ){
-      int ii;
       for(ii=0; ii<p->pOrderBy->nExpr; ii++){
         if( p->pOrderBy->a[ii].u.x.iOrderByCol==0 ) return 0;
       }
@@ -139983,6 +145761,9 @@ static int flattenSubquery(
     /* Restriction (23) */
     if( (p->selFlags & SF_Recursive) ) return 0;
 
+    /* Restriction (17h) */
+    if( compoundHasDifferentAffinities(pSub) ) return 0;
+
     if( pSrc->nSrc>1 ){
       if( pParse->nSelect>500 ) return 0;
       if( OptimizationDisabled(db, SQLITE_FlttnUnionAll) ) return 0;
@@ -139992,7 +145773,7 @@ static int flattenSubquery(
   }
 
   /***** If we reach this point, flattening is permitted. *****/
-  SELECTTRACE(1,pParse,p,("flatten %u.%p from term %d\n",
+  TREETRACE(0x4,pParse,p,("flatten %u.%p from term %d\n",
                    pSub->selId, pSub, iFrom));
 
   /* Authorize the subquery */
@@ -140001,7 +145782,7 @@ static int flattenSubquery(
   testcase( i==SQLITE_DENY );
   pParse->zAuthContext = zSavedAuthContext;
 
-  /* Delete the transient structures associated with thesubquery */
+  /* Delete the transient structures associated with the subquery */
   pSub1 = pSubitem->pSelect;
   sqlite3DbFree(db, pSubitem->zDatabase);
   sqlite3DbFree(db, pSubitem->zName);
@@ -140071,7 +145852,7 @@ static int flattenSubquery(
       if( pPrior ) pPrior->pNext = pNew;
       pNew->pNext = p;
       p->pPrior = pNew;
-      SELECTTRACE(2,pParse,p,("compound-subquery flattener"
+      TREETRACE(0x4,pParse,p,("compound-subquery flattener"
                               " creates %u as peer\n",pNew->selId));
     }
     assert( pSubitem->pSelect==0 );
@@ -140183,7 +145964,7 @@ static int flattenSubquery(
       ** ORDER BY column expression is identical to the iOrderByCol'th
       ** expression returned by SELECT statement pSub. Since these values
       ** do not necessarily correspond to columns in SELECT statement pParent,
-      ** zero them before transfering the ORDER BY clause.
+      ** zero them before transferring the ORDER BY clause.
       **
       ** Not doing this may cause an error if a subsequent call to this
       ** function attempts to flatten a compound sub-query into pParent
@@ -140216,6 +145997,7 @@ static int flattenSubquery(
       x.iNewTable = iNewParent;
       x.isOuterJoin = isOuterJoin;
       x.pEList = pSub->pEList;
+      x.pCList = findLeftmostExprlist(pSub);
       substSelect(&x, pParent, 0);
     }
 
@@ -140235,23 +146017,22 @@ static int flattenSubquery(
       pSub->pLimit = 0;
     }
 
-    /* Recompute the SrcList_item.colUsed masks for the flattened
+    /* Recompute the SrcItem.colUsed masks for the flattened
     ** tables. */
     for(i=0; i<nSubSrc; i++){
       recomputeColumnsUsed(pParent, &pSrc->a[i+iFrom]);
     }
   }
 
-  /* Finially, delete what is left of the subquery and return
-  ** success.
+  /* Finally, delete what is left of the subquery and return success.
   */
   sqlite3AggInfoPersistWalkerInit(&w, pParse);
   sqlite3WalkSelect(&w,pSub1);
   sqlite3SelectDelete(db, pSub1);
 
 #if TREETRACE_ENABLED
-  if( sqlite3TreeTrace & 0x100 ){
-    SELECTTRACE(0x100,pParse,p,("After flattening:\n"));
+  if( sqlite3TreeTrace & 0x4 ){
+    TREETRACE(0x4,pParse,p,("After flattening:\n"));
     sqlite3TreeViewSelect(0, p, 0);
   }
 #endif
@@ -140278,7 +146059,7 @@ struct WhereConst {
 
 /*
 ** Add a new entry to the pConst object.  Except, do not add duplicate
-** pColumn entires.  Also, do not add if doing so would not be appropriate.
+** pColumn entries.  Also, do not add if doing so would not be appropriate.
 **
 ** The caller guarantees the pColumn is a column and pValue is a constant.
 ** This routine has to do some additional checks before completing the
@@ -140464,7 +146245,7 @@ static int propagateConstantExprRewrite(Walker *pWalker, Expr *pExpr){
 **    SELECT * FROM t1 WHERE a=123 AND b=123;
 **
 ** The two SELECT statements above should return different answers.  b=a
-** is alway true because the comparison uses numeric affinity, but b=123
+** is always true because the comparison uses numeric affinity, but b=123
 ** is false because it uses text affinity and '0123' is not the same as '123'.
 ** To work around this, the expression tree is not actually changed from
 ** "b=a" to "b=123" but rather the "a" in "b=a" is tagged with EP_FixedCol
@@ -140548,7 +146329,7 @@ static int propagateConstants(
 ** At the time this function is called it is guaranteed that
 **
 **   * the sub-query uses only one distinct window frame, and
-**   * that the window frame has a PARTITION BY clase.
+**   * that the window frame has a PARTITION BY clause.
 */
 static int pushDownWindowCheck(Parse *pParse, Select *pSubq, Expr *pExpr){
   assert( pSubq->pWin->pPartition );
@@ -140625,6 +146406,29 @@ static int pushDownWindowCheck(Parse *pParse, Select *pSubq, Expr *pExpr){
 **       be materialized.  (This restriction is implemented in the calling
 **       routine.)
 **
+**   (8) If the subquery is a compound that uses UNION, INTERSECT,
+**       or EXCEPT, then all of the result set columns for all arms of
+**       the compound must use the BINARY collating sequence.
+**
+**   (9) All three of the following are true:
+**
+**       (9a) The WHERE clause expression originates in the ON or USING clause
+**            of a join (either an INNER or an OUTER join), and
+**
+**       (9b) The subquery is to the right of the ON/USING clause
+**
+**       (9c) There is a RIGHT JOIN (or FULL JOIN) in between the ON/USING
+**            clause and the subquery.
+**
+**       Without this restriction, the push-down optimization might move
+**       the ON/USING filter expression from the left side of a RIGHT JOIN
+**       over to the right side, which leads to incorrect answers.  See
+**       also restriction (6) in sqlite3ExprIsSingleTableConstraint().
+**
+**  (10) The inner query is not the right-hand table of a RIGHT JOIN.
+**
+**  (11) The subquery is not a VALUES clause
+**
 ** Return 0 if no changes are made and non-zero if one or more WHERE clause
 ** terms are duplicated into the subquery.
 */
@@ -140632,24 +146436,56 @@ static int pushDownWhereTerms(
   Parse *pParse,        /* Parse context (for malloc() and error reporting) */
   Select *pSubq,        /* The subquery whose WHERE clause is to be augmented */
   Expr *pWhere,         /* The WHERE clause of the outer query */
-  SrcItem *pSrc         /* The subquery term of the outer FROM clause */
+  SrcList *pSrcList,    /* The complete from clause of the outer query */
+  int iSrc              /* Which FROM clause term to try to push into  */
 ){
   Expr *pNew;
+  SrcItem *pSrc;        /* The subquery FROM term into which WHERE is pushed */
   int nChng = 0;
+  pSrc = &pSrcList->a[iSrc];
   if( pWhere==0 ) return 0;
-  if( pSubq->selFlags & (SF_Recursive|SF_MultiPart) ) return 0;
-  if( pSrc->fg.jointype & (JT_LTORJ|JT_RIGHT) ) return 0;
+  if( pSubq->selFlags & (SF_Recursive|SF_MultiPart) ){
+    return 0;           /* restrictions (2) and (11) */
+  }
+  if( pSrc->fg.jointype & (JT_LTORJ|JT_RIGHT) ){
+    return 0;           /* restrictions (10) */
+  }
 
-#ifndef SQLITE_OMIT_WINDOWFUNC
   if( pSubq->pPrior ){
     Select *pSel;
+    int notUnionAll = 0;
     for(pSel=pSubq; pSel; pSel=pSel->pPrior){
+      u8 op = pSel->op;
+      assert( op==TK_ALL || op==TK_SELECT
+           || op==TK_UNION || op==TK_INTERSECT || op==TK_EXCEPT );
+      if( op!=TK_ALL && op!=TK_SELECT ){
+        notUnionAll = 1;
+      }
+#ifndef SQLITE_OMIT_WINDOWFUNC
       if( pSel->pWin ) return 0;    /* restriction (6b) */
+#endif
+    }
+    if( notUnionAll ){
+      /* If any of the compound arms are connected using UNION, INTERSECT,
+      ** or EXCEPT, then we must ensure that none of the columns use a
+      ** non-BINARY collating sequence. */
+      for(pSel=pSubq; pSel; pSel=pSel->pPrior){
+        int ii;
+        const ExprList *pList = pSel->pEList;
+        assert( pList!=0 );
+        for(ii=0; ii<pList->nExpr; ii++){
+          CollSeq *pColl = sqlite3ExprCollSeq(pParse, pList->a[ii].pExpr);
+          if( !sqlite3IsBinary(pColl) ){
+            return 0;  /* Restriction (8) */
+          }
+        }
+      }
     }
   }else{
+#ifndef SQLITE_OMIT_WINDOWFUNC
     if( pSubq->pWin && pSubq->pWin->pPartition==0 ) return 0;
-  }
 #endif
+  }
 
 #ifdef SQLITE_DEBUG
   /* Only the first term of a compound can have a WITH clause.  But make
@@ -140668,11 +146504,28 @@ static int pushDownWhereTerms(
     return 0; /* restriction (3) */
   }
   while( pWhere->op==TK_AND ){
-    nChng += pushDownWhereTerms(pParse, pSubq, pWhere->pRight, pSrc);
+    nChng += pushDownWhereTerms(pParse, pSubq, pWhere->pRight, pSrcList, iSrc);
     pWhere = pWhere->pLeft;
   }
 
-#if 0  /* Legacy code. Checks now done by sqlite3ExprIsTableConstraint() */
+#if 0 /* These checks now done by sqlite3ExprIsSingleTableConstraint() */
+  if( ExprHasProperty(pWhere, EP_OuterON|EP_InnerON) /* (9a) */
+   && (pSrcList->a[0].fg.jointype & JT_LTORJ)!=0     /* Fast pre-test of (9c) */
+  ){
+    int jj;
+    for(jj=0; jj<iSrc; jj++){
+      if( pWhere->w.iJoin==pSrcList->a[jj].iCursor ){
+        /* If we reach this point, both (9a) and (9b) are satisfied.
+        ** The following loop checks (9c):
+        */
+        for(jj++; jj<iSrc; jj++){
+          if( (pSrcList->a[jj].fg.jointype & JT_RIGHT)!=0 ){
+            return 0;  /* restriction (9) */
+          }
+        }
+      }
+    }
+  }
   if( isLeftJoin
    && (ExprHasProperty(pWhere,EP_OuterON)==0
          || pWhere->w.iJoin!=iCursor)
@@ -140686,7 +146539,7 @@ static int pushDownWhereTerms(
   }
 #endif
 
-  if( sqlite3ExprIsTableConstraint(pWhere, pSrc) ){
+  if( sqlite3ExprIsSingleTableConstraint(pWhere, pSrcList, iSrc) ){
     nChng++;
     pSubq->selFlags |= SF_PushDown;
     while( pSubq ){
@@ -140698,6 +146551,7 @@ static int pushDownWhereTerms(
       x.iNewTable = pSrc->iCursor;
       x.isOuterJoin = 0;
       x.pEList = pSubq->pEList;
+      x.pCList = findLeftmostExprlist(pSubq);
       pNew = substExpr(&x, pNew);
 #ifndef SQLITE_OMIT_WINDOWFUNC
       if( pSubq->pWin && 0==pushDownWindowCheck(pParse, pSubq, pNew) ){
@@ -140719,6 +146573,78 @@ static int pushDownWhereTerms(
 }
 #endif /* !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) */
 
+/*
+** Check to see if a subquery contains result-set columns that are
+** never used.  If it does, change the value of those result-set columns
+** to NULL so that they do not cause unnecessary work to compute.
+**
+** Return the number of column that were changed to NULL.
+*/
+static int disableUnusedSubqueryResultColumns(SrcItem *pItem){
+  int nCol;
+  Select *pSub;      /* The subquery to be simplified */
+  Select *pX;        /* For looping over compound elements of pSub */
+  Table *pTab;       /* The table that describes the subquery */
+  int j;             /* Column number */
+  int nChng = 0;     /* Number of columns converted to NULL */
+  Bitmask colUsed;   /* Columns that may not be NULLed out */
+
+  assert( pItem!=0 );
+  if( pItem->fg.isCorrelated || pItem->fg.isCte ){
+    return 0;
+  }
+  assert( pItem->pTab!=0 );
+  pTab = pItem->pTab;
+  assert( pItem->pSelect!=0 );
+  pSub = pItem->pSelect;
+  assert( pSub->pEList->nExpr==pTab->nCol );
+  for(pX=pSub; pX; pX=pX->pPrior){
+    if( (pX->selFlags & (SF_Distinct|SF_Aggregate))!=0 ){
+      testcase( pX->selFlags & SF_Distinct );
+      testcase( pX->selFlags & SF_Aggregate );
+      return 0;
+    }
+    if( pX->pPrior && pX->op!=TK_ALL ){
+      /* This optimization does not work for compound subqueries that
+      ** use UNION, INTERSECT, or EXCEPT.  Only UNION ALL is allowed. */
+      return 0;
+    }
+#ifndef SQLITE_OMIT_WINDOWFUNC
+    if( pX->pWin ){
+      /* This optimization does not work for subqueries that use window
+      ** functions. */
+      return 0;
+    }
+#endif
+  }
+  colUsed = pItem->colUsed;
+  if( pSub->pOrderBy ){
+    ExprList *pList = pSub->pOrderBy;
+    for(j=0; j<pList->nExpr; j++){
+      u16 iCol = pList->a[j].u.x.iOrderByCol;
+      if( iCol>0 ){
+        iCol--;
+        colUsed |= ((Bitmask)1)<<(iCol>=BMS ? BMS-1 : iCol);
+      }
+    }
+  }
+  nCol = pTab->nCol;
+  for(j=0; j<nCol; j++){
+    Bitmask m = j<BMS-1 ? MASKBIT(j) : TOPBIT;
+    if( (m & colUsed)!=0 ) continue;
+    for(pX=pSub; pX; pX=pX->pPrior) {
+      Expr *pY = pX->pEList->a[j].pExpr;
+      if( pY->op==TK_NULL ) continue;
+      pY->op = TK_NULL;
+      ExprClearProperty(pY, EP_Skip|EP_Unlikely);
+      pX->selFlags |= SF_PushDown;
+      nChng++;
+    }
+  }
+  return nChng;
+}
+
+
 /*
 ** The pFunc is the only aggregate function in the query.  Check to see
 ** if the query is a candidate for the min/max optimization.
@@ -141110,9 +147036,6 @@ static int resolveFromTermToCte(
     pFrom->fg.isCte = 1;
     pFrom->u2.pCteUse = pCteUse;
     pCteUse->nUse++;
-    if( pCteUse->nUse>=2 && pCteUse->eM10d==M10d_Any ){
-      pCteUse->eM10d = M10d_Yes;
-    }
 
     /* Check if this is a recursive CTE. */
     pRecTerm = pSel = pFrom->pSelect;
@@ -141222,9 +147145,9 @@ SQLITE_PRIVATE void sqlite3SelectPopWith(Walker *pWalker, Select *p){
 #endif
 
 /*
-** The SrcList_item structure passed as the second argument represents a
+** The SrcItem structure passed as the second argument represents a
 ** sub-query in the FROM clause of a SELECT statement. This function
-** allocates and populates the SrcList_item.pTab object. If successful,
+** allocates and populates the SrcItem.pTab object. If successful,
 ** SQLITE_OK is returned. Otherwise, if an OOM error is encountered,
 ** SQLITE_NOMEM.
 */
@@ -141486,12 +147409,20 @@ static int selectExpander(Walker *pWalker, Select *p){
         ** expanded. */
         int tableSeen = 0;      /* Set to 1 when TABLE matches */
         char *zTName = 0;       /* text of name of TABLE */
+        int iErrOfst;
         if( pE->op==TK_DOT ){
+          assert( (selFlags & SF_NestedFrom)==0 );
           assert( pE->pLeft!=0 );
           assert( !ExprHasProperty(pE->pLeft, EP_IntValue) );
           zTName = pE->pLeft->u.zToken;
+          assert( ExprUseWOfst(pE->pLeft) );
+          iErrOfst = pE->pRight->w.iOfst;
+        }else{
+          assert( ExprUseWOfst(pE) );
+          iErrOfst = pE->w.iOfst;
         }
         for(i=0, pFrom=pTabList->a; i<pTabList->nSrc; i++, pFrom++){
+          int nAdd;                    /* Number of cols including rowid */
           Table *pTab = pFrom->pTab;   /* Table for this data source */
           ExprList *pNestedFrom;       /* Result-set of a nested FROM clause */
           char *zTabName;              /* AS name for this data source */
@@ -141509,6 +147440,7 @@ static int selectExpander(Walker *pWalker, Select *p){
             pNestedFrom = pFrom->pSelect->pEList;
             assert( pNestedFrom!=0 );
             assert( pNestedFrom->nExpr==pTab->nCol );
+            assert( VisibleRowid(pTab)==0 );
           }else{
             if( zTName && sqlite3StrICmp(zTName, zTabName)!=0 ){
               continue;
@@ -141526,6 +147458,7 @@ static int selectExpander(Walker *pWalker, Select *p){
             for(ii=0; ii<pUsing->nId; ii++){
               const char *zUName = pUsing->a[ii].zName;
               pRight = sqlite3Expr(db, TK_ID, zUName);
+              sqlite3ExprSetErrorOffset(pRight, iErrOfst);
               pNew = sqlite3ExprListAppend(pParse, pNew, pRight);
               if( pNew ){
                 struct ExprList_item *pX = &pNew->a[pNew->nExpr-1];
@@ -141538,33 +147471,48 @@ static int selectExpander(Walker *pWalker, Select *p){
           }else{
             pUsing = 0;
           }
-          for(j=0; j<pTab->nCol; j++){
-            char *zName = pTab->aCol[j].zCnName;
+
+          nAdd = pTab->nCol + (VisibleRowid(pTab) && (selFlags&SF_NestedFrom));
+          for(j=0; j<nAdd; j++){
+            const char *zName;
             struct ExprList_item *pX; /* Newly added ExprList term */
 
-            assert( zName );
-            if( zTName
-             && pNestedFrom
-             && sqlite3MatchEName(&pNestedFrom->a[j], 0, zTName, 0)==0
-            ){
-              continue;
-            }
+            if( j==pTab->nCol ){
+              zName = sqlite3RowidAlias(pTab);
+              if( zName==0 ) continue;
+            }else{
+              zName = pTab->aCol[j].zCnName;
 
-            /* If a column is marked as 'hidden', omit it from the expanded
-            ** result-set list unless the SELECT has the SF_IncludeHidden
-            ** bit set.
-            */
-            if( (p->selFlags & SF_IncludeHidden)==0
-             && IsHiddenColumn(&pTab->aCol[j])
-            ){
-              continue;
-            }
-            if( (pTab->aCol[j].colFlags & COLFLAG_NOEXPAND)!=0
-             && zTName==0
-             && (selFlags & (SF_NestedFrom))==0
-            ){
-              continue;
+              /* If pTab is actually an SF_NestedFrom sub-select, do not
+              ** expand any ENAME_ROWID columns.  */
+              if( pNestedFrom && pNestedFrom->a[j].fg.eEName==ENAME_ROWID ){
+                continue;
+              }
+
+              if( zTName
+               && pNestedFrom
+               && sqlite3MatchEName(&pNestedFrom->a[j], 0, zTName, 0, 0)==0
+              ){
+                continue;
+              }
+
+              /* If a column is marked as 'hidden', omit it from the expanded
+              ** result-set list unless the SELECT has the SF_IncludeHidden
+              ** bit set.
+              */
+              if( (p->selFlags & SF_IncludeHidden)==0
+                && IsHiddenColumn(&pTab->aCol[j])
+              ){
+                continue;
+              }
+              if( (pTab->aCol[j].colFlags & COLFLAG_NOEXPAND)!=0
+               && zTName==0
+               && (selFlags & (SF_NestedFrom))==0
+              ){
+                continue;
+              }
             }
+            assert( zName );
             tableSeen = 1;
 
             if( i>0 && zTName==0 && (selFlags & SF_NestedFrom)==0 ){
@@ -141598,6 +147546,7 @@ static int selectExpander(Walker *pWalker, Select *p){
             }else{
               pExpr = pRight;
             }
+            sqlite3ExprSetErrorOffset(pExpr, iErrOfst);
             pNew = sqlite3ExprListAppend(pParse, pNew, pExpr);
             if( pNew==0 ){
               break;  /* OOM */
@@ -141613,11 +147562,11 @@ static int selectExpander(Walker *pWalker, Select *p){
                                            zSchemaName, zTabName, zName);
                 testcase( pX->zEName==0 );
               }
-              pX->fg.eEName = ENAME_TAB;
+              pX->fg.eEName = (j==pTab->nCol ? ENAME_ROWID : ENAME_TAB);
               if( (pFrom->fg.isUsing
                    && sqlite3IdListIndex(pFrom->u3.pUsing, zName)>=0)
                || (pUsing && sqlite3IdListIndex(pUsing, zName)>=0)
-               || (pTab->aCol[j].colFlags & COLFLAG_NOEXPAND)!=0
+               || (j<pTab->nCol && (pTab->aCol[j].colFlags & COLFLAG_NOEXPAND))
               ){
                 pX->fg.bNoExpand = 1;
               }
@@ -141652,8 +147601,8 @@ static int selectExpander(Walker *pWalker, Select *p){
     }
   }
 #if TREETRACE_ENABLED
-  if( sqlite3TreeTrace & 0x100 ){
-    SELECTTRACE(0x100,pParse,p,("After result-set wildcard expansion:\n"));
+  if( sqlite3TreeTrace & 0x8 ){
+    TREETRACE(0x8,pParse,p,("After result-set wildcard expansion:\n"));
     sqlite3TreeViewSelect(0, p, 0);
   }
 #endif
@@ -141704,14 +147653,14 @@ static void sqlite3SelectExpand(Parse *pParse, Select *pSelect){
 ** This is a Walker.xSelectCallback callback for the sqlite3SelectTypeInfo()
 ** interface.
 **
-** For each FROM-clause subquery, add Column.zType and Column.zColl
-** information to the Table structure that represents the result set
-** of that subquery.
+** For each FROM-clause subquery, add Column.zType, Column.zColl, and
+** Column.affinity information to the Table structure that represents
+** the result set of that subquery.
 **
 ** The Table structure that represents the result set was constructed
-** by selectExpander() but the type and collation information was omitted
-** at that point because identifiers had not yet been resolved.  This
-** routine is called after identifier resolution.
+** by selectExpander() but the type and collation and affinity information
+** was omitted at that point because identifiers had not yet been resolved.
+** This routine is called after identifier resolution.
 */
 static void selectAddSubqueryTypeInfo(Walker *pWalker, Select *p){
   Parse *pParse;
@@ -141719,10 +147668,11 @@ static void selectAddSubqueryTypeInfo(Walker *pWalker, Select *p){
   SrcList *pTabList;
   SrcItem *pFrom;
 
-  assert( p->selFlags & SF_Resolved );
   if( p->selFlags & SF_HasTypeInfo ) return;
   p->selFlags |= SF_HasTypeInfo;
   pParse = pWalker->pParse;
+  testcase( (p->selFlags & SF_Resolved)==0 );
+  assert( (p->selFlags & SF_Resolved) || IN_RENAME_OBJECT );
   pTabList = p->pSrc;
   for(i=0, pFrom=pTabList->a; i<pTabList->nSrc; i++, pFrom++){
     Table *pTab = pFrom->pTab;
@@ -141731,9 +147681,7 @@ static void selectAddSubqueryTypeInfo(Walker *pWalker, Select *p){
       /* A sub-query in the FROM clause of a SELECT */
       Select *pSel = pFrom->pSelect;
       if( pSel ){
-        while( pSel->pPrior ) pSel = pSel->pPrior;
-        sqlite3SelectAddColumnTypeAndCollation(pParse, pTab, pSel,
-                                               SQLITE_AFF_NONE);
+        sqlite3SubqueryColumnTypes(pParse, pTab, pSel, SQLITE_AFF_NONE);
       }
     }
   }
@@ -141788,6 +147736,184 @@ SQLITE_PRIVATE void sqlite3SelectPrep(
   sqlite3SelectAddTypeInfo(pParse, p);
 }
 
+#if TREETRACE_ENABLED
+/*
+** Display all information about an AggInfo object
+*/
+static void printAggInfo(AggInfo *pAggInfo){
+  int ii;
+  for(ii=0; ii<pAggInfo->nColumn; ii++){
+    struct AggInfo_col *pCol = &pAggInfo->aCol[ii];
+    sqlite3DebugPrintf(
+       "agg-column[%d] pTab=%s iTable=%d iColumn=%d iMem=%d"
+       " iSorterColumn=%d %s\n",
+       ii, pCol->pTab ? pCol->pTab->zName : "NULL",
+       pCol->iTable, pCol->iColumn, pAggInfo->iFirstReg+ii,
+       pCol->iSorterColumn,
+       ii>=pAggInfo->nAccumulator ? "" : " Accumulator");
+    sqlite3TreeViewExpr(0, pAggInfo->aCol[ii].pCExpr, 0);
+  }
+  for(ii=0; ii<pAggInfo->nFunc; ii++){
+    sqlite3DebugPrintf("agg-func[%d]: iMem=%d\n",
+        ii, pAggInfo->iFirstReg+pAggInfo->nColumn+ii);
+    sqlite3TreeViewExpr(0, pAggInfo->aFunc[ii].pFExpr, 0);
+  }
+}
+#endif /* TREETRACE_ENABLED */
+
+/*
+** Analyze the arguments to aggregate functions.  Create new pAggInfo->aCol[]
+** entries for columns that are arguments to aggregate functions but which
+** are not otherwise used.
+**
+** The aCol[] entries in AggInfo prior to nAccumulator are columns that
+** are referenced outside of aggregate functions.  These might be columns
+** that are part of the GROUP by clause, for example.  Other database engines
+** would throw an error if there is a column reference that is not in the
+** GROUP BY clause and that is not part of an aggregate function argument.
+** But SQLite allows this.
+**
+** The aCol[] entries beginning with the aCol[nAccumulator] and following
+** are column references that are used exclusively as arguments to
+** aggregate functions.  This routine is responsible for computing
+** (or recomputing) those aCol[] entries.
+*/
+static void analyzeAggFuncArgs(
+  AggInfo *pAggInfo,
+  NameContext *pNC
+){
+  int i;
+  assert( pAggInfo!=0 );
+  assert( pAggInfo->iFirstReg==0 );
+  pNC->ncFlags |= NC_InAggFunc;
+  for(i=0; i<pAggInfo->nFunc; i++){
+    Expr *pExpr = pAggInfo->aFunc[i].pFExpr;
+    assert( pExpr->op==TK_FUNCTION || pExpr->op==TK_AGG_FUNCTION );
+    assert( ExprUseXList(pExpr) );
+    sqlite3ExprAnalyzeAggList(pNC, pExpr->x.pList);
+    if( pExpr->pLeft ){
+      assert( pExpr->pLeft->op==TK_ORDER );
+      assert( ExprUseXList(pExpr->pLeft) );
+      sqlite3ExprAnalyzeAggList(pNC, pExpr->pLeft->x.pList);
+    }
+#ifndef SQLITE_OMIT_WINDOWFUNC
+    assert( !IsWindowFunc(pExpr) );
+    if( ExprHasProperty(pExpr, EP_WinFunc) ){
+      sqlite3ExprAnalyzeAggregates(pNC, pExpr->y.pWin->pFilter);
+    }
+#endif
+  }
+  pNC->ncFlags &= ~NC_InAggFunc;
+}
+
+/*
+** An index on expressions is being used in the inner loop of an
+** aggregate query with a GROUP BY clause.  This routine attempts
+** to adjust the AggInfo object to take advantage of index and to
+** perhaps use the index as a covering index.
+**
+*/
+static void optimizeAggregateUseOfIndexedExpr(
+  Parse *pParse,          /* Parsing context */
+  Select *pSelect,        /* The SELECT statement being processed */
+  AggInfo *pAggInfo,      /* The aggregate info */
+  NameContext *pNC        /* Name context used to resolve agg-func args */
+){
+  assert( pAggInfo->iFirstReg==0 );
+  assert( pSelect!=0 );
+  assert( pSelect->pGroupBy!=0 );
+  pAggInfo->nColumn = pAggInfo->nAccumulator;
+  if( ALWAYS(pAggInfo->nSortingColumn>0) ){
+    int mx = pSelect->pGroupBy->nExpr - 1;
+    int j, k;
+    for(j=0; j<pAggInfo->nColumn; j++){
+      k = pAggInfo->aCol[j].iSorterColumn;
+      if( k>mx ) mx = k;
+    }
+    pAggInfo->nSortingColumn = mx+1;
+  }
+  analyzeAggFuncArgs(pAggInfo, pNC);
+#if TREETRACE_ENABLED
+  if( sqlite3TreeTrace & 0x20 ){
+    IndexedExpr *pIEpr;
+    TREETRACE(0x20, pParse, pSelect,
+        ("AggInfo (possibly) adjusted for Indexed Exprs\n"));
+    sqlite3TreeViewSelect(0, pSelect, 0);
+    for(pIEpr=pParse->pIdxEpr; pIEpr; pIEpr=pIEpr->pIENext){
+      printf("data-cursor=%d index={%d,%d}\n",
+          pIEpr->iDataCur, pIEpr->iIdxCur, pIEpr->iIdxCol);
+      sqlite3TreeViewExpr(0, pIEpr->pExpr, 0);
+    }
+    printAggInfo(pAggInfo);
+  }
+#else
+  UNUSED_PARAMETER(pSelect);
+  UNUSED_PARAMETER(pParse);
+#endif
+}
+
+/*
+** Walker callback for aggregateConvertIndexedExprRefToColumn().
+*/
+static int aggregateIdxEprRefToColCallback(Walker *pWalker, Expr *pExpr){
+  AggInfo *pAggInfo;
+  struct AggInfo_col *pCol;
+  UNUSED_PARAMETER(pWalker);
+  if( pExpr->pAggInfo==0 ) return WRC_Continue;
+  if( pExpr->op==TK_AGG_COLUMN ) return WRC_Continue;
+  if( pExpr->op==TK_AGG_FUNCTION ) return WRC_Continue;
+  if( pExpr->op==TK_IF_NULL_ROW ) return WRC_Continue;
+  pAggInfo = pExpr->pAggInfo;
+  if( NEVER(pExpr->iAgg>=pAggInfo->nColumn) ) return WRC_Continue;
+  assert( pExpr->iAgg>=0 );
+  pCol = &pAggInfo->aCol[pExpr->iAgg];
+  pExpr->op = TK_AGG_COLUMN;
+  pExpr->iTable = pCol->iTable;
+  pExpr->iColumn = pCol->iColumn;
+  ExprClearProperty(pExpr, EP_Skip|EP_Collate|EP_Unlikely);
+  return WRC_Prune;
+}
+
+/*
+** Convert every pAggInfo->aFunc[].pExpr such that any node within
+** those expressions that has pAppInfo set is changed into a TK_AGG_COLUMN
+** opcode.
+*/
+static void aggregateConvertIndexedExprRefToColumn(AggInfo *pAggInfo){
+  int i;
+  Walker w;
+  memset(&w, 0, sizeof(w));
+  w.xExprCallback = aggregateIdxEprRefToColCallback;
+  for(i=0; i<pAggInfo->nFunc; i++){
+    sqlite3WalkExpr(&w, pAggInfo->aFunc[i].pFExpr);
+  }
+}
+
+
+/*
+** Allocate a block of registers so that there is one register for each
+** pAggInfo->aCol[] and pAggInfo->aFunc[] entry in pAggInfo.  The first
+** register in this block is stored in pAggInfo->iFirstReg.
+**
+** This routine may only be called once for each AggInfo object.  Prior
+** to calling this routine:
+**
+**     *  The aCol[] and aFunc[] arrays may be modified
+**     *  The AggInfoColumnReg() and AggInfoFuncReg() macros may not be used
+**
+** After calling this routine:
+**
+**     *  The aCol[] and aFunc[] arrays are fixed
+**     *  The AggInfoColumnReg() and AggInfoFuncReg() macros may be used
+**
+*/
+static void assignAggregateRegisters(Parse *pParse, AggInfo *pAggInfo){
+  assert( pAggInfo!=0 );
+  assert( pAggInfo->iFirstReg==0 );
+  pAggInfo->iFirstReg = pParse->nMem + 1;
+  pParse->nMem += pAggInfo->nColumn + pAggInfo->nFunc;
+}
+
 /*
 ** Reset the aggregate accumulator.
 **
@@ -141801,24 +147927,13 @@ static void resetAccumulator(Parse *pParse, AggInfo *pAggInfo){
   int i;
   struct AggInfo_func *pFunc;
   int nReg = pAggInfo->nFunc + pAggInfo->nColumn;
+  assert( pAggInfo->iFirstReg>0 );
   assert( pParse->db->pParse==pParse );
   assert( pParse->db->mallocFailed==0 || pParse->nErr!=0 );
   if( nReg==0 ) return;
   if( pParse->nErr ) return;
-#ifdef SQLITE_DEBUG
-  /* Verify that all AggInfo registers are within the range specified by
-  ** AggInfo.mnReg..AggInfo.mxReg */
-  assert( nReg==pAggInfo->mxReg-pAggInfo->mnReg+1 );
-  for(i=0; i<pAggInfo->nColumn; i++){
-    assert( pAggInfo->aCol[i].iMem>=pAggInfo->mnReg
-         && pAggInfo->aCol[i].iMem<=pAggInfo->mxReg );
-  }
-  for(i=0; i<pAggInfo->nFunc; i++){
-    assert( pAggInfo->aFunc[i].iMem>=pAggInfo->mnReg
-         && pAggInfo->aFunc[i].iMem<=pAggInfo->mxReg );
-  }
-#endif
-  sqlite3VdbeAddOp3(v, OP_Null, 0, pAggInfo->mnReg, pAggInfo->mxReg);
+  sqlite3VdbeAddOp3(v, OP_Null, 0, pAggInfo->iFirstReg,
+                    pAggInfo->iFirstReg+nReg-1);
   for(pFunc=pAggInfo->aFunc, i=0; i<pAggInfo->nFunc; i++, pFunc++){
     if( pFunc->iDistinct>=0 ){
       Expr *pE = pFunc->pFExpr;
@@ -141835,6 +147950,32 @@ static void resetAccumulator(Parse *pParse, AggInfo *pAggInfo){
                           pFunc->pFunc->zName));
       }
     }
+    if( pFunc->iOBTab>=0 ){
+      ExprList *pOBList;
+      KeyInfo *pKeyInfo;
+      int nExtra = 0;
+      assert( pFunc->pFExpr->pLeft!=0 );
+      assert( pFunc->pFExpr->pLeft->op==TK_ORDER );
+      assert( ExprUseXList(pFunc->pFExpr->pLeft) );
+      pOBList = pFunc->pFExpr->pLeft->x.pList;
+      if( !pFunc->bOBUnique ){
+        nExtra++;  /* One extra column for the OP_Sequence */
+      }
+      if( pFunc->bOBPayload ){
+        /* extra columns for the function arguments */
+        assert( ExprUseXList(pFunc->pFExpr) );
+        nExtra += pFunc->pFExpr->x.pList->nExpr;
+      }
+      pKeyInfo = sqlite3KeyInfoFromExprList(pParse, pOBList, 0, nExtra);
+      if( !pFunc->bOBUnique && pParse->nErr==0 ){
+        pKeyInfo->nKeyField++;
+      }
+      sqlite3VdbeAddOp4(v, OP_OpenEphemeral,
+            pFunc->iOBTab, pOBList->nExpr+nExtra, 0,
+            (char*)pKeyInfo, P4_KEYINFO);
+      ExplainQueryPlan((pParse, 0, "USE TEMP B-TREE FOR %s(ORDER BY)",
+                          pFunc->pFunc->zName));
+    }
   }
 }
 
@@ -141850,20 +147991,61 @@ static void finalizeAggFunctions(Parse *pParse, AggInfo *pAggInfo){
     ExprList *pList;
     assert( ExprUseXList(pF->pFExpr) );
     pList = pF->pFExpr->x.pList;
-    sqlite3VdbeAddOp2(v, OP_AggFinal, pF->iMem, pList ? pList->nExpr : 0);
+    if( pF->iOBTab>=0 ){
+      /* For an ORDER BY aggregate, calls to OP_AggStep where deferred and
+      ** all content was stored in emphermal table pF->iOBTab.  Extract that
+      ** content now (in ORDER BY order) and make all calls to OP_AggStep
+      ** before doing the OP_AggFinal call. */
+      int iTop;        /* Start of loop for extracting columns */
+      int nArg;        /* Number of columns to extract */
+      int nKey;        /* Key columns to be skipped */
+      int regAgg;      /* Extract into this array */
+      int j;           /* Loop counter */
+
+      nArg = pList->nExpr;
+      regAgg = sqlite3GetTempRange(pParse, nArg);
+
+      if( pF->bOBPayload==0 ){
+        nKey = 0;
+      }else{
+        assert( pF->pFExpr->pLeft!=0 );
+        assert( ExprUseXList(pF->pFExpr->pLeft) );
+        assert( pF->pFExpr->pLeft->x.pList!=0 );
+        nKey = pF->pFExpr->pLeft->x.pList->nExpr;
+        if( ALWAYS(!pF->bOBUnique) ) nKey++;
+      }
+      iTop = sqlite3VdbeAddOp1(v, OP_Rewind, pF->iOBTab); VdbeCoverage(v);
+      for(j=nArg-1; j>=0; j--){
+        sqlite3VdbeAddOp3(v, OP_Column, pF->iOBTab, nKey+j, regAgg+j);
+      }
+      sqlite3VdbeAddOp3(v, OP_AggStep, 0, regAgg, AggInfoFuncReg(pAggInfo,i));
+      sqlite3VdbeAppendP4(v, pF->pFunc, P4_FUNCDEF);
+      sqlite3VdbeChangeP5(v, (u8)nArg);
+      sqlite3VdbeAddOp2(v, OP_Next, pF->iOBTab, iTop+1); VdbeCoverage(v);
+      sqlite3VdbeJumpHere(v, iTop);
+      sqlite3ReleaseTempRange(pParse, regAgg, nArg);
+    }
+    sqlite3VdbeAddOp2(v, OP_AggFinal, AggInfoFuncReg(pAggInfo,i),
+                      pList ? pList->nExpr : 0);
     sqlite3VdbeAppendP4(v, pF->pFunc, P4_FUNCDEF);
   }
 }
 
-
 /*
-** Update the accumulator memory cells for an aggregate based on
-** the current cursor position.
+** Generate code that will update the accumulator memory cells for an
+** aggregate based on the current cursor position.
 **
 ** If regAcc is non-zero and there are no min() or max() aggregates
 ** in pAggInfo, then only populate the pAggInfo->nAccumulator accumulator
 ** registers if register regAcc contains 0. The caller will take care
 ** of setting and clearing regAcc.
+**
+** For an ORDER BY aggregate, the actual accumulator memory cell update
+** is deferred until after all input rows have been received, so that they
+** can be run in the requested order.  In that case, instead of invoking
+** OP_AggStep to update the accumulator, just add the arguments that would
+** have been passed into OP_AggStep into the sorting ephemeral table
+** (along with the appropriate sort key).
 */
 static void updateAccumulator(
   Parse *pParse,
@@ -141878,11 +148060,15 @@ static void updateAccumulator(
   struct AggInfo_func *pF;
   struct AggInfo_col *pC;
 
+  assert( pAggInfo->iFirstReg>0 );
+  if( pParse->nErr ) return;
   pAggInfo->directMode = 1;
   for(i=0, pF=pAggInfo->aFunc; i<pAggInfo->nFunc; i++, pF++){
     int nArg;
     int addrNext = 0;
     int regAgg;
+    int regAggSz = 0;
+    int regDistinct = 0;
     ExprList *pList;
     assert( ExprUseXList(pF->pFExpr) );
     assert( !IsWindowFunc(pF->pFExpr) );
@@ -141909,9 +148095,44 @@ static void updateAccumulator(
       addrNext = sqlite3VdbeMakeLabel(pParse);
       sqlite3ExprIfFalse(pParse, pFilter, addrNext, SQLITE_JUMPIFNULL);
     }
-    if( pList ){
+    if( pF->iOBTab>=0 ){
+      /* Instead of invoking AggStep, we must push the arguments that would
+      ** have been passed to AggStep onto the sorting table. */
+      int jj;                /* Registered used so far in building the record */
+      ExprList *pOBList;     /* The ORDER BY clause */
+      assert( pList!=0 );
+      nArg = pList->nExpr;
+      assert( nArg>0 );
+      assert( pF->pFExpr->pLeft!=0 );
+      assert( pF->pFExpr->pLeft->op==TK_ORDER );
+      assert( ExprUseXList(pF->pFExpr->pLeft) );
+      pOBList = pF->pFExpr->pLeft->x.pList;
+      assert( pOBList!=0 );
+      assert( pOBList->nExpr>0 );
+      regAggSz = pOBList->nExpr;
+      if( !pF->bOBUnique ){
+        regAggSz++;   /* One register for OP_Sequence */
+      }
+      if( pF->bOBPayload ){
+        regAggSz += nArg;
+      }
+      regAggSz++;  /* One extra register to hold result of MakeRecord */
+      regAgg = sqlite3GetTempRange(pParse, regAggSz);
+      regDistinct = regAgg;
+      sqlite3ExprCodeExprList(pParse, pOBList, regAgg, 0, SQLITE_ECEL_DUP);
+      jj = pOBList->nExpr;
+      if( !pF->bOBUnique ){
+        sqlite3VdbeAddOp2(v, OP_Sequence, pF->iOBTab, regAgg+jj);
+        jj++;
+      }
+      if( pF->bOBPayload ){
+        regDistinct = regAgg+jj;
+        sqlite3ExprCodeExprList(pParse, pList, regDistinct, 0, SQLITE_ECEL_DUP);
+      }
+    }else if( pList ){
       nArg = pList->nExpr;
       regAgg = sqlite3GetTempRange(pParse, nArg);
+      regDistinct = regAgg;
       sqlite3ExprCodeExprList(pParse, pList, regAgg, 0, SQLITE_ECEL_DUP);
     }else{
       nArg = 0;
@@ -141922,26 +148143,37 @@ static void updateAccumulator(
         addrNext = sqlite3VdbeMakeLabel(pParse);
       }
       pF->iDistinct = codeDistinct(pParse, eDistinctType,
-          pF->iDistinct, addrNext, pList, regAgg);
-    }
-    if( pF->pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL ){
-      CollSeq *pColl = 0;
-      struct ExprList_item *pItem;
-      int j;
-      assert( pList!=0 );  /* pList!=0 if pF->pFunc has NEEDCOLL */
-      for(j=0, pItem=pList->a; !pColl && j<nArg; j++, pItem++){
-        pColl = sqlite3ExprCollSeq(pParse, pItem->pExpr);
-      }
-      if( !pColl ){
-        pColl = pParse->db->pDfltColl;
+          pF->iDistinct, addrNext, pList, regDistinct);
+    }
+    if( pF->iOBTab>=0 ){
+      /* Insert a new record into the ORDER BY table */
+      sqlite3VdbeAddOp3(v, OP_MakeRecord, regAgg, regAggSz-1,
+                        regAgg+regAggSz-1);
+      sqlite3VdbeAddOp4Int(v, OP_IdxInsert, pF->iOBTab, regAgg+regAggSz-1,
+                           regAgg, regAggSz-1);
+      sqlite3ReleaseTempRange(pParse, regAgg, regAggSz);
+    }else{
+      /* Invoke the AggStep function */
+      if( pF->pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL ){
+        CollSeq *pColl = 0;
+        struct ExprList_item *pItem;
+        int j;
+        assert( pList!=0 );  /* pList!=0 if pF->pFunc has NEEDCOLL */
+        for(j=0, pItem=pList->a; !pColl && j<nArg; j++, pItem++){
+          pColl = sqlite3ExprCollSeq(pParse, pItem->pExpr);
+        }
+        if( !pColl ){
+          pColl = pParse->db->pDfltColl;
+        }
+        if( regHit==0 && pAggInfo->nAccumulator ) regHit = ++pParse->nMem;
+        sqlite3VdbeAddOp4(v, OP_CollSeq, regHit, 0, 0,
+                         (char *)pColl, P4_COLLSEQ);
       }
-      if( regHit==0 && pAggInfo->nAccumulator ) regHit = ++pParse->nMem;
-      sqlite3VdbeAddOp4(v, OP_CollSeq, regHit, 0, 0, (char *)pColl, P4_COLLSEQ);
+      sqlite3VdbeAddOp3(v, OP_AggStep, 0, regAgg, AggInfoFuncReg(pAggInfo,i));
+      sqlite3VdbeAppendP4(v, pF->pFunc, P4_FUNCDEF);
+      sqlite3VdbeChangeP5(v, (u8)nArg);
+      sqlite3ReleaseTempRange(pParse, regAgg, nArg);
     }
-    sqlite3VdbeAddOp3(v, OP_AggStep, 0, regAgg, pF->iMem);
-    sqlite3VdbeAppendP4(v, pF->pFunc, P4_FUNCDEF);
-    sqlite3VdbeChangeP5(v, (u8)nArg);
-    sqlite3ReleaseTempRange(pParse, regAgg, nArg);
     if( addrNext ){
       sqlite3VdbeResolveLabel(v, addrNext);
     }
@@ -141953,7 +148185,7 @@ static void updateAccumulator(
     addrHitTest = sqlite3VdbeAddOp1(v, OP_If, regHit); VdbeCoverage(v);
   }
   for(i=0, pC=pAggInfo->aCol; i<pAggInfo->nAccumulator; i++, pC++){
-    sqlite3ExprCode(pParse, pC->pCExpr, pC->iMem);
+    sqlite3ExprCode(pParse, pC->pCExpr, AggInfoColumnReg(pAggInfo,i));
   }
 
   pAggInfo->directMode = 0;
@@ -142049,26 +148281,31 @@ static void havingToWhere(Parse *pParse, Select *p){
   sqlite3WalkExpr(&sWalker, p->pHaving);
 #if TREETRACE_ENABLED
   if( sWalker.eCode && (sqlite3TreeTrace & 0x100)!=0 ){
-    SELECTTRACE(0x100,pParse,p,("Move HAVING terms into WHERE:\n"));
+    TREETRACE(0x100,pParse,p,("Move HAVING terms into WHERE:\n"));
     sqlite3TreeViewSelect(0, p, 0);
   }
 #endif
 }
 
 /*
-** Check to see if the pThis entry of pTabList is a self-join of a prior view.
-** If it is, then return the SrcList_item for the prior view.  If it is not,
-** then return 0.
+** Check to see if the pThis entry of pTabList is a self-join of another view.
+** Search FROM-clause entries in the range of iFirst..iEnd, including iFirst
+** but stopping before iEnd.
+**
+** If pThis is a self-join, then return the SrcItem for the first other
+** instance of that view found.  If pThis is not a self-join then return 0.
 */
 static SrcItem *isSelfJoinView(
   SrcList *pTabList,           /* Search for self-joins in this FROM clause */
-  SrcItem *pThis               /* Search for prior reference to this subquery */
+  SrcItem *pThis,              /* Search for prior reference to this subquery */
+  int iFirst, int iEnd        /* Range of FROM-clause entries to search. */
 ){
   SrcItem *pItem;
   assert( pThis->pSelect!=0 );
   if( pThis->pSelect->selFlags & SF_PushDown ) return 0;
-  for(pItem = pTabList->a; pItem<pThis; pItem++){
+  while( iFirst<iEnd ){
     Select *pS1;
+    pItem = &pTabList->a[iFirst++];
     if( pItem->pSelect==0 ) continue;
     if( pItem->fg.viaCoroutine ) continue;
     if( pItem->zName==0 ) continue;
@@ -142101,7 +148338,6 @@ static void agginfoFree(sqlite3 *db, AggInfo *p){
   sqlite3DbFreeNN(db, p);
 }
 
-#ifdef SQLITE_COUNTOFVIEW_OPTIMIZATION
 /*
 ** Attempt to transform a query of the form
 **
@@ -142129,7 +148365,9 @@ static int countOfViewOptimization(Parse *pParse, Select *p){
   if( (p->selFlags & SF_Aggregate)==0 ) return 0;   /* This is an aggregate */
   if( p->pEList->nExpr!=1 ) return 0;               /* Single result column */
   if( p->pWhere ) return 0;
+  if( p->pHaving ) return 0;
   if( p->pGroupBy ) return 0;
+  if( p->pOrderBy ) return 0;
   pExpr = p->pEList->a[0].pExpr;
   if( pExpr->op!=TK_AGG_FUNCTION ) return 0;        /* Result is an aggregate */
   assert( ExprUseUToken(pExpr) );
@@ -142137,15 +148375,18 @@ static int countOfViewOptimization(Parse *pParse, Select *p){
   assert( ExprUseXList(pExpr) );
   if( pExpr->x.pList!=0 ) return 0;                 /* Must be count(*) */
   if( p->pSrc->nSrc!=1 ) return 0;                  /* One table in FROM  */
+  if( ExprHasProperty(pExpr, EP_WinFunc) ) return 0;/* Not a window function */
   pSub = p->pSrc->a[0].pSelect;
   if( pSub==0 ) return 0;                           /* The FROM is a subquery */
-  if( pSub->pPrior==0 ) return 0;                   /* Must be a compound ry */
+  if( pSub->pPrior==0 ) return 0;                   /* Must be a compound */
+  if( pSub->selFlags & SF_CopyCte ) return 0;       /* Not a CTE */
   do{
     if( pSub->op!=TK_ALL && pSub->pPrior ) return 0;  /* Must be UNION ALL */
     if( pSub->pWhere ) return 0;                      /* No WHERE clause */
     if( pSub->pLimit ) return 0;                      /* No LIMIT clause */
     if( pSub->selFlags & SF_Aggregate ) return 0;     /* Not an aggregate */
-    pSub = pSub->pPrior;                              /* Repeat over compound */
+    assert( pSub->pHaving==0 );  /* Due to the previous */
+   pSub = pSub->pPrior;                              /* Repeat over compound */
   }while( pSub );
 
   /* If we reach this point then it is OK to perform the transformation */
@@ -142181,14 +148422,13 @@ static int countOfViewOptimization(Parse *pParse, Select *p){
   p->selFlags &= ~SF_Aggregate;
 
 #if TREETRACE_ENABLED
-  if( sqlite3TreeTrace & 0x400 ){
-    SELECTTRACE(0x400,pParse,p,("After count-of-view optimization:\n"));
+  if( sqlite3TreeTrace & 0x200 ){
+    TREETRACE(0x200,pParse,p,("After count-of-view optimization:\n"));
     sqlite3TreeViewSelect(0, p, 0);
   }
 #endif
   return 1;
 }
-#endif /* SQLITE_COUNTOFVIEW_OPTIMIZATION */
 
 /*
 ** If any term of pSrc, or any SF_NestedFrom sub-query, is not the same
@@ -142213,6 +148453,68 @@ static int sameSrcAlias(SrcItem *p0, SrcList *pSrc){
   return 0;
 }
 
+/*
+** Return TRUE (non-zero) if the i-th entry in the pTabList SrcList can
+** be implemented as a co-routine.  The i-th entry is guaranteed to be
+** a subquery.
+**
+** The subquery is implemented as a co-routine if all of the following are
+** true:
+**
+**    (1)  The subquery will likely be implemented in the outer loop of
+**         the query.  This will be the case if any one of the following
+**         conditions hold:
+**         (a)  The subquery is the only term in the FROM clause
+**         (b)  The subquery is the left-most term and a CROSS JOIN or similar
+**              requires it to be the outer loop
+**         (c)  All of the following are true:
+**                (i) The subquery is the left-most subquery in the FROM clause
+**               (ii) There is nothing that would prevent the subquery from
+**                    being used as the outer loop if the sqlite3WhereBegin()
+**                    routine nominates it to that position.
+**              (iii) The query is not a UPDATE ... FROM
+**    (2)  The subquery is not a CTE that should be materialized because
+**         (a) the AS MATERIALIZED keyword is used, or
+**         (b) the CTE is used multiple times and does not have the
+**             NOT MATERIALIZED keyword
+**    (3)  The subquery is not part of a left operand for a RIGHT JOIN
+**    (4)  The SQLITE_Coroutine optimization disable flag is not set
+**    (5)  The subquery is not self-joined
+*/
+static int fromClauseTermCanBeCoroutine(
+  Parse *pParse,          /* Parsing context */
+  SrcList *pTabList,      /* FROM clause */
+  int i,                  /* Which term of the FROM clause holds the subquery */
+  int selFlags            /* Flags on the SELECT statement */
+){
+  SrcItem *pItem = &pTabList->a[i];
+  if( pItem->fg.isCte ){
+    const CteUse *pCteUse = pItem->u2.pCteUse;
+    if( pCteUse->eM10d==M10d_Yes ) return 0;                          /* (2a) */
+    if( pCteUse->nUse>=2 && pCteUse->eM10d!=M10d_No ) return 0;       /* (2b) */
+  }
+  if( pTabList->a[0].fg.jointype & JT_LTORJ ) return 0;               /* (3)  */
+  if( OptimizationDisabled(pParse->db, SQLITE_Coroutines) ) return 0; /* (4)  */
+  if( isSelfJoinView(pTabList, pItem, i+1, pTabList->nSrc)!=0 ){
+    return 0;                                                          /* (5) */
+  }
+  if( i==0 ){
+    if( pTabList->nSrc==1 ) return 1;                             /* (1a) */
+    if( pTabList->a[1].fg.jointype & JT_CROSS ) return 1;         /* (1b) */
+    if( selFlags & SF_UpdateFrom )              return 0;         /* (1c-iii) */
+    return 1;
+  }
+  if( selFlags & SF_UpdateFrom ) return 0;                        /* (1c-iii) */
+  while( 1 /*exit-by-break*/ ){
+    if( pItem->fg.jointype & (JT_OUTER|JT_CROSS)  ) return 0;     /* (1c-ii) */
+    if( i==0 ) break;
+    i--;
+    pItem--;
+    if( pItem->pSelect!=0 ) return 0;                             /* (1c-i) */
+  }
+  return 1;
+}
+
 /*
 ** Generate code for the SELECT statement given in the p argument.
 **
@@ -142258,8 +148560,8 @@ SQLITE_PRIVATE int sqlite3Select(
   assert( db->mallocFailed==0 );
   if( sqlite3AuthCheck(pParse, SQLITE_SELECT, 0, 0, 0) ) return 1;
 #if TREETRACE_ENABLED
-  SELECTTRACE(1,pParse,p, ("begin processing:\n", pParse->addrExplain));
-  if( sqlite3TreeTrace & 0x10100 ){
+  TREETRACE(0x1,pParse,p, ("begin processing:\n", pParse->addrExplain));
+  if( sqlite3TreeTrace & 0x10000 ){
     if( (sqlite3TreeTrace & 0x10001)==0x10000 ){
       sqlite3TreeViewLine(0, "In sqlite3Select() at %s:%d",
                            __FILE__, __LINE__);
@@ -142279,8 +148581,8 @@ SQLITE_PRIVATE int sqlite3Select(
     /* All of these destinations are also able to ignore the ORDER BY clause */
     if( p->pOrderBy ){
 #if TREETRACE_ENABLED
-      SELECTTRACE(1,pParse,p, ("dropping superfluous ORDER BY:\n"));
-      if( sqlite3TreeTrace & 0x100 ){
+      TREETRACE(0x800,pParse,p, ("dropping superfluous ORDER BY:\n"));
+      if( sqlite3TreeTrace & 0x800 ){
         sqlite3TreeViewExprList(0, p->pOrderBy, 0, "ORDERBY");
       }
 #endif
@@ -142300,8 +148602,8 @@ SQLITE_PRIVATE int sqlite3Select(
   assert( db->mallocFailed==0 );
   assert( p->pEList!=0 );
 #if TREETRACE_ENABLED
-  if( sqlite3TreeTrace & 0x104 ){
-    SELECTTRACE(0x104,pParse,p, ("after name resolution:\n"));
+  if( sqlite3TreeTrace & 0x10 ){
+    TREETRACE(0x10,pParse,p, ("after name resolution:\n"));
     sqlite3TreeViewSelect(0, p, 0);
   }
 #endif
@@ -142342,8 +148644,8 @@ SQLITE_PRIVATE int sqlite3Select(
     goto select_end;
   }
 #if TREETRACE_ENABLED
-  if( p->pWin && (sqlite3TreeTrace & 0x108)!=0 ){
-    SELECTTRACE(0x104,pParse,p, ("after window rewrite:\n"));
+  if( p->pWin && (sqlite3TreeTrace & 0x40)!=0 ){
+    TREETRACE(0x40,pParse,p, ("after window rewrite:\n"));
     sqlite3TreeViewSelect(0, p, 0);
   }
 #endif
@@ -142367,22 +148669,58 @@ SQLITE_PRIVATE int sqlite3Select(
     ** to a real table */
     assert( pTab!=0 );
 
-    /* Convert LEFT JOIN into JOIN if there are terms of the right table
-    ** of the LEFT JOIN used in the WHERE clause.
+    /* Try to simplify joins:
+    **
+    **      LEFT JOIN  ->  JOIN
+    **     RIGHT JOIN  ->  JOIN
+    **      FULL JOIN  ->  RIGHT JOIN
+    **
+    ** If terms of the i-th table are used in the WHERE clause in such a
+    ** way that the i-th table cannot be the NULL row of a join, then
+    ** perform the appropriate simplification. This is called
+    ** "OUTER JOIN strength reduction" in the SQLite documentation.
     */
-    if( (pItem->fg.jointype & (JT_LEFT|JT_RIGHT))==JT_LEFT
-     && sqlite3ExprImpliesNonNullRow(p->pWhere, pItem->iCursor)
+    if( (pItem->fg.jointype & (JT_LEFT|JT_LTORJ))!=0
+     && sqlite3ExprImpliesNonNullRow(p->pWhere, pItem->iCursor,
+                                     pItem->fg.jointype & JT_LTORJ)
      && OptimizationEnabled(db, SQLITE_SimplifyJoin)
     ){
-      SELECTTRACE(0x100,pParse,p,
-                ("LEFT-JOIN simplifies to JOIN on term %d\n",i));
-      pItem->fg.jointype &= ~(JT_LEFT|JT_OUTER);
-      assert( pItem->iCursor>=0 );
-      unsetJoinExpr(p->pWhere, pItem->iCursor,
-                    pTabList->a[0].fg.jointype & JT_LTORJ);
+      if( pItem->fg.jointype & JT_LEFT ){
+        if( pItem->fg.jointype & JT_RIGHT ){
+          TREETRACE(0x1000,pParse,p,
+                    ("FULL-JOIN simplifies to RIGHT-JOIN on term %d\n",i));
+          pItem->fg.jointype &= ~JT_LEFT;
+        }else{
+          TREETRACE(0x1000,pParse,p,
+                    ("LEFT-JOIN simplifies to JOIN on term %d\n",i));
+          pItem->fg.jointype &= ~(JT_LEFT|JT_OUTER);
+          unsetJoinExpr(p->pWhere, pItem->iCursor, 0);
+        }
+      }
+      if( pItem->fg.jointype & JT_LTORJ ){
+        for(j=i+1; j<pTabList->nSrc; j++){
+          SrcItem *pI2 = &pTabList->a[j];
+          if( pI2->fg.jointype & JT_RIGHT ){
+            if( pI2->fg.jointype & JT_LEFT ){
+              TREETRACE(0x1000,pParse,p,
+                        ("FULL-JOIN simplifies to LEFT-JOIN on term %d\n",j));
+              pI2->fg.jointype &= ~JT_RIGHT;
+            }else{
+              TREETRACE(0x1000,pParse,p,
+                        ("RIGHT-JOIN simplifies to JOIN on term %d\n",j));
+              pI2->fg.jointype &= ~(JT_RIGHT|JT_OUTER);
+              unsetJoinExpr(p->pWhere, pI2->iCursor, 1);
+            }
+          }
+        }
+        for(j=pTabList->nSrc-1; j>=0; j--){
+          pTabList->a[j].fg.jointype &= ~JT_LTORJ;
+          if( pTabList->a[j].fg.jointype & JT_RIGHT ) break;
+        }
+      }
     }
 
-    /* No futher action if this term of the FROM clause is no a subquery */
+    /* No further action if this term of the FROM clause is not a subquery */
     if( pSub==0 ) continue;
 
     /* Catch mismatch in the declared columns of a view and the number of
@@ -142393,6 +148731,14 @@ SQLITE_PRIVATE int sqlite3Select(
       goto select_end;
     }
 
+    /* Do not attempt the usual optimizations (flattening and ORDER BY
+    ** elimination) on a MATERIALIZED common table expression because
+    ** a MATERIALIZED common table expression is an optimization fence.
+    */
+    if( pItem->fg.isCte && pItem->u2.pCteUse->eM10d==M10d_Yes ){
+      continue;
+    }
+
     /* Do not try to flatten an aggregate subquery.
     **
     ** Flattening an aggregate subquery is only possible if the outer query
@@ -142422,6 +148768,8 @@ SQLITE_PRIVATE int sqlite3Select(
     **            (a)  The outer query has a different ORDER BY clause
     **            (b)  The subquery is part of a join
     **          See forum post 062d576715d277c8
+    **
+    ** Also retain the ORDER BY if the OmitOrderBy optimization is disabled.
     */
     if( pSub->pOrderBy!=0
      && (p->pOrderBy!=0 || pTabList->nSrc>1)      /* Condition (5) */
@@ -142430,7 +148778,7 @@ SQLITE_PRIVATE int sqlite3Select(
      && (p->selFlags & SF_OrderByReqd)==0         /* Condition (3) and (4) */
      && OptimizationEnabled(db, SQLITE_OmitOrderBy)
     ){
-      SELECTTRACE(0x100,pParse,p,
+      TREETRACE(0x800,pParse,p,
                 ("omit superfluous ORDER BY on %r FROM-clause subquery\n",i+1));
       sqlite3ParserAddCleanup(pParse,
          (void(*)(sqlite3*,void*))sqlite3ExprListDelete,
@@ -142485,8 +148833,8 @@ SQLITE_PRIVATE int sqlite3Select(
   if( p->pPrior ){
     rc = multiSelect(pParse, p, pDest);
 #if TREETRACE_ENABLED
-    SELECTTRACE(0x1,pParse,p,("end compound-select processing\n"));
-    if( (sqlite3TreeTrace & 0x2000)!=0 && ExplainQueryPlanParent(pParse)==0 ){
+    TREETRACE(0x400,pParse,p,("end compound-select processing\n"));
+    if( (sqlite3TreeTrace & 0x400)!=0 && ExplainQueryPlanParent(pParse)==0 ){
       sqlite3TreeViewSelect(0, p, 0);
     }
 #endif
@@ -142506,24 +148854,21 @@ SQLITE_PRIVATE int sqlite3Select(
    && propagateConstants(pParse, p)
   ){
 #if TREETRACE_ENABLED
-    if( sqlite3TreeTrace & 0x100 ){
-      SELECTTRACE(0x100,pParse,p,("After constant propagation:\n"));
+    if( sqlite3TreeTrace & 0x2000 ){
+      TREETRACE(0x2000,pParse,p,("After constant propagation:\n"));
       sqlite3TreeViewSelect(0, p, 0);
     }
 #endif
   }else{
-    SELECTTRACE(0x100,pParse,p,("Constant propagation not helpful\n"));
+    TREETRACE(0x2000,pParse,p,("Constant propagation not helpful\n"));
   }
 
-#ifdef SQLITE_COUNTOFVIEW_OPTIMIZATION
   if( OptimizationEnabled(db, SQLITE_QueryFlattener|SQLITE_CountOfView)
    && countOfViewOptimization(pParse, p)
   ){
     if( db->mallocFailed ) goto select_end;
-    pEList = p->pEList;
     pTabList = p->pSrc;
   }
-#endif
 
   /* For each term in the FROM clause, do two things:
   ** (1) Authorized unreferenced tables
@@ -142582,39 +148927,42 @@ SQLITE_PRIVATE int sqlite3Select(
     if( OptimizationEnabled(db, SQLITE_PushDown)
      && (pItem->fg.isCte==0
          || (pItem->u2.pCteUse->eM10d!=M10d_Yes && pItem->u2.pCteUse->nUse<2))
-     && pushDownWhereTerms(pParse, pSub, p->pWhere, pItem)
+     && pushDownWhereTerms(pParse, pSub, p->pWhere, pTabList, i)
     ){
 #if TREETRACE_ENABLED
-      if( sqlite3TreeTrace & 0x100 ){
-        SELECTTRACE(0x100,pParse,p,
+      if( sqlite3TreeTrace & 0x4000 ){
+        TREETRACE(0x4000,pParse,p,
             ("After WHERE-clause push-down into subquery %d:\n", pSub->selId));
         sqlite3TreeViewSelect(0, p, 0);
       }
 #endif
       assert( pItem->pSelect && (pItem->pSelect->selFlags & SF_PushDown)!=0 );
     }else{
-      SELECTTRACE(0x100,pParse,p,("Push-down not possible\n"));
+      TREETRACE(0x4000,pParse,p,("Push-down not possible\n"));
+    }
+
+    /* Convert unused result columns of the subquery into simple NULL
+    ** expressions, to avoid unneeded searching and computation.
+    */
+    if( OptimizationEnabled(db, SQLITE_NullUnusedCols)
+     && disableUnusedSubqueryResultColumns(pItem)
+    ){
+#if TREETRACE_ENABLED
+      if( sqlite3TreeTrace & 0x4000 ){
+        TREETRACE(0x4000,pParse,p,
+            ("Change unused result columns to NULL for subquery %d:\n",
+             pSub->selId));
+        sqlite3TreeViewSelect(0, p, 0);
+      }
+#endif
     }
 
     zSavedAuthContext = pParse->zAuthContext;
     pParse->zAuthContext = pItem->zName;
 
     /* Generate code to implement the subquery
-    **
-    ** The subquery is implemented as a co-routine if all of the following are
-    ** true:
-    **
-    **    (1)  the subquery is guaranteed to be the outer loop (so that
-    **         it does not need to be computed more than once), and
-    **    (2)  the subquery is not a CTE that should be materialized
-    **    (3)  the subquery is not part of a left operand for a RIGHT JOIN
     */
-    if( i==0
-     && (pTabList->nSrc==1
-            || (pTabList->a[1].fg.jointype&(JT_OUTER|JT_CROSS))!=0)  /* (1) */
-     && (pItem->fg.isCte==0 || pItem->u2.pCteUse->eM10d!=M10d_Yes)   /* (2) */
-     && (pTabList->a[0].fg.jointype & JT_LTORJ)==0                   /* (3) */
-    ){
+    if( fromClauseTermCanBeCoroutine(pParse, pTabList, i, p->selFlags) ){
       /* Implement a co-routine that will return a single row of the result
       ** set on each invocation.
       */
@@ -142636,7 +148984,7 @@ SQLITE_PRIVATE int sqlite3Select(
     }else if( pItem->fg.isCte && pItem->u2.pCteUse->addrM9e>0 ){
       /* This is a CTE for which materialization code has already been
       ** generated.  Invoke the subroutine to compute the materialization,
-      ** the make the pItem->iCursor be a copy of the ephemerial table that
+      ** the make the pItem->iCursor be a copy of the ephemeral table that
       ** holds the result of the materialization. */
       CteUse *pCteUse = pItem->u2.pCteUse;
       sqlite3VdbeAddOp2(v, OP_Gosub, pCteUse->regRtn, pCteUse->addrM9e);
@@ -142645,7 +148993,7 @@ SQLITE_PRIVATE int sqlite3Select(
         VdbeComment((v, "%!S", pItem));
       }
       pSub->nSelectRow = pCteUse->nRowEst;
-    }else if( (pPrior = isSelfJoinView(pTabList, pItem))!=0 ){
+    }else if( (pPrior = isSelfJoinView(pTabList, pItem, 0, i))!=0 ){
       /* This view has already been materialized by a prior entry in
       ** this same FROM clause.  Reuse it. */
       if( pPrior->addrFillSub ){
@@ -142659,6 +149007,9 @@ SQLITE_PRIVATE int sqlite3Select(
       ** the same view can reuse the materialization. */
       int topAddr;
       int onceAddr = 0;
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+      int addrExplain;
+#endif
 
       pItem->regReturn = ++pParse->nMem;
       topAddr = sqlite3VdbeAddOp0(v, OP_Goto);
@@ -142674,12 +149025,14 @@ SQLITE_PRIVATE int sqlite3Select(
         VdbeNoopComment((v, "materialize %!S", pItem));
       }
       sqlite3SelectDestInit(&dest, SRT_EphemTab, pItem->iCursor);
-      ExplainQueryPlan((pParse, 1, "MATERIALIZE %!S", pItem));
+
+      ExplainQueryPlan2(addrExplain, (pParse, 1, "MATERIALIZE %!S", pItem));
       sqlite3Select(pParse, pSub, &dest);
       pItem->pTab->nRowLogEst = pSub->nSelectRow;
       if( onceAddr ) sqlite3VdbeJumpHere(v, onceAddr);
       sqlite3VdbeAddOp2(v, OP_Return, pItem->regReturn, topAddr+1);
       VdbeComment((v, "end %!S", pItem));
+      sqlite3VdbeScanStatusRange(v, addrExplain, addrExplain, -1);
       sqlite3VdbeJumpHere(v, topAddr);
       sqlite3ClearTempRegCache(pParse);
       if( pItem->fg.isCte && pItem->fg.isCorrelated==0 ){
@@ -142705,8 +149058,8 @@ SQLITE_PRIVATE int sqlite3Select(
   sDistinct.isTnct = (p->selFlags & SF_Distinct)!=0;
 
 #if TREETRACE_ENABLED
-  if( sqlite3TreeTrace & 0x400 ){
-    SELECTTRACE(0x400,pParse,p,("After all FROM-clause analysis:\n"));
+  if( sqlite3TreeTrace & 0x8000 ){
+    TREETRACE(0x8000,pParse,p,("After all FROM-clause analysis:\n"));
     sqlite3TreeViewSelect(0, p, 0);
   }
 #endif
@@ -142742,8 +149095,8 @@ SQLITE_PRIVATE int sqlite3Select(
     sDistinct.isTnct = 2;
 
 #if TREETRACE_ENABLED
-    if( sqlite3TreeTrace & 0x400 ){
-      SELECTTRACE(0x400,pParse,p,("Transform DISTINCT into GROUP BY:\n"));
+    if( sqlite3TreeTrace & 0x20000 ){
+      TREETRACE(0x20000,pParse,p,("Transform DISTINCT into GROUP BY:\n"));
       sqlite3TreeViewSelect(0, p, 0);
     }
 #endif
@@ -142795,7 +149148,7 @@ SQLITE_PRIVATE int sqlite3Select(
   if( (p->selFlags & SF_FixedLimit)==0 ){
     p->nSelectRow = 320;  /* 4 billion rows */
   }
-  computeLimitRegisters(pParse, p, iEnd);
+  if( p->pLimit ) computeLimitRegisters(pParse, p, iEnd);
   if( p->iLimit==0 && sSort.addrSortIndex>=0 ){
     sqlite3VdbeChangeOpcode(v, sSort.addrSortIndex, OP_SorterOpen);
     sSort.sortFlags |= SORTFLAG_UseSorter;
@@ -142829,7 +149182,7 @@ SQLITE_PRIVATE int sqlite3Select(
 
 
     /* Begin the database scan. */
-    SELECTTRACE(1,pParse,p,("WhereBegin\n"));
+    TREETRACE(0x2,pParse,p,("WhereBegin\n"));
     pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, sSort.pOrderBy,
                                p->pEList, p, wctrlFlags, p->nSelectRow);
     if( pWInfo==0 ) goto select_end;
@@ -142846,7 +149199,7 @@ SQLITE_PRIVATE int sqlite3Select(
         sSort.pOrderBy = 0;
       }
     }
-    SELECTTRACE(1,pParse,p,("WhereBegin returns\n"));
+    TREETRACE(0x2,pParse,p,("WhereBegin returns\n"));
 
     /* If sorting index that was created by a prior OP_OpenEphemeral
     ** instruction ended up not being needed, then change the OP_OpenEphemeral
@@ -142885,7 +149238,7 @@ SQLITE_PRIVATE int sqlite3Select(
 
       /* End the database scan loop.
       */
-      SELECTTRACE(1,pParse,p,("WhereEnd\n"));
+      TREETRACE(0x2,pParse,p,("WhereEnd\n"));
       sqlite3WhereEnd(pWInfo);
     }
   }else{
@@ -142966,12 +149319,14 @@ SQLITE_PRIVATE int sqlite3Select(
       goto select_end;
     }
     pAggInfo->selId = p->selId;
+#ifdef SQLITE_DEBUG
+    pAggInfo->pSelect = p;
+#endif
     memset(&sNC, 0, sizeof(sNC));
     sNC.pParse = pParse;
     sNC.pSrcList = pTabList;
     sNC.uNC.pAggInfo = pAggInfo;
     VVA_ONLY( sNC.ncFlags = NC_UAggInfo; )
-    pAggInfo->mnReg = pParse->nMem+1;
     pAggInfo->nSortingColumn = pGroupBy ? pGroupBy->nExpr : 0;
     pAggInfo->pGroupBy = pGroupBy;
     sqlite3ExprAnalyzeAggList(&sNC, pEList);
@@ -142992,40 +149347,17 @@ SQLITE_PRIVATE int sqlite3Select(
     }else{
       minMaxFlag = WHERE_ORDERBY_NORMAL;
     }
-    for(i=0; i<pAggInfo->nFunc; i++){
-      Expr *pExpr = pAggInfo->aFunc[i].pFExpr;
-      assert( ExprUseXList(pExpr) );
-      sNC.ncFlags |= NC_InAggFunc;
-      sqlite3ExprAnalyzeAggList(&sNC, pExpr->x.pList);
-#ifndef SQLITE_OMIT_WINDOWFUNC
-      assert( !IsWindowFunc(pExpr) );
-      if( ExprHasProperty(pExpr, EP_WinFunc) ){
-        sqlite3ExprAnalyzeAggregates(&sNC, pExpr->y.pWin->pFilter);
-      }
-#endif
-      sNC.ncFlags &= ~NC_InAggFunc;
-    }
-    pAggInfo->mxReg = pParse->nMem;
+    analyzeAggFuncArgs(pAggInfo, &sNC);
     if( db->mallocFailed ) goto select_end;
 #if TREETRACE_ENABLED
-    if( sqlite3TreeTrace & 0x400 ){
-      int ii;
-      SELECTTRACE(0x400,pParse,p,("After aggregate analysis %p:\n", pAggInfo));
+    if( sqlite3TreeTrace & 0x20 ){
+      TREETRACE(0x20,pParse,p,("After aggregate analysis %p:\n", pAggInfo));
       sqlite3TreeViewSelect(0, p, 0);
       if( minMaxFlag ){
         sqlite3DebugPrintf("MIN/MAX Optimization (0x%02x) adds:\n", minMaxFlag);
         sqlite3TreeViewExprList(0, pMinMaxOrderBy, 0, "ORDERBY");
       }
-      for(ii=0; ii<pAggInfo->nColumn; ii++){
-        sqlite3DebugPrintf("agg-column[%d] iMem=%d\n",
-            ii, pAggInfo->aCol[ii].iMem);
-        sqlite3TreeViewExpr(0, pAggInfo->aCol[ii].pCExpr, 0);
-      }
-      for(ii=0; ii<pAggInfo->nFunc; ii++){
-        sqlite3DebugPrintf("agg-func[%d]: iMem=%d\n",
-            ii, pAggInfo->aFunc[ii].iMem);
-        sqlite3TreeViewExpr(0, pAggInfo->aFunc[ii].pFExpr, 0);
-      }
+      printAggInfo(pAggInfo);
     }
 #endif
 
@@ -143035,7 +149367,7 @@ SQLITE_PRIVATE int sqlite3Select(
     */
     if( pGroupBy ){
       KeyInfo *pKeyInfo;  /* Keying information for the group by clause */
-      int addr1;          /* A-vs-B comparision jump */
+      int addr1;          /* A-vs-B comparison jump */
       int addrOutputRow;  /* Start of subroutine that outputs a result row */
       int regOutputRow;   /* Return address register for output subroutine */
       int addrSetAbort;   /* Set the abort flag and return */
@@ -143094,17 +149426,21 @@ SQLITE_PRIVATE int sqlite3Select(
       ** in the right order to begin with.
       */
       sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset);
-      SELECTTRACE(1,pParse,p,("WhereBegin\n"));
+      TREETRACE(0x2,pParse,p,("WhereBegin\n"));
       pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, pGroupBy, pDistinct,
-          0, (sDistinct.isTnct==2 ? WHERE_DISTINCTBY : WHERE_GROUPBY)
+          p, (sDistinct.isTnct==2 ? WHERE_DISTINCTBY : WHERE_GROUPBY)
           |  (orderByGrp ? WHERE_SORTBYGROUP : 0) | distFlag, 0
       );
       if( pWInfo==0 ){
         sqlite3ExprListDelete(db, pDistinct);
         goto select_end;
       }
+      if( pParse->pIdxEpr ){
+        optimizeAggregateUseOfIndexedExpr(pParse, p, pAggInfo, &sNC);
+      }
+      assignAggregateRegisters(pParse, pAggInfo);
       eDist = sqlite3WhereIsDistinct(pWInfo);
-      SELECTTRACE(1,pParse,p,("WhereBegin returns\n"));
+      TREETRACE(0x2,pParse,p,("WhereBegin returns\n"));
       if( sqlite3WhereIsOrdered(pWInfo)==pGroupBy->nExpr ){
         /* The optimizer is able to deliver rows in group by order so
         ** we do not have to sort.  The OP_OpenEphemeral table will be
@@ -143122,9 +149458,13 @@ SQLITE_PRIVATE int sqlite3Select(
         int nCol;
         int nGroupBy;
 
-        explainTempTable(pParse,
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+        int addrExp;              /* Address of OP_Explain instruction */
+#endif
+        ExplainQueryPlan2(addrExp, (pParse, 0, "USE TEMP B-TREE FOR %s",
             (sDistinct.isTnct && (p->selFlags&SF_Distinct)==0) ?
-                    "DISTINCT" : "GROUP BY");
+                    "DISTINCT" : "GROUP BY"
+        ));
 
         groupBySort = 1;
         nGroupBy = pGroupBy->nExpr;
@@ -143139,28 +149479,50 @@ SQLITE_PRIVATE int sqlite3Select(
         regBase = sqlite3GetTempRange(pParse, nCol);
         sqlite3ExprCodeExprList(pParse, pGroupBy, regBase, 0, 0);
         j = nGroupBy;
+        pAggInfo->directMode = 1;
         for(i=0; i<pAggInfo->nColumn; i++){
           struct AggInfo_col *pCol = &pAggInfo->aCol[i];
           if( pCol->iSorterColumn>=j ){
-            int r1 = j + regBase;
-            sqlite3ExprCodeGetColumnOfTable(v,
-                               pCol->pTab, pCol->iTable, pCol->iColumn, r1);
+            sqlite3ExprCode(pParse, pCol->pCExpr, j + regBase);
             j++;
           }
         }
+        pAggInfo->directMode = 0;
         regRecord = sqlite3GetTempReg(pParse);
+        sqlite3VdbeScanStatusCounters(v, addrExp, 0, sqlite3VdbeCurrentAddr(v));
         sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase, nCol, regRecord);
         sqlite3VdbeAddOp2(v, OP_SorterInsert, pAggInfo->sortingIdx, regRecord);
+        sqlite3VdbeScanStatusRange(v, addrExp, sqlite3VdbeCurrentAddr(v)-2, -1);
         sqlite3ReleaseTempReg(pParse, regRecord);
         sqlite3ReleaseTempRange(pParse, regBase, nCol);
-        SELECTTRACE(1,pParse,p,("WhereEnd\n"));
+        TREETRACE(0x2,pParse,p,("WhereEnd\n"));
         sqlite3WhereEnd(pWInfo);
         pAggInfo->sortingIdxPTab = sortPTab = pParse->nTab++;
         sortOut = sqlite3GetTempReg(pParse);
+        sqlite3VdbeScanStatusCounters(v, addrExp, sqlite3VdbeCurrentAddr(v), 0);
         sqlite3VdbeAddOp3(v, OP_OpenPseudo, sortPTab, sortOut, nCol);
         sqlite3VdbeAddOp2(v, OP_SorterSort, pAggInfo->sortingIdx, addrEnd);
         VdbeComment((v, "GROUP BY sort")); VdbeCoverage(v);
         pAggInfo->useSortingIdx = 1;
+        sqlite3VdbeScanStatusRange(v, addrExp, -1, sortPTab);
+        sqlite3VdbeScanStatusRange(v, addrExp, -1, pAggInfo->sortingIdx);
+      }
+
+      /* If there are entries in pAgggInfo->aFunc[] that contain subexpressions
+      ** that are indexed (and that were previously identified and tagged
+      ** in optimizeAggregateUseOfIndexedExpr()) then those subexpressions
+      ** must now be converted into a TK_AGG_COLUMN node so that the value
+      ** is correctly pulled from the index rather than being recomputed. */
+      if( pParse->pIdxEpr ){
+        aggregateConvertIndexedExprRefToColumn(pAggInfo);
+#if TREETRACE_ENABLED
+        if( sqlite3TreeTrace & 0x20 ){
+          TREETRACE(0x20, pParse, p,
+             ("AggInfo function expressions converted to reference index\n"));
+          sqlite3TreeViewSelect(0, p, 0);
+          printAggInfo(pAggInfo);
+        }
+#endif
       }
 
       /* If the index or temporary table used by the GROUP BY sort
@@ -143231,7 +149593,7 @@ SQLITE_PRIVATE int sqlite3Select(
         sqlite3VdbeAddOp2(v, OP_SorterNext, pAggInfo->sortingIdx,addrTopOfLoop);
         VdbeCoverage(v);
       }else{
-        SELECTTRACE(1,pParse,p,("WhereEnd\n"));
+        TREETRACE(0x2,pParse,p,("WhereEnd\n"));
         sqlite3WhereEnd(pWInfo);
         sqlite3VdbeChangeToNoop(v, addrSortingIdx);
       }
@@ -143341,7 +149703,8 @@ SQLITE_PRIVATE int sqlite3Select(
         if( pKeyInfo ){
           sqlite3VdbeChangeP4(v, -1, (char *)pKeyInfo, P4_KEYINFO);
         }
-        sqlite3VdbeAddOp2(v, OP_Count, iCsr, pAggInfo->aFunc[0].iMem);
+        assignAggregateRegisters(pParse, pAggInfo);
+        sqlite3VdbeAddOp2(v, OP_Count, iCsr, AggInfoFuncReg(pAggInfo,0));
         sqlite3VdbeAddOp1(v, OP_Close, iCsr);
         explainSimpleCount(pParse, pTab, pBest);
       }else{
@@ -143377,6 +149740,7 @@ SQLITE_PRIVATE int sqlite3Select(
           pDistinct = pAggInfo->aFunc[0].pFExpr->x.pList;
           distFlag = pDistinct ? (WHERE_WANT_DISTINCT|WHERE_AGG_DISTINCT) : 0;
         }
+        assignAggregateRegisters(pParse, pAggInfo);
 
         /* This case runs if the aggregate has no GROUP BY clause.  The
         ** processing is much simpler since there is only a single row
@@ -143393,13 +149757,13 @@ SQLITE_PRIVATE int sqlite3Select(
         assert( minMaxFlag==WHERE_ORDERBY_NORMAL || pMinMaxOrderBy!=0 );
         assert( pMinMaxOrderBy==0 || pMinMaxOrderBy->nExpr==1 );
 
-        SELECTTRACE(1,pParse,p,("WhereBegin\n"));
+        TREETRACE(0x2,pParse,p,("WhereBegin\n"));
         pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, pMinMaxOrderBy,
-                                   pDistinct, 0, minMaxFlag|distFlag, 0);
+                                   pDistinct, p, minMaxFlag|distFlag, 0);
         if( pWInfo==0 ){
           goto select_end;
         }
-        SELECTTRACE(1,pParse,p,("WhereBegin returns\n"));
+        TREETRACE(0x2,pParse,p,("WhereBegin returns\n"));
         eDist = sqlite3WhereIsDistinct(pWInfo);
         updateAccumulator(pParse, regAcc, pAggInfo, eDist);
         if( eDist!=WHERE_DISTINCT_NOOP ){
@@ -143413,7 +149777,7 @@ SQLITE_PRIVATE int sqlite3Select(
         if( minMaxFlag ){
           sqlite3WhereMinMaxOptEarlyOut(v, pWInfo);
         }
-        SELECTTRACE(1,pParse,p,("WhereEnd\n"));
+        TREETRACE(0x2,pParse,p,("WhereEnd\n"));
         sqlite3WhereEnd(pWInfo);
         finalizeAggFunctions(pParse, pAggInfo);
       }
@@ -143435,8 +149799,6 @@ SQLITE_PRIVATE int sqlite3Select(
   ** and send them to the callback one by one.
   */
   if( sSort.pOrderBy ){
-    explainTempTable(pParse,
-                     sSort.nOBSat>0 ? "RIGHT PART OF ORDER BY":"ORDER BY");
     assert( p->pEList==pEList );
     generateSortTail(pParse, p, &sSort, pEList->nExpr, pDest);
   }
@@ -143460,7 +149822,7 @@ SQLITE_PRIVATE int sqlite3Select(
   if( pAggInfo && !db->mallocFailed ){
     for(i=0; i<pAggInfo->nColumn; i++){
       Expr *pExpr = pAggInfo->aCol[i].pCExpr;
-      assert( pExpr!=0 );
+      if( pExpr==0 ) continue;
       assert( pExpr->pAggInfo==pAggInfo );
       assert( pExpr->iAgg==i );
     }
@@ -143474,8 +149836,8 @@ SQLITE_PRIVATE int sqlite3Select(
 #endif
 
 #if TREETRACE_ENABLED
-  SELECTTRACE(0x1,pParse,p,("end processing\n"));
-  if( (sqlite3TreeTrace & 0x2000)!=0 && ExplainQueryPlanParent(pParse)==0 ){
+  TREETRACE(0x1,pParse,p,("end processing\n"));
+  if( (sqlite3TreeTrace & 0x40000)!=0 && ExplainQueryPlanParent(pParse)==0 ){
     sqlite3TreeViewSelect(0, p, 0);
   }
 #endif
@@ -143749,7 +150111,7 @@ SQLITE_PRIVATE Trigger *sqlite3TriggerList(Parse *pParse, Table *pTab){
     if( pTrig->pTabSchema==pTab->pSchema
      && pTrig->table
      && 0==sqlite3StrICmp(pTrig->table, pTab->zName)
-     && pTrig->pTabSchema!=pTmpSchema
+     && (pTrig->pTabSchema!=pTmpSchema || pTrig->bReturning)
     ){
       pTrig->pNext = pList;
       pList = pTrig;
@@ -143871,6 +150233,10 @@ SQLITE_PRIVATE void sqlite3BeginTrigger(
     sqlite3ErrorMsg(pParse, "cannot create triggers on virtual tables");
     goto trigger_orphan_error;
   }
+  if( (pTab->tabFlags & TF_Shadow)!=0 && sqlite3ReadOnlyShadowTables(db) ){
+    sqlite3ErrorMsg(pParse, "cannot create triggers on shadow tables");
+    goto trigger_orphan_error;
+  }
 
   /* Check that the trigger name is not reserved and that no trigger of the
   ** specified name exists */
@@ -143890,6 +150256,7 @@ SQLITE_PRIVATE void sqlite3BeginTrigger(
       }else{
         assert( !db->init.busy );
         sqlite3CodeVerifySchema(pParse, iDb);
+        VVA_ONLY( pParse->ifNotExists = 1; )
       }
       goto trigger_cleanup;
     }
@@ -144039,6 +150406,23 @@ SQLITE_PRIVATE void sqlite3FinishTrigger(
     Vdbe *v;
     char *z;
 
+    /* If this is a new CREATE TABLE statement, and if shadow tables
+    ** are read-only, and the trigger makes a change to a shadow table,
+    ** then raise an error - do not allow the trigger to be created. */
+    if( sqlite3ReadOnlyShadowTables(db) ){
+      TriggerStep *pStep;
+      for(pStep=pTrig->step_list; pStep; pStep=pStep->pNext){
+        if( pStep->zTarget!=0
+         && sqlite3ShadowTableName(db, pStep->zTarget)
+        ){
+          sqlite3ErrorMsg(pParse,
+            "trigger \"%s\" may not write to shadow table \"%s\"",
+            pTrig->zName, pStep->zTarget);
+          goto triggerfinish_cleanup;
+        }
+      }
+    }
+
     /* Make an entry in the sqlite_schema table */
     v = sqlite3GetVdbe(pParse);
     if( v==0 ) goto triggerfinish_cleanup;
@@ -144636,10 +151020,17 @@ static void codeReturningTrigger(
   SrcList sFrom;
 
   assert( v!=0 );
-  assert( pParse->bReturning );
+  if( !pParse->bReturning ){
+    /* This RETURNING trigger must be for a different statement as
+    ** this statement lacks a RETURNING clause. */
+    return;
+  }
   assert( db->pParse==pParse );
   pReturning = pParse->u1.pReturning;
-  assert( pTrigger == &(pReturning->retTrig) );
+  if( pTrigger != &(pReturning->retTrig) ){
+    /* This RETURNING trigger is for a different statement */
+    return;
+  }
   memset(&sSelect, 0, sizeof(sSelect));
   memset(&sFrom, 0, sizeof(sFrom));
   sSelect.pEList = sqlite3ExprListDup(db, pReturning->pReturnEL, 0);
@@ -144654,7 +151045,7 @@ static void codeReturningTrigger(
   }
   sqlite3ExprListDelete(db, sSelect.pEList);
   pNew = sqlite3ExpandReturning(pParse, pReturning->pReturnEL, pTab);
-  if( !db->mallocFailed ){
+  if( pParse->nErr==0 ){
     NameContext sNC;
     memset(&sNC, 0, sizeof(sNC));
     if( pReturning->nRetCol==0 ){
@@ -144862,7 +151253,7 @@ static TriggerPrg *codeRowTrigger(
   sSubParse.zAuthContext = pTrigger->zName;
   sSubParse.eTriggerOp = pTrigger->op;
   sSubParse.nQueryLoop = pParse->nQueryLoop;
-  sSubParse.disableVtab = pParse->disableVtab;
+  sSubParse.prepFlags = pParse->prepFlags;
 
   v = sqlite3GetVdbe(&sSubParse);
   if( v ){
@@ -145123,6 +151514,9 @@ SQLITE_PRIVATE u32 sqlite3TriggerColmask(
   Trigger *p;
 
   assert( isNew==1 || isNew==0 );
+  if( IsView(pTab) ){
+    return 0xffffffff;
+  }
   for(p=pTrigger; p; p=p->pNext){
     if( p->op==op
      && (tr_tm&p->tr_tm)
@@ -145208,11 +151602,14 @@ static void updateVirtualTable(
 ** it has been converted into REAL.
 */
 SQLITE_PRIVATE void sqlite3ColumnDefault(Vdbe *v, Table *pTab, int i, int iReg){
+  Column *pCol;
   assert( pTab!=0 );
-  if( !IsView(pTab) ){
+  assert( pTab->nCol>i );
+  pCol = &pTab->aCol[i];
+  if( pCol->iDflt ){
     sqlite3_value *pValue = 0;
     u8 enc = ENC(sqlite3VdbeDb(v));
-    Column *pCol = &pTab->aCol[i];
+    assert( !IsView(pTab) );
     VdbeComment((v, "%s.%s", pTab->zName, pCol->zCnName));
     assert( i<pTab->nCol );
     sqlite3ValueFromExpr(sqlite3VdbeDb(v),
@@ -145223,7 +151620,7 @@ SQLITE_PRIVATE void sqlite3ColumnDefault(Vdbe *v, Table *pTab, int i, int iReg){
     }
   }
 #ifndef SQLITE_OMIT_FLOATING_POINT
-  if( pTab->aCol[i].affinity==SQLITE_AFF_REAL && !IsVirtual(pTab) ){
+  if( pCol->affinity==SQLITE_AFF_REAL && !IsVirtual(pTab) ){
     sqlite3VdbeAddOp1(v, OP_RealAffinity, iReg);
   }
 #endif
@@ -145370,7 +151767,7 @@ static void updateFromSelect(
 
   assert( pTabList->nSrc>1 );
   if( pSrc ){
-    pSrc->a[0].fg.notCte = 1;
+    assert( pSrc->a[0].fg.notCte );
     pSrc->a[0].iCursor = -1;
     pSrc->a[0].pTab->nTabRef--;
     pSrc->a[0].pTab = 0;
@@ -145409,7 +151806,8 @@ static void updateFromSelect(
     }
   }
   pSelect = sqlite3SelectNew(pParse, pList,
-      pSrc, pWhere2, pGrp, 0, pOrderBy2, SF_UFSrcCheck|SF_IncludeHidden, pLimit2
+      pSrc, pWhere2, pGrp, 0, pOrderBy2,
+      SF_UFSrcCheck|SF_IncludeHidden|SF_UpdateFrom, pLimit2
   );
   if( pSelect ) pSelect->selFlags |= SF_OrderByReqd;
   sqlite3SelectDestInit(&dest, eDest, iEph);
@@ -145553,7 +151951,7 @@ SQLITE_PRIVATE void sqlite3Update(
   if( sqlite3ViewGetColumnNames(pParse, pTab) ){
     goto update_cleanup;
   }
-  if( sqlite3IsReadOnly(pParse, pTab, tmask) ){
+  if( sqlite3IsReadOnly(pParse, pTab, pTrigger) ){
     goto update_cleanup;
   }
 
@@ -145872,12 +152270,22 @@ SQLITE_PRIVATE void sqlite3Update(
       /* Begin the database scan.
       **
       ** Do not consider a single-pass strategy for a multi-row update if
-      ** there are any triggers or foreign keys to process, or rows may
-      ** be deleted as a result of REPLACE conflict handling. Any of these
-      ** things might disturb a cursor being used to scan through the table
-      ** or index, causing a single-pass approach to malfunction.  */
+      ** there is anything that might disrupt the cursor being used to do
+      ** the UPDATE:
+      **   (1) This is a nested UPDATE
+      **   (2) There are triggers
+      **   (3) There are FOREIGN KEY constraints
+      **   (4) There are REPLACE conflict handlers
+      **   (5) There are subqueries in the WHERE clause
+      */
       flags = WHERE_ONEPASS_DESIRED;
-      if( !pParse->nested && !pTrigger && !hasFK && !chngKey && !bReplace ){
+      if( !pParse->nested
+       && !pTrigger
+       && !hasFK
+       && !chngKey
+       && !bReplace
+       && (pWhere==0 || !ExprHasProperty(pWhere, EP_Subquery))
+      ){
         flags |= WHERE_ONEPASS_MULTIROW;
       }
       pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere,0,0,0,flags,iIdxCur);
@@ -145948,6 +152356,8 @@ SQLITE_PRIVATE void sqlite3Update(
 
     if( !isView ){
       int addrOnce = 0;
+      int iNotUsed1 = 0;
+      int iNotUsed2 = 0;
 
       /* Open every index that needs updating. */
       if( eOnePass!=ONEPASS_OFF ){
@@ -145959,7 +152369,7 @@ SQLITE_PRIVATE void sqlite3Update(
         addrOnce = sqlite3VdbeAddOp0(v, OP_Once); VdbeCoverage(v);
       }
       sqlite3OpenTableAndIndices(pParse, pTab, OP_OpenWrite, 0, iBaseCur,
-                                 aToOpen, 0, 0);
+                                 aToOpen, &iNotUsed1, &iNotUsed2);
       if( addrOnce ){
         sqlite3VdbeJumpHereOrPopInst(v, addrOnce);
       }
@@ -146250,8 +152660,10 @@ SQLITE_PRIVATE void sqlite3Update(
     sqlite3VdbeAddOp2(v, OP_AddImm, regRowCount, 1);
   }
 
-  sqlite3CodeRowTrigger(pParse, pTrigger, TK_UPDATE, pChanges,
-      TRIGGER_AFTER, pTab, regOldRowid, onError, labelContinue);
+  if( pTrigger ){
+    sqlite3CodeRowTrigger(pParse, pTrigger, TK_UPDATE, pChanges,
+        TRIGGER_AFTER, pTab, regOldRowid, onError, labelContinue);
+  }
 
   /* Repeat the above with the next record to be updated, until
   ** all record selected by the WHERE clause have been updated.
@@ -146346,7 +152758,7 @@ static void updateVirtualTable(
   int nArg = 2 + pTab->nCol;      /* Number of arguments to VUpdate */
   int regArg;                     /* First register in VUpdate arg array */
   int regRec;                     /* Register in which to assemble record */
-  int regRowid;                   /* Register for ephem table rowid */
+  int regRowid;                   /* Register for ephemeral table rowid */
   int iCsr = pSrc->a[0].iCursor;  /* Cursor used for virtual table scan */
   int aDummy[2];                  /* Unused arg for sqlite3WhereOkOnePass() */
   int eOnePass;                   /* True to use onepass strategy */
@@ -146390,7 +152802,9 @@ static void updateVirtualTable(
           sqlite3ExprDup(db, pChanges->a[aXRef[i]].pExpr, 0)
         );
       }else{
-        pList = sqlite3ExprListAppend(pParse, pList, exprRowColumn(pParse, i));
+        Expr *pRowExpr = exprRowColumn(pParse, i);
+        if( pRowExpr ) pRowExpr->op2 = OPFLAG_NOCHNG;
+        pList = sqlite3ExprListAppend(pParse, pList, pRowExpr);
       }
     }
 
@@ -146467,7 +152881,7 @@ static void updateVirtualTable(
       sqlite3WhereEnd(pWInfo);
     }
 
-    /* Begin scannning through the ephemeral table. */
+    /* Begin scanning through the ephemeral table. */
     addr = sqlite3VdbeAddOp1(v, OP_Rewind, ephemTab); VdbeCoverage(v);
 
     /* Extract arguments from the current row of the ephemeral table and
@@ -146663,6 +153077,7 @@ SQLITE_PRIVATE int sqlite3UpsertAnalyzeTarget(
         if( pIdx->aiColumn[ii]==XN_EXPR ){
           assert( pIdx->aColExpr!=0 );
           assert( pIdx->aColExpr->nExpr>ii );
+          assert( pIdx->bHasExpr );
           pExpr = pIdx->aColExpr->a[ii].pExpr;
           if( pExpr->op!=TK_COLLATE ){
             sCol[0].pLeft = pExpr;
@@ -146674,7 +153089,7 @@ SQLITE_PRIVATE int sqlite3UpsertAnalyzeTarget(
           pExpr = &sCol[0];
         }
         for(jj=0; jj<nn; jj++){
-          if( sqlite3ExprCompare(pParse,pTarget->a[jj].pExpr,pExpr,iCursor)<2 ){
+          if( sqlite3ExprCompare(0,pTarget->a[jj].pExpr,pExpr,iCursor)<2 ){
             break;  /* Column ii of the index matches column jj of target */
           }
         }
@@ -146976,6 +153391,7 @@ SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3RunVacuum(
   int nDb;                /* Number of attached databases */
   const char *zDbMain;    /* Schema name of database to vacuum */
   const char *zOut;       /* Name of output file */
+  u32 pgflags = PAGER_SYNCHRONOUS_OFF; /* sync flags for output db */
 
   if( !db->autoCommit ){
     sqlite3SetString(pzErrMsg, db, "cannot VACUUM from within a transaction");
@@ -147022,7 +153438,7 @@ SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3RunVacuum(
   ** (possibly synchronous) transaction opened on the main database before
   ** sqlite3BtreeCopyFile() is called.
   **
-  ** An optimisation would be to use a non-journaled pager.
+  ** An optimization would be to use a non-journaled pager.
   ** (Later:) I tried setting "PRAGMA vacuum_db.journal_mode=OFF" but
   ** that actually made the VACUUM run slower.  Very little journalling
   ** actually occurs when doing a vacuum since the vacuum_db is initially
@@ -147047,12 +153463,17 @@ SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3RunVacuum(
       goto end_of_vacuum;
     }
     db->mDbFlags |= DBFLAG_VacuumInto;
+
+    /* For a VACUUM INTO, the pager-flags are set to the same values as
+    ** they are for the database being vacuumed, except that PAGER_CACHESPILL
+    ** is always set. */
+    pgflags = db->aDb[iDb].safety_level | (db->flags & PAGER_FLAGS_MASK);
   }
   nRes = sqlite3BtreeGetRequestedReserve(pMain);
 
   sqlite3BtreeSetCacheSize(pTemp, db->aDb[iDb].pSchema->cache_size);
   sqlite3BtreeSetSpillSize(pTemp, sqlite3BtreeSetSpillSize(pMain,0));
-  sqlite3BtreeSetPagerFlags(pTemp, PAGER_SYNCHRONOUS_OFF|PAGER_CACHESPILL);
+  sqlite3BtreeSetPagerFlags(pTemp, pgflags|PAGER_CACHESPILL);
 
   /* Begin a transaction and take an exclusive lock on the main database
   ** file. This is done before the sqlite3BtreeGetPageSize(pMain) call below,
@@ -147436,10 +153857,10 @@ SQLITE_PRIVATE void sqlite3VtabUnlock(VTable *pVTab){
   pVTab->nRef--;
   if( pVTab->nRef==0 ){
     sqlite3_vtab *p = pVTab->pVtab;
-    sqlite3VtabModuleUnref(pVTab->db, pVTab->pMod);
     if( p ){
       p->pModule->xDisconnect(p);
     }
+    sqlite3VtabModuleUnref(pVTab->db, pVTab->pMod);
     sqlite3DbFree(db, pVTab);
   }
 }
@@ -147565,7 +153986,8 @@ SQLITE_PRIVATE void sqlite3VtabUnlockList(sqlite3 *db){
 */
 SQLITE_PRIVATE void sqlite3VtabClear(sqlite3 *db, Table *p){
   assert( IsVirtual(p) );
-  if( !db || db->pnBytesFreed==0 ) vtabDisconnectAll(0, p);
+  assert( db!=0 );
+  if( db->pnBytesFreed==0 ) vtabDisconnectAll(0, p);
   if( p->u.vtab.azArg ){
     int i;
     for(i=0; i<p->u.vtab.nArg; i++){
@@ -147705,7 +154127,7 @@ SQLITE_PRIVATE void sqlite3VtabFinishParse(Parse *pParse, Token *pEnd){
     ** the information we've collected.
     **
     ** The VM register number pParse->regRowid holds the rowid of an
-    ** entry in the sqlite_schema table tht was created for this vtab
+    ** entry in the sqlite_schema table that was created for this vtab
     ** by sqlite3StartTable().
     */
     iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
@@ -147834,7 +154256,9 @@ static int vtabCallConstructor(
   sCtx.pPrior = db->pVtabCtx;
   sCtx.bDeclared = 0;
   db->pVtabCtx = &sCtx;
+  pTab->nTabRef++;
   rc = xConstruct(db, pMod->pAux, nArg, azArg, &pVTable->pVtab, &zErr);
+  sqlite3DeleteTable(db, pTab);
   db->pVtabCtx = sCtx.pPrior;
   if( rc==SQLITE_NOMEM ) sqlite3OomFault(db);
   assert( sCtx.pTab==pTab );
@@ -148043,7 +154467,7 @@ SQLITE_API int sqlite3_declare_vtab(sqlite3 *db, const char *zCreateTable){
   sqlite3_mutex_enter(db->mutex);
   pCtx = db->pVtabCtx;
   if( !pCtx || pCtx->bDeclared ){
-    sqlite3Error(db, SQLITE_MISUSE);
+    sqlite3Error(db, SQLITE_MISUSE_BKPT);
     sqlite3_mutex_leave(db->mutex);
     return SQLITE_MISUSE_BKPT;
   }
@@ -148324,7 +154748,10 @@ SQLITE_PRIVATE int sqlite3VtabSavepoint(sqlite3 *db, int op, int iSavepoint){
             break;
         }
         if( xMethod && pVTab->iSavepoint>iSavepoint ){
+          u64 savedFlags = (db->flags & SQLITE_Defensive);
+          db->flags &= ~(u64)SQLITE_Defensive;
           rc = xMethod(pVTab->pVtab, iSavepoint);
+          db->flags |= savedFlags;
         }
         sqlite3VtabUnlock(pVTab);
       }
@@ -148365,7 +154792,7 @@ SQLITE_PRIVATE FuncDef *sqlite3VtabOverloadFunction(
   if( pExpr->op!=TK_COLUMN ) return pDef;
   assert( ExprUseYTab(pExpr) );
   pTab = pExpr->y.pTab;
-  if( pTab==0 ) return pDef;
+  if( NEVER(pTab==0) ) return pDef;
   if( !IsVirtual(pTab) ) return pDef;
   pVtab = sqlite3GetVTable(db, pTab)->pVtab;
   assert( pVtab!=0 );
@@ -148444,7 +154871,7 @@ SQLITE_PRIVATE void sqlite3VtabMakeWritable(Parse *pParse, Table *pTab){
 **
 ** An eponymous virtual table instance is one that is named after its
 ** module, and more importantly, does not require a CREATE VIRTUAL TABLE
-** statement in order to come into existance.  Eponymous virtual table
+** statement in order to come into existence.  Eponymous virtual table
 ** instances always exist.  They cannot be DROP-ed.
 **
 ** Any virtual table module for which xConnect and xCreate are the same
@@ -148553,6 +154980,10 @@ SQLITE_API int sqlite3_vtab_config(sqlite3 *db, int op, ...){
         p->pVTable->eVtabRisk = SQLITE_VTABRISK_High;
         break;
       }
+      case SQLITE_VTAB_USES_ALL_SCHEMAS: {
+        p->pVTable->bAllSchemas = 1;
+        break;
+      }
       default: {
         rc = SQLITE_MISUSE_BKPT;
         break;
@@ -148631,7 +155062,7 @@ typedef struct WhereRightJoin WhereRightJoin;
 
 /*
 ** This object is a header on a block of allocated memory that will be
-** automatically freed when its WInfo oject is destructed.
+** automatically freed when its WInfo object is destructed.
 */
 struct WhereMemBlock {
   WhereMemBlock *pNext;      /* Next block in the chain */
@@ -148692,7 +155123,7 @@ struct WhereLevel {
         int iCur;              /* The VDBE cursor used by this IN operator */
         int addrInTop;         /* Top of the IN loop */
         int iBase;             /* Base register of multi-key index record */
-        int nPrefix;           /* Number of prior entires in the key */
+        int nPrefix;           /* Number of prior entries in the key */
         u8 eEndLoopOp;         /* IN Loop terminator. OP_Next or OP_Prev */
       } *aInLoop;           /* Information about each nested IN operator */
     } in;                 /* Used when pWLoop->wsFlags&WHERE_IN_ABLE */
@@ -148942,7 +155373,7 @@ struct WhereClause {
   int nTerm;               /* Number of terms */
   int nSlot;               /* Number of entries in a[] */
   int nBase;               /* Number of terms through the last non-Virtual */
-  WhereTerm *a;            /* Each a[] describes a term of the WHERE cluase */
+  WhereTerm *a;            /* Each a[] describes a term of the WHERE clause */
 #if defined(SQLITE_SMALL_STACK)
   WhereTerm aStatic[1];    /* Initial static space for a[] */
 #else
@@ -148972,7 +155403,7 @@ struct WhereAndInfo {
 ** between VDBE cursor numbers and bits of the bitmasks in WhereTerm.
 **
 ** The VDBE cursor numbers are small integers contained in
-** SrcList_item.iCursor and Expr.iTable fields.  For any given WHERE
+** SrcItem.iCursor and Expr.iTable fields.  For any given WHERE
 ** clause, the cursor numbers might not begin with 0 and they might
 ** contain gaps in the numbering sequence.  But we want to make maximum
 ** use of the bits in our bitmasks.  This structure provides a mapping
@@ -149043,20 +155474,6 @@ struct WhereLoopBuilder {
 # define SQLITE_QUERY_PLANNER_LIMIT_INCR 1000
 #endif
 
-/*
-** Each instance of this object records a change to a single node
-** in an expression tree to cause that node to point to a column
-** of an index rather than an expression or a virtual column.  All
-** such transformations need to be undone at the end of WHERE clause
-** processing.
-*/
-typedef struct WhereExprMod WhereExprMod;
-struct WhereExprMod {
-  WhereExprMod *pNext;  /* Next translation on a list of them all */
-  Expr *pExpr;          /* The Expr node that was transformed */
-  Expr orig;            /* Original value of the Expr node */
-};
-
 /*
 ** The WHERE clause processing routine has two halves.  The
 ** first part does the start of the WHERE loop and the second
@@ -149072,10 +155489,10 @@ struct WhereInfo {
   SrcList *pTabList;        /* List of tables in the join */
   ExprList *pOrderBy;       /* The ORDER BY clause or NULL */
   ExprList *pResultSet;     /* Result set of the query */
+#if WHERETRACE_ENABLED
   Expr *pWhere;             /* The complete WHERE clause */
-#ifndef SQLITE_OMIT_VIRTUALTABLE
-  Select *pLimit;           /* Used to access LIMIT expr/registers for vtabs */
 #endif
+  Select *pSelect;          /* The entire SELECT statement containing WHERE */
   int aiCurOnePass[2];      /* OP_OpenWrite cursors for the ONEPASS opt */
   int iContinue;            /* Jump here to continue with next record */
   int iBreak;               /* Jump here to break out of the loop */
@@ -149094,7 +155511,6 @@ struct WhereInfo {
   int iTop;                 /* The very beginning of the WHERE loop */
   int iEndWhere;            /* End of the WHERE clause itself */
   WhereLoop *pLoops;        /* List of all WhereLoop objects */
-  WhereExprMod *pExprMods;  /* Expression modifications */
   WhereMemBlock *pMemToFree;/* Memory to free when this object destroyed */
   Bitmask revMask;          /* Mask of ORDER BY terms that need reversing */
   WhereClause sWC;          /* Decomposition of the WHERE clause */
@@ -149242,6 +155658,8 @@ SQLITE_PRIVATE void sqlite3WhereTabFuncArgs(Parse*, SrcItem*, WhereClause*);
 #define WHERE_BLOOMFILTER  0x00400000  /* Consider using a Bloom-filter */
 #define WHERE_SELFCULL     0x00800000  /* nOut reduced by extra WHERE terms */
 #define WHERE_OMIT_OFFSET  0x01000000  /* Set offset counter to zero */
+                      /*   0x02000000  -- available for reuse */
+#define WHERE_EXPRIDX      0x04000000  /* Uses an index-on-expressions */
 
 #endif /* !defined(SQLITE_WHEREINT_H) */
 
@@ -149339,9 +155757,9 @@ static void explainIndexRange(StrAccum *pStr, WhereLoop *pLoop){
 
 /*
 ** This function is a no-op unless currently processing an EXPLAIN QUERY PLAN
-** command, or if either SQLITE_DEBUG or SQLITE_ENABLE_STMT_SCANSTATUS was
-** defined at compile-time. If it is not a no-op, a single OP_Explain opcode
-** is added to the output to describe the table scan strategy in pLevel.
+** command, or if stmt_scanstatus_v2() stats are enabled, or if SQLITE_DEBUG
+** was defined at compile-time. If it is not a no-op, a single OP_Explain
+** opcode is added to the output to describe the table scan strategy in pLevel.
 **
 ** If an OP_Explain opcode is added to the VM, its address is returned.
 ** Otherwise, if no OP_Explain is coded, zero is returned.
@@ -149353,8 +155771,8 @@ SQLITE_PRIVATE int sqlite3WhereExplainOneScan(
   u16 wctrlFlags                  /* Flags passed to sqlite3WhereBegin() */
 ){
   int ret = 0;
-#if !defined(SQLITE_DEBUG) && !defined(SQLITE_ENABLE_STMT_SCANSTATUS)
-  if( sqlite3ParseToplevel(pParse)->explain==2 )
+#if !defined(SQLITE_DEBUG)
+  if( sqlite3ParseToplevel(pParse)->explain==2 || IS_STMT_SCANSTATUS(pParse->db) )
 #endif
   {
     SrcItem *pItem = &pTabList->a[pLevel->iFrom];
@@ -149498,6 +155916,8 @@ SQLITE_PRIVATE int sqlite3WhereExplainBloomFilter(
   zMsg = sqlite3StrAccumFinish(&str);
   ret = sqlite3VdbeAddOp4(v, OP_Explain, sqlite3VdbeCurrentAddr(v),
                           pParse->addrExplain, 0, zMsg,P4_DYNAMIC);
+
+  sqlite3VdbeScanStatus(v, sqlite3VdbeCurrentAddr(v)-1, 0, 0, 0, 0);
   return ret;
 }
 #endif /* SQLITE_OMIT_EXPLAIN */
@@ -149518,16 +155938,37 @@ SQLITE_PRIVATE void sqlite3WhereAddScanStatus(
   WhereLevel *pLvl,               /* Level to add scanstatus() entry for */
   int addrExplain                 /* Address of OP_Explain (or 0) */
 ){
-  const char *zObj = 0;
-  WhereLoop *pLoop = pLvl->pWLoop;
-  if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0  &&  pLoop->u.btree.pIndex!=0 ){
-    zObj = pLoop->u.btree.pIndex->zName;
-  }else{
-    zObj = pSrclist->a[pLvl->iFrom].zName;
+  if( IS_STMT_SCANSTATUS( sqlite3VdbeDb(v) ) ){
+    const char *zObj = 0;
+    WhereLoop *pLoop = pLvl->pWLoop;
+    int wsFlags = pLoop->wsFlags;
+    int viaCoroutine = 0;
+
+    if( (wsFlags & WHERE_VIRTUALTABLE)==0  &&  pLoop->u.btree.pIndex!=0 ){
+      zObj = pLoop->u.btree.pIndex->zName;
+    }else{
+      zObj = pSrclist->a[pLvl->iFrom].zName;
+      viaCoroutine = pSrclist->a[pLvl->iFrom].fg.viaCoroutine;
+    }
+    sqlite3VdbeScanStatus(
+        v, addrExplain, pLvl->addrBody, pLvl->addrVisit, pLoop->nOut, zObj
+    );
+
+    if( viaCoroutine==0 ){
+      if( (wsFlags & (WHERE_MULTI_OR|WHERE_AUTO_INDEX))==0 ){
+        sqlite3VdbeScanStatusRange(v, addrExplain, -1, pLvl->iTabCur);
+      }
+      if( wsFlags & WHERE_INDEXED ){
+        sqlite3VdbeScanStatusRange(v, addrExplain, -1, pLvl->iIdxCur);
+      }
+    }else{
+      int addr = pSrclist->a[pLvl->iFrom].addrFillSub;
+      VdbeOp *pOp = sqlite3VdbeGetOp(v, addr-1);
+      assert( sqlite3VdbeDb(v)->mallocFailed || pOp->opcode==OP_InitCoroutine );
+      assert( sqlite3VdbeDb(v)->mallocFailed || pOp->p2>addr );
+      sqlite3VdbeScanStatusRange(v, addrExplain, addr, pOp->p2-1);
+    }
   }
-  sqlite3VdbeScanStatus(
-      v, addrExplain, pLvl->addrBody, pLvl->addrVisit, pLoop->nOut, zObj
-  );
 }
 #endif
 
@@ -149587,7 +156028,7 @@ static void disableTerm(WhereLevel *pLevel, WhereTerm *pTerm){
       pTerm->wtFlags |= TERM_CODED;
     }
 #ifdef WHERETRACE_ENABLED
-    if( sqlite3WhereTrace & 0x20000 ){
+    if( (sqlite3WhereTrace & 0x4001)==0x4001 ){
       sqlite3DebugPrintf("DISABLE-");
       sqlite3WhereTermPrint(pTerm, (int)(pTerm - (pTerm->pWC->a)));
     }
@@ -149702,68 +156143,75 @@ static Expr *removeUnindexableInClauseTerms(
   Expr *pX              /* The IN expression to be reduced */
 ){
   sqlite3 *db = pParse->db;
+  Select *pSelect;            /* Pointer to the SELECT on the RHS */
   Expr *pNew;
   pNew = sqlite3ExprDup(db, pX, 0);
   if( db->mallocFailed==0 ){
-    ExprList *pOrigRhs;         /* Original unmodified RHS */
-    ExprList *pOrigLhs;         /* Original unmodified LHS */
-    ExprList *pRhs = 0;         /* New RHS after modifications */
-    ExprList *pLhs = 0;         /* New LHS after mods */
-    int i;                      /* Loop counter */
-    Select *pSelect;            /* Pointer to the SELECT on the RHS */
-
-    assert( ExprUseXSelect(pNew) );
-    pOrigRhs = pNew->x.pSelect->pEList;
-    assert( pNew->pLeft!=0 );
-    assert( ExprUseXList(pNew->pLeft) );
-    pOrigLhs = pNew->pLeft->x.pList;
-    for(i=iEq; i<pLoop->nLTerm; i++){
-      if( pLoop->aLTerm[i]->pExpr==pX ){
-        int iField;
-        assert( (pLoop->aLTerm[i]->eOperator & (WO_OR|WO_AND))==0 );
-        iField = pLoop->aLTerm[i]->u.x.iField - 1;
-        if( pOrigRhs->a[iField].pExpr==0 ) continue; /* Duplicate PK column */
-        pRhs = sqlite3ExprListAppend(pParse, pRhs, pOrigRhs->a[iField].pExpr);
-        pOrigRhs->a[iField].pExpr = 0;
-        assert( pOrigLhs->a[iField].pExpr!=0 );
-        pLhs = sqlite3ExprListAppend(pParse, pLhs, pOrigLhs->a[iField].pExpr);
-        pOrigLhs->a[iField].pExpr = 0;
-      }
-    }
-    sqlite3ExprListDelete(db, pOrigRhs);
-    sqlite3ExprListDelete(db, pOrigLhs);
-    pNew->pLeft->x.pList = pLhs;
-    pNew->x.pSelect->pEList = pRhs;
-    if( pLhs && pLhs->nExpr==1 ){
-      /* Take care here not to generate a TK_VECTOR containing only a
-      ** single value. Since the parser never creates such a vector, some
-      ** of the subroutines do not handle this case.  */
-      Expr *p = pLhs->a[0].pExpr;
-      pLhs->a[0].pExpr = 0;
-      sqlite3ExprDelete(db, pNew->pLeft);
-      pNew->pLeft = p;
-    }
-    pSelect = pNew->x.pSelect;
-    if( pSelect->pOrderBy ){
-      /* If the SELECT statement has an ORDER BY clause, zero the
-      ** iOrderByCol variables. These are set to non-zero when an
-      ** ORDER BY term exactly matches one of the terms of the
-      ** result-set. Since the result-set of the SELECT statement may
-      ** have been modified or reordered, these variables are no longer
-      ** set correctly.  Since setting them is just an optimization,
-      ** it's easiest just to zero them here.  */
-      ExprList *pOrderBy = pSelect->pOrderBy;
-      for(i=0; i<pOrderBy->nExpr; i++){
-        pOrderBy->a[i].u.x.iOrderByCol = 0;
+    for(pSelect=pNew->x.pSelect; pSelect; pSelect=pSelect->pPrior){
+      ExprList *pOrigRhs;         /* Original unmodified RHS */
+      ExprList *pOrigLhs = 0;     /* Original unmodified LHS */
+      ExprList *pRhs = 0;         /* New RHS after modifications */
+      ExprList *pLhs = 0;         /* New LHS after mods */
+      int i;                      /* Loop counter */
+
+      assert( ExprUseXSelect(pNew) );
+      pOrigRhs = pSelect->pEList;
+      assert( pNew->pLeft!=0 );
+      assert( ExprUseXList(pNew->pLeft) );
+      if( pSelect==pNew->x.pSelect ){
+        pOrigLhs = pNew->pLeft->x.pList;
+      }
+      for(i=iEq; i<pLoop->nLTerm; i++){
+        if( pLoop->aLTerm[i]->pExpr==pX ){
+          int iField;
+          assert( (pLoop->aLTerm[i]->eOperator & (WO_OR|WO_AND))==0 );
+          iField = pLoop->aLTerm[i]->u.x.iField - 1;
+          if( pOrigRhs->a[iField].pExpr==0 ) continue; /* Duplicate PK column */
+          pRhs = sqlite3ExprListAppend(pParse, pRhs, pOrigRhs->a[iField].pExpr);
+          pOrigRhs->a[iField].pExpr = 0;
+          if( pOrigLhs ){
+            assert( pOrigLhs->a[iField].pExpr!=0 );
+            pLhs = sqlite3ExprListAppend(pParse,pLhs,pOrigLhs->a[iField].pExpr);
+            pOrigLhs->a[iField].pExpr = 0;
+          }
+        }
+      }
+      sqlite3ExprListDelete(db, pOrigRhs);
+      if( pOrigLhs ){
+        sqlite3ExprListDelete(db, pOrigLhs);
+        pNew->pLeft->x.pList = pLhs;
+      }
+      pSelect->pEList = pRhs;
+      if( pLhs && pLhs->nExpr==1 ){
+        /* Take care here not to generate a TK_VECTOR containing only a
+        ** single value. Since the parser never creates such a vector, some
+        ** of the subroutines do not handle this case.  */
+        Expr *p = pLhs->a[0].pExpr;
+        pLhs->a[0].pExpr = 0;
+        sqlite3ExprDelete(db, pNew->pLeft);
+        pNew->pLeft = p;
+      }
+      if( pSelect->pOrderBy ){
+        /* If the SELECT statement has an ORDER BY clause, zero the
+        ** iOrderByCol variables. These are set to non-zero when an
+        ** ORDER BY term exactly matches one of the terms of the
+        ** result-set. Since the result-set of the SELECT statement may
+        ** have been modified or reordered, these variables are no longer
+        ** set correctly.  Since setting them is just an optimization,
+        ** it's easiest just to zero them here.  */
+        ExprList *pOrderBy = pSelect->pOrderBy;
+        for(i=0; i<pOrderBy->nExpr; i++){
+          pOrderBy->a[i].u.x.iOrderByCol = 0;
+        }
       }
-    }
 
 #if 0
-    printf("For indexing, change the IN expr:\n");
-    sqlite3TreeViewExpr(0, pX, 0);
-    printf("Into:\n");
-    sqlite3TreeViewExpr(0, pNew, 0);
+      printf("For indexing, change the IN expr:\n");
+      sqlite3TreeViewExpr(0, pX, 0);
+      printf("Into:\n");
+      sqlite3TreeViewExpr(0, pNew, 0);
 #endif
+    }
   }
   return pNew;
 }
@@ -150016,7 +156464,7 @@ static int codeAllEqualityTerms(
   /* Figure out how many memory cells we will need then allocate them.
   */
   regBase = pParse->nMem + 1;
-  nReg = pLoop->u.btree.nEq + nExtraReg;
+  nReg = nEq + nExtraReg;
   pParse->nMem += nReg;
 
   zAff = sqlite3DbStrDup(pParse->db,sqlite3IndexAffinityStr(pParse->db,pIdx));
@@ -150063,9 +156511,6 @@ static int codeAllEqualityTerms(
         sqlite3VdbeAddOp2(v, OP_Copy, r1, regBase+j);
       }
     }
-  }
-  for(j=nSkip; j<nEq; j++){
-    pTerm = pLoop->aLTerm[j];
     if( pTerm->eOperator & WO_IN ){
       if( pTerm->pExpr->flags & EP_xIsSelect ){
         /* No affinity ever needs to be (or should be) applied to a value
@@ -150121,7 +156566,7 @@ static void whereLikeOptimizationStringFixup(
   if( pTerm->wtFlags & TERM_LIKEOPT ){
     VdbeOp *pOp;
     assert( pLevel->iLikeRepCntr>0 );
-    pOp = sqlite3VdbeGetOp(v, -1);
+    pOp = sqlite3VdbeGetLastOp(v);
     assert( pOp!=0 );
     assert( pOp->opcode==OP_String8
             || pTerm->pWC->pWInfo->pParse->db->mallocFailed );
@@ -150208,18 +156653,19 @@ static int codeCursorHintIsOrFunction(Walker *pWalker, Expr *pExpr){
 **   2) transform the expression node to a TK_REGISTER node that reads
 **      from the newly populated register.
 **
-** Also, if the node is a TK_COLUMN that does access the table idenified
+** Also, if the node is a TK_COLUMN that does access the table identified
 ** by pCCurHint.iTabCur, and an index is being used (which we will
 ** know because CCurHint.pIdx!=0) then transform the TK_COLUMN into
 ** an access of the index rather than the original table.
 */
 static int codeCursorHintFixExpr(Walker *pWalker, Expr *pExpr){
   int rc = WRC_Continue;
+  int reg;
   struct CCurHint *pHint = pWalker->u.pCCurHint;
   if( pExpr->op==TK_COLUMN ){
     if( pExpr->iTable!=pHint->iTabCur ){
-      int reg = ++pWalker->pParse->nMem;   /* Register for column value */
-      sqlite3ExprCode(pWalker->pParse, pExpr, reg);
+      reg = ++pWalker->pParse->nMem;   /* Register for column value */
+      reg = sqlite3ExprCodeTarget(pWalker->pParse, pExpr, reg);
       pExpr->op = TK_REGISTER;
       pExpr->iTable = reg;
     }else if( pHint->pIdx!=0 ){
@@ -150227,15 +156673,15 @@ static int codeCursorHintFixExpr(Walker *pWalker, Expr *pExpr){
       pExpr->iColumn = sqlite3TableColumnToIndex(pHint->pIdx, pExpr->iColumn);
       assert( pExpr->iColumn>=0 );
     }
-  }else if( pExpr->op==TK_AGG_FUNCTION ){
-    /* An aggregate function in the WHERE clause of a query means this must
-    ** be a correlated sub-query, and expression pExpr is an aggregate from
-    ** the parent context. Do not walk the function arguments in this case.
-    **
-    ** todo: It should be possible to replace this node with a TK_REGISTER
-    ** expression, as the result of the expression must be stored in a
-    ** register at this point. The same holds for TK_AGG_COLUMN nodes. */
+  }else if( pExpr->pAggInfo ){
     rc = WRC_Prune;
+    reg = ++pWalker->pParse->nMem;   /* Register for column value */
+    reg = sqlite3ExprCodeTarget(pWalker->pParse, pExpr, reg);
+    pExpr->op = TK_REGISTER;
+    pExpr->iTable = reg;
+  }else if( pExpr->op==TK_TRUEFALSE ){
+    /* Do not walk disabled expressions.  tag-20230504-1 */
+    return WRC_Prune;
   }
   return rc;
 }
@@ -150337,7 +156783,7 @@ static void codeCursorHint(
   }
   if( pExpr!=0 ){
     sWalker.xExprCallback = codeCursorHintFixExpr;
-    sqlite3WalkExpr(&sWalker, pExpr);
+    if( pParse->nErr==0 ) sqlite3WalkExpr(&sWalker, pExpr);
     sqlite3VdbeAddOp4(v, OP_CursorHint,
                       (sHint.pIdx ? sHint.iIdxCur : sHint.iTabCur), 0, 0,
                       (const char*)pExpr, P4_EXPR);
@@ -150445,143 +156891,6 @@ static void codeExprOrVector(Parse *pParse, Expr *p, int iReg, int nReg){
   }
 }
 
-/* An instance of the IdxExprTrans object carries information about a
-** mapping from an expression on table columns into a column in an index
-** down through the Walker.
-*/
-typedef struct IdxExprTrans {
-  Expr *pIdxExpr;    /* The index expression */
-  int iTabCur;       /* The cursor of the corresponding table */
-  int iIdxCur;       /* The cursor for the index */
-  int iIdxCol;       /* The column for the index */
-  int iTabCol;       /* The column for the table */
-  WhereInfo *pWInfo; /* Complete WHERE clause information */
-  sqlite3 *db;       /* Database connection (for malloc()) */
-} IdxExprTrans;
-
-/*
-** Preserve pExpr on the WhereETrans list of the WhereInfo.
-*/
-static void preserveExpr(IdxExprTrans *pTrans, Expr *pExpr){
-  WhereExprMod *pNew;
-  pNew = sqlite3DbMallocRaw(pTrans->db, sizeof(*pNew));
-  if( pNew==0 ) return;
-  pNew->pNext = pTrans->pWInfo->pExprMods;
-  pTrans->pWInfo->pExprMods = pNew;
-  pNew->pExpr = pExpr;
-  memcpy(&pNew->orig, pExpr, sizeof(*pExpr));
-}
-
-/* The walker node callback used to transform matching expressions into
-** a reference to an index column for an index on an expression.
-**
-** If pExpr matches, then transform it into a reference to the index column
-** that contains the value of pExpr.
-*/
-static int whereIndexExprTransNode(Walker *p, Expr *pExpr){
-  IdxExprTrans *pX = p->u.pIdxTrans;
-  if( sqlite3ExprCompare(0, pExpr, pX->pIdxExpr, pX->iTabCur)==0 ){
-    pExpr = sqlite3ExprSkipCollate(pExpr);
-    preserveExpr(pX, pExpr);
-    pExpr->affExpr = sqlite3ExprAffinity(pExpr);
-    pExpr->op = TK_COLUMN;
-    pExpr->iTable = pX->iIdxCur;
-    pExpr->iColumn = pX->iIdxCol;
-    testcase( ExprHasProperty(pExpr, EP_Unlikely) );
-    ExprClearProperty(pExpr, EP_Skip|EP_Unlikely|EP_WinFunc|EP_Subrtn);
-    pExpr->y.pTab = 0;
-    return WRC_Prune;
-  }else{
-    return WRC_Continue;
-  }
-}
-
-#ifndef SQLITE_OMIT_GENERATED_COLUMNS
-/* A walker node callback that translates a column reference to a table
-** into a corresponding column reference of an index.
-*/
-static int whereIndexExprTransColumn(Walker *p, Expr *pExpr){
-  if( pExpr->op==TK_COLUMN ){
-    IdxExprTrans *pX = p->u.pIdxTrans;
-    if( pExpr->iTable==pX->iTabCur && pExpr->iColumn==pX->iTabCol ){
-      assert( ExprUseYTab(pExpr) && pExpr->y.pTab!=0 );
-      preserveExpr(pX, pExpr);
-      pExpr->affExpr = sqlite3TableColumnAffinity(pExpr->y.pTab,pExpr->iColumn);
-      pExpr->iTable = pX->iIdxCur;
-      pExpr->iColumn = pX->iIdxCol;
-      pExpr->y.pTab = 0;
-    }
-  }
-  return WRC_Continue;
-}
-#endif /* SQLITE_OMIT_GENERATED_COLUMNS */
-
-/*
-** For an indexes on expression X, locate every instance of expression X
-** in pExpr and change that subexpression into a reference to the appropriate
-** column of the index.
-**
-** 2019-10-24: Updated to also translate references to a VIRTUAL column in
-** the table into references to the corresponding (stored) column of the
-** index.
-*/
-static void whereIndexExprTrans(
-  Index *pIdx,      /* The Index */
-  int iTabCur,      /* Cursor of the table that is being indexed */
-  int iIdxCur,      /* Cursor of the index itself */
-  WhereInfo *pWInfo /* Transform expressions in this WHERE clause */
-){
-  int iIdxCol;               /* Column number of the index */
-  ExprList *aColExpr;        /* Expressions that are indexed */
-  Table *pTab;
-  Walker w;
-  IdxExprTrans x;
-  aColExpr = pIdx->aColExpr;
-  if( aColExpr==0 && !pIdx->bHasVCol ){
-    /* The index does not reference any expressions or virtual columns
-    ** so no translations are needed. */
-    return;
-  }
-  pTab = pIdx->pTable;
-  memset(&w, 0, sizeof(w));
-  w.u.pIdxTrans = &x;
-  x.iTabCur = iTabCur;
-  x.iIdxCur = iIdxCur;
-  x.pWInfo = pWInfo;
-  x.db = pWInfo->pParse->db;
-  for(iIdxCol=0; iIdxCol<pIdx->nColumn; iIdxCol++){
-    i16 iRef = pIdx->aiColumn[iIdxCol];
-    if( iRef==XN_EXPR ){
-      assert( aColExpr!=0 && aColExpr->a[iIdxCol].pExpr!=0 );
-      x.pIdxExpr = aColExpr->a[iIdxCol].pExpr;
-      if( sqlite3ExprIsConstant(x.pIdxExpr) ) continue;
-      w.xExprCallback = whereIndexExprTransNode;
-#ifndef SQLITE_OMIT_GENERATED_COLUMNS
-    }else if( iRef>=0
-       && (pTab->aCol[iRef].colFlags & COLFLAG_VIRTUAL)!=0
-       && ((pTab->aCol[iRef].colFlags & COLFLAG_HASCOLL)==0
-           || sqlite3StrICmp(sqlite3ColumnColl(&pTab->aCol[iRef]),
-                                               sqlite3StrBINARY)==0)
-    ){
-      /* Check to see if there are direct references to generated columns
-      ** that are contained in the index.  Pulling the generated column
-      ** out of the index is an optimization only - the main table is always
-      ** available if the index cannot be used.  To avoid unnecessary
-      ** complication, omit this optimization if the collating sequence for
-      ** the column is non-standard */
-      x.iTabCol = iRef;
-      w.xExprCallback = whereIndexExprTransColumn;
-#endif /* SQLITE_OMIT_GENERATED_COLUMNS */
-    }else{
-      continue;
-    }
-    x.iIdxCol = iIdxCol;
-    sqlite3WalkExpr(&w, pWInfo->pWhere);
-    sqlite3WalkExprList(&w, pWInfo->pOrderBy);
-    sqlite3WalkExprList(&w, pWInfo->pResultSet);
-  }
-}
-
 /*
 ** The pTruth expression is always true because it is the WHERE clause
 ** a partial index that is driving a query loop.  Look through all of the
@@ -150650,6 +156959,8 @@ static SQLITE_NOINLINE void filterPullDown(
       testcase( pTerm->wtFlags & TERM_VIRTUAL );
       regRowid = sqlite3GetTempReg(pParse);
       regRowid = codeEqualityTerm(pParse, pTerm, pLevel, 0, 0, regRowid);
+      sqlite3VdbeAddOp2(pParse->pVdbe, OP_MustBeInt, regRowid, addrNxt);
+      VdbeCoverage(pParse->pVdbe);
       sqlite3VdbeAddOp4Int(pParse->pVdbe, OP_Filter, pLevel->regFilter,
                            addrNxt, regRowid, 1);
       VdbeCoverage(pParse->pVdbe);
@@ -150709,13 +157020,15 @@ SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart(
   pLevel->notReady = notReady & ~sqlite3WhereGetMask(&pWInfo->sMaskSet, iCur);
   bRev = (pWInfo->revMask>>iLevel)&1;
   VdbeModuleComment((v, "Begin WHERE-loop%d: %s",iLevel,pTabItem->pTab->zName));
-#if WHERETRACE_ENABLED /* 0x20800 */
-  if( sqlite3WhereTrace & 0x800 ){
+#if WHERETRACE_ENABLED /* 0x4001 */
+  if( sqlite3WhereTrace & 0x1 ){
     sqlite3DebugPrintf("Coding level %d of %d:  notReady=%llx  iFrom=%d\n",
        iLevel, pWInfo->nLevel, (u64)notReady, pLevel->iFrom);
-    sqlite3WhereLoopPrint(pLoop, pWC);
+    if( sqlite3WhereTrace & 0x1000 ){
+      sqlite3WhereLoopPrint(pLoop, pWC);
+    }
   }
-  if( sqlite3WhereTrace & 0x20000 ){
+  if( (sqlite3WhereTrace & 0x4001)==0x4001 ){
     if( iLevel==0 ){
       sqlite3DebugPrintf("WHERE clause being coded:\n");
       sqlite3TreeViewExpr(0, pWInfo->pWhere, 0);
@@ -150801,9 +157114,9 @@ SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart(
          && pLoop->u.vtab.bOmitOffset
         ){
           assert( pTerm->eOperator==WO_AUX );
-          assert( pWInfo->pLimit!=0 );
-          assert( pWInfo->pLimit->iOffset>0 );
-          sqlite3VdbeAddOp2(v, OP_Integer, 0, pWInfo->pLimit->iOffset);
+          assert( pWInfo->pSelect!=0 );
+          assert( pWInfo->pSelect->iOffset>0 );
+          sqlite3VdbeAddOp2(v, OP_Integer, 0, pWInfo->pSelect->iOffset);
           VdbeComment((v,"Zero OFFSET counter"));
         }
       }
@@ -150911,6 +157224,8 @@ SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart(
     if( iRowidReg!=iReleaseReg ) sqlite3ReleaseTempReg(pParse, iReleaseReg);
     addrNxt = pLevel->addrNxt;
     if( pLevel->regFilter ){
+      sqlite3VdbeAddOp2(v, OP_MustBeInt, iRowidReg, addrNxt);
+      VdbeCoverage(v);
       sqlite3VdbeAddOp4Int(v, OP_Filter, pLevel->regFilter, addrNxt,
                            iRowidReg, 1);
       VdbeCoverage(v);
@@ -150956,7 +157271,7 @@ SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart(
       };
       assert( TK_LE==TK_GT+1 );      /* Make sure the ordering.. */
       assert( TK_LT==TK_GT+2 );      /*  ... of the TK_xx values... */
-      assert( TK_GE==TK_GT+3 );      /*  ... is correcct. */
+      assert( TK_GE==TK_GT+3 );      /*  ... is correct. */
 
       assert( (pStart->wtFlags & TERM_VNULL)==0 );
       testcase( pStart->wtFlags & TERM_VIRTUAL );
@@ -151262,6 +157577,11 @@ SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart(
         ** guess. */
         addrSeekScan = sqlite3VdbeAddOp1(v, OP_SeekScan,
                                          (pIdx->aiRowLogEst[0]+9)/10);
+        if( pRangeStart || pRangeEnd ){
+          sqlite3VdbeChangeP5(v, 1);
+          sqlite3VdbeChangeP2(v, addrSeekScan, sqlite3VdbeCurrentAddr(v)+1);
+          addrSeekScan = 0;
+        }
         VdbeCoverage(v);
       }
       sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, nConstraint);
@@ -151298,16 +157618,7 @@ SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart(
     assert( pLevel->p2==0 );
     if( pRangeEnd ){
       Expr *pRight = pRangeEnd->pExpr->pRight;
-      if( addrSeekScan ){
-        /* For a seek-scan that has a range on the lowest term of the index,
-        ** we have to make the top of the loop be code that sets the end
-        ** condition of the range.  Otherwise, the OP_SeekScan might jump
-        ** over that initialization, leaving the range-end value set to the
-        ** range-start value, resulting in a wrong answer.
-        ** See ticket 5981a8c041a3c2f3 (2021-11-02).
-        */
-        pLevel->p2 = sqlite3VdbeCurrentAddr(v);
-      }
+      assert( addrSeekScan==0 );
       codeExprOrVector(pParse, pRight, regBase+nEq, nTop);
       whereLikeOptimizationStringFixup(v, pLevel, pRangeEnd);
       if( (pRangeEnd->wtFlags & TERM_VNULL)==0
@@ -151337,11 +157648,11 @@ SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart(
       }
       nConstraint++;
     }
-    sqlite3DbFree(db, zStartAff);
-    sqlite3DbFree(db, zEndAff);
+    if( zStartAff ) sqlite3DbNNFreeNN(db, zStartAff);
+    if( zEndAff ) sqlite3DbNNFreeNN(db, zEndAff);
 
     /* Top of the loop body */
-    if( pLevel->p2==0 ) pLevel->p2 = sqlite3VdbeCurrentAddr(v);
+    pLevel->p2 = sqlite3VdbeCurrentAddr(v);
 
     /* Check if the index cursor is past the end of the range. */
     if( nConstraint ){
@@ -151400,27 +157711,6 @@ SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart(
     }
 
     if( pLevel->iLeftJoin==0 ){
-      /* If pIdx is an index on one or more expressions, then look through
-      ** all the expressions in pWInfo and try to transform matching expressions
-      ** into reference to index columns.  Also attempt to translate references
-      ** to virtual columns in the table into references to (stored) columns
-      ** of the index.
-      **
-      ** Do not do this for the RHS of a LEFT JOIN. This is because the
-      ** expression may be evaluated after OP_NullRow has been executed on
-      ** the cursor. In this case it is important to do the full evaluation,
-      ** as the result of the expression may not be NULL, even if all table
-      ** column values are.  https://www.sqlite.org/src/info/7fa8049685b50b5a
-      **
-      ** Also, do not do this when processing one index an a multi-index
-      ** OR clause, since the transformation will become invalid once we
-      ** move forward to the next index.
-      ** https://sqlite.org/src/info/4e8e4857d32d401f
-      */
-      if( (pWInfo->wctrlFlags & (WHERE_OR_SUBCLAUSE|WHERE_RIGHT_JOIN))==0 ){
-        whereIndexExprTrans(pIdx, iCur, iIdxCur, pWInfo);
-      }
-
       /* If a partial index is driving the loop, try to eliminate WHERE clause
       ** terms from the query that must be true due to the WHERE clause of
       ** the partial index.
@@ -151533,7 +157823,7 @@ SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart(
       int nNotReady;                 /* The number of notReady tables */
       SrcItem *origSrc;              /* Original list of tables */
       nNotReady = pWInfo->nLevel - iLevel - 1;
-      pOrTab = sqlite3StackAllocRaw(db,
+      pOrTab = sqlite3DbMallocRawNN(db,
                             sizeof(*pOrTab)+ nNotReady*sizeof(pOrTab->a[0]));
       if( pOrTab==0 ) return notReady;
       pOrTab->nAlloc = (u8)(nNotReady + 1);
@@ -151653,7 +157943,7 @@ SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart(
         }
         /* Loop through table entries that match term pOrTerm. */
         ExplainQueryPlan((pParse, 1, "INDEX %d", ii+1));
-        WHERETRACE(0xffff, ("Subplan for OR-clause:\n"));
+        WHERETRACE(0xffffffff, ("Subplan for OR-clause:\n"));
         pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrExpr, 0, 0, 0,
                                       WHERE_OR_SUBCLAUSE, iCovCur);
         assert( pSubWInfo || pParse->nErr );
@@ -151786,7 +158076,7 @@ SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart(
     assert( pLevel->op==OP_Return );
     pLevel->p2 = sqlite3VdbeCurrentAddr(v);
 
-    if( pWInfo->nLevel>1 ){ sqlite3StackFree(db, pOrTab); }
+    if( pWInfo->nLevel>1 ){ sqlite3DbFreeNN(db, pOrTab); }
     if( !untestedTerms ) disableTerm(pLevel, pTerm);
   }else
 #endif /* SQLITE_OMIT_OR_OPTIMIZATION */
@@ -151890,12 +158180,12 @@ SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart(
         }
 #endif
       }
-#ifdef WHERETRACE_ENABLED /* 0xffff */
+#ifdef WHERETRACE_ENABLED /* 0xffffffff */
       if( sqlite3WhereTrace ){
         VdbeNoopComment((v, "WhereTerm[%d] (%p) priority=%d",
                          pWC->nTerm-j, pTerm, iLoop));
       }
-      if( sqlite3WhereTrace & 0x800 ){
+      if( sqlite3WhereTrace & 0x4000 ){
         sqlite3DebugPrintf("Coding auxiliary constraint:\n");
         sqlite3WhereTermPrint(pTerm, pWC->nTerm-j);
       }
@@ -151924,8 +158214,8 @@ SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart(
     if( pTerm->leftCursor!=iCur ) continue;
     if( pTabItem->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT) ) continue;
     pE = pTerm->pExpr;
-#ifdef WHERETRACE_ENABLED /* 0x800 */
-    if( sqlite3WhereTrace & 0x800 ){
+#ifdef WHERETRACE_ENABLED /* 0x4001 */
+    if( (sqlite3WhereTrace & 0x4001)==0x4001 ){
       sqlite3DebugPrintf("Coding transitive constraint:\n");
       sqlite3WhereTermPrint(pTerm, pWC->nTerm-j);
     }
@@ -152040,13 +158330,13 @@ SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart(
     }
   }
 
-#if WHERETRACE_ENABLED /* 0x20800 */
-  if( sqlite3WhereTrace & 0x20000 ){
+#if WHERETRACE_ENABLED /* 0x4001 */
+  if( sqlite3WhereTrace & 0x4000 ){
     sqlite3DebugPrintf("All WHERE-clause terms after coding level %d:\n",
                        iLevel);
     sqlite3WhereClausePrint(pWC);
   }
-  if( sqlite3WhereTrace & 0x800 ){
+  if( sqlite3WhereTrace & 0x1 ){
     sqlite3DebugPrintf("End Coding level %d:  notReady=%llx\n",
        iLevel, (u64)pLevel->notReady);
   }
@@ -152161,7 +158451,7 @@ SQLITE_PRIVATE SQLITE_NOINLINE void sqlite3WhereRightJoinLoop(
 ** the WHERE clause of SQL statements.
 **
 ** This file was originally part of where.c but was split out to improve
-** readability and editabiliity.  This file contains utility routines for
+** readability and editability.  This file contains utility routines for
 ** analyzing Expr objects in the WHERE clause.
 */
 /* #include "sqliteInt.h" */
@@ -152377,7 +158667,7 @@ static int isLikeOrGlob(
     ** range search. The third is because the caller assumes that the pattern
     ** consists of at least one character after all escapes have been
     ** removed.  */
-    if( cnt!=0 && 255!=(u8)z[cnt-1] && (cnt>1 || z[0]!=wc[3]) ){
+    if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && 255!=(u8)z[cnt-1] ){
       Expr *pPrefix;
 
       /* A "complete" match if the pattern ends with "*" or "%" */
@@ -152414,7 +158704,7 @@ static int isLikeOrGlob(
         if( pLeft->op!=TK_COLUMN
          || sqlite3ExprAffinity(pLeft)!=SQLITE_AFF_TEXT
          || (ALWAYS( ExprUseYTab(pLeft) )
-             && pLeft->y.pTab
+             && ALWAYS(pLeft->y.pTab)
              && IsVirtual(pLeft->y.pTab))  /* Might be numeric */
         ){
           int isNum;
@@ -152531,8 +158821,7 @@ static int isAuxiliaryVtabOperator(
     **       MATCH(expression,vtab_column)
     */
     pCol = pList->a[1].pExpr;
-    assert( pCol->op!=TK_COLUMN || ExprUseYTab(pCol) );
-    testcase( pCol->op==TK_COLUMN && pCol->y.pTab==0 );
+    assert( pCol->op!=TK_COLUMN || (ExprUseYTab(pCol) && pCol->y.pTab!=0) );
     if( ExprIsVtab(pCol) ){
       for(i=0; i<ArraySize(aOp); i++){
         assert( !ExprHasProperty(pExpr, EP_IntValue) );
@@ -152557,7 +158846,7 @@ static int isAuxiliaryVtabOperator(
     */
     pCol = pList->a[0].pExpr;
     assert( pCol->op!=TK_COLUMN || ExprUseYTab(pCol) );
-    testcase( pCol->op==TK_COLUMN && pCol->y.pTab==0 );
+    assert( pCol->op!=TK_COLUMN || (ExprUseYTab(pCol) && pCol->y.pTab!=0) );
     if( ExprIsVtab(pCol) ){
       sqlite3_vtab *pVtab;
       sqlite3_module *pMod;
@@ -152582,13 +158871,12 @@ static int isAuxiliaryVtabOperator(
     int res = 0;
     Expr *pLeft = pExpr->pLeft;
     Expr *pRight = pExpr->pRight;
-    assert( pLeft->op!=TK_COLUMN || ExprUseYTab(pLeft) );
-    testcase( pLeft->op==TK_COLUMN && pLeft->y.pTab==0 );
+    assert( pLeft->op!=TK_COLUMN || (ExprUseYTab(pLeft) && pLeft->y.pTab!=0) );
     if( ExprIsVtab(pLeft) ){
       res++;
     }
-    assert( pRight==0 || pRight->op!=TK_COLUMN || ExprUseYTab(pRight) );
-    testcase( pRight && pRight->op==TK_COLUMN && pRight->y.pTab==0 );
+    assert( pRight==0 || pRight->op!=TK_COLUMN
+            || (ExprUseYTab(pRight) && pRight->y.pTab!=0) );
     if( pRight && ExprIsVtab(pRight) ){
       res++;
       SWAP(Expr*, pLeft, pRight);
@@ -152952,7 +159240,7 @@ static void exprAnalyzeOrTerm(
                                             pOrTerm->leftCursor))==0 ){
           /* This term must be of the form t1.a==t2.b where t2 is in the
           ** chngToIN set but t1 is not.  This term will be either preceded
-          ** or follwed by an inverted copy (t2.b==t1.a).  Skip this term
+          ** or followed by an inverted copy (t2.b==t1.a).  Skip this term
           ** and use its inversion. */
           testcase( pOrTerm->wtFlags & TERM_COPIED );
           testcase( pOrTerm->wtFlags & TERM_VIRTUAL );
@@ -153124,35 +159412,40 @@ static Bitmask exprSelectUsage(WhereMaskSet *pMaskSet, Select *pS){
 */
 static SQLITE_NOINLINE int exprMightBeIndexed2(
   SrcList *pFrom,        /* The FROM clause */
-  Bitmask mPrereq,       /* Bitmask of FROM clause terms referenced by pExpr */
   int *aiCurCol,         /* Write the referenced table cursor and column here */
-  Expr *pExpr            /* An operand of a comparison operator */
+  Expr *pExpr,           /* An operand of a comparison operator */
+  int j                  /* Start looking with the j-th pFrom entry */
 ){
   Index *pIdx;
   int i;
   int iCur;
-  for(i=0; mPrereq>1; i++, mPrereq>>=1){}
-  iCur = pFrom->a[i].iCursor;
-  for(pIdx=pFrom->a[i].pTab->pIndex; pIdx; pIdx=pIdx->pNext){
-    if( pIdx->aColExpr==0 ) continue;
-    for(i=0; i<pIdx->nKeyCol; i++){
-      if( pIdx->aiColumn[i]!=XN_EXPR ) continue;
-      if( sqlite3ExprCompareSkip(pExpr, pIdx->aColExpr->a[i].pExpr, iCur)==0 ){
-        aiCurCol[0] = iCur;
-        aiCurCol[1] = XN_EXPR;
-        return 1;
+  do{
+    iCur = pFrom->a[j].iCursor;
+    for(pIdx=pFrom->a[j].pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+      if( pIdx->aColExpr==0 ) continue;
+      for(i=0; i<pIdx->nKeyCol; i++){
+        if( pIdx->aiColumn[i]!=XN_EXPR ) continue;
+        assert( pIdx->bHasExpr );
+        if( sqlite3ExprCompareSkip(pExpr,pIdx->aColExpr->a[i].pExpr,iCur)==0
+          && pExpr->op!=TK_STRING
+        ){
+          aiCurCol[0] = iCur;
+          aiCurCol[1] = XN_EXPR;
+          return 1;
+        }
       }
     }
-  }
+  }while( ++j < pFrom->nSrc );
   return 0;
 }
 static int exprMightBeIndexed(
   SrcList *pFrom,        /* The FROM clause */
-  Bitmask mPrereq,       /* Bitmask of FROM clause terms referenced by pExpr */
   int *aiCurCol,         /* Write the referenced table cursor & column here */
   Expr *pExpr,           /* An operand of a comparison operator */
   int op                 /* The specific comparison operator */
 ){
+  int i;
+
   /* If this expression is a vector to the left or right of a
   ** inequality constraint (>, <, >= or <=), perform the processing
   ** on the first element of the vector.  */
@@ -153162,7 +159455,6 @@ static int exprMightBeIndexed(
   if( pExpr->op==TK_VECTOR && (op>=TK_GT && ALWAYS(op<=TK_GE)) ){
     assert( ExprUseXList(pExpr) );
     pExpr = pExpr->x.pList->a[0].pExpr;
-
   }
 
   if( pExpr->op==TK_COLUMN ){
@@ -153170,9 +159462,16 @@ static int exprMightBeIndexed(
     aiCurCol[1] = pExpr->iColumn;
     return 1;
   }
-  if( mPrereq==0 ) return 0;                 /* No table references */
-  if( (mPrereq&(mPrereq-1))!=0 ) return 0;   /* Refs more than one table */
-  return exprMightBeIndexed2(pFrom,mPrereq,aiCurCol,pExpr);
+
+  for(i=0; i<pFrom->nSrc; i++){
+    Index *pIdx;
+    for(pIdx=pFrom->a[i].pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+      if( pIdx->aColExpr ){
+        return exprMightBeIndexed2(pFrom,aiCurCol,pExpr,i);
+      }
+    }
+  }
+  return 0;
 }
 
 
@@ -153203,8 +159502,8 @@ static void exprAnalyze(
   WhereTerm *pTerm;                /* The term to be analyzed */
   WhereMaskSet *pMaskSet;          /* Set of table index masks */
   Expr *pExpr;                     /* The expression to be analyzed */
-  Bitmask prereqLeft;              /* Prerequesites of the pExpr->pLeft */
-  Bitmask prereqAll;               /* Prerequesites of pExpr */
+  Bitmask prereqLeft;              /* Prerequisites of the pExpr->pLeft */
+  Bitmask prereqAll;               /* Prerequisites of pExpr */
   Bitmask extraRight = 0;          /* Extra dependencies on LEFT JOIN */
   Expr *pStr1 = 0;                 /* RHS of LIKE/GLOB operator */
   int isComplete = 0;              /* RHS of LIKE/GLOB ends with wildcard */
@@ -153298,7 +159597,7 @@ static void exprAnalyze(
       pLeft = pLeft->x.pList->a[pTerm->u.x.iField-1].pExpr;
     }
 
-    if( exprMightBeIndexed(pSrc, prereqLeft, aiCurCol, pLeft, op) ){
+    if( exprMightBeIndexed(pSrc, aiCurCol, pLeft, op) ){
       pTerm->leftCursor = aiCurCol[0];
       assert( (pTerm->eOperator & (WO_OR|WO_AND))==0 );
       pTerm->u.x.leftColumn = aiCurCol[1];
@@ -153306,7 +159605,7 @@ static void exprAnalyze(
     }
     if( op==TK_IS ) pTerm->wtFlags |= TERM_IS;
     if( pRight
-     && exprMightBeIndexed(pSrc, pTerm->prereqRight, aiCurCol, pRight, op)
+     && exprMightBeIndexed(pSrc, aiCurCol, pRight, op)
      && !ExprHasProperty(pRight, EP_FixedCol)
     ){
       WhereTerm *pNew;
@@ -153350,7 +159649,7 @@ static void exprAnalyze(
      && 0==sqlite3ExprCanBeNull(pLeft)
     ){
       assert( !ExprHasProperty(pExpr, EP_IntValue) );
-      pExpr->op = TK_TRUEFALSE;
+      pExpr->op = TK_TRUEFALSE;  /* See tag-20230504-1 */
       pExpr->u.zToken = "false";
       ExprSetProperty(pExpr, EP_IsFalse);
       pTerm->prereqAll = 0;
@@ -153517,7 +159816,6 @@ static void exprAnalyze(
     transferJoinMarkings(pNewExpr1, pExpr);
     idxNew1 = whereClauseInsert(pWC, pNewExpr1, wtFlags);
     testcase( idxNew1==0 );
-    exprAnalyze(pSrc, pWC, idxNew1);
     pNewExpr2 = sqlite3ExprDup(db, pLeft, 0);
     pNewExpr2 = sqlite3PExpr(pParse, TK_LT,
            sqlite3ExprAddCollateString(pParse,pNewExpr2,zCollSeqName),
@@ -153525,6 +159823,7 @@ static void exprAnalyze(
     transferJoinMarkings(pNewExpr2, pExpr);
     idxNew2 = whereClauseInsert(pWC, pNewExpr2, wtFlags);
     testcase( idxNew2==0 );
+    exprAnalyze(pSrc, pWC, idxNew1);
     exprAnalyze(pSrc, pWC, idxNew2);
     pTerm = &pWC->a[idxTerm];
     if( isComplete ){
@@ -153581,7 +159880,7 @@ static void exprAnalyze(
    && pTerm->u.x.iField==0
    && pExpr->pLeft->op==TK_VECTOR
    && ALWAYS( ExprUseXSelect(pExpr) )
-   && pExpr->x.pSelect->pPrior==0
+   && (pExpr->x.pSelect->pPrior==0 || (pExpr->x.pSelect->selFlags & SF_Values))
 #ifndef SQLITE_OMIT_WINDOWFUNC
    && pExpr->x.pSelect->pWin==0
 #endif
@@ -153750,9 +160049,9 @@ static void whereAddLimitExpr(
 ** exist only so that they may be passed to the xBestIndex method of the
 ** single virtual table in the FROM clause of the SELECT.
 */
-SQLITE_PRIVATE void sqlite3WhereAddLimit(WhereClause *pWC, Select *p){
-  assert( p==0 || (p->pGroupBy==0 && (p->selFlags & SF_Aggregate)==0) );
-  if( (p && p->pLimit)                                          /* 1 */
+SQLITE_PRIVATE void SQLITE_NOINLINE sqlite3WhereAddLimit(WhereClause *pWC, Select *p){
+  assert( p!=0 && p->pLimit!=0 );                 /* 1 -- checked by caller */
+  if( p->pGroupBy==0
    && (p->selFlags & (SF_Distinct|SF_Aggregate))==0             /* 2 */
    && (p->pSrc->nSrc==1 && IsVirtual(p->pSrc->a[0].pTab))       /* 3 */
   ){
@@ -153769,6 +160068,13 @@ SQLITE_PRIVATE void sqlite3WhereAddLimit(WhereClause *pWC, Select *p){
         assert( pWC->a[ii].eOperator==WO_ROWVAL );
         continue;
       }
+      if( pWC->a[ii].nChild ){
+        /* If this term has child terms, then they are also part of the
+        ** pWC->a[] array. So this term can be ignored, as a LIMIT clause
+        ** will only be added if each of the child terms passes the
+        ** (leftCursor==iCsr) test below.  */
+        continue;
+      }
       if( pWC->a[ii].leftCursor!=iCsr ) return;
     }
 
@@ -153988,9 +160294,12 @@ SQLITE_PRIVATE void sqlite3WhereTabFuncArgs(
     pRhs = sqlite3PExpr(pParse, TK_UPLUS,
         sqlite3ExprDup(pParse->db, pArgs->a[j].pExpr, 0), 0);
     pTerm = sqlite3PExpr(pParse, TK_EQ, pColRef, pRhs);
-    if( pItem->fg.jointype & (JT_LEFT|JT_LTORJ) ){
+    if( pItem->fg.jointype & (JT_LEFT|JT_RIGHT) ){
+      testcase( pItem->fg.jointype & JT_LEFT );  /* testtag-20230227a */
+      testcase( pItem->fg.jointype & JT_RIGHT ); /* testtag-20230227b */
       joinType = EP_OuterON;
     }else{
+      testcase( pItem->fg.jointype & JT_LTORJ ); /* testtag-20230227c */
       joinType = EP_InnerON;
     }
     sqlite3SetJoinExpr(pTerm, pItem->iCursor, joinType);
@@ -154069,7 +160378,7 @@ SQLITE_PRIVATE int sqlite3WhereIsDistinct(WhereInfo *pWInfo){
 ** block sorting is required.
 */
 SQLITE_PRIVATE int sqlite3WhereIsOrdered(WhereInfo *pWInfo){
-  return pWInfo->nOBSat;
+  return pWInfo->nOBSat<0 ? 0 : pWInfo->nOBSat;
 }
 
 /*
@@ -154707,7 +161016,7 @@ static void translateColumnToCopy(
 #if !defined(SQLITE_OMIT_VIRTUALTABLE) && defined(WHERETRACE_ENABLED)
 static void whereTraceIndexInfoInputs(sqlite3_index_info *p){
   int i;
-  if( !sqlite3WhereTrace ) return;
+  if( (sqlite3WhereTrace & 0x10)==0 ) return;
   for(i=0; i<p->nConstraint; i++){
     sqlite3DebugPrintf(
        "  constraint[%d]: col=%d termid=%d op=%d usabled=%d collseq=%s\n",
@@ -154727,7 +161036,7 @@ static void whereTraceIndexInfoInputs(sqlite3_index_info *p){
 }
 static void whereTraceIndexInfoOutputs(sqlite3_index_info *p){
   int i;
-  if( !sqlite3WhereTrace ) return;
+  if( (sqlite3WhereTrace & 0x10)==0 ) return;
   for(i=0; i<p->nConstraint; i++){
     sqlite3DebugPrintf("  usage[%d]: argvIdx=%d omit=%d\n",
        i,
@@ -154745,6 +161054,43 @@ static void whereTraceIndexInfoOutputs(sqlite3_index_info *p){
 #define whereTraceIndexInfoOutputs(A)
 #endif
 
+/*
+** We know that pSrc is an operand of an outer join.  Return true if
+** pTerm is a constraint that is compatible with that join.
+**
+** pTerm must be EP_OuterON if pSrc is the right operand of an
+** outer join.  pTerm can be either EP_OuterON or EP_InnerON if pSrc
+** is the left operand of a RIGHT join.
+**
+** See https://sqlite.org/forum/forumpost/206d99a16dd9212f
+** for an example of a WHERE clause constraints that may not be used on
+** the right table of a RIGHT JOIN because the constraint implies a
+** not-NULL condition on the left table of the RIGHT JOIN.
+*/
+static int constraintCompatibleWithOuterJoin(
+  const WhereTerm *pTerm,       /* WHERE clause term to check */
+  const SrcItem *pSrc           /* Table we are trying to access */
+){
+  assert( (pSrc->fg.jointype&(JT_LEFT|JT_LTORJ|JT_RIGHT))!=0 ); /* By caller */
+  testcase( (pSrc->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT))==JT_LEFT );
+  testcase( (pSrc->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT))==JT_LTORJ );
+  testcase( ExprHasProperty(pTerm->pExpr, EP_OuterON) )
+  testcase( ExprHasProperty(pTerm->pExpr, EP_InnerON) );
+  if( !ExprHasProperty(pTerm->pExpr, EP_OuterON|EP_InnerON)
+   || pTerm->pExpr->w.iJoin != pSrc->iCursor
+  ){
+    return 0;
+  }
+  if( (pSrc->fg.jointype & (JT_LEFT|JT_RIGHT))!=0
+   && ExprHasProperty(pTerm->pExpr, EP_InnerON)
+  ){
+    return 0;
+  }
+  return 1;
+}
+
+
+
 #ifndef SQLITE_OMIT_AUTOMATIC_INDEX
 /*
 ** Return TRUE if the WHERE clause term pTerm is of a form where it
@@ -154760,16 +161106,10 @@ static int termCanDriveIndex(
   if( pTerm->leftCursor!=pSrc->iCursor ) return 0;
   if( (pTerm->eOperator & (WO_EQ|WO_IS))==0 ) return 0;
   assert( (pSrc->fg.jointype & JT_RIGHT)==0 );
-  if( (pSrc->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT))!=0 ){
-    testcase( (pSrc->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT))==JT_LEFT );
-    testcase( (pSrc->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT))==JT_LTORJ );
-    testcase( ExprHasProperty(pTerm->pExpr, EP_OuterON) )
-    testcase( ExprHasProperty(pTerm->pExpr, EP_InnerON) );
-    if( !ExprHasProperty(pTerm->pExpr, EP_OuterON|EP_InnerON)
-     || pTerm->pExpr->w.iJoin != pSrc->iCursor
-    ){
-      return 0;  /* See tag-20191211-001 */
-    }
+  if( (pSrc->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT))!=0
+   && !constraintCompatibleWithOuterJoin(pTerm,pSrc)
+  ){
+    return 0;  /* See https://sqlite.org/forum/forumpost/51e6959f61 */
   }
   if( (pTerm->prereqRight & notReady)!=0 ) return 0;
   assert( (pTerm->eOperator & (WO_OR|WO_AND))==0 );
@@ -154783,6 +161123,57 @@ static int termCanDriveIndex(
 
 
 #ifndef SQLITE_OMIT_AUTOMATIC_INDEX
+
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+/*
+** Argument pIdx represents an automatic index that the current statement
+** will create and populate. Add an OP_Explain with text of the form:
+**
+**     CREATE AUTOMATIC INDEX ON <table>(<cols>) [WHERE <expr>]
+**
+** This is only required if sqlite3_stmt_scanstatus() is enabled, to
+** associate an SQLITE_SCANSTAT_NCYCLE and SQLITE_SCANSTAT_NLOOP
+** values with. In order to avoid breaking legacy code and test cases,
+** the OP_Explain is not added if this is an EXPLAIN QUERY PLAN command.
+*/
+static void explainAutomaticIndex(
+  Parse *pParse,
+  Index *pIdx,                    /* Automatic index to explain */
+  int bPartial,                   /* True if pIdx is a partial index */
+  int *pAddrExplain               /* OUT: Address of OP_Explain */
+){
+  if( IS_STMT_SCANSTATUS(pParse->db) && pParse->explain!=2 ){
+    Table *pTab = pIdx->pTable;
+    const char *zSep = "";
+    char *zText = 0;
+    int ii = 0;
+    sqlite3_str *pStr = sqlite3_str_new(pParse->db);
+    sqlite3_str_appendf(pStr,"CREATE AUTOMATIC INDEX ON %s(", pTab->zName);
+    assert( pIdx->nColumn>1 );
+    assert( pIdx->aiColumn[pIdx->nColumn-1]==XN_ROWID );
+    for(ii=0; ii<(pIdx->nColumn-1); ii++){
+      const char *zName = 0;
+      int iCol = pIdx->aiColumn[ii];
+
+      zName = pTab->aCol[iCol].zCnName;
+      sqlite3_str_appendf(pStr, "%s%s", zSep, zName);
+      zSep = ", ";
+    }
+    zText = sqlite3_str_finish(pStr);
+    if( zText==0 ){
+      sqlite3OomFault(pParse->db);
+    }else{
+      *pAddrExplain = sqlite3VdbeExplain(
+          pParse, 0, "%s)%s", zText, (bPartial ? " WHERE <expr>" : "")
+      );
+      sqlite3_free(zText);
+    }
+  }
+}
+#else
+# define explainAutomaticIndex(a,b,c,d)
+#endif
+
 /*
 ** Generate code to construct the Index object for an automatic index
 ** and to set up the WhereLevel object pLevel so that the code generator
@@ -154790,8 +161181,7 @@ static int termCanDriveIndex(
 */
 static SQLITE_NOINLINE void constructAutomaticIndex(
   Parse *pParse,              /* The parsing context */
-  const WhereClause *pWC,     /* The WHERE clause */
-  const SrcItem *pSrc,        /* The FROM clause term to get the next index */
+  WhereClause *pWC,           /* The WHERE clause */
   const Bitmask notReady,     /* Mask of cursors that are not available */
   WhereLevel *pLevel          /* Write new index here */
 ){
@@ -154812,12 +161202,17 @@ static SQLITE_NOINLINE void constructAutomaticIndex(
   char *zNotUsed;             /* Extra space on the end of pIdx */
   Bitmask idxCols;            /* Bitmap of columns used for indexing */
   Bitmask extraCols;          /* Bitmap of additional columns */
-  u8 sentWarning = 0;         /* True if a warnning has been issued */
+  u8 sentWarning = 0;         /* True if a warning has been issued */
+  u8 useBloomFilter = 0;      /* True to also add a Bloom filter */
   Expr *pPartial = 0;         /* Partial Index Expression */
   int iContinue = 0;          /* Jump here to skip excluded rows */
-  SrcItem *pTabItem;          /* FROM clause term being indexed */
+  SrcList *pTabList;          /* The complete FROM clause */
+  SrcItem *pSrc;              /* The FROM clause term to get the next index */
   int addrCounter = 0;        /* Address where integer counter is initialized */
   int regBase;                /* Array of registers where record is assembled */
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+  int addrExp = 0;            /* Address of OP_Explain */
+#endif
 
   /* Generate code to skip over the creation and initialization of the
   ** transient index on 2nd and subsequent iterations of the loop. */
@@ -154828,6 +161223,8 @@ static SQLITE_NOINLINE void constructAutomaticIndex(
   /* Count the number of columns that will be added to the index
   ** and used to match WHERE clause constraints */
   nKeyCol = 0;
+  pTabList = pWC->pWInfo->pTabList;
+  pSrc = &pTabList->a[pLevel->iFrom];
   pTable = pSrc->pTab;
   pWCEnd = &pWC->a[pWC->nTerm];
   pLoop = pLevel->pWLoop;
@@ -154838,7 +161235,7 @@ static SQLITE_NOINLINE void constructAutomaticIndex(
     ** WHERE clause (or the ON clause of a LEFT join) that constrain which
     ** rows of the target table (pSrc) that can be used. */
     if( (pTerm->wtFlags & TERM_VIRTUAL)==0
-     && sqlite3ExprIsTableConstraint(pExpr, pSrc)
+     && sqlite3ExprIsSingleTableConstraint(pExpr, pTabList, pLevel->iFrom)
     ){
       pPartial = sqlite3ExprAnd(pParse, pPartial,
                                 sqlite3ExprDup(pParse->db, pExpr, 0));
@@ -154879,7 +161276,11 @@ static SQLITE_NOINLINE void constructAutomaticIndex(
   ** original table changes and the index and table cannot both be used
   ** if they go out of sync.
   */
-  extraCols = pSrc->colUsed & (~idxCols | MASKBIT(BMS-1));
+  if( IsView(pTable) ){
+    extraCols = ALLBITS;
+  }else{
+    extraCols = pSrc->colUsed & (~idxCols | MASKBIT(BMS-1));
+  }
   mxBitCol = MIN(BMS-1,pTable->nCol);
   testcase( pTable->nCol==BMS-1 );
   testcase( pTable->nCol==BMS-2 );
@@ -154915,6 +161316,16 @@ static SQLITE_NOINLINE void constructAutomaticIndex(
         assert( pColl!=0 || pParse->nErr>0 ); /* TH3 collate01.800 */
         pIdx->azColl[n] = pColl ? pColl->zName : sqlite3StrBINARY;
         n++;
+        if( ALWAYS(pX->pLeft!=0)
+         && sqlite3ExprAffinity(pX->pLeft)!=SQLITE_AFF_TEXT
+        ){
+          /* TUNING: only use a Bloom filter on an automatic index
+          ** if one or more key columns has the ability to hold numeric
+          ** values, since strings all have the same hash in the Bloom
+          ** filter implementation and hence a Bloom filter on a text column
+          ** is not usually helpful. */
+          useBloomFilter = 1;
+        }
       }
     }
   }
@@ -154941,25 +161352,27 @@ static SQLITE_NOINLINE void constructAutomaticIndex(
   pIdx->azColl[n] = sqlite3StrBINARY;
 
   /* Create the automatic index */
+  explainAutomaticIndex(pParse, pIdx, pPartial!=0, &addrExp);
   assert( pLevel->iIdxCur>=0 );
   pLevel->iIdxCur = pParse->nTab++;
   sqlite3VdbeAddOp2(v, OP_OpenAutoindex, pLevel->iIdxCur, nKeyCol+1);
   sqlite3VdbeSetP4KeyInfo(pParse, pIdx);
   VdbeComment((v, "for %s", pTable->zName));
-  if( OptimizationEnabled(pParse->db, SQLITE_BloomFilter) ){
+  if( OptimizationEnabled(pParse->db, SQLITE_BloomFilter) && useBloomFilter ){
+    sqlite3WhereExplainBloomFilter(pParse, pWC->pWInfo, pLevel);
     pLevel->regFilter = ++pParse->nMem;
     sqlite3VdbeAddOp2(v, OP_Blob, 10000, pLevel->regFilter);
   }
 
   /* Fill the automatic index with content */
-  pTabItem = &pWC->pWInfo->pTabList->a[pLevel->iFrom];
-  if( pTabItem->fg.viaCoroutine ){
-    int regYield = pTabItem->regReturn;
+  assert( pSrc == &pWC->pWInfo->pTabList->a[pLevel->iFrom] );
+  if( pSrc->fg.viaCoroutine ){
+    int regYield = pSrc->regReturn;
     addrCounter = sqlite3VdbeAddOp2(v, OP_Integer, 0, 0);
-    sqlite3VdbeAddOp3(v, OP_InitCoroutine, regYield, 0, pTabItem->addrFillSub);
+    sqlite3VdbeAddOp3(v, OP_InitCoroutine, regYield, 0, pSrc->addrFillSub);
     addrTop =  sqlite3VdbeAddOp1(v, OP_Yield, regYield);
     VdbeCoverage(v);
-    VdbeComment((v, "next row of %s", pTabItem->pTab->zName));
+    VdbeComment((v, "next row of %s", pSrc->pTab->zName));
   }else{
     addrTop = sqlite3VdbeAddOp1(v, OP_Rewind, pLevel->iTabCur); VdbeCoverage(v);
   }
@@ -154976,17 +161389,18 @@ static SQLITE_NOINLINE void constructAutomaticIndex(
     sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pLevel->regFilter, 0,
                          regBase, pLoop->u.btree.nEq);
   }
+  sqlite3VdbeScanStatusCounters(v, addrExp, addrExp, sqlite3VdbeCurrentAddr(v));
   sqlite3VdbeAddOp2(v, OP_IdxInsert, pLevel->iIdxCur, regRecord);
   sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT);
   if( pPartial ) sqlite3VdbeResolveLabel(v, iContinue);
-  if( pTabItem->fg.viaCoroutine ){
+  if( pSrc->fg.viaCoroutine ){
     sqlite3VdbeChangeP2(v, addrCounter, regBase+n);
     testcase( pParse->db->mallocFailed );
     assert( pLevel->iIdxCur>0 );
     translateColumnToCopy(pParse, addrTop, pLevel->iTabCur,
-                          pTabItem->regResult, pLevel->iIdxCur);
+                          pSrc->regResult, pLevel->iIdxCur);
     sqlite3VdbeGoto(v, addrTop);
-    pTabItem->fg.viaCoroutine = 0;
+    pSrc->fg.viaCoroutine = 0;
   }else{
     sqlite3VdbeAddOp2(v, OP_Next, pLevel->iTabCur, addrTop+1); VdbeCoverage(v);
     sqlite3VdbeChangeP5(v, SQLITE_STMTSTATUS_AUTOINDEX);
@@ -154996,6 +161410,7 @@ static SQLITE_NOINLINE void constructAutomaticIndex(
 
   /* Jump here when skipping the initialization */
   sqlite3VdbeJumpHere(v, addrInit);
+  sqlite3VdbeScanStatusRange(v, addrExp, addrExp, -1);
 
 end_auto_index_create:
   sqlite3ExprDelete(pParse->db, pPartial);
@@ -155037,16 +161452,26 @@ static SQLITE_NOINLINE void sqlite3ConstructBloomFilter(
   Vdbe *v = pParse->pVdbe;             /* VDBE under construction */
   WhereLoop *pLoop = pLevel->pWLoop;   /* The loop being coded */
   int iCur;                            /* Cursor for table getting the filter */
+  IndexedExpr *saved_pIdxEpr;          /* saved copy of Parse.pIdxEpr */
+  IndexedExpr *saved_pIdxPartExpr;     /* saved copy of Parse.pIdxPartExpr */
+
+  saved_pIdxEpr = pParse->pIdxEpr;
+  saved_pIdxPartExpr = pParse->pIdxPartExpr;
+  pParse->pIdxEpr = 0;
+  pParse->pIdxPartExpr = 0;
 
   assert( pLoop!=0 );
   assert( v!=0 );
   assert( pLoop->wsFlags & WHERE_BLOOMFILTER );
+  assert( (pLoop->wsFlags & WHERE_IDX_ONLY)==0 );
 
   addrOnce = sqlite3VdbeAddOp0(v, OP_Once); VdbeCoverage(v);
   do{
+    const SrcList *pTabList;
     const SrcItem *pItem;
     const Table *pTab;
     u64 sz;
+    int iSrc;
     sqlite3WhereExplainBloomFilter(pParse, pWInfo, pLevel);
     addrCont = sqlite3VdbeMakeLabel(pParse);
     iCur = pLevel->iTabCur;
@@ -155060,7 +161485,9 @@ static SQLITE_NOINLINE void sqlite3ConstructBloomFilter(
     ** testing complicated.  By basing the blob size on the value in the
     ** sqlite_stat1 table, testing is much easier.
     */
-    pItem = &pWInfo->pTabList->a[pLevel->iFrom];
+    pTabList = pWInfo->pTabList;
+    iSrc = pLevel->iFrom;
+    pItem = &pTabList->a[iSrc];
     assert( pItem!=0 );
     pTab = pItem->pTab;
     assert( pTab!=0 );
@@ -155077,7 +161504,7 @@ static SQLITE_NOINLINE void sqlite3ConstructBloomFilter(
     for(pTerm=pWInfo->sWC.a; pTerm<pWCEnd; pTerm++){
       Expr *pExpr = pTerm->pExpr;
       if( (pTerm->wtFlags & TERM_VIRTUAL)==0
-       && sqlite3ExprIsTableConstraint(pExpr, pItem)
+       && sqlite3ExprIsSingleTableConstraint(pExpr, pTabList, iSrc)
       ){
         sqlite3ExprIfFalse(pParse, pTerm->pExpr, addrCont, SQLITE_JUMPIFNULL);
       }
@@ -155093,9 +161520,8 @@ static SQLITE_NOINLINE void sqlite3ConstructBloomFilter(
       int r1 = sqlite3GetTempRange(pParse, n);
       int jj;
       for(jj=0; jj<n; jj++){
-        int iCol = pIdx->aiColumn[jj];
         assert( pIdx->pTable==pItem->pTab );
-        sqlite3ExprCodeGetColumnOfTable(v, pIdx->pTable, iCur, iCol,r1+jj);
+        sqlite3ExprCodeLoadIndexColumn(pParse, pIdx, iCur, jj, r1+jj);
       }
       sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pLevel->regFilter, 0, r1, n);
       sqlite3ReleaseTempRange(pParse, r1, n);
@@ -155126,6 +161552,8 @@ static SQLITE_NOINLINE void sqlite3ConstructBloomFilter(
     }
   }while( iLevel < pWInfo->nLevel );
   sqlite3VdbeJumpHere(v, addrOnce);
+  pParse->pIdxEpr = saved_pIdxEpr;
+  pParse->pIdxPartExpr = saved_pIdxPartExpr;
 }
 
 
@@ -155181,22 +161609,10 @@ static sqlite3_index_info *allocateIndexInfo(
     assert( (pTerm->eOperator & (WO_OR|WO_AND))==0 );
     assert( pTerm->u.x.leftColumn>=XN_ROWID );
     assert( pTerm->u.x.leftColumn<pTab->nCol );
-
-    /* tag-20191211-002: WHERE-clause constraints are not useful to the
-    ** right-hand table of a LEFT JOIN nor to the either table of a
-    ** RIGHT JOIN.  See tag-20191211-001 for the
-    ** equivalent restriction for ordinary tables. */
-    if( (pSrc->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT))!=0 ){
-      testcase( (pSrc->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT))==JT_LEFT );
-      testcase( (pSrc->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT))==JT_RIGHT );
-      testcase( (pSrc->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT))==JT_LTORJ );
-      testcase( ExprHasProperty(pTerm->pExpr, EP_OuterON) );
-      testcase( ExprHasProperty(pTerm->pExpr, EP_InnerON) );
-      if( !ExprHasProperty(pTerm->pExpr, EP_OuterON|EP_InnerON)
-       || pTerm->pExpr->w.iJoin != pSrc->iCursor
-      ){
-        continue;
-      }
+    if( (pSrc->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT))!=0
+     && !constraintCompatibleWithOuterJoin(pTerm,pSrc)
+    ){
+      continue;
     }
     nTerm++;
     pTerm->wtFlags |= TERM_OK;
@@ -155393,6 +161809,9 @@ static int vtabBestIndex(Parse *pParse, Table *pTab, sqlite3_index_info *p){
       sqlite3ErrorMsg(pParse, "%s", pVtab->zErrMsg);
     }
   }
+  if( pTab->u.vtab.p->bAllSchemas ){
+    sqlite3VtabUsesAllSchemas(pParse);
+  }
   sqlite3_free(pVtab->zErrMsg);
   pVtab->zErrMsg = 0;
   return rc;
@@ -155437,6 +161856,7 @@ static int whereKeyStats(
   assert( pIdx->nSample>0 );
   assert( pRec->nField>0 );
 
+
   /* Do a binary search to find the first sample greater than or equal
   ** to pRec. If pRec contains a single field, the set of samples to search
   ** is simply the aSample[] array. If the samples in aSample[] contain more
@@ -155481,7 +161901,12 @@ static int whereKeyStats(
   ** it is extended to two fields. The duplicates that this creates do not
   ** cause any problems.
   */
-  nField = MIN(pRec->nField, pIdx->nSample);
+  if( !HasRowid(pIdx->pTable) && IsPrimaryKeyIndex(pIdx) ){
+    nField = pIdx->nKeyCol;
+  }else{
+    nField = pIdx->nColumn;
+  }
+  nField = MIN(pRec->nField, nField);
   iCol = 0;
   iSample = pIdx->nSample * nField;
   do{
@@ -155547,12 +161972,12 @@ static int whereKeyStats(
       if( iCol>0 ){
         pRec->nField = iCol;
         assert( sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)<=0
-             || pParse->db->mallocFailed );
+             || pParse->db->mallocFailed || CORRUPT_DB );
       }
       if( i>0 ){
         pRec->nField = nField;
         assert( sqlite3VdbeRecordCompare(aSample[i-1].n, aSample[i-1].p, pRec)<0
-             || pParse->db->mallocFailed );
+             || pParse->db->mallocFailed || CORRUPT_DB );
       }
     }
   }
@@ -155569,7 +161994,7 @@ static int whereKeyStats(
     ** is larger than all samples in the array. */
     tRowcnt iUpper, iGap;
     if( i>=pIdx->nSample ){
-      iUpper = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]);
+      iUpper = pIdx->nRowEst0;
     }else{
       iUpper = aSample[i].anLt[iCol];
     }
@@ -155644,7 +162069,7 @@ SQLITE_PRIVATE char sqlite3IndexColumnAffinity(sqlite3 *db, Index *pIdx, int iCo
 ** Value pLoop->nOut is currently set to the estimated number of rows
 ** visited for scanning (a=? AND b=?). This function reduces that estimate
 ** by some factor to account for the (c BETWEEN ? AND ?) expression based
-** on the stat4 data for the index. this scan will be peformed multiple
+** on the stat4 data for the index. this scan will be performed multiple
 ** times (once for each (a,b) combination that matches a=?) is dealt with
 ** by the caller.
 **
@@ -155725,7 +162150,7 @@ static int whereRangeSkipScanEst(
       int nAdjust = (sqlite3LogEst(p->nSample) - sqlite3LogEst(nDiff));
       pLoop->nOut -= nAdjust;
       *pbDone = 1;
-      WHERETRACE(0x10, ("range skip-scan regions: %u..%u  adjust=%d est=%d\n",
+      WHERETRACE(0x20, ("range skip-scan regions: %u..%u  adjust=%d est=%d\n",
                            nLower, nUpper, nAdjust*-1, pLoop->nOut));
     }
 
@@ -155903,7 +162328,7 @@ static int whereRangeScanEst(
         if( nNew<nOut ){
           nOut = nNew;
         }
-        WHERETRACE(0x10, ("STAT4 range scan: %u..%u  est=%d\n",
+        WHERETRACE(0x20, ("STAT4 range scan: %u..%u  est=%d\n",
                            (u32)iLower, (u32)iUpper, nOut));
       }
     }else{
@@ -155917,7 +162342,7 @@ static int whereRangeScanEst(
   UNUSED_PARAMETER(pBuilder);
   assert( pLower || pUpper );
 #endif
-  assert( pUpper==0 || (pUpper->wtFlags & TERM_VNULL)==0 );
+  assert( pUpper==0 || (pUpper->wtFlags & TERM_VNULL)==0 || pParse->nErr>0 );
   nNew = whereRangeAdjust(pLower, nOut);
   nNew = whereRangeAdjust(pUpper, nNew);
 
@@ -155936,7 +162361,7 @@ static int whereRangeScanEst(
   if( nNew<nOut ) nOut = nNew;
 #if defined(WHERETRACE_ENABLED)
   if( pLoop->nOut>nOut ){
-    WHERETRACE(0x10,("Range scan lowers nOut from %d to %d\n",
+    WHERETRACE(0x20,("Range scan lowers nOut from %d to %d\n",
                     pLoop->nOut, nOut));
   }
 #endif
@@ -156001,7 +162426,7 @@ static int whereEqualScanEst(
   pBuilder->nRecValid = nEq;
 
   whereKeyStats(pParse, p, pRec, 0, a);
-  WHERETRACE(0x10,("equality scan regions %s(%d): %d\n",
+  WHERETRACE(0x20,("equality scan regions %s(%d): %d\n",
                    p->zName, nEq-1, (int)a[1]));
   *pnRow = a[1];
 
@@ -156049,9 +162474,9 @@ static int whereInScanEst(
   }
 
   if( rc==SQLITE_OK ){
-    if( nRowEst > nRow0 ) nRowEst = nRow0;
+    if( nRowEst > (tRowcnt)nRow0 ) nRowEst = nRow0;
     *pnRow = nRowEst;
-    WHERETRACE(0x10,("IN row estimate: est=%d\n", nRowEst));
+    WHERETRACE(0x20,("IN row estimate: est=%d\n", nRowEst));
   }
   assert( pBuilder->nRecValid==nRecValid );
   return rc;
@@ -156160,7 +162585,7 @@ SQLITE_PRIVATE void sqlite3WhereLoopPrint(WhereLoop *p, WhereClause *pWC){
     sqlite3DebugPrintf(" f %06x N %d", p->wsFlags, p->nLTerm);
   }
   sqlite3DebugPrintf(" cost %d,%d,%d\n", p->rSetup, p->rRun, p->nOut);
-  if( p->nLTerm && (sqlite3WhereTrace & 0x100)!=0 ){
+  if( p->nLTerm && (sqlite3WhereTrace & 0x4000)!=0 ){
     int i;
     for(i=0; i<p->nLTerm; i++){
       sqlite3WhereTermPrint(p->aLTerm[i], i);
@@ -156198,12 +162623,18 @@ static void whereLoopClearUnion(sqlite3 *db, WhereLoop *p){
 }
 
 /*
-** Deallocate internal memory used by a WhereLoop object
+** Deallocate internal memory used by a WhereLoop object.  Leave the
+** object in an initialized state, as if it had been newly allocated.
 */
 static void whereLoopClear(sqlite3 *db, WhereLoop *p){
-  if( p->aLTerm!=p->aLTermSpace ) sqlite3DbFreeNN(db, p->aLTerm);
+  if( p->aLTerm!=p->aLTermSpace ){
+    sqlite3DbFreeNN(db, p->aLTerm);
+    p->aLTerm = p->aLTermSpace;
+    p->nLSlot = ArraySize(p->aLTermSpace);
+  }
   whereLoopClearUnion(db, p);
-  whereLoopInit(p);
+  p->nLTerm = 0;
+  p->wsFlags = 0;
 }
 
 /*
@@ -156227,7 +162658,9 @@ static int whereLoopResize(sqlite3 *db, WhereLoop *p, int n){
 */
 static int whereLoopXfer(sqlite3 *db, WhereLoop *pTo, WhereLoop *pFrom){
   whereLoopClearUnion(db, pTo);
-  if( whereLoopResize(db, pTo, pFrom->nLTerm) ){
+  if( pFrom->nLTerm > pTo->nLSlot
+   && whereLoopResize(db, pTo, pFrom->nLTerm)
+  ){
     memset(pTo, 0, WHERE_LOOP_XFER_SZ);
     return SQLITE_NOMEM_BKPT;
   }
@@ -156245,8 +162678,9 @@ static int whereLoopXfer(sqlite3 *db, WhereLoop *pTo, WhereLoop *pFrom){
 ** Delete a WhereLoop object
 */
 static void whereLoopDelete(sqlite3 *db, WhereLoop *p){
+  assert( db!=0 );
   whereLoopClear(db, p);
-  sqlite3DbFreeNN(db, p);
+  sqlite3DbNNFreeNN(db, p);
 }
 
 /*
@@ -156254,30 +162688,19 @@ static void whereLoopDelete(sqlite3 *db, WhereLoop *p){
 */
 static void whereInfoFree(sqlite3 *db, WhereInfo *pWInfo){
   assert( pWInfo!=0 );
+  assert( db!=0 );
   sqlite3WhereClauseClear(&pWInfo->sWC);
   while( pWInfo->pLoops ){
     WhereLoop *p = pWInfo->pLoops;
     pWInfo->pLoops = p->pNextLoop;
     whereLoopDelete(db, p);
   }
-  assert( pWInfo->pExprMods==0 );
   while( pWInfo->pMemToFree ){
     WhereMemBlock *pNext = pWInfo->pMemToFree->pNext;
-    sqlite3DbFreeNN(db, pWInfo->pMemToFree);
+    sqlite3DbNNFreeNN(db, pWInfo->pMemToFree);
     pWInfo->pMemToFree = pNext;
   }
-  sqlite3DbFreeNN(db, pWInfo);
-}
-
-/* Undo all Expr node modifications
-*/
-static void whereUndoExprMods(WhereInfo *pWInfo){
-  while( pWInfo->pExprMods ){
-    WhereExprMod *p = pWInfo->pExprMods;
-    pWInfo->pExprMods = p->pNext;
-    memcpy(p->pExpr, &p->orig, sizeof(p->orig));
-    sqlite3DbFree(pWInfo->pParse->db, p);
-  }
+  sqlite3DbNNFreeNN(db, pWInfo);
 }
 
 /*
@@ -156401,7 +162824,7 @@ static WhereLoop **whereLoopFindLesser(
     ** rSetup. Call this SETUP-INVARIANT */
     assert( p->rSetup>=pTemplate->rSetup );
 
-    /* Any loop using an appliation-defined index (or PRIMARY KEY or
+    /* Any loop using an application-defined index (or PRIMARY KEY or
     ** UNIQUE constraint) with one or more == constraints is better
     ** than an automatic index. Unless it is a skip-scan. */
     if( (p->wsFlags & WHERE_AUTO_INDEX)!=0
@@ -156428,7 +162851,7 @@ static WhereLoop **whereLoopFindLesser(
 
     /* If pTemplate is always better than p, then cause p to be overwritten
     ** with pTemplate.  pTemplate is better than p if:
-    **   (1)  pTemplate has no more dependences than p, and
+    **   (1)  pTemplate has no more dependencies than p, and
     **   (2)  pTemplate has an equal or lower cost than p.
     */
     if( (p->prereq & pTemplate->prereq)==pTemplate->prereq   /* (1)  */
@@ -156546,7 +162969,7 @@ static int whereLoopInsert(WhereLoopBuilder *pBuilder, WhereLoop *pTemplate){
   }else{
     /* We will be overwriting WhereLoop p[].  But before we do, first
     ** go through the rest of the list and delete any other entries besides
-    ** p[] that are also supplated by pTemplate */
+    ** p[] that are also supplanted by pTemplate */
     WhereLoop **ppTail = &p->pNextLoop;
     WhereLoop *pToDel;
     while( *ppTail ){
@@ -156626,6 +163049,7 @@ static void whereLoopOutputAdjust(
       if( pX->iParent>=0 && (&pWC->a[pX->iParent])==pTerm ) break;
     }
     if( j<0 ){
+      sqlite3ProgressCheck(pWC->pWInfo->pParse);
       if( pLoop->maskSelf==pTerm->prereqAll ){
         /* If there are extra terms in the WHERE clause not used by an index
         ** that depend only on the table being scanned, and that will tend to
@@ -156745,7 +163169,7 @@ static int whereRangeVectorLen(
 }
 
 /*
-** Adjust the cost C by the costMult facter T.  This only occurs if
+** Adjust the cost C by the costMult factor T.  This only occurs if
 ** compiled with -DSQLITE_ENABLE_COSTMULT
 */
 #ifdef SQLITE_ENABLE_COSTMULT
@@ -156772,7 +163196,7 @@ static int whereLoopAddBtreeIndex(
   Index *pProbe,                  /* An index on pSrc */
   LogEst nInMul                   /* log(Number of iterations due to IN) */
 ){
-  WhereInfo *pWInfo = pBuilder->pWInfo;  /* WHERE analyse context */
+  WhereInfo *pWInfo = pBuilder->pWInfo;  /* WHERE analyze context */
   Parse *pParse = pWInfo->pParse;        /* Parsing context */
   sqlite3 *db = pParse->db;       /* Database connection malloc context */
   WhereLoop *pNew;                /* Template WhereLoop under construction */
@@ -156793,7 +163217,10 @@ static int whereLoopAddBtreeIndex(
   WhereTerm *pTop = 0, *pBtm = 0; /* Top and bottom range constraints */
 
   pNew = pBuilder->pNew;
-  if( db->mallocFailed ) return SQLITE_NOMEM_BKPT;
+  assert( db->mallocFailed==0 || pParse->nErr>0 );
+  if( pParse->nErr ){
+    return pParse->rc;
+  }
   WHERETRACE(0x800, ("BEGIN %s.addBtreeIdx(%s), nEq=%d, nSkip=%d, rRun=%d\n",
                      pProbe->pTable->zName,pProbe->zName,
                      pNew->u.btree.nEq, pNew->nSkip, pNew->rRun));
@@ -156844,32 +163271,11 @@ static int whereLoopAddBtreeIndex(
     ** to mix with a lower range bound from some other source */
     if( pTerm->wtFlags & TERM_LIKEOPT && pTerm->eOperator==WO_LT ) continue;
 
-    /* tag-20191211-001:  Do not allow constraints from the WHERE clause to
-    ** be used by the right table of a LEFT JOIN nor by the left table of a
-    ** RIGHT JOIN.  Only constraints in the ON clause are allowed.
-    ** See tag-20191211-002 for the vtab equivalent.
-    **
-    ** 2022-06-06: See https://sqlite.org/forum/forumpost/206d99a16dd9212f
-    ** for an example of a WHERE clause constraints that may not be used on
-    ** the right table of a RIGHT JOIN because the constraint implies a
-    ** not-NULL condition on the left table of the RIGHT JOIN.
-    **
-    ** 2022-06-10: The same condition applies to termCanDriveIndex() above.
-    ** https://sqlite.org/forum/forumpost/51e6959f61
-    */
-    if( (pSrc->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT))!=0 ){
-      testcase( (pSrc->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT))==JT_LEFT );
-      testcase( (pSrc->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT))==JT_RIGHT );
-      testcase( (pSrc->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT))==JT_LTORJ );
-      testcase( ExprHasProperty(pTerm->pExpr, EP_OuterON) )
-      testcase( ExprHasProperty(pTerm->pExpr, EP_InnerON) );
-      if( !ExprHasProperty(pTerm->pExpr, EP_OuterON|EP_InnerON)
-       || pTerm->pExpr->w.iJoin != pSrc->iCursor
-      ){
-        continue;
-      }
+    if( (pSrc->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT))!=0
+     && !constraintCompatibleWithOuterJoin(pTerm,pSrc)
+    ){
+      continue;
     }
-
     if( IsUniqueIndex(pProbe) && saved_nEq==pProbe->nKeyCol-1 ){
       pBuilder->bldFlags1 |= SQLITE_BLDF1_UNIQUE;
     }else{
@@ -156880,7 +163286,11 @@ static int whereLoopAddBtreeIndex(
     pNew->u.btree.nBtm = saved_nBtm;
     pNew->u.btree.nTop = saved_nTop;
     pNew->nLTerm = saved_nLTerm;
-    if( whereLoopResize(db, pNew, pNew->nLTerm+1) ) break; /* OOM */
+    if( pNew->nLTerm>=pNew->nLSlot
+     && whereLoopResize(db, pNew, pNew->nLTerm+1)
+    ){
+       break; /* OOM while trying to enlarge the pNew->aLTerm array */
+    }
     pNew->aLTerm[pNew->nLTerm++] = pTerm;
     pNew->prereq = (saved_prereq | pTerm->prereqRight) & ~pNew->maskSelf;
 
@@ -156973,38 +163383,39 @@ static int whereLoopAddBtreeIndex(
       if( scan.iEquiv>1 ) pNew->wsFlags |= WHERE_TRANSCONS;
     }else if( eOp & WO_ISNULL ){
       pNew->wsFlags |= WHERE_COLUMN_NULL;
-    }else if( eOp & (WO_GT|WO_GE) ){
-      testcase( eOp & WO_GT );
-      testcase( eOp & WO_GE );
-      pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_BTM_LIMIT;
-      pNew->u.btree.nBtm = whereRangeVectorLen(
-          pParse, pSrc->iCursor, pProbe, saved_nEq, pTerm
-      );
-      pBtm = pTerm;
-      pTop = 0;
-      if( pTerm->wtFlags & TERM_LIKEOPT ){
-        /* Range constraints that come from the LIKE optimization are
-        ** always used in pairs. */
-        pTop = &pTerm[1];
-        assert( (pTop-(pTerm->pWC->a))<pTerm->pWC->nTerm );
-        assert( pTop->wtFlags & TERM_LIKEOPT );
-        assert( pTop->eOperator==WO_LT );
-        if( whereLoopResize(db, pNew, pNew->nLTerm+1) ) break; /* OOM */
-        pNew->aLTerm[pNew->nLTerm++] = pTop;
-        pNew->wsFlags |= WHERE_TOP_LIMIT;
-        pNew->u.btree.nTop = 1;
-      }
-    }else{
-      assert( eOp & (WO_LT|WO_LE) );
-      testcase( eOp & WO_LT );
-      testcase( eOp & WO_LE );
-      pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_TOP_LIMIT;
-      pNew->u.btree.nTop = whereRangeVectorLen(
+    }else{
+      int nVecLen = whereRangeVectorLen(
           pParse, pSrc->iCursor, pProbe, saved_nEq, pTerm
       );
-      pTop = pTerm;
-      pBtm = (pNew->wsFlags & WHERE_BTM_LIMIT)!=0 ?
-                     pNew->aLTerm[pNew->nLTerm-2] : 0;
+      if( eOp & (WO_GT|WO_GE) ){
+        testcase( eOp & WO_GT );
+        testcase( eOp & WO_GE );
+        pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_BTM_LIMIT;
+        pNew->u.btree.nBtm = nVecLen;
+        pBtm = pTerm;
+        pTop = 0;
+        if( pTerm->wtFlags & TERM_LIKEOPT ){
+          /* Range constraints that come from the LIKE optimization are
+          ** always used in pairs. */
+          pTop = &pTerm[1];
+          assert( (pTop-(pTerm->pWC->a))<pTerm->pWC->nTerm );
+          assert( pTop->wtFlags & TERM_LIKEOPT );
+          assert( pTop->eOperator==WO_LT );
+          if( whereLoopResize(db, pNew, pNew->nLTerm+1) ) break; /* OOM */
+          pNew->aLTerm[pNew->nLTerm++] = pTop;
+          pNew->wsFlags |= WHERE_TOP_LIMIT;
+          pNew->u.btree.nTop = 1;
+        }
+      }else{
+        assert( eOp & (WO_LT|WO_LE) );
+        testcase( eOp & WO_LT );
+        testcase( eOp & WO_LE );
+        pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_TOP_LIMIT;
+        pNew->u.btree.nTop = nVecLen;
+        pTop = pTerm;
+        pBtm = (pNew->wsFlags & WHERE_BTM_LIMIT)!=0 ?
+                       pNew->aLTerm[pNew->nLTerm-2] : 0;
+      }
     }
 
     /* At this point pNew->nOut is set to the number of rows expected to
@@ -157056,7 +163467,7 @@ static int whereLoopAddBtreeIndex(
              && pNew->nOut+10 > pProbe->aiRowLogEst[0]
             ){
 #if WHERETRACE_ENABLED /* 0x01 */
-              if( sqlite3WhereTrace & 0x01 ){
+              if( sqlite3WhereTrace & 0x20 ){
                 sqlite3DebugPrintf(
                    "STAT4 determines term has low selectivity:\n");
                 sqlite3WhereTermPrint(pTerm, 999);
@@ -157093,9 +163504,17 @@ static int whereLoopAddBtreeIndex(
     ** seek only. Then, if this is a non-covering index, add the cost of
     ** visiting the rows in the main table.  */
     assert( pSrc->pTab->szTabRow>0 );
-    rCostIdx = pNew->nOut + 1 + (15*pProbe->szIdxRow)/pSrc->pTab->szTabRow;
+    if( pProbe->idxType==SQLITE_IDXTYPE_IPK ){
+      /* The pProbe->szIdxRow is low for an IPK table since the interior
+      ** pages are small.  Thus szIdxRow gives a good estimate of seek cost.
+      ** But the leaf pages are full-size, so pProbe->szIdxRow would badly
+      ** under-estimate the scanning cost. */
+      rCostIdx = pNew->nOut + 16;
+    }else{
+      rCostIdx = pNew->nOut + 1 + (15*pProbe->szIdxRow)/pSrc->pTab->szTabRow;
+    }
     pNew->rRun = sqlite3LogEstAdd(rLogSize, rCostIdx);
-    if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){
+    if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK|WHERE_EXPRIDX))==0 ){
       pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut + 16);
     }
     ApplyCostMultiplier(pNew->rRun, pProbe->pTable->costMult);
@@ -157117,6 +163536,9 @@ static int whereLoopAddBtreeIndex(
      && (pNew->u.btree.nEq<pProbe->nKeyCol ||
            pProbe->idxType!=SQLITE_IDXTYPE_PRIMARYKEY)
     ){
+      if( pNew->u.btree.nEq>3 ){
+        sqlite3ProgressCheck(pParse);
+      }
       whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nInMul+nIn);
     }
     pNew->nOut = saved_nOut;
@@ -157248,6 +163670,243 @@ static int whereUsablePartialIndex(
   return 0;
 }
 
+/*
+** pIdx is an index containing expressions.  Check it see if any of the
+** expressions in the index match the pExpr expression.
+*/
+static int exprIsCoveredByIndex(
+  const Expr *pExpr,
+  const Index *pIdx,
+  int iTabCur
+){
+  int i;
+  for(i=0; i<pIdx->nColumn; i++){
+    if( pIdx->aiColumn[i]==XN_EXPR
+     && sqlite3ExprCompare(0, pExpr, pIdx->aColExpr->a[i].pExpr, iTabCur)==0
+    ){
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/*
+** Structure passed to the whereIsCoveringIndex Walker callback.
+*/
+typedef struct CoveringIndexCheck CoveringIndexCheck;
+struct CoveringIndexCheck {
+  Index *pIdx;       /* The index */
+  int iTabCur;       /* Cursor number for the corresponding table */
+  u8 bExpr;          /* Uses an indexed expression */
+  u8 bUnidx;         /* Uses an unindexed column not within an indexed expr */
+};
+
+/*
+** Information passed in is pWalk->u.pCovIdxCk.  Call it pCk.
+**
+** If the Expr node references the table with cursor pCk->iTabCur, then
+** make sure that column is covered by the index pCk->pIdx.  We know that
+** all columns less than 63 (really BMS-1) are covered, so we don't need
+** to check them.  But we do need to check any column at 63 or greater.
+**
+** If the index does not cover the column, then set pWalk->eCode to
+** non-zero and return WRC_Abort to stop the search.
+**
+** If this node does not disprove that the index can be a covering index,
+** then just return WRC_Continue, to continue the search.
+**
+** If pCk->pIdx contains indexed expressions and one of those expressions
+** matches pExpr, then prune the search.
+*/
+static int whereIsCoveringIndexWalkCallback(Walker *pWalk, Expr *pExpr){
+  int i;                    /* Loop counter */
+  const Index *pIdx;        /* The index of interest */
+  const i16 *aiColumn;      /* Columns contained in the index */
+  u16 nColumn;              /* Number of columns in the index */
+  CoveringIndexCheck *pCk;  /* Info about this search */
+
+  pCk = pWalk->u.pCovIdxCk;
+  pIdx = pCk->pIdx;
+  if( (pExpr->op==TK_COLUMN || pExpr->op==TK_AGG_COLUMN) ){
+    /* if( pExpr->iColumn<(BMS-1) && pIdx->bHasExpr==0 ) return WRC_Continue;*/
+    if( pExpr->iTable!=pCk->iTabCur ) return WRC_Continue;
+    pIdx = pWalk->u.pCovIdxCk->pIdx;
+    aiColumn = pIdx->aiColumn;
+    nColumn = pIdx->nColumn;
+    for(i=0; i<nColumn; i++){
+      if( aiColumn[i]==pExpr->iColumn ) return WRC_Continue;
+    }
+    pCk->bUnidx = 1;
+    return WRC_Abort;
+  }else if( pIdx->bHasExpr
+         && exprIsCoveredByIndex(pExpr, pIdx, pWalk->u.pCovIdxCk->iTabCur) ){
+    pCk->bExpr = 1;
+    return WRC_Prune;
+  }
+  return WRC_Continue;
+}
+
+
+/*
+** pIdx is an index that covers all of the low-number columns used by
+** pWInfo->pSelect (columns from 0 through 62) or an index that has
+** expressions terms.  Hence, we cannot determine whether or not it is
+** a covering index by using the colUsed bitmasks.  We have to do a search
+** to see if the index is covering.  This routine does that search.
+**
+** The return value is one of these:
+**
+**      0                The index is definitely not a covering index
+**
+**      WHERE_IDX_ONLY   The index is definitely a covering index
+**
+**      WHERE_EXPRIDX    The index is likely a covering index, but it is
+**                       difficult to determine precisely because of the
+**                       expressions that are indexed.  Score it as a
+**                       covering index, but still keep the main table open
+**                       just in case we need it.
+**
+** This routine is an optimization.  It is always safe to return zero.
+** But returning one of the other two values when zero should have been
+** returned can lead to incorrect bytecode and assertion faults.
+*/
+static SQLITE_NOINLINE u32 whereIsCoveringIndex(
+  WhereInfo *pWInfo,     /* The WHERE clause context */
+  Index *pIdx,           /* Index that is being tested */
+  int iTabCur            /* Cursor for the table being indexed */
+){
+  int i, rc;
+  struct CoveringIndexCheck ck;
+  Walker w;
+  if( pWInfo->pSelect==0 ){
+    /* We don't have access to the full query, so we cannot check to see
+    ** if pIdx is covering.  Assume it is not. */
+    return 0;
+  }
+  if( pIdx->bHasExpr==0 ){
+    for(i=0; i<pIdx->nColumn; i++){
+      if( pIdx->aiColumn[i]>=BMS-1 ) break;
+    }
+    if( i>=pIdx->nColumn ){
+      /* pIdx does not index any columns greater than 62, but we know from
+      ** colMask that columns greater than 62 are used, so this is not a
+      ** covering index */
+      return 0;
+    }
+  }
+  ck.pIdx = pIdx;
+  ck.iTabCur = iTabCur;
+  ck.bExpr = 0;
+  ck.bUnidx = 0;
+  memset(&w, 0, sizeof(w));
+  w.xExprCallback = whereIsCoveringIndexWalkCallback;
+  w.xSelectCallback = sqlite3SelectWalkNoop;
+  w.u.pCovIdxCk = &ck;
+  sqlite3WalkSelect(&w, pWInfo->pSelect);
+  if( ck.bUnidx ){
+    rc = 0;
+  }else if( ck.bExpr ){
+    rc = WHERE_EXPRIDX;
+  }else{
+    rc = WHERE_IDX_ONLY;
+  }
+  return rc;
+}
+
+/*
+** This is an sqlite3ParserAddCleanup() callback that is invoked to
+** free the Parse->pIdxEpr list when the Parse object is destroyed.
+*/
+static void whereIndexedExprCleanup(sqlite3 *db, void *pObject){
+  IndexedExpr **pp = (IndexedExpr**)pObject;
+  while( *pp!=0 ){
+    IndexedExpr *p = *pp;
+    *pp = p->pIENext;
+    sqlite3ExprDelete(db, p->pExpr);
+    sqlite3DbFreeNN(db, p);
+  }
+}
+
+/*
+** This function is called for a partial index - one with a WHERE clause - in
+** two scenarios. In both cases, it determines whether or not the WHERE
+** clause on the index implies that a column of the table may be safely
+** replaced by a constant expression. For example, in the following
+** SELECT:
+**
+**   CREATE INDEX i1 ON t1(b, c) WHERE a=<expr>;
+**   SELECT a, b, c FROM t1 WHERE a=<expr> AND b=?;
+**
+** The "a" in the select-list may be replaced by <expr>, iff:
+**
+**    (a) <expr> is a constant expression, and
+**    (b) The (a=<expr>) comparison uses the BINARY collation sequence, and
+**    (c) Column "a" has an affinity other than NONE or BLOB.
+**
+** If argument pItem is NULL, then pMask must not be NULL. In this case this
+** function is being called as part of determining whether or not pIdx
+** is a covering index. This function clears any bits in (*pMask)
+** corresponding to columns that may be replaced by constants as described
+** above.
+**
+** Otherwise, if pItem is not NULL, then this function is being called
+** as part of coding a loop that uses index pIdx. In this case, add entries
+** to the Parse.pIdxPartExpr list for each column that can be replaced
+** by a constant.
+*/
+static void wherePartIdxExpr(
+  Parse *pParse,                  /* Parse context */
+  Index *pIdx,                    /* Partial index being processed */
+  Expr *pPart,                    /* WHERE clause being processed */
+  Bitmask *pMask,                 /* Mask to clear bits in */
+  int iIdxCur,                    /* Cursor number for index */
+  SrcItem *pItem                  /* The FROM clause entry for the table */
+){
+  assert( pItem==0 || (pItem->fg.jointype & JT_RIGHT)==0 );
+  assert( (pItem==0 || pMask==0) && (pMask!=0 || pItem!=0) );
+
+  if( pPart->op==TK_AND ){
+    wherePartIdxExpr(pParse, pIdx, pPart->pRight, pMask, iIdxCur, pItem);
+    pPart = pPart->pLeft;
+  }
+
+  if( (pPart->op==TK_EQ || pPart->op==TK_IS) ){
+    Expr *pLeft = pPart->pLeft;
+    Expr *pRight = pPart->pRight;
+    u8 aff;
+
+    if( pLeft->op!=TK_COLUMN ) return;
+    if( !sqlite3ExprIsConstant(pRight) ) return;
+    if( !sqlite3IsBinary(sqlite3ExprCompareCollSeq(pParse, pPart)) ) return;
+    if( pLeft->iColumn<0 ) return;
+    aff = pIdx->pTable->aCol[pLeft->iColumn].affinity;
+    if( aff>=SQLITE_AFF_TEXT ){
+      if( pItem ){
+        sqlite3 *db = pParse->db;
+        IndexedExpr *p = (IndexedExpr*)sqlite3DbMallocRaw(db, sizeof(*p));
+        if( p ){
+          int bNullRow = (pItem->fg.jointype&(JT_LEFT|JT_LTORJ))!=0;
+          p->pExpr = sqlite3ExprDup(db, pRight, 0);
+          p->iDataCur = pItem->iCursor;
+          p->iIdxCur = iIdxCur;
+          p->iIdxCol = pLeft->iColumn;
+          p->bMaybeNullRow = bNullRow;
+          p->pIENext = pParse->pIdxPartExpr;
+          p->aff = aff;
+          pParse->pIdxPartExpr = p;
+          if( p->pIENext==0 ){
+            void *pArg = (void*)&pParse->pIdxPartExpr;
+            sqlite3ParserAddCleanup(pParse, whereIndexedExprCleanup, pArg);
+          }
+        }
+      }else if( pLeft->iColumn<(BMS-1) ){
+        *pMask &= ~((Bitmask)1 << pLeft->iColumn);
+      }
+    }
+  }
+}
+
+
 /*
 ** Add all WhereLoop objects for a single table of the join where the table
 ** is identified by pBuilder->pNew->iTab.  That table is guaranteed to be
@@ -157286,7 +163945,7 @@ static int whereUsablePartialIndex(
 */
 static int whereLoopAddBtree(
   WhereLoopBuilder *pBuilder, /* WHERE clause information */
-  Bitmask mPrereq             /* Extra prerequesites for using this table */
+  Bitmask mPrereq             /* Extra prerequisites for using this table */
 ){
   WhereInfo *pWInfo;          /* WHERE analysis context */
   Index *pProbe;              /* An index we are evaluating */
@@ -157330,7 +163989,7 @@ static int whereLoopAddBtree(
     sPk.aiRowLogEst = aiRowEstPk;
     sPk.onError = OE_Replace;
     sPk.pTable = pTab;
-    sPk.szIdxRow = pTab->szTabRow;
+    sPk.szIdxRow = 3;  /* TUNING: Interior rows of IPK table are very small */
     sPk.idxType = SQLITE_IDXTYPE_IPK;
     aiRowEstPk[0] = pTab->nRowLogEst;
     aiRowEstPk[1] = 0;
@@ -157381,7 +164040,8 @@ static int whereLoopAddBtree(
         if( !IsView(pTab) && (pTab->tabFlags & TF_Ephemeral)==0 ){
           pNew->rSetup += 28;
         }else{
-          pNew->rSetup -= 10;
+          pNew->rSetup -= 25;  /* Greatly reduced setup cost for auto indexes
+                               ** on ephemeral materializations of views */
         }
         ApplyCostMultiplier(pNew->rSetup, pTab->costMult);
         if( pNew->rSetup<0 ) pNew->rSetup = 0;
@@ -157458,11 +164118,43 @@ static int whereLoopAddBtree(
     }else{
       Bitmask m;
       if( pProbe->isCovering ){
-        pNew->wsFlags = WHERE_IDX_ONLY | WHERE_INDEXED;
         m = 0;
+        pNew->wsFlags = WHERE_IDX_ONLY | WHERE_INDEXED;
       }else{
         m = pSrc->colUsed & pProbe->colNotIdxed;
-        pNew->wsFlags = (m==0) ? (WHERE_IDX_ONLY|WHERE_INDEXED) : WHERE_INDEXED;
+        if( pProbe->pPartIdxWhere ){
+          wherePartIdxExpr(
+              pWInfo->pParse, pProbe, pProbe->pPartIdxWhere, &m, 0, 0
+          );
+        }
+        pNew->wsFlags = WHERE_INDEXED;
+        if( m==TOPBIT || (pProbe->bHasExpr && !pProbe->bHasVCol && m!=0) ){
+          u32 isCov = whereIsCoveringIndex(pWInfo, pProbe, pSrc->iCursor);
+          if( isCov==0 ){
+            WHERETRACE(0x200,
+               ("-> %s is not a covering index"
+                " according to whereIsCoveringIndex()\n", pProbe->zName));
+            assert( m!=0 );
+          }else{
+            m = 0;
+            pNew->wsFlags |= isCov;
+            if( isCov & WHERE_IDX_ONLY ){
+              WHERETRACE(0x200,
+                 ("-> %s is a covering expression index"
+                  " according to whereIsCoveringIndex()\n", pProbe->zName));
+            }else{
+              assert( isCov==WHERE_EXPRIDX );
+              WHERETRACE(0x200,
+                 ("-> %s might be a covering expression index"
+                  " according to whereIsCoveringIndex()\n", pProbe->zName));
+            }
+          }
+        }else if( m==0 ){
+          WHERETRACE(0x200,
+             ("-> %s a covering index according to bitmasks\n",
+             pProbe->zName, m==0 ? "is" : "is not"));
+          pNew->wsFlags = WHERE_IDX_ONLY | WHERE_INDEXED;
+        }
       }
 
       /* Full scan via index */
@@ -157635,7 +164327,7 @@ static int whereLoopAddVirtualOne(
       ** that the particular combination of parameters provided is unusable.
       ** Make no entries in the loop table.
       */
-      WHERETRACE(0xffff, ("  ^^^^--- non-viable plan rejected!\n"));
+      WHERETRACE(0xffffffff, ("  ^^^^--- non-viable plan rejected!\n"));
       return SQLITE_OK;
     }
     return rc;
@@ -157746,7 +164438,7 @@ static int whereLoopAddVirtualOne(
     sqlite3_free(pNew->u.vtab.idxStr);
     pNew->u.vtab.needFree = 0;
   }
-  WHERETRACE(0xffff, ("  bIn=%d prereqIn=%04llx prereqOut=%04llx\n",
+  WHERETRACE(0xffffffff, ("  bIn=%d prereqIn=%04llx prereqOut=%04llx\n",
                       *pbIn, (sqlite3_uint64)mPrereq,
                       (sqlite3_uint64)(pNew->prereq & ~mPrereq)));
 
@@ -157762,7 +164454,7 @@ static int whereLoopAddVirtualOne(
 **
 ** Return a pointer to the collation name:
 **
-**    1. If there is an explicit COLLATE operator on the constaint, return it.
+**    1. If there is an explicit COLLATE operator on the constraint, return it.
 **
 **    2. Else, if the column has an alternative collation, return that.
 **
@@ -157817,7 +164509,7 @@ SQLITE_API int sqlite3_vtab_rhs_value(
   sqlite3_value *pVal = 0;
   int rc = SQLITE_OK;
   if( iCons<0 || iCons>=pIdxInfo->nConstraint ){
-    rc = SQLITE_MISUSE; /* EV: R-30545-25046 */
+    rc = SQLITE_MISUSE_BKPT; /* EV: R-30545-25046 */
   }else{
     if( pH->aRhs[iCons]==0 ){
       WhereTerm *pTerm = &pH->pWC->a[pIdxInfo->aConstraint[iCons].iTermOffset];
@@ -157847,32 +164539,27 @@ SQLITE_API int sqlite3_vtab_distinct(sqlite3_index_info *pIdxInfo){
   return pHidden->eDistinct;
 }
 
-#if (defined(SQLITE_ENABLE_DBPAGE_VTAB) || defined(SQLITE_TEST)) \
-    && !defined(SQLITE_OMIT_VIRTUALTABLE)
 /*
 ** Cause the prepared statement that is associated with a call to
-** xBestIndex to potentiall use all schemas.  If the statement being
+** xBestIndex to potentially use all schemas.  If the statement being
 ** prepared is read-only, then just start read transactions on all
 ** schemas.  But if this is a write operation, start writes on all
 ** schemas.
 **
 ** This is used by the (built-in) sqlite_dbpage virtual table.
 */
-SQLITE_PRIVATE void sqlite3VtabUsesAllSchemas(sqlite3_index_info *pIdxInfo){
-  HiddenIndexInfo *pHidden = (HiddenIndexInfo*)&pIdxInfo[1];
-  Parse *pParse = pHidden->pParse;
+SQLITE_PRIVATE void sqlite3VtabUsesAllSchemas(Parse *pParse){
   int nDb = pParse->db->nDb;
   int i;
   for(i=0; i<nDb; i++){
     sqlite3CodeVerifySchema(pParse, i);
   }
-  if( pParse->writeMask ){
+  if( DbMaskNonZero(pParse->writeMask) ){
     for(i=0; i<nDb; i++){
       sqlite3BeginWriteOperation(pParse, 0, i);
     }
   }
 }
-#endif
 
 /*
 ** Add all WhereLoop objects for a table of the join identified by
@@ -157938,7 +164625,7 @@ static int whereLoopAddVirtual(
 
   /* First call xBestIndex() with all constraints usable. */
   WHERETRACE(0x800, ("BEGIN %s.addVirtual()\n", pSrc->pTab->zName));
-  WHERETRACE(0x40, ("  VirtualOne: all usable\n"));
+  WHERETRACE(0x800, ("  VirtualOne: all usable\n"));
   rc = whereLoopAddVirtualOne(
       pBuilder, mPrereq, ALLBITS, 0, p, mNoOmit, &bIn, &bRetry
   );
@@ -157963,7 +164650,7 @@ static int whereLoopAddVirtual(
     /* If the plan produced by the earlier call uses an IN(...) term, call
     ** xBestIndex again, this time with IN(...) terms disabled. */
     if( bIn ){
-      WHERETRACE(0x40, ("  VirtualOne: all usable w/o IN\n"));
+      WHERETRACE(0x800, ("  VirtualOne: all usable w/o IN\n"));
       rc = whereLoopAddVirtualOne(
           pBuilder, mPrereq, ALLBITS, WO_IN, p, mNoOmit, &bIn, 0);
       assert( bIn==0 );
@@ -157989,7 +164676,7 @@ static int whereLoopAddVirtual(
       mPrev = mNext;
       if( mNext==ALLBITS ) break;
       if( mNext==mBest || mNext==mBestNoIn ) continue;
-      WHERETRACE(0x40, ("  VirtualOne: mPrev=%04llx mNext=%04llx\n",
+      WHERETRACE(0x800, ("  VirtualOne: mPrev=%04llx mNext=%04llx\n",
                        (sqlite3_uint64)mPrev, (sqlite3_uint64)mNext));
       rc = whereLoopAddVirtualOne(
           pBuilder, mPrereq, mNext|mPrereq, 0, p, mNoOmit, &bIn, 0);
@@ -158003,7 +164690,7 @@ static int whereLoopAddVirtual(
     ** that requires no source tables at all (i.e. one guaranteed to be
     ** usable), make a call here with all source tables disabled */
     if( rc==SQLITE_OK && seenZero==0 ){
-      WHERETRACE(0x40, ("  VirtualOne: all disabled\n"));
+      WHERETRACE(0x800, ("  VirtualOne: all disabled\n"));
       rc = whereLoopAddVirtualOne(
           pBuilder, mPrereq, mPrereq, 0, p, mNoOmit, &bIn, 0);
       if( bIn==0 ) seenZeroNoIN = 1;
@@ -158013,7 +164700,7 @@ static int whereLoopAddVirtual(
     ** that requires no source tables at all and does not use an IN(...)
     ** operator, make a final call to obtain one here.  */
     if( rc==SQLITE_OK && seenZeroNoIN==0 ){
-      WHERETRACE(0x40, ("  VirtualOne: all disabled and w/o IN\n"));
+      WHERETRACE(0x800, ("  VirtualOne: all disabled and w/o IN\n"));
       rc = whereLoopAddVirtualOne(
           pBuilder, mPrereq, mPrereq, WO_IN, p, mNoOmit, &bIn, 0);
     }
@@ -158069,7 +164756,7 @@ static int whereLoopAddOr(
       sSubBuild = *pBuilder;
       sSubBuild.pOrSet = &sCur;
 
-      WHERETRACE(0x200, ("Begin processing OR-clause %p\n", pTerm));
+      WHERETRACE(0x400, ("Begin processing OR-clause %p\n", pTerm));
       for(pOrTerm=pOrWC->a; pOrTerm<pOrWCEnd; pOrTerm++){
         if( (pOrTerm->eOperator & WO_AND)!=0 ){
           sSubBuild.pWC = &pOrTerm->u.pAndInfo->wc;
@@ -158086,9 +164773,9 @@ static int whereLoopAddOr(
         }
         sCur.n = 0;
 #ifdef WHERETRACE_ENABLED
-        WHERETRACE(0x200, ("OR-term %d of %p has %d subterms:\n",
+        WHERETRACE(0x400, ("OR-term %d of %p has %d subterms:\n",
                    (int)(pOrTerm-pOrWC->a), pTerm, sSubBuild.pWC->nTerm));
-        if( sqlite3WhereTrace & 0x400 ){
+        if( sqlite3WhereTrace & 0x20000 ){
           sqlite3WhereClausePrint(sSubBuild.pWC);
         }
 #endif
@@ -158103,8 +164790,6 @@ static int whereLoopAddOr(
         if( rc==SQLITE_OK ){
           rc = whereLoopAddOr(&sSubBuild, mPrereq, mUnusable);
         }
-        assert( rc==SQLITE_OK || rc==SQLITE_DONE || sCur.n==0
-                || rc==SQLITE_NOMEM );
         testcase( rc==SQLITE_NOMEM && sCur.n>0 );
         testcase( rc==SQLITE_DONE );
         if( sCur.n==0 ){
@@ -158150,7 +164835,7 @@ static int whereLoopAddOr(
         pNew->prereq = sSum.a[i].prereq;
         rc = whereLoopInsert(pBuilder, pNew);
       }
-      WHERETRACE(0x200, ("End processing OR-clause %p\n", pTerm));
+      WHERETRACE(0x400, ("End processing OR-clause %p\n", pTerm));
     }
   }
   return rc;
@@ -158176,7 +164861,13 @@ static int whereLoopAddAll(WhereLoopBuilder *pBuilder){
 
   /* Loop over the tables in the join, from left to right */
   pNew = pBuilder->pNew;
-  whereLoopInit(pNew);
+
+  /* Verify that pNew has already been initialized */
+  assert( pNew->nLTerm==0 );
+  assert( pNew->wsFlags==0 );
+  assert( pNew->nLSlot>=ArraySize(pNew->aLTermSpace) );
+  assert( pNew->aLTerm!=0 );
+
   pBuilder->iPlanLimit = SQLITE_QUERY_PLANNER_LIMIT;
   for(iTab=0, pItem=pTabList->a; pItem<pEnd; iTab++, pItem++){
     Bitmask mUnusable = 0;
@@ -158492,8 +165183,8 @@ static i8 wherePathSatisfiesOrderBy(
             if( pOBExpr->iTable!=iCur ) continue;
             if( pOBExpr->iColumn!=iColumn ) continue;
           }else{
-            Expr *pIdxExpr = pIndex->aColExpr->a[j].pExpr;
-            if( sqlite3ExprCompareSkip(pOBExpr, pIdxExpr, iCur) ){
+            Expr *pIxExpr = pIndex->aColExpr->a[j].pExpr;
+            if( sqlite3ExprCompareSkip(pOBExpr, pIxExpr, iCur) ){
               continue;
             }
           }
@@ -158625,37 +165316,56 @@ static const char *wherePathName(WherePath *pPath, int nLoop, WhereLoop *pLast){
 ** order.
 */
 static LogEst whereSortingCost(
-  WhereInfo *pWInfo,
-  LogEst nRow,
-  int nOrderBy,
-  int nSorted
+  WhereInfo *pWInfo, /* Query planning context */
+  LogEst nRow,       /* Estimated number of rows to sort */
+  int nOrderBy,      /* Number of ORDER BY clause terms */
+  int nSorted        /* Number of initial ORDER BY terms naturally in order */
 ){
-  /* TUNING: Estimated cost of a full external sort, where N is
+  /* Estimated cost of a full external sort, where N is
   ** the number of rows to sort is:
   **
-  **   cost = (3.0 * N * log(N)).
+  **   cost = (K * N * log(N)).
   **
   ** Or, if the order-by clause has X terms but only the last Y
   ** terms are out of order, then block-sorting will reduce the
   ** sorting cost to:
   **
-  **   cost = (3.0 * N * log(N)) * (Y/X)
+  **   cost = (K * N * log(N)) * (Y/X)
+  **
+  ** The constant K is at least 2.0 but will be larger if there are a
+  ** large number of columns to be sorted, as the sorting time is
+  ** proportional to the amount of content to be sorted.  The algorithm
+  ** does not currently distinguish between fat columns (BLOBs and TEXTs)
+  ** and skinny columns (INTs).  It just uses the number of columns as
+  ** an approximation for the row width.
   **
-  ** The (Y/X) term is implemented using stack variable rScale
-  ** below.
+  ** And extra factor of 2.0 or 3.0 is added to the sorting cost if the sort
+  ** is built using OP_IdxInsert and OP_Sort rather than with OP_SorterInsert.
   */
-  LogEst rScale, rSortCost;
-  assert( nOrderBy>0 && 66==sqlite3LogEst(100) );
-  rScale = sqlite3LogEst((nOrderBy-nSorted)*100/nOrderBy) - 66;
-  rSortCost = nRow + rScale + 16;
+  LogEst rSortCost, nCol;
+  assert( pWInfo->pSelect!=0 );
+  assert( pWInfo->pSelect->pEList!=0 );
+  /* TUNING: sorting cost proportional to the number of output columns: */
+  nCol = sqlite3LogEst((pWInfo->pSelect->pEList->nExpr+59)/30);
+  rSortCost = nRow + nCol;
+  if( nSorted>0 ){
+    /* Scale the result by (Y/X) */
+    rSortCost += sqlite3LogEst((nOrderBy-nSorted)*100/nOrderBy) - 66;
+  }
 
   /* Multiple by log(M) where M is the number of output rows.
   ** Use the LIMIT for M if it is smaller.  Or if this sort is for
   ** a DISTINCT operator, M will be the number of distinct output
   ** rows, so fudge it downwards a bit.
   */
-  if( (pWInfo->wctrlFlags & WHERE_USE_LIMIT)!=0 && pWInfo->iLimit<nRow ){
-    nRow = pWInfo->iLimit;
+  if( (pWInfo->wctrlFlags & WHERE_USE_LIMIT)!=0 ){
+    rSortCost += 10;       /* TUNING: Extra 2.0x if using LIMIT */
+    if( nSorted!=0 ){
+      rSortCost += 6;      /* TUNING: Extra 1.5x if also using partial sort */
+    }
+    if( pWInfo->iLimit<nRow ){
+      nRow = pWInfo->iLimit;
+    }
   }else if( (pWInfo->wctrlFlags & WHERE_WANT_DISTINCT) ){
     /* TUNING: In the sort for a DISTINCT operator, assume that the DISTINCT
     ** reduces the number of output rows by a factor of 2 */
@@ -158681,7 +165391,6 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
   int mxChoice;             /* Maximum number of simultaneous paths tracked */
   int nLoop;                /* Number of terms in the join */
   Parse *pParse;            /* Parsing context */
-  sqlite3 *db;              /* The database connection */
   int iLoop;                /* Loop counter over the terms of the join */
   int ii, jj;               /* Loop counters */
   int mxI = 0;              /* Index of next entry to replace */
@@ -158700,14 +165409,14 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
   int nSpace;               /* Bytes of space allocated at pSpace */
 
   pParse = pWInfo->pParse;
-  db = pParse->db;
   nLoop = pWInfo->nLevel;
   /* TUNING: For simple queries, only the best path is tracked.
   ** For 2-way joins, the 5 best paths are followed.
   ** For joins of 3 or more tables, track the 10 best paths */
   mxChoice = (nLoop<=1) ? 1 : (nLoop==2 ? 5 : 10);
   assert( nLoop<=pWInfo->pTabList->nSrc );
-  WHERETRACE(0x002, ("---- begin solver.  (nRowEst=%d)\n", nRowEst));
+  WHERETRACE(0x002, ("---- begin solver.  (nRowEst=%d, nQueryLoop=%d)\n",
+                     nRowEst, pParse->nQueryLoop));
 
   /* If nRowEst is zero and there is an ORDER BY clause, ignore it. In this
   ** case the purpose of this call is to estimate the number of rows returned
@@ -158723,7 +165432,7 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
   /* Allocate and initialize space for aTo, aFrom and aSortCost[] */
   nSpace = (sizeof(WherePath)+sizeof(WhereLoop*)*nLoop)*mxChoice*2;
   nSpace += sizeof(LogEst) * nOrderBy;
-  pSpace = sqlite3DbMallocRawNN(db, nSpace);
+  pSpace = sqlite3StackAllocRawNN(pParse->db, nSpace);
   if( pSpace==0 ) return SQLITE_NOMEM_BKPT;
   aTo = (WherePath*)pSpace;
   aFrom = aTo+mxChoice;
@@ -158773,9 +165482,9 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
         LogEst nOut;                      /* Rows visited by (pFrom+pWLoop) */
         LogEst rCost;                     /* Cost of path (pFrom+pWLoop) */
         LogEst rUnsorted;                 /* Unsorted cost of (pFrom+pWLoop) */
-        i8 isOrdered = pFrom->isOrdered;  /* isOrdered for (pFrom+pWLoop) */
+        i8 isOrdered;                     /* isOrdered for (pFrom+pWLoop) */
         Bitmask maskNew;                  /* Mask of src visited by (..) */
-        Bitmask revMask = 0;              /* Mask of rev-order loops for (..) */
+        Bitmask revMask;                  /* Mask of rev-order loops for (..) */
 
         if( (pWLoop->prereq & ~pFrom->maskLoop)!=0 ) continue;
         if( (pWLoop->maskSelf & pFrom->maskLoop)!=0 ) continue;
@@ -158794,7 +165503,9 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
         rUnsorted = sqlite3LogEstAdd(rUnsorted, pFrom->rUnsorted);
         nOut = pFrom->nRow + pWLoop->nOut;
         maskNew = pFrom->maskLoop | pWLoop->maskSelf;
+        isOrdered = pFrom->isOrdered;
         if( isOrdered<0 ){
+          revMask = 0;
           isOrdered = wherePathSatisfiesOrderBy(pWInfo,
                        pWInfo->pOrderBy, pFrom, pWInfo->wctrlFlags,
                        iLoop, pWLoop, &revMask);
@@ -158807,11 +165518,11 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
                 pWInfo, nRowEst, nOrderBy, isOrdered
             );
           }
-          /* TUNING:  Add a small extra penalty (5) to sorting as an
-          ** extra encouragment to the query planner to select a plan
+          /* TUNING:  Add a small extra penalty (3) to sorting as an
+          ** extra encouragement to the query planner to select a plan
           ** where the rows emerge in the correct order without any sorting
           ** required. */
-          rCost = sqlite3LogEstAdd(rUnsorted, aSortCost[isOrdered]) + 5;
+          rCost = sqlite3LogEstAdd(rUnsorted, aSortCost[isOrdered]) + 3;
 
           WHERETRACE(0x002,
               ("---- sort cost=%-3d (%d/%d) increases cost %3d to %-3d\n",
@@ -158972,7 +165683,7 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
 
   if( nFrom==0 ){
     sqlite3ErrorMsg(pParse, "no query solution");
-    sqlite3DbFreeNN(db, pSpace);
+    sqlite3StackFreeNN(pParse->db, pSpace);
     return SQLITE_ERROR;
   }
 
@@ -159008,6 +165719,10 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
       if( pFrom->isOrdered==pWInfo->pOrderBy->nExpr ){
         pWInfo->eDistinct = WHERE_DISTINCT_ORDERED;
       }
+      if( pWInfo->pSelect->pOrderBy
+       && pWInfo->nOBSat > pWInfo->pSelect->pOrderBy->nExpr ){
+        pWInfo->nOBSat = pWInfo->pSelect->pOrderBy->nExpr;
+      }
     }else{
       pWInfo->revMask = pFrom->revLoop;
       if( pWInfo->nOBSat<=0 ){
@@ -159054,7 +165769,7 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
   pWInfo->nRowOut = pFrom->nRow;
 
   /* Free temporary memory and return success */
-  sqlite3DbFreeNN(db, pSpace);
+  sqlite3StackFreeNN(pParse->db, pSpace);
   return SQLITE_OK;
 }
 
@@ -159152,7 +165867,7 @@ static int whereShortCut(WhereLoopBuilder *pBuilder){
     pLoop->cId = '0';
 #endif
 #ifdef WHERETRACE_ENABLED
-    if( sqlite3WhereTrace ){
+    if( sqlite3WhereTrace & 0x02 ){
       sqlite3DebugPrintf("whereShortCut() used to compute solution\n");
     }
 #endif
@@ -159219,6 +165934,13 @@ static void showAllWhereLoops(WhereInfo *pWInfo, WhereClause *pWC){
 **      at most a single row.
 **   4) The table must not be referenced by any part of the query apart
 **      from its own USING or ON clause.
+**   5) The table must not have an inner-join ON or USING clause if there is
+**      a RIGHT JOIN anywhere in the query.  Otherwise the ON/USING clause
+**      might move from the right side to the left side of the RIGHT JOIN.
+**      Note: Due to (2), this condition can only arise if the table is
+**      the right-most table of a subquery that was flattened into the
+**      main query and that subquery was the right-hand operand of an
+**      inner join that held an ON or USING clause.
 **
 ** For example, given:
 **
@@ -159244,6 +165966,7 @@ static SQLITE_NOINLINE Bitmask whereOmitNoopJoin(
 ){
   int i;
   Bitmask tabUsed;
+  int hasRightJoin;
 
   /* Preconditions checked by the caller */
   assert( pWInfo->nLevel>=2 );
@@ -159258,6 +165981,7 @@ static SQLITE_NOINLINE Bitmask whereOmitNoopJoin(
   if( pWInfo->pOrderBy ){
     tabUsed |= sqlite3WhereExprListUsage(&pWInfo->sMaskSet, pWInfo->pOrderBy);
   }
+  hasRightJoin = (pWInfo->pTabList->a[0].fg.jointype & JT_LTORJ)!=0;
   for(i=pWInfo->nLevel-1; i>=1; i--){
     WhereTerm *pTerm, *pEnd;
     SrcItem *pItem;
@@ -159280,9 +166004,15 @@ static SQLITE_NOINLINE Bitmask whereOmitNoopJoin(
           break;
         }
       }
+      if( hasRightJoin
+       && ExprHasProperty(pTerm->pExpr, EP_InnerON)
+       && pTerm->pExpr->w.iJoin==pItem->iCursor
+      ){
+        break;  /* restriction (5) */
+      }
     }
     if( pTerm<pEnd ) continue;
-    WHERETRACE(0xffff, ("-> drop loop %c not used\n", pLoop->cId));
+    WHERETRACE(0xffffffff, ("-> drop loop %c not used\n", pLoop->cId));
     notReady &= ~pLoop->maskSelf;
     for(pTerm=pWInfo->sWC.a; pTerm<pEnd; pTerm++){
       if( (pTerm->prereqAll & pLoop->maskSelf)!=0 ){
@@ -159321,28 +166051,27 @@ static SQLITE_NOINLINE void whereCheckIfBloomFilterIsUseful(
   const WhereInfo *pWInfo
 ){
   int i;
-  LogEst nSearch;
+  LogEst nSearch = 0;
 
   assert( pWInfo->nLevel>=2 );
   assert( OptimizationEnabled(pWInfo->pParse->db, SQLITE_BloomFilter) );
-  nSearch = pWInfo->a[0].pWLoop->nOut;
-  for(i=1; i<pWInfo->nLevel; i++){
+  for(i=0; i<pWInfo->nLevel; i++){
     WhereLoop *pLoop = pWInfo->a[i].pWLoop;
     const unsigned int reqFlags = (WHERE_SELFCULL|WHERE_COLUMN_EQ);
-    if( (pLoop->wsFlags & reqFlags)==reqFlags
+    SrcItem *pItem = &pWInfo->pTabList->a[pLoop->iTab];
+    Table *pTab = pItem->pTab;
+    if( (pTab->tabFlags & TF_HasStat1)==0 ) break;
+    pTab->tabFlags |= TF_StatsUsed;
+    if( i>=1
+     && (pLoop->wsFlags & reqFlags)==reqFlags
      /* vvvvvv--- Always the case if WHERE_COLUMN_EQ is defined */
      && ALWAYS((pLoop->wsFlags & (WHERE_IPK|WHERE_INDEXED))!=0)
     ){
-      SrcItem *pItem = &pWInfo->pTabList->a[pLoop->iTab];
-      Table *pTab = pItem->pTab;
-      pTab->tabFlags |= TF_StatsUsed;
-      if( nSearch > pTab->nRowLogEst
-       && (pTab->tabFlags & TF_HasStat1)!=0
-      ){
+      if( nSearch > pTab->nRowLogEst ){
         testcase( pItem->fg.jointype & JT_LEFT );
         pLoop->wsFlags |= WHERE_BLOOMFILTER;
         pLoop->wsFlags &= ~WHERE_IDX_ONLY;
-        WHERETRACE(0xffff, (
+        WHERETRACE(0xffffffff, (
            "-> use Bloom-filter on loop %c because there are ~%.1e "
            "lookups into %s which has only ~%.1e rows\n",
            pLoop->cId, (double)sqlite3LogEstToInt(nSearch), pTab->zName,
@@ -159353,6 +166082,109 @@ static SQLITE_NOINLINE void whereCheckIfBloomFilterIsUseful(
   }
 }
 
+/*
+** The index pIdx is used by a query and contains one or more expressions.
+** In other words pIdx is an index on an expression.  iIdxCur is the cursor
+** number for the index and iDataCur is the cursor number for the corresponding
+** table.
+**
+** This routine adds IndexedExpr entries to the Parse->pIdxEpr field for
+** each of the expressions in the index so that the expression code generator
+** will know to replace occurrences of the indexed expression with
+** references to the corresponding column of the index.
+*/
+static SQLITE_NOINLINE void whereAddIndexedExpr(
+  Parse *pParse,     /* Add IndexedExpr entries to pParse->pIdxEpr */
+  Index *pIdx,       /* The index-on-expression that contains the expressions */
+  int iIdxCur,       /* Cursor number for pIdx */
+  SrcItem *pTabItem  /* The FROM clause entry for the table */
+){
+  int i;
+  IndexedExpr *p;
+  Table *pTab;
+  assert( pIdx->bHasExpr );
+  pTab = pIdx->pTable;
+  for(i=0; i<pIdx->nColumn; i++){
+    Expr *pExpr;
+    int j = pIdx->aiColumn[i];
+    int bMaybeNullRow;
+    if( j==XN_EXPR ){
+      pExpr = pIdx->aColExpr->a[i].pExpr;
+      testcase( pTabItem->fg.jointype & JT_LEFT );
+      testcase( pTabItem->fg.jointype & JT_RIGHT );
+      testcase( pTabItem->fg.jointype & JT_LTORJ );
+      bMaybeNullRow = (pTabItem->fg.jointype & (JT_LEFT|JT_LTORJ|JT_RIGHT))!=0;
+    }else if( j>=0 && (pTab->aCol[j].colFlags & COLFLAG_VIRTUAL)!=0 ){
+      pExpr = sqlite3ColumnExpr(pTab, &pTab->aCol[j]);
+      bMaybeNullRow = 0;
+    }else{
+      continue;
+    }
+    if( sqlite3ExprIsConstant(pExpr) ) continue;
+    if( pExpr->op==TK_FUNCTION ){
+      /* Functions that might set a subtype should not be replaced by the
+      ** value taken from an expression index since the index omits the
+      ** subtype.  https://sqlite.org/forum/forumpost/68d284c86b082c3e */
+      int n;
+      FuncDef *pDef;
+      sqlite3 *db = pParse->db;
+      assert( ExprUseXList(pExpr) );
+      n = pExpr->x.pList ? pExpr->x.pList->nExpr : 0;
+      pDef = sqlite3FindFunction(db, pExpr->u.zToken, n, ENC(db), 0);
+      if( pDef==0 || (pDef->funcFlags & SQLITE_RESULT_SUBTYPE)!=0 ){
+        continue;
+      }
+    }
+    p = sqlite3DbMallocRaw(pParse->db,  sizeof(IndexedExpr));
+    if( p==0 ) break;
+    p->pIENext = pParse->pIdxEpr;
+#ifdef WHERETRACE_ENABLED
+    if( sqlite3WhereTrace & 0x200 ){
+      sqlite3DebugPrintf("New pParse->pIdxEpr term {%d,%d}\n", iIdxCur, i);
+      if( sqlite3WhereTrace & 0x5000 ) sqlite3ShowExpr(pExpr);
+    }
+#endif
+    p->pExpr = sqlite3ExprDup(pParse->db, pExpr, 0);
+    p->iDataCur = pTabItem->iCursor;
+    p->iIdxCur = iIdxCur;
+    p->iIdxCol = i;
+    p->bMaybeNullRow = bMaybeNullRow;
+    if( sqlite3IndexAffinityStr(pParse->db, pIdx) ){
+      p->aff = pIdx->zColAff[i];
+    }
+#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS
+    p->zIdxName = pIdx->zName;
+#endif
+    pParse->pIdxEpr = p;
+    if( p->pIENext==0 ){
+      void *pArg = (void*)&pParse->pIdxEpr;
+      sqlite3ParserAddCleanup(pParse, whereIndexedExprCleanup, pArg);
+    }
+  }
+}
+
+/*
+** Set the reverse-scan order mask to one for all tables in the query
+** with the exception of MATERIALIZED common table expressions that have
+** their own internal ORDER BY clauses.
+**
+** This implements the PRAGMA reverse_unordered_selects=ON setting.
+** (Also SQLITE_DBCONFIG_REVERSE_SCANORDER).
+*/
+static SQLITE_NOINLINE void whereReverseScanOrder(WhereInfo *pWInfo){
+  int ii;
+  for(ii=0; ii<pWInfo->pTabList->nSrc; ii++){
+    SrcItem *pItem = &pWInfo->pTabList->a[ii];
+    if( !pItem->fg.isCte
+     || pItem->u2.pCteUse->eM10d!=M10d_Yes
+     || NEVER(pItem->pSelect==0)
+     || pItem->pSelect->pOrderBy==0
+    ){
+      pWInfo->revMask |= MASKBIT(ii);
+    }
+  }
+}
+
 /*
 ** Generate the beginning of the loop used for WHERE clause processing.
 ** The return value is a pointer to an opaque structure that contains
@@ -159411,7 +166243,7 @@ static SQLITE_NOINLINE void whereCheckIfBloomFilterIsUseful(
 **
 ** OUTER JOINS
 **
-** An outer join of tables t1 and t2 is conceptally coded as follows:
+** An outer join of tables t1 and t2 is conceptually coded as follows:
 **
 **    foreach row1 in t1 do
 **      flag = 0
@@ -159447,7 +166279,7 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
   Expr *pWhere,           /* The WHERE clause */
   ExprList *pOrderBy,     /* An ORDER BY (or GROUP BY) clause, or NULL */
   ExprList *pResultSet,   /* Query result set.  Req'd for DISTINCT */
-  Select *pLimit,         /* Use this LIMIT/OFFSET clause, if any */
+  Select *pSelect,        /* The entire SELECT statement */
   u16 wctrlFlags,         /* The WHERE_* flags defined in sqliteInt.h */
   int iAuxArg             /* If WHERE_OR_SUBCLAUSE is set, index cursor number
                           ** If WHERE_USE_LIMIT, then the limit amount */
@@ -159516,7 +166348,9 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
   pWInfo->pParse = pParse;
   pWInfo->pTabList = pTabList;
   pWInfo->pOrderBy = pOrderBy;
+#if WHERETRACE_ENABLED
   pWInfo->pWhere = pWhere;
+#endif
   pWInfo->pResultSet = pResultSet;
   pWInfo->aiCurOnePass[0] = pWInfo->aiCurOnePass[1] = -1;
   pWInfo->nLevel = nTabList;
@@ -159524,9 +166358,7 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
   pWInfo->wctrlFlags = wctrlFlags;
   pWInfo->iLimit = iAuxArg;
   pWInfo->savedNQueryLoop = pParse->nQueryLoop;
-#ifndef SQLITE_OMIT_VIRTUALTABLE
-  pWInfo->pLimit = pLimit;
-#endif
+  pWInfo->pSelect = pSelect;
   memset(&pWInfo->nOBSat, 0,
          offsetof(WhereInfo,sWC) - offsetof(WhereInfo,nOBSat));
   memset(&pWInfo->a[0], 0, sizeof(WhereLoop)+nTabList*sizeof(WhereLevel));
@@ -159566,7 +166398,7 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
     **
     ** The N-th term of the FROM clause is assigned a bitmask of 1<<N.
     **
-    ** The rule of the previous sentence ensures thta if X is the bitmask for
+    ** The rule of the previous sentence ensures that if X is the bitmask for
     ** a table T, then X-1 is the bitmask for all other tables to the left of T.
     ** Knowing the bitmask for all tables to the left of a left join is
     ** important.  Ticket #3015.
@@ -159595,25 +166427,50 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
 
   /* Analyze all of the subexpressions. */
   sqlite3WhereExprAnalyze(pTabList, &pWInfo->sWC);
-  sqlite3WhereAddLimit(&pWInfo->sWC, pLimit);
+  if( pSelect && pSelect->pLimit ){
+    sqlite3WhereAddLimit(&pWInfo->sWC, pSelect);
+  }
   if( pParse->nErr ) goto whereBeginError;
 
-  /* Special case: WHERE terms that do not refer to any tables in the join
-  ** (constant expressions). Evaluate each such term, and jump over all the
-  ** generated code if the result is not true.
+  /* The False-WHERE-Term-Bypass optimization:
+  **
+  ** If there are WHERE terms that are false, then no rows will be output,
+  ** so skip over all of the code generated here.
   **
-  ** Do not do this if the expression contains non-deterministic functions
-  ** that are not within a sub-select. This is not strictly required, but
-  ** preserves SQLite's legacy behaviour in the following two cases:
+  ** Conditions:
   **
-  **   FROM ... WHERE random()>0;           -- eval random() once per row
-  **   FROM ... WHERE (SELECT random())>0;  -- eval random() once overall
+  **   (1)  The WHERE term must not refer to any tables in the join.
+  **   (2)  The term must not come from an ON clause on the
+  **        right-hand side of a LEFT or FULL JOIN.
+  **   (3)  The term must not come from an ON clause, or there must be
+  **        no RIGHT or FULL OUTER joins in pTabList.
+  **   (4)  If the expression contains non-deterministic functions
+  **        that are not within a sub-select. This is not required
+  **        for correctness but rather to preserves SQLite's legacy
+  **        behaviour in the following two cases:
+  **
+  **          WHERE random()>0;           -- eval random() once per row
+  **          WHERE (SELECT random())>0;  -- eval random() just once overall
+  **
+  ** Note that the Where term need not be a constant in order for this
+  ** optimization to apply, though it does need to be constant relative to
+  ** the current subquery (condition 1).  The term might include variables
+  ** from outer queries so that the value of the term changes from one
+  ** invocation of the current subquery to the next.
   */
   for(ii=0; ii<sWLB.pWC->nBase; ii++){
-    WhereTerm *pT = &sWLB.pWC->a[ii];
+    WhereTerm *pT = &sWLB.pWC->a[ii];  /* A term of the WHERE clause */
+    Expr *pX;                          /* The expression of pT */
     if( pT->wtFlags & TERM_VIRTUAL ) continue;
-    if( pT->prereqAll==0 && (nTabList==0 || exprIsDeterministic(pT->pExpr)) ){
-      sqlite3ExprIfFalse(pParse, pT->pExpr, pWInfo->iBreak, SQLITE_JUMPIFNULL);
+    pX = pT->pExpr;
+    assert( pX!=0 );
+    assert( pT->prereqAll!=0 || !ExprHasProperty(pX, EP_OuterON) );
+    if( pT->prereqAll==0                           /* Conditions (1) and (2) */
+     && (nTabList==0 || exprIsDeterministic(pX))   /* Condition (4) */
+     && !(ExprHasProperty(pX, EP_InnerON)          /* Condition (3) */
+          && (pTabList->a[0].fg.jointype & JT_LTORJ)!=0 )
+    ){
+      sqlite3ExprIfFalse(pParse, pX, pWInfo->iBreak, SQLITE_JUMPIFNULL);
       pT->wtFlags |= TERM_CODED;
     }
   }
@@ -159636,13 +166493,13 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
 
   /* Construct the WhereLoop objects */
 #if defined(WHERETRACE_ENABLED)
-  if( sqlite3WhereTrace & 0xffff ){
+  if( sqlite3WhereTrace & 0xffffffff ){
     sqlite3DebugPrintf("*** Optimizer Start *** (wctrlFlags: 0x%x",wctrlFlags);
     if( wctrlFlags & WHERE_USE_LIMIT ){
       sqlite3DebugPrintf(", limit: %d", iAuxArg);
     }
     sqlite3DebugPrintf(")\n");
-    if( sqlite3WhereTrace & 0x100 ){
+    if( sqlite3WhereTrace & 0x8000 ){
       Select sSelect;
       memset(&sSelect, 0, sizeof(sSelect));
       sSelect.selFlags = SF_WhereBegin;
@@ -159652,10 +166509,10 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
       sSelect.pEList = pResultSet;
       sqlite3TreeViewSelect(0, &sSelect, 0);
     }
-  }
-  if( sqlite3WhereTrace & 0x100 ){ /* Display all terms of the WHERE clause */
-    sqlite3DebugPrintf("---- WHERE clause at start of analysis:\n");
-    sqlite3WhereClausePrint(sWLB.pWC);
+    if( sqlite3WhereTrace & 0x4000 ){ /* Display all WHERE clause terms */
+      sqlite3DebugPrintf("---- WHERE clause at start of analysis:\n");
+      sqlite3WhereClausePrint(sWLB.pWC);
+    }
   }
 #endif
 
@@ -159671,7 +166528,7 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
     ** loops will be built using the revised truthProb values. */
     if( sWLB.bldFlags2 & SQLITE_BLDF2_2NDPASS ){
       WHERETRACE_ALL_LOOPS(pWInfo, sWLB.pWC);
-      WHERETRACE(0xffff,
+      WHERETRACE(0xffffffff,
            ("**** Redo all loop computations due to"
             " TERM_HIGHTRUTH changes ****\n"));
       while( pWInfo->pLoops ){
@@ -159691,9 +166548,20 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
        wherePathSolver(pWInfo, pWInfo->nRowOut+1);
        if( db->mallocFailed ) goto whereBeginError;
     }
+
+    /* TUNING:  Assume that a DISTINCT clause on a subquery reduces
+    ** the output size by a factor of 8 (LogEst -30).
+    */
+    if( (pWInfo->wctrlFlags & WHERE_WANT_DISTINCT)!=0 ){
+      WHERETRACE(0x0080,("nRowOut reduced from %d to %d due to DISTINCT\n",
+                         pWInfo->nRowOut, pWInfo->nRowOut-30));
+      pWInfo->nRowOut -= 30;
+    }
+
   }
+  assert( pWInfo->pTabList!=0 );
   if( pWInfo->pOrderBy==0 && (db->flags & SQLITE_ReverseOrder)!=0 ){
-     pWInfo->revMask = ALLBITS;
+    whereReverseScanOrder(pWInfo);
   }
   if( pParse->nErr ){
     goto whereBeginError;
@@ -159757,11 +166625,11 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
   }
 
 #if defined(WHERETRACE_ENABLED)
-  if( sqlite3WhereTrace & 0x100 ){ /* Display all terms of the WHERE clause */
+  if( sqlite3WhereTrace & 0x4000 ){ /* Display all terms of the WHERE clause */
     sqlite3DebugPrintf("---- WHERE clause at end of analysis:\n");
     sqlite3WhereClausePrint(sWLB.pWC);
   }
-  WHERETRACE(0xffff,("*** Optimizer Finished ***\n"));
+  WHERETRACE(0xffffffff,("*** Optimizer Finished ***\n"));
 #endif
   pWInfo->pParse->nQueryLoop += pWInfo->nRowOut;
 
@@ -159793,6 +166661,7 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
         0!=(wctrlFlags & WHERE_ONEPASS_MULTIROW)
      && !IsVirtual(pTabList->a[0].pTab)
      && (0==(wsFlags & WHERE_MULTI_OR) || (wctrlFlags & WHERE_DUPLICATES_OK))
+     && OptimizationEnabled(db, SQLITE_OnePass)
     )){
       pWInfo->eOnePass = bOnerow ? ONEPASS_SINGLE : ONEPASS_MULTI;
       if( HasRowid(pTabList->a[0].pTab) && (wsFlags & WHERE_IDX_ONLY) ){
@@ -159856,7 +166725,7 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
         assert( n<=pTab->nCol );
       }
 #ifdef SQLITE_ENABLE_CURSOR_HINTS
-      if( pLoop->u.btree.pIndex!=0 ){
+      if( pLoop->u.btree.pIndex!=0 && (pTab->tabFlags & TF_WithoutRowid)==0 ){
         sqlite3VdbeChangeP5(v, OPFLAG_SEEKEQ|bFordelete);
       }else
 #endif
@@ -159898,6 +166767,14 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
         op = OP_ReopenIdx;
       }else{
         iIndexCur = pParse->nTab++;
+        if( pIx->bHasExpr && OptimizationEnabled(db, SQLITE_IndexedExpr) ){
+          whereAddIndexedExpr(pParse, pIx, iIndexCur, pTabItem);
+        }
+        if( pIx->pPartIdxWhere && (pTabItem->fg.jointype & JT_RIGHT)==0 ){
+          wherePartIdxExpr(
+              pParse, pIx, pIx->pPartIdxWhere, 0, iIndexCur, pTabItem
+          );
+        }
       }
       pLevel->iIdxCur = iIndexCur;
       assert( pIx!=0 );
@@ -159990,11 +166867,11 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
         sqlite3VdbeJumpHere(v, iOnce);
       }
     }
+    assert( pTabList == pWInfo->pTabList );
     if( (wsFlags & (WHERE_AUTO_INDEX|WHERE_BLOOMFILTER))!=0 ){
       if( (wsFlags & WHERE_AUTO_INDEX)!=0 ){
 #ifndef SQLITE_OMIT_AUTOMATIC_INDEX
-        constructAutomaticIndex(pParse, &pWInfo->sWC,
-                  &pTabList->a[pLevel->iFrom], notReady, pLevel);
+        constructAutomaticIndex(pParse, &pWInfo->sWC, notReady, pLevel);
 #endif
       }else{
         sqlite3ConstructBloomFilter(pWInfo, ii, pLevel, notReady);
@@ -160020,8 +166897,6 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
   /* Jump here if malloc fails */
 whereBeginError:
   if( pWInfo ){
-    testcase( pWInfo->pExprMods!=0 );
-    whereUndoExprMods(pWInfo);
     pParse->nQueryLoop = pWInfo->savedNQueryLoop;
     whereInfoFree(db, pWInfo);
   }
@@ -160240,7 +167115,6 @@ SQLITE_PRIVATE void sqlite3WhereEnd(WhereInfo *pWInfo){
   }
 
   assert( pWInfo->nLevel<=pTabList->nSrc );
-  if( pWInfo->pExprMods ) whereUndoExprMods(pWInfo);
   for(i=0, pLevel=pWInfo->a; i<pWInfo->nLevel; i++, pLevel++){
     int k, last;
     VdbeOp *pOp, *pLastOp;
@@ -160294,10 +167168,28 @@ SQLITE_PRIVATE void sqlite3WhereEnd(WhereInfo *pWInfo){
       }else{
         last = pWInfo->iEndWhere;
       }
+      if( pIdx->bHasExpr ){
+        IndexedExpr *p = pParse->pIdxEpr;
+        while( p ){
+          if( p->iIdxCur==pLevel->iIdxCur ){
+#ifdef WHERETRACE_ENABLED
+            if( sqlite3WhereTrace & 0x200 ){
+              sqlite3DebugPrintf("Disable pParse->pIdxEpr term {%d,%d}\n",
+                                  p->iIdxCur, p->iIdxCol);
+              if( sqlite3WhereTrace & 0x5000 ) sqlite3ShowExpr(p->pExpr);
+            }
+#endif
+            p->iDataCur = -1;
+            p->iIdxCur = -1;
+          }
+          p = p->pIENext;
+        }
+      }
       k = pLevel->addrBody + 1;
 #ifdef SQLITE_DEBUG
       if( db->flags & SQLITE_VdbeAddopTrace ){
-        printf("TRANSLATE opcodes in range %d..%d\n", k, last-1);
+        printf("TRANSLATE cursor %d->%d in opcode range %d..%d\n",
+                pLevel->iTabCur, pLevel->iIdxCur, k, last-1);
       }
       /* Proof that the "+1" on the k value above is safe */
       pOp = sqlite3VdbeGetOp(v, k - 1);
@@ -160504,7 +167396,7 @@ SQLITE_PRIVATE void sqlite3WhereEnd(WhereInfo *pWInfo){
 **
 **   These are the same built-in window functions supported by Postgres.
 **   Although the behaviour of aggregate window functions (functions that
-**   can be used as either aggregates or window funtions) allows them to
+**   can be used as either aggregates or window functions) allows them to
 **   be implemented using an API, built-in window functions are much more
 **   esoteric. Additionally, some window functions (e.g. nth_value())
 **   may only be implemented by caching the entire partition in memory.
@@ -161034,7 +167926,7 @@ static Window *windowFind(Parse *pParse, Window *pList, const char *zName){
 ** is the Window object representing the associated OVER clause. This
 ** function updates the contents of pWin as follows:
 **
-**   * If the OVER clause refered to a named window (as in "max(x) OVER win"),
+**   * If the OVER clause referred to a named window (as in "max(x) OVER win"),
 **     search list pList for a matching WINDOW definition, and update pWin
 **     accordingly. If no such WINDOW clause can be found, leave an error
 **     in pParse.
@@ -161172,6 +168064,7 @@ static int selectWindowRewriteExprCb(Walker *pWalker, Expr *pExpr){
       }
       /* no break */ deliberate_fall_through
 
+    case TK_IF_NULL_ROW:
     case TK_AGG_FUNCTION:
     case TK_COLUMN: {
       int iCol = -1;
@@ -161287,7 +168180,6 @@ static ExprList *exprListAppendList(
     for(i=0; i<pAppend->nExpr; i++){
       sqlite3 *db = pParse->db;
       Expr *pDup = sqlite3ExprDup(db, pAppend->a[i].pExpr, 0);
-      assert( pDup==0 || !ExprHasProperty(pDup, EP_MemToken) );
       if( db->mallocFailed ){
         sqlite3ExprDelete(db, pDup);
         break;
@@ -161425,7 +168317,7 @@ SQLITE_PRIVATE int sqlite3WindowRewrite(Parse *pParse, Select *p){
       assert( ExprUseXList(pWin->pOwner) );
       assert( pWin->pWFunc!=0 );
       pArgs = pWin->pOwner->x.pList;
-      if( pWin->pWFunc->funcFlags & SQLITE_FUNC_SUBTYPE ){
+      if( pWin->pWFunc->funcFlags & SQLITE_SUBTYPE ){
         selectWindowRewriteEList(pParse, pMWin, pSrc, pArgs, pTab, &pSublist);
         pWin->iArgCol = (pSublist ? pSublist->nExpr : 0);
         pWin->bExprArgs = 1;
@@ -161457,7 +168349,7 @@ SQLITE_PRIVATE int sqlite3WindowRewrite(Parse *pParse, Select *p){
     pSub = sqlite3SelectNew(
         pParse, pSublist, pSrc, pWhere, pGroupBy, pHaving, pSort, 0, 0
     );
-    SELECTTRACE(1,pParse,pSub,
+    TREETRACE(0x40,pParse,pSub,
        ("New window-function subquery in FROM clause of (%u/%p)\n",
        p->selId, p));
     p->pSrc = sqlite3SrcListAppend(pParse, 0, 0, 0);
@@ -161467,6 +168359,7 @@ SQLITE_PRIVATE int sqlite3WindowRewrite(Parse *pParse, Select *p){
     if( p->pSrc ){
       Table *pTab2;
       p->pSrc->a[0].pSelect = pSub;
+      p->pSrc->a[0].fg.isCorrelated = 1;
       sqlite3SrcListAssignCursors(pParse, p->pSrc);
       pSub->selFlags |= SF_Expanded|SF_OrderByReqd;
       pTab2 = sqlite3ResultSetOfSelect(pParse, pSub, SQLITE_AFF_NONE);
@@ -161654,7 +168547,7 @@ SQLITE_PRIVATE Window *sqlite3WindowAssemble(
 }
 
 /*
-** Window *pWin has just been created from a WINDOW clause. Tokne pBase
+** Window *pWin has just been created from a WINDOW clause. Token pBase
 ** is the base window. Earlier windows from the same WINDOW clause are
 ** stored in the linked list starting at pWin->pNextWin. This function
 ** either updates *pWin according to the base specification, or else
@@ -161698,8 +168591,9 @@ SQLITE_PRIVATE void sqlite3WindowAttach(Parse *pParse, Expr *p, Window *pWin){
   if( p ){
     assert( p->op==TK_FUNCTION );
     assert( pWin );
+    assert( ExprIsFullSize(p) );
     p->y.pWin = pWin;
-    ExprSetProperty(p, EP_WinFunc);
+    ExprSetProperty(p, EP_WinFunc|EP_FullSize);
     pWin->pOwner = p;
     if( (p->flags & EP_Distinct) && pWin->eFrmType!=TK_FILTER ){
       sqlite3ErrorMsg(pParse,
@@ -161960,7 +168854,7 @@ struct WindowCsrAndReg {
 **
 **     (ORDER BY a, b GROUPS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
 **
-**   The windows functions implmentation caches the input rows in a temp
+**   The windows functions implementation caches the input rows in a temp
 **   table, sorted by "a, b" (it actually populates the cache lazily, and
 **   aggressively removes rows once they are no longer required, but that's
 **   a mere detail). It keeps three cursors open on the temp table. One
@@ -162558,10 +169452,9 @@ static void windowCodeRangeTest(
 
     /* This block runs if reg1 is not NULL, but reg2 is. */
     sqlite3VdbeJumpHere(v, addr);
-    sqlite3VdbeAddOp2(v, OP_IsNull, reg2, lbl); VdbeCoverage(v);
-    if( op==OP_Gt || op==OP_Ge ){
-      sqlite3VdbeChangeP2(v, -1, addrDone);
-    }
+    sqlite3VdbeAddOp2(v, OP_IsNull, reg2,
+                      (op==OP_Gt || op==OP_Ge) ? addrDone : lbl);
+    VdbeCoverage(v);
   }
 
   /* Register reg1 currently contains csr1.peerVal (the peer-value from csr1).
@@ -162970,7 +169863,7 @@ static int windowExprGtZero(Parse *pParse, Expr *pExpr){
 **
 ** For the most part, the patterns above are adapted to support UNBOUNDED by
 ** assuming that it is equivalent to "infinity PRECEDING/FOLLOWING" and
-** CURRENT ROW by assuming that it is equivilent to "0 PRECEDING/FOLLOWING".
+** CURRENT ROW by assuming that it is equivalent to "0 PRECEDING/FOLLOWING".
 ** This is optimized of course - branches that will never be taken and
 ** conditions that are always true are omitted from the VM code. The only
 ** exceptional case is:
@@ -163249,7 +170142,7 @@ SQLITE_PRIVATE void sqlite3WindowCodeStep(
   }
 
   /* Allocate registers for the array of values from the sub-query, the
-  ** samve values in record form, and the rowid used to insert said record
+  ** same values in record form, and the rowid used to insert said record
   ** into the ephemeral table.  */
   regNew = pParse->nMem+1;
   pParse->nMem += nInput;
@@ -163333,8 +170226,7 @@ SQLITE_PRIVATE void sqlite3WindowCodeStep(
     VdbeCoverageNeverNullIf(v, op==OP_Ge); /* NeverNull because bound <expr> */
     VdbeCoverageNeverNullIf(v, op==OP_Le); /*   values previously checked */
     windowAggFinal(&s, 0);
-    sqlite3VdbeAddOp2(v, OP_Rewind, s.current.csr, 1);
-    VdbeCoverageNeverTaken(v);
+    sqlite3VdbeAddOp1(v, OP_Rewind, s.current.csr);
     windowReturnOneRow(&s);
     sqlite3VdbeAddOp1(v, OP_ResetSorter, s.current.csr);
     sqlite3VdbeAddOp2(v, OP_Goto, 0, lblWhereEnd);
@@ -163346,13 +170238,10 @@ SQLITE_PRIVATE void sqlite3WindowCodeStep(
   }
 
   if( pMWin->eStart!=TK_UNBOUNDED ){
-    sqlite3VdbeAddOp2(v, OP_Rewind, s.start.csr, 1);
-    VdbeCoverageNeverTaken(v);
+    sqlite3VdbeAddOp1(v, OP_Rewind, s.start.csr);
   }
-  sqlite3VdbeAddOp2(v, OP_Rewind, s.current.csr, 1);
-  VdbeCoverageNeverTaken(v);
-  sqlite3VdbeAddOp2(v, OP_Rewind, s.end.csr, 1);
-  VdbeCoverageNeverTaken(v);
+  sqlite3VdbeAddOp1(v, OP_Rewind, s.current.csr);
+  sqlite3VdbeAddOp1(v, OP_Rewind, s.end.csr);
   if( regPeer && pOrderBy ){
     sqlite3VdbeAddOp3(v, OP_Copy, regNewPeer, regPeer, pOrderBy->nExpr-1);
     sqlite3VdbeAddOp3(v, OP_Copy, regPeer, s.start.reg, pOrderBy->nExpr-1);
@@ -163494,7 +170383,8 @@ SQLITE_PRIVATE void sqlite3WindowCodeStep(
 /************** End of window.c **********************************************/
 /************** Begin file parse.c *******************************************/
 /* This file is automatically generated by Lemon from input grammar
-** source file "parse.y". */
+** source file "parse.y".
+*/
 /*
 ** 2001-09-15
 **
@@ -163511,7 +170401,7 @@ SQLITE_PRIVATE void sqlite3WindowCodeStep(
 ** The canonical source code to this file ("parse.y") is a Lemon grammar
 ** file that specifies the input grammar and actions to take while parsing.
 ** That input file is processed by Lemon to generate a C-language
-** implementation of a parser for the given grammer.  You might be reading
+** implementation of a parser for the given grammar.  You might be reading
 ** this comment as part of the translated C-code.  Edits should be made
 ** to the original parse.y sources.
 */
@@ -164005,18 +170895,18 @@ typedef union {
 #define sqlite3ParserCTX_FETCH Parse *pParse=yypParser->pParse;
 #define sqlite3ParserCTX_STORE yypParser->pParse=pParse;
 #define YYFALLBACK 1
-#define YYNSTATE             576
+#define YYNSTATE             579
 #define YYNRULE              405
-#define YYNRULE_WITH_ACTION  342
+#define YYNRULE_WITH_ACTION  340
 #define YYNTOKEN             185
-#define YY_MAX_SHIFT         575
-#define YY_MIN_SHIFTREDUCE   835
-#define YY_MAX_SHIFTREDUCE   1239
-#define YY_ERROR_ACTION      1240
-#define YY_ACCEPT_ACTION     1241
-#define YY_NO_ACTION         1242
-#define YY_MIN_REDUCE        1243
-#define YY_MAX_REDUCE        1647
+#define YY_MAX_SHIFT         578
+#define YY_MIN_SHIFTREDUCE   838
+#define YY_MAX_SHIFTREDUCE   1242
+#define YY_ERROR_ACTION      1243
+#define YY_ACCEPT_ACTION     1244
+#define YY_NO_ACTION         1245
+#define YY_MIN_REDUCE        1246
+#define YY_MAX_REDUCE        1650
 /************* End control #defines *******************************************/
 #define YY_NLOOKAHEAD ((int)(sizeof(yy_lookahead)/sizeof(yy_lookahead[0])))
 
@@ -164083,218 +170973,218 @@ typedef union {
 **  yy_default[]       Default action for each state.
 **
 *********** Begin parsing tables **********************************************/
-#define YY_ACTTAB_COUNT (2098)
+#define YY_ACTTAB_COUNT (2100)
 static const YYACTIONTYPE yy_action[] = {
- /*     0 */   568,  208,  568,  118,  115,  229,  568,  118,  115,  229,
- /*    10 */   568, 1314,  377, 1293,  408,  562,  562,  562,  568,  409,
- /*    20 */   378, 1314, 1276,   41,   41,   41,   41,  208, 1526,   71,
- /*    30 */    71,  971,  419,   41,   41,  491,  303,  279,  303,  972,
- /*    40 */   397,   71,   71,  125,  126,   80, 1217, 1217, 1050, 1053,
- /*    50 */  1040, 1040,  123,  123,  124,  124,  124,  124,  476,  409,
- /*    60 */  1241,    1,    1,  575,    2, 1245,  550,  118,  115,  229,
- /*    70 */   317,  480,  146,  480,  524,  118,  115,  229,  529, 1327,
- /*    80 */   417,  523,  142,  125,  126,   80, 1217, 1217, 1050, 1053,
- /*    90 */  1040, 1040,  123,  123,  124,  124,  124,  124,  118,  115,
- /*   100 */   229,  327,  122,  122,  122,  122,  121,  121,  120,  120,
- /*   110 */   120,  119,  116,  444,  284,  284,  284,  284,  442,  442,
- /*   120 */   442, 1567,  376, 1569, 1192,  375, 1163,  565, 1163,  565,
- /*   130 */   409, 1567,  537,  259,  226,  444,  101,  145,  449,  316,
- /*   140 */   559,  240,  122,  122,  122,  122,  121,  121,  120,  120,
- /*   150 */   120,  119,  116,  444,  125,  126,   80, 1217, 1217, 1050,
- /*   160 */  1053, 1040, 1040,  123,  123,  124,  124,  124,  124,  142,
- /*   170 */   294, 1192,  339,  448,  120,  120,  120,  119,  116,  444,
- /*   180 */   127, 1192, 1193, 1194,  148,  441,  440,  568,  119,  116,
- /*   190 */   444,  124,  124,  124,  124,  117,  122,  122,  122,  122,
- /*   200 */   121,  121,  120,  120,  120,  119,  116,  444,  454,  113,
- /*   210 */    13,   13,  546,  122,  122,  122,  122,  121,  121,  120,
- /*   220 */   120,  120,  119,  116,  444,  422,  316,  559, 1192, 1193,
- /*   230 */  1194,  149, 1224,  409, 1224,  124,  124,  124,  124,  122,
- /*   240 */   122,  122,  122,  121,  121,  120,  120,  120,  119,  116,
- /*   250 */   444,  465,  342, 1037, 1037, 1051, 1054,  125,  126,   80,
- /*   260 */  1217, 1217, 1050, 1053, 1040, 1040,  123,  123,  124,  124,
- /*   270 */   124,  124, 1279,  522,  222, 1192,  568,  409,  224,  514,
- /*   280 */   175,   82,   83,  122,  122,  122,  122,  121,  121,  120,
- /*   290 */   120,  120,  119,  116,  444, 1007,   16,   16, 1192,  133,
- /*   300 */   133,  125,  126,   80, 1217, 1217, 1050, 1053, 1040, 1040,
- /*   310 */   123,  123,  124,  124,  124,  124,  122,  122,  122,  122,
- /*   320 */   121,  121,  120,  120,  120,  119,  116,  444, 1041,  546,
- /*   330 */  1192,  373, 1192, 1193, 1194,  252, 1434,  399,  504,  501,
- /*   340 */   500,  111,  560,  566,    4,  926,  926,  433,  499,  340,
- /*   350 */   460,  328,  360,  394, 1237, 1192, 1193, 1194,  563,  568,
- /*   360 */   122,  122,  122,  122,  121,  121,  120,  120,  120,  119,
- /*   370 */   116,  444,  284,  284,  369, 1580, 1607,  441,  440,  154,
- /*   380 */   409,  445,   71,   71, 1286,  565, 1221, 1192, 1193, 1194,
- /*   390 */    85, 1223,  271,  557,  543,  515, 1561,  568,   98, 1222,
- /*   400 */     6, 1278,  472,  142,  125,  126,   80, 1217, 1217, 1050,
- /*   410 */  1053, 1040, 1040,  123,  123,  124,  124,  124,  124,  550,
- /*   420 */    13,   13, 1027,  507, 1224, 1192, 1224,  549,  109,  109,
- /*   430 */   222,  568, 1238,  175,  568,  427,  110,  197,  445,  570,
- /*   440 */   569,  430, 1552, 1017,  325,  551, 1192,  270,  287,  368,
- /*   450 */   510,  363,  509,  257,   71,   71,  543,   71,   71,  359,
- /*   460 */   316,  559, 1613,  122,  122,  122,  122,  121,  121,  120,
- /*   470 */   120,  120,  119,  116,  444, 1017, 1017, 1019, 1020,   27,
- /*   480 */   284,  284, 1192, 1193, 1194, 1158,  568, 1612,  409,  901,
- /*   490 */   190,  550,  356,  565,  550,  937,  533,  517, 1158,  516,
- /*   500 */   413, 1158,  552, 1192, 1193, 1194,  568,  544, 1554,   51,
- /*   510 */    51,  214,  125,  126,   80, 1217, 1217, 1050, 1053, 1040,
- /*   520 */  1040,  123,  123,  124,  124,  124,  124, 1192,  474,  135,
- /*   530 */   135,  409,  284,  284, 1490,  505,  121,  121,  120,  120,
- /*   540 */   120,  119,  116,  444, 1007,  565,  518,  217,  541, 1561,
- /*   550 */   316,  559,  142,    6,  532,  125,  126,   80, 1217, 1217,
- /*   560 */  1050, 1053, 1040, 1040,  123,  123,  124,  124,  124,  124,
- /*   570 */  1555,  122,  122,  122,  122,  121,  121,  120,  120,  120,
- /*   580 */   119,  116,  444,  485, 1192, 1193, 1194,  482,  281, 1267,
- /*   590 */   957,  252, 1192,  373,  504,  501,  500, 1192,  340,  571,
- /*   600 */  1192,  571,  409,  292,  499,  957,  876,  191,  480,  316,
- /*   610 */   559,  384,  290,  380,  122,  122,  122,  122,  121,  121,
- /*   620 */   120,  120,  120,  119,  116,  444,  125,  126,   80, 1217,
- /*   630 */  1217, 1050, 1053, 1040, 1040,  123,  123,  124,  124,  124,
- /*   640 */   124,  409,  394, 1136, 1192,  869,  100,  284,  284, 1192,
- /*   650 */  1193, 1194,  373, 1093, 1192, 1193, 1194, 1192, 1193, 1194,
- /*   660 */   565,  455,   32,  373,  233,  125,  126,   80, 1217, 1217,
- /*   670 */  1050, 1053, 1040, 1040,  123,  123,  124,  124,  124,  124,
- /*   680 */  1433,  959,  568,  228,  958,  122,  122,  122,  122,  121,
- /*   690 */   121,  120,  120,  120,  119,  116,  444, 1158,  228, 1192,
- /*   700 */   157, 1192, 1193, 1194, 1553,   13,   13,  301,  957, 1232,
- /*   710 */  1158,  153,  409, 1158,  373, 1583, 1176,    5,  369, 1580,
- /*   720 */   429, 1238,    3,  957,  122,  122,  122,  122,  121,  121,
- /*   730 */   120,  120,  120,  119,  116,  444,  125,  126,   80, 1217,
- /*   740 */  1217, 1050, 1053, 1040, 1040,  123,  123,  124,  124,  124,
- /*   750 */   124,  409,  208,  567, 1192, 1028, 1192, 1193, 1194, 1192,
- /*   760 */   388,  852,  155, 1552,  286,  402, 1098, 1098,  488,  568,
- /*   770 */   465,  342, 1319, 1319, 1552,  125,  126,   80, 1217, 1217,
- /*   780 */  1050, 1053, 1040, 1040,  123,  123,  124,  124,  124,  124,
- /*   790 */   129,  568,   13,   13,  374,  122,  122,  122,  122,  121,
- /*   800 */   121,  120,  120,  120,  119,  116,  444,  302,  568,  453,
- /*   810 */   528, 1192, 1193, 1194,   13,   13, 1192, 1193, 1194, 1297,
- /*   820 */   463, 1267,  409, 1317, 1317, 1552, 1012,  453,  452,  200,
- /*   830 */   299,   71,   71, 1265,  122,  122,  122,  122,  121,  121,
- /*   840 */   120,  120,  120,  119,  116,  444,  125,  126,   80, 1217,
- /*   850 */  1217, 1050, 1053, 1040, 1040,  123,  123,  124,  124,  124,
- /*   860 */   124,  409,  227, 1073, 1158,  284,  284,  419,  312,  278,
- /*   870 */   278,  285,  285, 1419,  406,  405,  382, 1158,  565,  568,
- /*   880 */  1158, 1196,  565, 1600,  565,  125,  126,   80, 1217, 1217,
- /*   890 */  1050, 1053, 1040, 1040,  123,  123,  124,  124,  124,  124,
- /*   900 */   453, 1482,   13,   13, 1536,  122,  122,  122,  122,  121,
- /*   910 */   121,  120,  120,  120,  119,  116,  444,  201,  568,  354,
- /*   920 */  1586,  575,    2, 1245,  840,  841,  842, 1562,  317, 1212,
- /*   930 */   146,    6,  409,  255,  254,  253,  206, 1327,    9, 1196,
- /*   940 */   262,   71,   71,  424,  122,  122,  122,  122,  121,  121,
- /*   950 */   120,  120,  120,  119,  116,  444,  125,  126,   80, 1217,
- /*   960 */  1217, 1050, 1053, 1040, 1040,  123,  123,  124,  124,  124,
- /*   970 */   124,  568,  284,  284,  568, 1213,  409,  574,  313, 1245,
- /*   980 */   349, 1296,  352,  419,  317,  565,  146,  491,  525, 1643,
- /*   990 */   395,  371,  491, 1327,   70,   70, 1295,   71,   71,  240,
- /*  1000 */  1325,  104,   80, 1217, 1217, 1050, 1053, 1040, 1040,  123,
- /*  1010 */   123,  124,  124,  124,  124,  122,  122,  122,  122,  121,
- /*  1020 */   121,  120,  120,  120,  119,  116,  444, 1114,  284,  284,
- /*  1030 */   428,  448, 1525, 1213,  439,  284,  284, 1489, 1352,  311,
- /*  1040 */   474,  565, 1115,  971,  491,  491,  217, 1263,  565, 1538,
- /*  1050 */   568,  972,  207,  568, 1027,  240,  383, 1116,  519,  122,
- /*  1060 */   122,  122,  122,  121,  121,  120,  120,  120,  119,  116,
- /*  1070 */   444, 1018,  107,   71,   71, 1017,   13,   13,  912,  568,
- /*  1080 */  1495,  568,  284,  284,   97,  526,  491,  448,  913, 1326,
- /*  1090 */  1322,  545,  409,  284,  284,  565,  151,  209, 1495, 1497,
- /*  1100 */   262,  450,   55,   55,   56,   56,  565, 1017, 1017, 1019,
- /*  1110 */   443,  332,  409,  527,   12,  295,  125,  126,   80, 1217,
- /*  1120 */  1217, 1050, 1053, 1040, 1040,  123,  123,  124,  124,  124,
- /*  1130 */   124,  347,  409,  864, 1534, 1213,  125,  126,   80, 1217,
- /*  1140 */  1217, 1050, 1053, 1040, 1040,  123,  123,  124,  124,  124,
- /*  1150 */   124, 1137, 1641,  474, 1641,  371,  125,  114,   80, 1217,
- /*  1160 */  1217, 1050, 1053, 1040, 1040,  123,  123,  124,  124,  124,
- /*  1170 */   124, 1495,  329,  474,  331,  122,  122,  122,  122,  121,
- /*  1180 */   121,  120,  120,  120,  119,  116,  444,  203, 1419,  568,
- /*  1190 */  1294,  864,  464, 1213,  436,  122,  122,  122,  122,  121,
- /*  1200 */   121,  120,  120,  120,  119,  116,  444,  553, 1137, 1642,
- /*  1210 */   539, 1642,   15,   15,  892,  122,  122,  122,  122,  121,
- /*  1220 */   121,  120,  120,  120,  119,  116,  444,  568,  298,  538,
- /*  1230 */  1135, 1419, 1559, 1560, 1331,  409,    6,    6, 1169, 1268,
- /*  1240 */   415,  320,  284,  284, 1419,  508,  565,  525,  300,  457,
- /*  1250 */    43,   43,  568,  893,   12,  565,  330,  478,  425,  407,
- /*  1260 */   126,   80, 1217, 1217, 1050, 1053, 1040, 1040,  123,  123,
- /*  1270 */   124,  124,  124,  124,  568,   57,   57,  288, 1192, 1419,
- /*  1280 */   496,  458,  392,  392,  391,  273,  389, 1135, 1558,  849,
- /*  1290 */  1169,  407,    6,  568,  321, 1158,  470,   44,   44, 1557,
- /*  1300 */  1114,  426,  234,    6,  323,  256,  540,  256, 1158,  431,
- /*  1310 */   568, 1158,  322,   17,  487, 1115,   58,   58,  122,  122,
- /*  1320 */   122,  122,  121,  121,  120,  120,  120,  119,  116,  444,
- /*  1330 */  1116,  216,  481,   59,   59, 1192, 1193, 1194,  111,  560,
- /*  1340 */   324,    4,  236,  456,  526,  568,  237,  456,  568,  437,
- /*  1350 */   168,  556,  420,  141,  479,  563,  568,  293,  568, 1095,
- /*  1360 */   568,  293,  568, 1095,  531,  568,  872,    8,   60,   60,
- /*  1370 */   235,   61,   61,  568,  414,  568,  414,  568,  445,   62,
- /*  1380 */    62,   45,   45,   46,   46,   47,   47,  199,   49,   49,
- /*  1390 */   557,  568,  359,  568,  100,  486,   50,   50,   63,   63,
- /*  1400 */    64,   64,  561,  415,  535,  410,  568, 1027,  568,  534,
- /*  1410 */   316,  559,  316,  559,   65,   65,   14,   14,  568, 1027,
- /*  1420 */   568,  512,  932,  872, 1018,  109,  109,  931, 1017,   66,
- /*  1430 */    66,  131,  131,  110,  451,  445,  570,  569,  416,  177,
- /*  1440 */  1017,  132,  132,   67,   67,  568,  467,  568,  932,  471,
- /*  1450 */  1364,  283,  226,  931,  315, 1363,  407,  568,  459,  407,
- /*  1460 */  1017, 1017, 1019,  239,  407,   86,  213, 1350,   52,   52,
- /*  1470 */    68,   68, 1017, 1017, 1019, 1020,   27, 1585, 1180,  447,
- /*  1480 */    69,   69,  288,   97,  108, 1541,  106,  392,  392,  391,
- /*  1490 */   273,  389,  568,  879,  849,  883,  568,  111,  560,  466,
- /*  1500 */     4,  568,  152,   30,   38,  568, 1132,  234,  396,  323,
- /*  1510 */   111,  560,  527,    4,  563,   53,   53,  322,  568,  163,
- /*  1520 */   163,  568,  337,  468,  164,  164,  333,  563,   76,   76,
- /*  1530 */   568,  289, 1514,  568,   31, 1513,  568,  445,  338,  483,
- /*  1540 */   100,   54,   54,  344,   72,   72,  296,  236, 1080,  557,
- /*  1550 */   445,  879, 1360,  134,  134,  168,   73,   73,  141,  161,
- /*  1560 */   161, 1574,  557,  535,  568,  319,  568,  348,  536, 1009,
- /*  1570 */   473,  261,  261,  891,  890,  235,  535,  568, 1027,  568,
- /*  1580 */   475,  534,  261,  367,  109,  109,  521,  136,  136,  130,
- /*  1590 */   130, 1027,  110,  366,  445,  570,  569,  109,  109, 1017,
- /*  1600 */   162,  162,  156,  156,  568,  110, 1080,  445,  570,  569,
- /*  1610 */   410,  351, 1017,  568,  353,  316,  559,  568,  343,  568,
- /*  1620 */   100,  497,  357,  258,  100,  898,  899,  140,  140,  355,
- /*  1630 */  1310, 1017, 1017, 1019, 1020,   27,  139,  139,  362,  451,
- /*  1640 */   137,  137,  138,  138, 1017, 1017, 1019, 1020,   27, 1180,
- /*  1650 */   447,  568,  372,  288,  111,  560, 1021,    4,  392,  392,
- /*  1660 */   391,  273,  389,  568, 1141,  849,  568, 1076,  568,  258,
- /*  1670 */   492,  563,  568,  211,   75,   75,  555,  962,  234,  261,
- /*  1680 */   323,  111,  560,  929,    4,  113,   77,   77,  322,   74,
- /*  1690 */    74,   42,   42, 1373,  445,   48,   48, 1418,  563,  974,
- /*  1700 */   975, 1092, 1091, 1092, 1091,  862,  557,  150,  930, 1346,
- /*  1710 */   113, 1358,  554, 1424, 1021, 1275, 1266, 1254,  236, 1253,
- /*  1720 */  1255,  445, 1593, 1343,  308,  276,  168,  309,   11,  141,
- /*  1730 */   393,  310,  232,  557, 1405, 1027,  335,  291, 1400,  219,
- /*  1740 */   336,  109,  109,  936,  297, 1410,  235,  341,  477,  110,
- /*  1750 */   502,  445,  570,  569, 1393, 1409, 1017,  400, 1293,  365,
- /*  1760 */   223, 1486, 1027, 1485, 1355, 1356, 1354, 1353,  109,  109,
- /*  1770 */   204, 1596, 1232,  558,  265,  218,  110,  205,  445,  570,
- /*  1780 */   569,  410,  387, 1017, 1533,  179,  316,  559, 1017, 1017,
- /*  1790 */  1019, 1020,   27,  230, 1531, 1229,   79,  560,   85,    4,
- /*  1800 */   418,  215,  548,   81,   84,  188, 1406,  173,  181,  461,
- /*  1810 */   451,   35,  462,  563,  183, 1017, 1017, 1019, 1020,   27,
- /*  1820 */   184, 1491,  185,  186,  495,  242,   98,  398, 1412,   36,
- /*  1830 */  1411,  484,   91,  469,  401, 1414,  445,  192, 1480,  246,
- /*  1840 */  1502,  490,  346,  277,  248,  196,  493,  511,  557,  350,
- /*  1850 */  1256,  249,  250,  403, 1313, 1312,  111,  560,  432,    4,
- /*  1860 */  1311, 1304,   93, 1611,  883, 1610,  224,  404,  434,  520,
- /*  1870 */   263,  435, 1579,  563, 1283, 1282,  364, 1027,  306, 1281,
- /*  1880 */   264, 1609, 1565,  109,  109,  370, 1303,  307, 1564,  438,
- /*  1890 */   128,  110, 1378,  445,  570,  569,  445,  546, 1017,   10,
- /*  1900 */  1466,  105,  381, 1377,   34,  572,   99, 1336,  557,  314,
- /*  1910 */  1186,  530,  272,  274,  379,  210, 1335,  547,  385,  386,
- /*  1920 */   275,  573, 1251, 1246,  411,  412, 1518,  165,  178, 1519,
- /*  1930 */  1017, 1017, 1019, 1020,   27, 1517, 1516, 1027,   78,  147,
- /*  1940 */   166,  220,  221,  109,  109,  836,  304,  167,  446,  212,
- /*  1950 */   318,  110,  231,  445,  570,  569,  144, 1090, 1017, 1088,
- /*  1960 */   326,  180,  169, 1212,  182,  334,  238,  915,  241, 1104,
- /*  1970 */   187,  170,  171,  421,   87,   88,  423,  189,   89,   90,
- /*  1980 */   172, 1107,  243, 1103,  244,  158,   18,  245,  345,  247,
- /*  1990 */  1017, 1017, 1019, 1020,   27,  261, 1096,  193, 1226,  489,
- /*  2000 */   194,   37,  366,  851,  494,  251,  195,  506,   92,   19,
- /*  2010 */   498,  358,   20,  503,  881,  361,   94,  894,  305,  159,
- /*  2020 */   513,   39,   95, 1174,  160, 1056,  966, 1143,   96,  174,
- /*  2030 */  1142,  225,  280,  282,  198,  960,  113, 1164, 1160,  260,
- /*  2040 */    21,   22,   23, 1162, 1168, 1167, 1148,   24,   33,   25,
- /*  2050 */   202,  542,   26,  100, 1071,  102, 1057,  103,    7, 1055,
- /*  2060 */  1059, 1113, 1060, 1112,  266,  267,   28,   40,  390, 1022,
- /*  2070 */   863,  112,   29,  564, 1182, 1181,  268,  176,  143,  925,
- /*  2080 */  1242, 1242, 1242, 1242, 1242, 1242, 1242, 1242, 1242, 1242,
- /*  2090 */  1242, 1242, 1242, 1242,  269, 1602, 1242, 1601,
+ /*     0 */   572,  210,  572,  119,  116,  231,  572,  119,  116,  231,
+ /*    10 */   572, 1317,  379, 1296,  410,  566,  566,  566,  572,  411,
+ /*    20 */   380, 1317, 1279,   42,   42,   42,   42,  210, 1529,   72,
+ /*    30 */    72,  974,  421,   42,   42,  495,  305,  281,  305,  975,
+ /*    40 */   399,   72,   72,  126,  127,   81, 1217, 1217, 1054, 1057,
+ /*    50 */  1044, 1044,  124,  124,  125,  125,  125,  125,  480,  411,
+ /*    60 */  1244,    1,    1,  578,    2, 1248,  554,  119,  116,  231,
+ /*    70 */   319,  484,  147,  484,  528,  119,  116,  231,  533, 1330,
+ /*    80 */   419,  527,  143,  126,  127,   81, 1217, 1217, 1054, 1057,
+ /*    90 */  1044, 1044,  124,  124,  125,  125,  125,  125,  119,  116,
+ /*   100 */   231,  329,  123,  123,  123,  123,  122,  122,  121,  121,
+ /*   110 */   121,  120,  117,  448,  286,  286,  286,  286,  446,  446,
+ /*   120 */   446, 1568,  378, 1570, 1193,  377, 1164,  569, 1164,  569,
+ /*   130 */   411, 1568,  541,  261,  228,  448,  102,  146,  453,  318,
+ /*   140 */   563,  242,  123,  123,  123,  123,  122,  122,  121,  121,
+ /*   150 */   121,  120,  117,  448,  126,  127,   81, 1217, 1217, 1054,
+ /*   160 */  1057, 1044, 1044,  124,  124,  125,  125,  125,  125,  143,
+ /*   170 */   296, 1193,  341,  452,  121,  121,  121,  120,  117,  448,
+ /*   180 */   128, 1193, 1194, 1193,  149,  445,  444,  572,  120,  117,
+ /*   190 */   448,  125,  125,  125,  125,  118,  123,  123,  123,  123,
+ /*   200 */   122,  122,  121,  121,  121,  120,  117,  448,  458,  114,
+ /*   210 */    13,   13,  550,  123,  123,  123,  123,  122,  122,  121,
+ /*   220 */   121,  121,  120,  117,  448,  424,  318,  563, 1193, 1194,
+ /*   230 */  1193,  150, 1225,  411, 1225,  125,  125,  125,  125,  123,
+ /*   240 */   123,  123,  123,  122,  122,  121,  121,  121,  120,  117,
+ /*   250 */   448,  469,  344, 1041, 1041, 1055, 1058,  126,  127,   81,
+ /*   260 */  1217, 1217, 1054, 1057, 1044, 1044,  124,  124,  125,  125,
+ /*   270 */   125,  125, 1282,  526,  224, 1193,  572,  411,  226,  519,
+ /*   280 */   177,   83,   84,  123,  123,  123,  123,  122,  122,  121,
+ /*   290 */   121,  121,  120,  117,  448, 1010,   16,   16, 1193,  134,
+ /*   300 */   134,  126,  127,   81, 1217, 1217, 1054, 1057, 1044, 1044,
+ /*   310 */   124,  124,  125,  125,  125,  125,  123,  123,  123,  123,
+ /*   320 */   122,  122,  121,  121,  121,  120,  117,  448, 1045,  550,
+ /*   330 */  1193,  375, 1193, 1194, 1193,  254, 1438,  401,  508,  505,
+ /*   340 */   504,  112,  564,  570,    4,  929,  929,  435,  503,  342,
+ /*   350 */   464,  330,  362,  396, 1238, 1193, 1194, 1193,  567,  572,
+ /*   360 */   123,  123,  123,  123,  122,  122,  121,  121,  121,  120,
+ /*   370 */   117,  448,  286,  286,  371, 1581, 1607,  445,  444,  155,
+ /*   380 */   411,  449,   72,   72, 1289,  569, 1222, 1193, 1194, 1193,
+ /*   390 */    86, 1224,  273,  561,  547,  520,  520,  572,   99, 1223,
+ /*   400 */     6, 1281,  476,  143,  126,  127,   81, 1217, 1217, 1054,
+ /*   410 */  1057, 1044, 1044,  124,  124,  125,  125,  125,  125,  554,
+ /*   420 */    13,   13, 1031,  511, 1225, 1193, 1225,  553,  110,  110,
+ /*   430 */   224,  572, 1239,  177,  572,  429,  111,  199,  449,  573,
+ /*   440 */   449,  432, 1555, 1019,  327,  555, 1193,  272,  289,  370,
+ /*   450 */   514,  365,  513,  259,   72,   72,  547,   72,   72,  361,
+ /*   460 */   318,  563, 1613,  123,  123,  123,  123,  122,  122,  121,
+ /*   470 */   121,  121,  120,  117,  448, 1019, 1019, 1021, 1022,   28,
+ /*   480 */   286,  286, 1193, 1194, 1193, 1159,  572, 1612,  411,  904,
+ /*   490 */   192,  554,  358,  569,  554,  940,  537,  521, 1159,  437,
+ /*   500 */   415, 1159,  556, 1193, 1194, 1193,  572,  548,  548,   52,
+ /*   510 */    52,  216,  126,  127,   81, 1217, 1217, 1054, 1057, 1044,
+ /*   520 */  1044,  124,  124,  125,  125,  125,  125, 1193,  478,  136,
+ /*   530 */   136,  411,  286,  286, 1493,  509,  122,  122,  121,  121,
+ /*   540 */   121,  120,  117,  448, 1010,  569,  522,  219,  545,  545,
+ /*   550 */   318,  563,  143,    6,  536,  126,  127,   81, 1217, 1217,
+ /*   560 */  1054, 1057, 1044, 1044,  124,  124,  125,  125,  125,  125,
+ /*   570 */  1557,  123,  123,  123,  123,  122,  122,  121,  121,  121,
+ /*   580 */   120,  117,  448,  489, 1193, 1194, 1193,  486,  283, 1270,
+ /*   590 */   960,  254, 1193,  375,  508,  505,  504, 1193,  342,  574,
+ /*   600 */  1193,  574,  411,  294,  503,  960,  879,  193,  484,  318,
+ /*   610 */   563,  386,  292,  382,  123,  123,  123,  123,  122,  122,
+ /*   620 */   121,  121,  121,  120,  117,  448,  126,  127,   81, 1217,
+ /*   630 */  1217, 1054, 1057, 1044, 1044,  124,  124,  125,  125,  125,
+ /*   640 */   125,  411,  396, 1139, 1193,  872,  101,  286,  286, 1193,
+ /*   650 */  1194, 1193,  375, 1096, 1193, 1194, 1193, 1193, 1194, 1193,
+ /*   660 */   569,  459,   33,  375,  235,  126,  127,   81, 1217, 1217,
+ /*   670 */  1054, 1057, 1044, 1044,  124,  124,  125,  125,  125,  125,
+ /*   680 */  1437,  962,  572,  230,  961,  123,  123,  123,  123,  122,
+ /*   690 */   122,  121,  121,  121,  120,  117,  448, 1159,  230, 1193,
+ /*   700 */   158, 1193, 1194, 1193, 1556,   13,   13,  303,  960, 1233,
+ /*   710 */  1159,  154,  411, 1159,  375, 1584, 1177,    5,  371, 1581,
+ /*   720 */   431, 1239,    3,  960,  123,  123,  123,  123,  122,  122,
+ /*   730 */   121,  121,  121,  120,  117,  448,  126,  127,   81, 1217,
+ /*   740 */  1217, 1054, 1057, 1044, 1044,  124,  124,  125,  125,  125,
+ /*   750 */   125,  411,  210,  571, 1193, 1032, 1193, 1194, 1193, 1193,
+ /*   760 */   390,  855,  156, 1555,  376,  404, 1101, 1101,  492,  572,
+ /*   770 */   469,  344, 1322, 1322, 1555,  126,  127,   81, 1217, 1217,
+ /*   780 */  1054, 1057, 1044, 1044,  124,  124,  125,  125,  125,  125,
+ /*   790 */   130,  572,   13,   13,  532,  123,  123,  123,  123,  122,
+ /*   800 */   122,  121,  121,  121,  120,  117,  448,  304,  572,  457,
+ /*   810 */   229, 1193, 1194, 1193,   13,   13, 1193, 1194, 1193, 1300,
+ /*   820 */   467, 1270,  411, 1320, 1320, 1555, 1015,  457,  456,  436,
+ /*   830 */   301,   72,   72, 1268,  123,  123,  123,  123,  122,  122,
+ /*   840 */   121,  121,  121,  120,  117,  448,  126,  127,   81, 1217,
+ /*   850 */  1217, 1054, 1057, 1044, 1044,  124,  124,  125,  125,  125,
+ /*   860 */   125,  411,  384, 1076, 1159,  286,  286,  421,  314,  280,
+ /*   870 */   280,  287,  287,  461,  408,  407, 1539, 1159,  569,  572,
+ /*   880 */  1159, 1196,  569,  409,  569,  126,  127,   81, 1217, 1217,
+ /*   890 */  1054, 1057, 1044, 1044,  124,  124,  125,  125,  125,  125,
+ /*   900 */   457, 1485,   13,   13, 1541,  123,  123,  123,  123,  122,
+ /*   910 */   122,  121,  121,  121,  120,  117,  448,  202,  572,  462,
+ /*   920 */  1587,  578,    2, 1248,  843,  844,  845, 1563,  319,  409,
+ /*   930 */   147,    6,  411,  257,  256,  255,  208, 1330,    9, 1196,
+ /*   940 */   264,   72,   72, 1436,  123,  123,  123,  123,  122,  122,
+ /*   950 */   121,  121,  121,  120,  117,  448,  126,  127,   81, 1217,
+ /*   960 */  1217, 1054, 1057, 1044, 1044,  124,  124,  125,  125,  125,
+ /*   970 */   125,  572,  286,  286,  572, 1213,  411,  577,  315, 1248,
+ /*   980 */   421,  371, 1581,  356,  319,  569,  147,  495,  529, 1644,
+ /*   990 */   397,  935,  495, 1330,   71,   71,  934,   72,   72,  242,
+ /*  1000 */  1328,  105,   81, 1217, 1217, 1054, 1057, 1044, 1044,  124,
+ /*  1010 */   124,  125,  125,  125,  125,  123,  123,  123,  123,  122,
+ /*  1020 */   122,  121,  121,  121,  120,  117,  448, 1117,  286,  286,
+ /*  1030 */  1422,  452, 1528, 1213,  443,  286,  286, 1492, 1355,  313,
+ /*  1040 */   478,  569, 1118,  454,  351,  495,  354, 1266,  569,  209,
+ /*  1050 */   572,  418,  179,  572, 1031,  242,  385, 1119,  523,  123,
+ /*  1060 */   123,  123,  123,  122,  122,  121,  121,  121,  120,  117,
+ /*  1070 */   448, 1020,  108,   72,   72, 1019,   13,   13,  915,  572,
+ /*  1080 */  1498,  572,  286,  286,   98,  530, 1537,  452,  916, 1334,
+ /*  1090 */  1329,  203,  411,  286,  286,  569,  152,  211, 1498, 1500,
+ /*  1100 */   426,  569,   56,   56,   57,   57,  569, 1019, 1019, 1021,
+ /*  1110 */   447,  572,  411,  531,   12,  297,  126,  127,   81, 1217,
+ /*  1120 */  1217, 1054, 1057, 1044, 1044,  124,  124,  125,  125,  125,
+ /*  1130 */   125,  572,  411,  867,   15,   15,  126,  127,   81, 1217,
+ /*  1140 */  1217, 1054, 1057, 1044, 1044,  124,  124,  125,  125,  125,
+ /*  1150 */   125,  373,  529,  264,   44,   44,  126,  115,   81, 1217,
+ /*  1160 */  1217, 1054, 1057, 1044, 1044,  124,  124,  125,  125,  125,
+ /*  1170 */   125, 1498,  478, 1271,  417,  123,  123,  123,  123,  122,
+ /*  1180 */   122,  121,  121,  121,  120,  117,  448,  205, 1213,  495,
+ /*  1190 */   430,  867,  468,  322,  495,  123,  123,  123,  123,  122,
+ /*  1200 */   122,  121,  121,  121,  120,  117,  448,  572,  557, 1140,
+ /*  1210 */  1642, 1422, 1642,  543,  572,  123,  123,  123,  123,  122,
+ /*  1220 */   122,  121,  121,  121,  120,  117,  448,  572, 1422,  572,
+ /*  1230 */    13,   13,  542,  323, 1325,  411,  334,   58,   58,  349,
+ /*  1240 */  1422, 1170,  326,  286,  286,  549, 1213,  300,  895,  530,
+ /*  1250 */    45,   45,   59,   59, 1140, 1643,  569, 1643,  565,  417,
+ /*  1260 */   127,   81, 1217, 1217, 1054, 1057, 1044, 1044,  124,  124,
+ /*  1270 */   125,  125,  125,  125, 1367,  373,  500,  290, 1193,  512,
+ /*  1280 */  1366,  427,  394,  394,  393,  275,  391,  896, 1138,  852,
+ /*  1290 */   478,  258, 1422, 1170,  463, 1159,   12,  331,  428,  333,
+ /*  1300 */  1117,  460,  236,  258,  325,  460,  544, 1544, 1159, 1098,
+ /*  1310 */   491, 1159,  324, 1098,  440, 1118,  335,  516,  123,  123,
+ /*  1320 */   123,  123,  122,  122,  121,  121,  121,  120,  117,  448,
+ /*  1330 */  1119,  318,  563, 1138,  572, 1193, 1194, 1193,  112,  564,
+ /*  1340 */   201,    4,  238,  433,  935,  490,  285,  228, 1517,  934,
+ /*  1350 */   170,  560,  572,  142, 1516,  567,  572,   60,   60,  572,
+ /*  1360 */   416,  572,  441,  572,  535,  302,  875,    8,  487,  572,
+ /*  1370 */   237,  572,  416,  572,  485,   61,   61,  572,  449,   62,
+ /*  1380 */    62,  332,   63,   63,   46,   46,   47,   47,  361,  572,
+ /*  1390 */   561,  572,   48,   48,   50,   50,   51,   51,  572,  295,
+ /*  1400 */    64,   64,  482,  295,  539,  412,  471, 1031,  572,  538,
+ /*  1410 */   318,  563,   65,   65,   66,   66,  409,  475,  572, 1031,
+ /*  1420 */   572,   14,   14,  875, 1020,  110,  110,  409, 1019,  572,
+ /*  1430 */   474,   67,   67,  111,  455,  449,  573,  449,   98,  317,
+ /*  1440 */  1019,  132,  132,  133,  133,  572, 1561,  572,  974,  409,
+ /*  1450 */     6, 1562,   68,   68, 1560,    6,  975,  572,    6, 1559,
+ /*  1460 */  1019, 1019, 1021,    6,  346,  218,  101,  531,   53,   53,
+ /*  1470 */    69,   69, 1019, 1019, 1021, 1022,   28, 1586, 1181,  451,
+ /*  1480 */    70,   70,  290,   87,  215,   31, 1363,  394,  394,  393,
+ /*  1490 */   275,  391,  350,  109,  852,  107,  572,  112,  564,  483,
+ /*  1500 */     4, 1212,  572,  239,  153,  572,   39,  236, 1299,  325,
+ /*  1510 */   112,  564, 1298,    4,  567,  572,   32,  324,  572,   54,
+ /*  1520 */    54,  572, 1135,  353,  398,  165,  165,  567,  166,  166,
+ /*  1530 */   572,  291,  355,  572,   17,  357,  572,  449,   77,   77,
+ /*  1540 */  1313,   55,   55, 1297,   73,   73,  572,  238,  470,  561,
+ /*  1550 */   449,  472,  364,  135,  135,  170,   74,   74,  142,  163,
+ /*  1560 */   163,  374,  561,  539,  572,  321,  572,  886,  540,  137,
+ /*  1570 */   137,  339, 1353,  422,  298,  237,  539,  572, 1031,  572,
+ /*  1580 */   340,  538,  101,  369,  110,  110,  162,  131,  131,  164,
+ /*  1590 */   164, 1031,  111,  368,  449,  573,  449,  110,  110, 1019,
+ /*  1600 */   157,  157,  141,  141,  572,  111,  572,  449,  573,  449,
+ /*  1610 */   412,  288, 1019,  572,  882,  318,  563,  572,  219,  572,
+ /*  1620 */   241, 1012,  477,  263,  263,  894,  893,  140,  140,  138,
+ /*  1630 */   138, 1019, 1019, 1021, 1022,   28,  139,  139,  525,  455,
+ /*  1640 */    76,   76,   78,   78, 1019, 1019, 1021, 1022,   28, 1181,
+ /*  1650 */   451,  572, 1083,  290,  112,  564, 1575,    4,  394,  394,
+ /*  1660 */   393,  275,  391,  572, 1023,  852,  572,  479,  345,  263,
+ /*  1670 */   101,  567,  882, 1376,   75,   75, 1421,  501,  236,  260,
+ /*  1680 */   325,  112,  564,  359,    4,  101,   43,   43,  324,   49,
+ /*  1690 */    49,  901,  902,  161,  449,  101,  977,  978,  567, 1079,
+ /*  1700 */  1349,  260,  965,  932,  263,  114,  561, 1095,  517, 1095,
+ /*  1710 */  1083, 1094,  865, 1094,  151,  933, 1144,  114,  238, 1361,
+ /*  1720 */   558,  449, 1023,  559, 1426, 1278,  170, 1269, 1257,  142,
+ /*  1730 */  1601, 1256, 1258,  561, 1594, 1031,  496,  278,  213, 1346,
+ /*  1740 */   310,  110,  110,  939,  311,  312,  237,   11,  234,  111,
+ /*  1750 */   221,  449,  573,  449,  293,  395, 1019, 1408,  337, 1403,
+ /*  1760 */  1396,  338, 1031,  299,  343, 1413, 1412,  481,  110,  110,
+ /*  1770 */   506,  402,  225, 1296,  206,  367,  111, 1358,  449,  573,
+ /*  1780 */   449,  412, 1359, 1019, 1489, 1488,  318,  563, 1019, 1019,
+ /*  1790 */  1021, 1022,   28,  562,  207,  220,   80,  564,  389,    4,
+ /*  1800 */  1597, 1357,  552, 1356, 1233,  181,  267,  232, 1536, 1534,
+ /*  1810 */   455, 1230,  420,  567,   82, 1019, 1019, 1021, 1022,   28,
+ /*  1820 */    86,  217,   85, 1494,  190,  175,  183,  465,  185,  466,
+ /*  1830 */    36, 1409,  186,  187,  188,  499,  449,  244,   37,   99,
+ /*  1840 */   400, 1415, 1414,  488, 1417,  194,  473,  403,  561, 1483,
+ /*  1850 */   248,   92, 1505,  494,  198,  279,  112,  564,  250,    4,
+ /*  1860 */   348,  497,  405,  352, 1259,  251,  252,  515, 1316,  434,
+ /*  1870 */  1315, 1314,   94,  567, 1307,  886, 1306, 1031,  226,  406,
+ /*  1880 */  1611, 1610,  438,  110,  110, 1580, 1286,  524,  439,  308,
+ /*  1890 */   266,  111, 1285,  449,  573,  449,  449,  309, 1019,  366,
+ /*  1900 */  1284, 1609,  265, 1566, 1565,  442,  372, 1381,  561,  129,
+ /*  1910 */   550, 1380,   10, 1470,  383,  106,  316,  551,  100,   35,
+ /*  1920 */   534,  575,  212, 1339,  381,  387, 1187, 1338,  274,  276,
+ /*  1930 */  1019, 1019, 1021, 1022,   28,  277,  413, 1031,  576, 1254,
+ /*  1940 */   388, 1521, 1249,  110,  110,  167, 1522,  168,  148, 1520,
+ /*  1950 */  1519,  111,  306,  449,  573,  449,  222,  223, 1019,  839,
+ /*  1960 */   169,   79,  450,  214,  414,  233,  320,  145, 1093, 1091,
+ /*  1970 */   328,  182,  171, 1212,  918,  184,  240,  336,  243, 1107,
+ /*  1980 */   189,  172,  173,  423,  425,   88,  180,  191,   89,   90,
+ /*  1990 */  1019, 1019, 1021, 1022,   28,   91,  174, 1110,  245, 1106,
+ /*  2000 */   246,  159,   18,  247,  347, 1099,  263,  195, 1227,  493,
+ /*  2010 */   249,  196,   38,  854,  498,  368,  253,  360,  897,  197,
+ /*  2020 */   502,   93,   19,   20,  507,  884,  363,  510,   95,  307,
+ /*  2030 */   160,   96,  518,   97, 1175, 1060, 1146,   40,   21,  227,
+ /*  2040 */   176, 1145,  282,  284,  969,  200,  963,  114,  262, 1165,
+ /*  2050 */    22,   23,   24, 1161, 1169,   25, 1163, 1150,   34,   26,
+ /*  2060 */  1168,  546,   27,  204,  101,  103,  104, 1074,    7, 1061,
+ /*  2070 */  1059, 1063, 1116, 1064, 1115,  268,  269,   29,   41,  270,
+ /*  2080 */  1024,  866,  113,   30,  568,  392, 1183,  144,  178, 1182,
+ /*  2090 */   271,  928, 1245, 1245, 1245, 1245, 1245, 1245, 1245, 1602,
 };
 static const YYCODETYPE yy_lookahead[] = {
  /*     0 */   193,  193,  193,  274,  275,  276,  193,  274,  275,  276,
@@ -164373,7 +171263,7 @@ static const YYCODETYPE yy_lookahead[] = {
  /*   730 */   108,  109,  110,  111,  112,  113,   43,   44,   45,   46,
  /*   740 */    47,   48,   49,   50,   51,   52,   53,   54,   55,   56,
  /*   750 */    57,   19,  193,  193,   59,   23,  116,  117,  118,   59,
- /*   760 */   201,   21,  241,  304,   22,  206,  127,  128,  129,  193,
+ /*   760 */   201,   21,  241,  304,  193,  206,  127,  128,  129,  193,
  /*   770 */   128,  129,  235,  236,  304,   43,   44,   45,   46,   47,
  /*   780 */    48,   49,   50,   51,   52,   53,   54,   55,   56,   57,
  /*   790 */    22,  193,  216,  217,  193,  102,  103,  104,  105,  106,
@@ -164384,129 +171274,129 @@ static const YYCODETYPE yy_lookahead[] = {
  /*   840 */   108,  109,  110,  111,  112,  113,   43,   44,   45,   46,
  /*   850 */    47,   48,   49,   50,   51,   52,   53,   54,   55,   56,
  /*   860 */    57,   19,  193,  123,   76,  239,  240,  193,  253,  239,
- /*   870 */   240,  239,  240,  193,  106,  107,  193,   89,  252,  193,
- /*   880 */    92,   59,  252,  141,  252,   43,   44,   45,   46,   47,
+ /*   870 */   240,  239,  240,  244,  106,  107,  193,   89,  252,  193,
+ /*   880 */    92,   59,  252,  254,  252,   43,   44,   45,   46,   47,
  /*   890 */    48,   49,   50,   51,   52,   53,   54,   55,   56,   57,
  /*   900 */   284,  161,  216,  217,  193,  102,  103,  104,  105,  106,
- /*   910 */   107,  108,  109,  110,  111,  112,  113,  231,  193,   16,
- /*   920 */   187,  188,  189,  190,    7,    8,    9,  309,  195,   25,
+ /*   910 */   107,  108,  109,  110,  111,  112,  113,  231,  193,  244,
+ /*   920 */   187,  188,  189,  190,    7,    8,    9,  309,  195,  254,
  /*   930 */   197,  313,   19,  127,  128,  129,  262,  204,   22,  117,
- /*   940 */    24,  216,  217,  263,  102,  103,  104,  105,  106,  107,
+ /*   940 */    24,  216,  217,  273,  102,  103,  104,  105,  106,  107,
  /*   950 */   108,  109,  110,  111,  112,  113,   43,   44,   45,   46,
  /*   960 */    47,   48,   49,   50,   51,   52,   53,   54,   55,   56,
  /*   970 */    57,  193,  239,  240,  193,   59,   19,  188,  253,  190,
- /*   980 */    77,  226,   79,  193,  195,  252,  197,  193,   19,  301,
- /*   990 */   302,  193,  193,  204,  216,  217,  226,  216,  217,  266,
+ /*   980 */   193,  311,  312,   16,  195,  252,  197,  193,   19,  301,
+ /*   990 */   302,  135,  193,  204,  216,  217,  140,  216,  217,  266,
  /*  1000 */   204,  159,   45,   46,   47,   48,   49,   50,   51,   52,
  /*  1010 */    53,   54,   55,   56,   57,  102,  103,  104,  105,  106,
  /*  1020 */   107,  108,  109,  110,  111,  112,  113,   12,  239,  240,
- /*  1030 */   232,  298,  238,  117,  253,  239,  240,  238,  259,  260,
- /*  1040 */   193,  252,   27,   31,  193,  193,  142,  204,  252,  193,
- /*  1050 */   193,   39,  262,  193,  100,  266,  278,   42,  204,  102,
+ /*  1030 */   193,  298,  238,  117,  253,  239,  240,  238,  259,  260,
+ /*  1040 */   193,  252,   27,  193,   77,  193,   79,  204,  252,  262,
+ /*  1050 */   193,  299,  300,  193,  100,  266,  278,   42,  204,  102,
  /*  1060 */   103,  104,  105,  106,  107,  108,  109,  110,  111,  112,
  /*  1070 */   113,  117,  159,  216,  217,  121,  216,  217,   63,  193,
- /*  1080 */   193,  193,  239,  240,  115,  116,  193,  298,   73,  238,
+ /*  1080 */   193,  193,  239,  240,  115,  116,  193,  298,   73,  240,
  /*  1090 */   238,  231,   19,  239,  240,  252,   22,   24,  211,  212,
- /*  1100 */    24,  193,  216,  217,  216,  217,  252,  153,  154,  155,
- /*  1110 */   253,   16,   19,  144,  213,  268,   43,   44,   45,   46,
+ /*  1100 */   263,  252,  216,  217,  216,  217,  252,  153,  154,  155,
+ /*  1110 */   253,  193,   19,  144,  213,  268,   43,   44,   45,   46,
  /*  1120 */    47,   48,   49,   50,   51,   52,   53,   54,   55,   56,
- /*  1130 */    57,  238,   19,   59,  193,   59,   43,   44,   45,   46,
+ /*  1130 */    57,  193,   19,   59,  216,  217,   43,   44,   45,   46,
  /*  1140 */    47,   48,   49,   50,   51,   52,   53,   54,   55,   56,
- /*  1150 */    57,   22,   23,  193,   25,  193,   43,   44,   45,   46,
+ /*  1150 */    57,  193,   19,   24,  216,  217,   43,   44,   45,   46,
  /*  1160 */    47,   48,   49,   50,   51,   52,   53,   54,   55,   56,
- /*  1170 */    57,  284,   77,  193,   79,  102,  103,  104,  105,  106,
- /*  1180 */   107,  108,  109,  110,  111,  112,  113,  286,  193,  193,
- /*  1190 */   193,  117,  291,  117,  232,  102,  103,  104,  105,  106,
- /*  1200 */   107,  108,  109,  110,  111,  112,  113,  204,   22,   23,
- /*  1210 */    66,   25,  216,  217,   35,  102,  103,  104,  105,  106,
- /*  1220 */   107,  108,  109,  110,  111,  112,  113,  193,  268,   85,
- /*  1230 */   101,  193,  309,  309,  240,   19,  313,  313,   94,  208,
- /*  1240 */   209,  193,  239,  240,  193,   66,  252,   19,  268,  244,
- /*  1250 */   216,  217,  193,   74,  213,  252,  161,   19,  263,  254,
+ /*  1170 */    57,  284,  193,  208,  209,  102,  103,  104,  105,  106,
+ /*  1180 */   107,  108,  109,  110,  111,  112,  113,  286,   59,  193,
+ /*  1190 */   232,  117,  291,  193,  193,  102,  103,  104,  105,  106,
+ /*  1200 */   107,  108,  109,  110,  111,  112,  113,  193,  204,   22,
+ /*  1210 */    23,  193,   25,   66,  193,  102,  103,  104,  105,  106,
+ /*  1220 */   107,  108,  109,  110,  111,  112,  113,  193,  193,  193,
+ /*  1230 */   216,  217,   85,  193,  238,   19,   16,  216,  217,  238,
+ /*  1240 */   193,   94,  193,  239,  240,  231,  117,  268,   35,  116,
+ /*  1250 */   216,  217,  216,  217,   22,   23,  252,   25,  208,  209,
  /*  1260 */    44,   45,   46,   47,   48,   49,   50,   51,   52,   53,
- /*  1270 */    54,   55,   56,   57,  193,  216,  217,    5,   59,  193,
- /*  1280 */    19,  244,   10,   11,   12,   13,   14,  101,  309,   17,
- /*  1290 */   146,  254,  313,  193,  193,   76,  115,  216,  217,  309,
- /*  1300 */    12,  263,   30,  313,   32,   46,   87,   46,   89,  130,
- /*  1310 */   193,   92,   40,   22,  263,   27,  216,  217,  102,  103,
+ /*  1270 */    54,   55,   56,   57,  193,  193,   19,    5,   59,   66,
+ /*  1280 */   193,  263,   10,   11,   12,   13,   14,   74,  101,   17,
+ /*  1290 */   193,   46,  193,  146,  193,   76,  213,   77,  263,   79,
+ /*  1300 */    12,  260,   30,   46,   32,  264,   87,  193,   89,   29,
+ /*  1310 */   263,   92,   40,   33,  232,   27,  193,  108,  102,  103,
  /*  1320 */   104,  105,  106,  107,  108,  109,  110,  111,  112,  113,
- /*  1330 */    42,  150,  291,  216,  217,  116,  117,  118,   19,   20,
- /*  1340 */   193,   22,   70,  260,  116,  193,   24,  264,  193,  263,
- /*  1350 */    78,   63,   61,   81,  116,   36,  193,  260,  193,   29,
- /*  1360 */   193,  264,  193,   33,  145,  193,   59,   48,  216,  217,
- /*  1370 */    98,  216,  217,  193,  115,  193,  115,  193,   59,  216,
- /*  1380 */   217,  216,  217,  216,  217,  216,  217,  255,  216,  217,
- /*  1390 */    71,  193,  131,  193,   25,   65,  216,  217,  216,  217,
- /*  1400 */   216,  217,  208,  209,   85,  133,  193,  100,  193,   90,
- /*  1410 */   138,  139,  138,  139,  216,  217,  216,  217,  193,  100,
- /*  1420 */   193,  108,  135,  116,  117,  106,  107,  140,  121,  216,
- /*  1430 */   217,  216,  217,  114,  162,  116,  117,  118,  299,  300,
- /*  1440 */   121,  216,  217,  216,  217,  193,  244,  193,  135,  244,
- /*  1450 */   193,  256,  257,  140,  244,  193,  254,  193,  193,  254,
- /*  1460 */   153,  154,  155,  141,  254,  149,  150,  258,  216,  217,
+ /*  1330 */    42,  138,  139,  101,  193,  116,  117,  118,   19,   20,
+ /*  1340 */   255,   22,   70,  130,  135,   65,  256,  257,  193,  140,
+ /*  1350 */    78,   63,  193,   81,  193,   36,  193,  216,  217,  193,
+ /*  1360 */   115,  193,  263,  193,  145,  268,   59,   48,  193,  193,
+ /*  1370 */    98,  193,  115,  193,  291,  216,  217,  193,   59,  216,
+ /*  1380 */   217,  161,  216,  217,  216,  217,  216,  217,  131,  193,
+ /*  1390 */    71,  193,  216,  217,  216,  217,  216,  217,  193,  260,
+ /*  1400 */   216,  217,   19,  264,   85,  133,  244,  100,  193,   90,
+ /*  1410 */   138,  139,  216,  217,  216,  217,  254,  244,  193,  100,
+ /*  1420 */   193,  216,  217,  116,  117,  106,  107,  254,  121,  193,
+ /*  1430 */   115,  216,  217,  114,  162,  116,  117,  118,  115,  244,
+ /*  1440 */   121,  216,  217,  216,  217,  193,  309,  193,   31,  254,
+ /*  1450 */   313,  309,  216,  217,  309,  313,   39,  193,  313,  309,
+ /*  1460 */   153,  154,  155,  313,  193,  150,   25,  144,  216,  217,
  /*  1470 */   216,  217,  153,  154,  155,  156,  157,    0,    1,    2,
- /*  1480 */   216,  217,    5,  115,  158,  193,  160,   10,   11,   12,
- /*  1490 */    13,   14,  193,   59,   17,  126,  193,   19,   20,  129,
- /*  1500 */    22,  193,   22,   22,   24,  193,   23,   30,   25,   32,
- /*  1510 */    19,   20,  144,   22,   36,  216,  217,   40,  193,  216,
- /*  1520 */   217,  193,  152,  129,  216,  217,  193,   36,  216,  217,
- /*  1530 */   193,   99,  193,  193,   53,  193,  193,   59,   23,  193,
- /*  1540 */    25,  216,  217,  193,  216,  217,  152,   70,   59,   71,
- /*  1550 */    59,  117,  193,  216,  217,   78,  216,  217,   81,  216,
- /*  1560 */   217,  318,   71,   85,  193,  133,  193,  193,   90,   23,
- /*  1570 */    23,   25,   25,  120,  121,   98,   85,  193,  100,  193,
- /*  1580 */    23,   90,   25,  121,  106,  107,   19,  216,  217,  216,
+ /*  1480 */   216,  217,    5,  149,  150,   22,  193,   10,   11,   12,
+ /*  1490 */    13,   14,  193,  158,   17,  160,  193,   19,   20,  116,
+ /*  1500 */    22,   25,  193,   24,   22,  193,   24,   30,  226,   32,
+ /*  1510 */    19,   20,  226,   22,   36,  193,   53,   40,  193,  216,
+ /*  1520 */   217,  193,   23,  193,   25,  216,  217,   36,  216,  217,
+ /*  1530 */   193,   99,  193,  193,   22,  193,  193,   59,  216,  217,
+ /*  1540 */   193,  216,  217,  193,  216,  217,  193,   70,  129,   71,
+ /*  1550 */    59,  129,  193,  216,  217,   78,  216,  217,   81,  216,
+ /*  1560 */   217,  193,   71,   85,  193,  133,  193,  126,   90,  216,
+ /*  1570 */   217,  152,  258,   61,  152,   98,   85,  193,  100,  193,
+ /*  1580 */    23,   90,   25,  121,  106,  107,   23,  216,  217,  216,
  /*  1590 */   217,  100,  114,  131,  116,  117,  118,  106,  107,  121,
- /*  1600 */   216,  217,  216,  217,  193,  114,  117,  116,  117,  118,
- /*  1610 */   133,  193,  121,  193,  193,  138,  139,  193,   23,  193,
- /*  1620 */    25,   23,   23,   25,   25,    7,    8,  216,  217,  193,
- /*  1630 */   193,  153,  154,  155,  156,  157,  216,  217,  193,  162,
+ /*  1600 */   216,  217,  216,  217,  193,  114,  193,  116,  117,  118,
+ /*  1610 */   133,   22,  121,  193,   59,  138,  139,  193,  142,  193,
+ /*  1620 */   141,   23,   23,   25,   25,  120,  121,  216,  217,  216,
+ /*  1630 */   217,  153,  154,  155,  156,  157,  216,  217,   19,  162,
  /*  1640 */   216,  217,  216,  217,  153,  154,  155,  156,  157,    1,
- /*  1650 */     2,  193,  193,    5,   19,   20,   59,   22,   10,   11,
- /*  1660 */    12,   13,   14,  193,   97,   17,  193,   23,  193,   25,
- /*  1670 */   288,   36,  193,  242,  216,  217,  236,   23,   30,   25,
+ /*  1650 */     2,  193,   59,    5,   19,   20,  318,   22,   10,   11,
+ /*  1660 */    12,   13,   14,  193,   59,   17,  193,   23,   23,   25,
+ /*  1670 */    25,   36,  117,  193,  216,  217,  193,   23,   30,   25,
  /*  1680 */    32,   19,   20,   23,   22,   25,  216,  217,   40,  216,
- /*  1690 */   217,  216,  217,  193,   59,  216,  217,  193,   36,   83,
- /*  1700 */    84,  153,  153,  155,  155,   23,   71,   25,   23,  193,
- /*  1710 */    25,  193,  193,  193,  117,  193,  193,  193,   70,  193,
- /*  1720 */   193,   59,  193,  255,  255,  287,   78,  255,  243,   81,
- /*  1730 */   191,  255,  297,   71,  271,  100,  293,  245,  267,  214,
- /*  1740 */   246,  106,  107,  108,  246,  271,   98,  245,  293,  114,
- /*  1750 */   220,  116,  117,  118,  267,  271,  121,  271,  225,  219,
- /*  1760 */   229,  219,  100,  219,  259,  259,  259,  259,  106,  107,
- /*  1770 */   249,  196,   60,  280,  141,  243,  114,  249,  116,  117,
- /*  1780 */   118,  133,  245,  121,  200,  297,  138,  139,  153,  154,
- /*  1790 */   155,  156,  157,  297,  200,   38,   19,   20,  151,   22,
- /*  1800 */   200,  150,  140,  294,  294,   22,  272,   43,  234,   18,
- /*  1810 */   162,  270,  200,   36,  237,  153,  154,  155,  156,  157,
- /*  1820 */   237,  283,  237,  237,   18,  199,  149,  246,  272,  270,
- /*  1830 */   272,  200,  158,  246,  246,  234,   59,  234,  246,  199,
- /*  1840 */   290,   62,  289,  200,  199,   22,  221,  115,   71,  200,
- /*  1850 */   200,  199,  199,  221,  218,  218,   19,   20,   64,   22,
- /*  1860 */   218,  227,   22,  224,  126,  224,  165,  221,   24,  305,
- /*  1870 */   200,  113,  312,   36,  218,  220,  218,  100,  282,  218,
- /*  1880 */    91,  218,  317,  106,  107,  221,  227,  282,  317,   82,
- /*  1890 */   148,  114,  265,  116,  117,  118,   59,  145,  121,   22,
- /*  1900 */   277,  158,  200,  265,   25,  202,  147,  250,   71,  279,
- /*  1910 */    13,  146,  194,  194,  249,  248,  250,  140,  247,  246,
- /*  1920 */     6,  192,  192,  192,  303,  303,  213,  207,  300,  213,
- /*  1930 */   153,  154,  155,  156,  157,  213,  213,  100,  213,  222,
- /*  1940 */   207,  214,  214,  106,  107,    4,  222,  207,    3,   22,
- /*  1950 */   163,  114,   15,  116,  117,  118,   16,   23,  121,   23,
- /*  1960 */   139,  151,  130,   25,  142,   16,   24,   20,  144,    1,
- /*  1970 */   142,  130,  130,   61,   53,   53,   37,  151,   53,   53,
- /*  1980 */   130,  116,   34,    1,  141,    5,   22,  115,  161,  141,
- /*  1990 */   153,  154,  155,  156,  157,   25,   68,   68,   75,   41,
- /*  2000 */   115,   24,  131,   20,   19,  125,   22,   96,   22,   22,
- /*  2010 */    67,   23,   22,   67,   59,   24,   22,   28,   67,   23,
- /*  2020 */    22,   22,  149,   23,   23,   23,  116,   23,   25,   37,
- /*  2030 */    97,  141,   23,   23,   22,  143,   25,   75,   88,   34,
- /*  2040 */    34,   34,   34,   86,   75,   93,   23,   34,   22,   34,
- /*  2050 */    25,   24,   34,   25,   23,  142,   23,  142,   44,   23,
- /*  2060 */    23,   23,   11,   23,   25,   22,   22,   22,   15,   23,
- /*  2070 */    23,   22,   22,   25,    1,    1,  141,   25,   23,  135,
- /*  2080 */   319,  319,  319,  319,  319,  319,  319,  319,  319,  319,
- /*  2090 */   319,  319,  319,  319,  141,  141,  319,  141,  319,  319,
+ /*  1690 */   217,    7,    8,   23,   59,   25,   83,   84,   36,   23,
+ /*  1700 */   193,   25,   23,   23,   25,   25,   71,  153,  145,  155,
+ /*  1710 */   117,  153,   23,  155,   25,   23,   97,   25,   70,  193,
+ /*  1720 */   193,   59,  117,  236,  193,  193,   78,  193,  193,   81,
+ /*  1730 */   141,  193,  193,   71,  193,  100,  288,  287,  242,  255,
+ /*  1740 */   255,  106,  107,  108,  255,  255,   98,  243,  297,  114,
+ /*  1750 */   214,  116,  117,  118,  245,  191,  121,  271,  293,  267,
+ /*  1760 */   267,  246,  100,  246,  245,  271,  271,  293,  106,  107,
+ /*  1770 */   220,  271,  229,  225,  249,  219,  114,  259,  116,  117,
+ /*  1780 */   118,  133,  259,  121,  219,  219,  138,  139,  153,  154,
+ /*  1790 */   155,  156,  157,  280,  249,  243,   19,   20,  245,   22,
+ /*  1800 */   196,  259,  140,  259,   60,  297,  141,  297,  200,  200,
+ /*  1810 */   162,   38,  200,   36,  294,  153,  154,  155,  156,  157,
+ /*  1820 */   151,  150,  294,  283,   22,   43,  234,   18,  237,  200,
+ /*  1830 */   270,  272,  237,  237,  237,   18,   59,  199,  270,  149,
+ /*  1840 */   246,  272,  272,  200,  234,  234,  246,  246,   71,  246,
+ /*  1850 */   199,  158,  290,   62,   22,  200,   19,   20,  199,   22,
+ /*  1860 */   289,  221,  221,  200,  200,  199,  199,  115,  218,   64,
+ /*  1870 */   218,  218,   22,   36,  227,  126,  227,  100,  165,  221,
+ /*  1880 */   224,  224,   24,  106,  107,  312,  218,  305,  113,  282,
+ /*  1890 */    91,  114,  220,  116,  117,  118,   59,  282,  121,  218,
+ /*  1900 */   218,  218,  200,  317,  317,   82,  221,  265,   71,  148,
+ /*  1910 */   145,  265,   22,  277,  200,  158,  279,  140,  147,   25,
+ /*  1920 */   146,  202,  248,  250,  249,  247,   13,  250,  194,  194,
+ /*  1930 */   153,  154,  155,  156,  157,    6,  303,  100,  192,  192,
+ /*  1940 */   246,  213,  192,  106,  107,  207,  213,  207,  222,  213,
+ /*  1950 */   213,  114,  222,  116,  117,  118,  214,  214,  121,    4,
+ /*  1960 */   207,  213,    3,   22,  303,   15,  163,   16,   23,   23,
+ /*  1970 */   139,  151,  130,   25,   20,  142,   24,   16,  144,    1,
+ /*  1980 */   142,  130,  130,   61,   37,   53,  300,  151,   53,   53,
+ /*  1990 */   153,  154,  155,  156,  157,   53,  130,  116,   34,    1,
+ /*  2000 */   141,    5,   22,  115,  161,   68,   25,   68,   75,   41,
+ /*  2010 */   141,  115,   24,   20,   19,  131,  125,   23,   28,   22,
+ /*  2020 */    67,   22,   22,   22,   67,   59,   24,   96,   22,   67,
+ /*  2030 */    23,  149,   22,   25,   23,   23,   23,   22,   34,  141,
+ /*  2040 */    37,   97,   23,   23,  116,   22,  143,   25,   34,   75,
+ /*  2050 */    34,   34,   34,   88,   75,   34,   86,   23,   22,   34,
+ /*  2060 */    93,   24,   34,   25,   25,  142,  142,   23,   44,   23,
+ /*  2070 */    23,   23,   23,   11,   23,   25,   22,   22,   22,  141,
+ /*  2080 */    23,   23,   22,   22,   25,   15,    1,   23,   25,    1,
+ /*  2090 */   141,  135,  319,  319,  319,  319,  319,  319,  319,  141,
  /*  2100 */   319,  319,  319,  319,  319,  319,  319,  319,  319,  319,
  /*  2110 */   319,  319,  319,  319,  319,  319,  319,  319,  319,  319,
  /*  2120 */   319,  319,  319,  319,  319,  319,  319,  319,  319,  319,
@@ -164525,176 +171415,177 @@ static const YYCODETYPE yy_lookahead[] = {
  /*  2250 */   319,  319,  319,  319,  319,  319,  319,  319,  319,  319,
  /*  2260 */   319,  319,  319,  319,  319,  319,  319,  319,  319,  319,
  /*  2270 */   319,  319,  319,  319,  319,  319,  319,  319,  319,  319,
- /*  2280 */   319,  319,  319,
+ /*  2280 */   319,  319,  319,  319,  319,
 };
-#define YY_SHIFT_COUNT    (575)
+#define YY_SHIFT_COUNT    (578)
 #define YY_SHIFT_MIN      (0)
-#define YY_SHIFT_MAX      (2074)
+#define YY_SHIFT_MAX      (2088)
 static const unsigned short int yy_shift_ofst[] = {
  /*     0 */  1648, 1477, 1272,  322,  322,    1, 1319, 1478, 1491, 1837,
  /*    10 */  1837, 1837,  471,    0,    0,  214, 1093, 1837, 1837, 1837,
  /*    20 */  1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837,
- /*    30 */   271,  271, 1219, 1219,  216,   88,    1,    1,    1,    1,
- /*    40 */     1,   40,  111,  258,  361,  469,  512,  583,  622,  693,
- /*    50 */   732,  803,  842,  913, 1073, 1093, 1093, 1093, 1093, 1093,
+ /*    30 */  1837,  271,  271, 1219, 1219,  216,   88,    1,    1,    1,
+ /*    40 */     1,    1,   40,  111,  258,  361,  469,  512,  583,  622,
+ /*    50 */   693,  732,  803,  842,  913, 1073, 1093, 1093, 1093, 1093,
  /*    60 */  1093, 1093, 1093, 1093, 1093, 1093, 1093, 1093, 1093, 1093,
- /*    70 */  1093, 1093, 1093, 1113, 1093, 1216,  957,  957, 1635, 1662,
- /*    80 */  1777, 1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837,
+ /*    70 */  1093, 1093, 1093, 1093, 1113, 1093, 1216,  957,  957, 1635,
+ /*    80 */  1662, 1777, 1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837,
  /*    90 */  1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837,
  /*   100 */  1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837,
  /*   110 */  1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837,
  /*   120 */  1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837, 1837,
- /*   130 */   137,  181,  181,  181,  181,  181,  181,  181,   94,  430,
- /*   140 */    66,   65,  112,  366,  533,  533,  740, 1261,  533,  533,
- /*   150 */    79,   79,  533,  412,  412,  412,   77,  412,  123,  113,
- /*   160 */   113,   22,   22, 2098, 2098,  328,  328,  328,  239,  468,
- /*   170 */   468,  468,  468, 1015, 1015,  409,  366, 1129, 1186,  533,
- /*   180 */   533,  533,  533,  533,  533,  533,  533,  533,  533,  533,
- /*   190 */   533,  533,  533,  533,  533,  533,  533,  533,  533,  969,
- /*   200 */   621,  621,  533,  642,  788,  788, 1228, 1228,  822,  822,
- /*   210 */    67, 1274, 2098, 2098, 2098, 2098, 2098, 2098, 2098, 1307,
- /*   220 */   954,  954,  585,  472,  640,  387,  695,  538,  541,  700,
- /*   230 */   533,  533,  533,  533,  533,  533,  533,  533,  533,  533,
- /*   240 */   222,  533,  533,  533,  533,  533,  533,  533,  533,  533,
- /*   250 */   533,  533,  533, 1179, 1179, 1179,  533,  533,  533,  565,
- /*   260 */   533,  533,  533,  916, 1144,  533,  533, 1288,  533,  533,
- /*   270 */   533,  533,  533,  533,  533,  533,  639, 1330,  209, 1076,
- /*   280 */  1076, 1076, 1076,  580,  209,  209, 1313,  768,  917,  649,
- /*   290 */  1181, 1316,  405, 1316, 1238,  249, 1181, 1181,  249, 1181,
- /*   300 */   405, 1238, 1369,  464, 1259, 1012, 1012, 1012, 1368, 1368,
- /*   310 */  1368, 1368,  184,  184, 1326,  904, 1287, 1480, 1712, 1712,
- /*   320 */  1633, 1633, 1757, 1757, 1633, 1647, 1651, 1783, 1764, 1791,
- /*   330 */  1791, 1791, 1791, 1633, 1806, 1677, 1651, 1651, 1677, 1783,
- /*   340 */  1764, 1677, 1764, 1677, 1633, 1806, 1674, 1779, 1633, 1806,
- /*   350 */  1823, 1633, 1806, 1633, 1806, 1823, 1732, 1732, 1732, 1794,
- /*   360 */  1840, 1840, 1823, 1732, 1738, 1732, 1794, 1732, 1732, 1701,
- /*   370 */  1844, 1758, 1758, 1823, 1633, 1789, 1789, 1807, 1807, 1742,
- /*   380 */  1752, 1877, 1633, 1743, 1742, 1759, 1765, 1677, 1879, 1897,
- /*   390 */  1897, 1914, 1914, 1914, 2098, 2098, 2098, 2098, 2098, 2098,
- /*   400 */  2098, 2098, 2098, 2098, 2098, 2098, 2098, 2098, 2098,  207,
- /*   410 */  1095,  331,  620,  903,  806, 1074, 1483, 1432, 1481, 1322,
- /*   420 */  1370, 1394, 1515, 1291, 1546, 1547, 1557, 1595, 1598, 1599,
- /*   430 */  1434, 1453, 1618, 1462, 1567, 1489, 1644, 1654, 1616, 1660,
- /*   440 */  1548, 1549, 1682, 1685, 1597,  742, 1941, 1945, 1927, 1787,
- /*   450 */  1937, 1940, 1934, 1936, 1821, 1810, 1832, 1938, 1938, 1942,
- /*   460 */  1822, 1947, 1824, 1949, 1968, 1828, 1841, 1938, 1842, 1912,
- /*   470 */  1939, 1938, 1826, 1921, 1922, 1925, 1926, 1850, 1865, 1948,
- /*   480 */  1843, 1982, 1980, 1964, 1872, 1827, 1928, 1970, 1929, 1923,
- /*   490 */  1958, 1848, 1885, 1977, 1983, 1985, 1871, 1880, 1984, 1943,
- /*   500 */  1986, 1987, 1988, 1990, 1946, 1955, 1991, 1911, 1989, 1994,
- /*   510 */  1951, 1992, 1996, 1873, 1998, 2000, 2001, 2002, 2003, 2004,
- /*   520 */  1999, 1933, 1890, 2009, 2010, 1910, 2005, 2012, 1892, 2011,
- /*   530 */  2006, 2007, 2008, 2013, 1950, 1962, 1957, 2014, 1969, 1952,
- /*   540 */  2015, 2023, 2026, 2027, 2025, 2028, 2018, 1913, 1915, 2031,
- /*   550 */  2011, 2033, 2036, 2037, 2038, 2039, 2040, 2043, 2051, 2044,
- /*   560 */  2045, 2046, 2047, 2049, 2050, 2048, 1944, 1935, 1953, 1954,
- /*   570 */  1956, 2052, 2055, 2053, 2073, 2074,
+ /*   130 */  1837,  137,  181,  181,  181,  181,  181,  181,  181,   94,
+ /*   140 */   430,   66,   65,  112,  366,  533,  533,  740, 1257,  533,
+ /*   150 */   533,   79,   79,  533,  412,  412,  412,   77,  412,  123,
+ /*   160 */   113,  113,  113,   22,   22, 2100, 2100,  328,  328,  328,
+ /*   170 */   239,  468,  468,  468,  468, 1015, 1015,  409,  366, 1187,
+ /*   180 */  1232,  533,  533,  533,  533,  533,  533,  533,  533,  533,
+ /*   190 */   533,  533,  533,  533,  533,  533,  533,  533,  533,  533,
+ /*   200 */   533,  969,  621,  621,  533,  642,  788,  788, 1133, 1133,
+ /*   210 */   822,  822,   67, 1193, 2100, 2100, 2100, 2100, 2100, 2100,
+ /*   220 */  2100, 1307,  954,  954,  585,  472,  640,  387,  695,  538,
+ /*   230 */   541,  700,  533,  533,  533,  533,  533,  533,  533,  533,
+ /*   240 */   533,  533,  222,  533,  533,  533,  533,  533,  533,  533,
+ /*   250 */   533,  533,  533,  533,  533, 1213, 1213, 1213,  533,  533,
+ /*   260 */   533,  565,  533,  533,  533,  916, 1147,  533,  533, 1288,
+ /*   270 */   533,  533,  533,  533,  533,  533,  533,  533,  639, 1280,
+ /*   280 */   209, 1129, 1129, 1129, 1129,  580,  209,  209, 1209,  768,
+ /*   290 */   917,  649, 1315, 1334,  405, 1334, 1383,  249, 1315, 1315,
+ /*   300 */   249, 1315,  405, 1383, 1441,  464, 1245, 1417, 1417, 1417,
+ /*   310 */  1323, 1323, 1323, 1323,  184,  184, 1335, 1476,  856, 1482,
+ /*   320 */  1744, 1744, 1665, 1665, 1773, 1773, 1665, 1669, 1671, 1802,
+ /*   330 */  1782, 1809, 1809, 1809, 1809, 1665, 1817, 1690, 1671, 1671,
+ /*   340 */  1690, 1802, 1782, 1690, 1782, 1690, 1665, 1817, 1693, 1791,
+ /*   350 */  1665, 1817, 1832, 1665, 1817, 1665, 1817, 1832, 1752, 1752,
+ /*   360 */  1752, 1805, 1850, 1850, 1832, 1752, 1749, 1752, 1805, 1752,
+ /*   370 */  1752, 1713, 1858, 1775, 1775, 1832, 1665, 1799, 1799, 1823,
+ /*   380 */  1823, 1761, 1765, 1890, 1665, 1757, 1761, 1771, 1774, 1690,
+ /*   390 */  1894, 1913, 1913, 1929, 1929, 1929, 2100, 2100, 2100, 2100,
+ /*   400 */  2100, 2100, 2100, 2100, 2100, 2100, 2100, 2100, 2100, 2100,
+ /*   410 */  2100,  207, 1220,  331,  620,  967,  806, 1074, 1499, 1432,
+ /*   420 */  1463, 1479, 1419, 1422, 1557, 1512, 1598, 1599, 1644, 1645,
+ /*   430 */  1654, 1660, 1555, 1505, 1684, 1462, 1670, 1563, 1619, 1593,
+ /*   440 */  1676, 1679, 1613, 1680, 1554, 1558, 1689, 1692, 1605, 1589,
+ /*   450 */  1955, 1959, 1941, 1803, 1950, 1951, 1945, 1946, 1831, 1820,
+ /*   460 */  1842, 1948, 1948, 1952, 1833, 1954, 1834, 1961, 1978, 1838,
+ /*   470 */  1851, 1948, 1852, 1922, 1947, 1948, 1836, 1932, 1935, 1936,
+ /*   480 */  1942, 1866, 1881, 1964, 1859, 1998, 1996, 1980, 1888, 1843,
+ /*   490 */  1937, 1981, 1939, 1933, 1968, 1869, 1896, 1988, 1993, 1995,
+ /*   500 */  1884, 1891, 1997, 1953, 1999, 2000, 1994, 2001, 1957, 1966,
+ /*   510 */  2002, 1931, 1990, 2006, 1962, 2003, 2007, 2004, 1882, 2010,
+ /*   520 */  2011, 2012, 2008, 2013, 2015, 1944, 1898, 2019, 2020, 1928,
+ /*   530 */  2014, 2023, 1903, 2022, 2016, 2017, 2018, 2021, 1965, 1974,
+ /*   540 */  1970, 2024, 1979, 1967, 2025, 2034, 2036, 2037, 2038, 2039,
+ /*   550 */  2028, 1923, 1924, 2044, 2022, 2046, 2047, 2048, 2049, 2050,
+ /*   560 */  2051, 2054, 2062, 2055, 2056, 2057, 2058, 2060, 2061, 2059,
+ /*   570 */  1956, 1938, 1949, 1958, 2063, 2064, 2070, 2085, 2088,
 };
-#define YY_REDUCE_COUNT (408)
+#define YY_REDUCE_COUNT (410)
 #define YY_REDUCE_MIN   (-271)
-#define YY_REDUCE_MAX   (1740)
+#define YY_REDUCE_MAX   (1753)
 static const short yy_reduce_ofst[] = {
  /*     0 */  -125,  733,  789,  241,  293, -123, -193, -191, -183, -187,
  /*    10 */   166,  238,  133, -207, -199, -267, -176,   -6,  204,  489,
- /*    20 */   576, -175,  598,  686,  615,  725,  860,  778,  781,  857,
- /*    30 */   616,  887,   87,  240, -192,  408,  626,  796,  843,  854,
- /*    40 */  1003, -271, -271, -271, -271, -271, -271, -271, -271, -271,
+ /*    20 */   576,  598, -175,  686,  860,  615,  725, 1014,  778,  781,
+ /*    30 */   857,  616,  887,   87,  240, -192,  408,  626,  796,  843,
+ /*    40 */   854, 1004, -271, -271, -271, -271, -271, -271, -271, -271,
  /*    50 */  -271, -271, -271, -271, -271, -271, -271, -271, -271, -271,
  /*    60 */  -271, -271, -271, -271, -271, -271, -271, -271, -271, -271,
- /*    70 */  -271, -271, -271, -271, -271, -271, -271, -271,   80,   83,
- /*    80 */   313,  886,  888,  996, 1034, 1059, 1081, 1100, 1117, 1152,
- /*    90 */  1155, 1163, 1165, 1167, 1169, 1172, 1180, 1182, 1184, 1198,
- /*   100 */  1200, 1213, 1215, 1225, 1227, 1252, 1254, 1264, 1299, 1303,
- /*   110 */  1308, 1312, 1325, 1328, 1337, 1340, 1343, 1371, 1373, 1384,
- /*   120 */  1386, 1411, 1420, 1424, 1426, 1458, 1470, 1473, 1475, 1479,
- /*   130 */  -271, -271, -271, -271, -271, -271, -271, -271, -271, -271,
- /*   140 */  -271,  138,  459,  396, -158,  470,  302, -212,  521,  201,
- /*   150 */  -195,  -92,  559,  630,  632,  630, -271,  632,  901,   63,
- /*   160 */   407, -271, -271, -271, -271,  161,  161,  161,  251,  335,
- /*   170 */   847,  960,  980,  537,  588,  618,  628,  688,  688, -166,
- /*   180 */  -161,  674,  790,  794,  799,  851,  852, -122,  680, -120,
- /*   190 */   995, 1038,  415, 1051,  893,  798,  962,  400, 1086,  779,
- /*   200 */   923,  924,  263, 1041,  979,  990, 1083, 1097, 1031, 1194,
- /*   210 */   362,  994, 1139, 1005, 1037, 1202, 1205, 1195, 1210, -194,
- /*   220 */    56,  185, -135,  232,  522,  560,  601,  617,  669,  683,
- /*   230 */   711,  856,  908,  941, 1048, 1101, 1147, 1257, 1262, 1265,
- /*   240 */   392, 1292, 1333, 1339, 1342, 1346, 1350, 1359, 1374, 1418,
- /*   250 */  1421, 1436, 1437,  593,  755,  770,  997, 1445, 1459, 1209,
- /*   260 */  1500, 1504, 1516, 1132, 1243, 1518, 1519, 1440, 1520,  560,
- /*   270 */  1522, 1523, 1524, 1526, 1527, 1529, 1382, 1438, 1431, 1468,
- /*   280 */  1469, 1472, 1476, 1209, 1431, 1431, 1485, 1525, 1539, 1435,
- /*   290 */  1463, 1471, 1492, 1487, 1443, 1494, 1474, 1484, 1498, 1486,
- /*   300 */  1502, 1455, 1530, 1531, 1533, 1540, 1542, 1544, 1505, 1506,
- /*   310 */  1507, 1508, 1521, 1528, 1493, 1537, 1532, 1575, 1488, 1496,
- /*   320 */  1584, 1594, 1509, 1510, 1600, 1538, 1534, 1541, 1574, 1577,
- /*   330 */  1583, 1585, 1586, 1612, 1626, 1581, 1556, 1558, 1587, 1559,
- /*   340 */  1601, 1588, 1603, 1592, 1631, 1640, 1550, 1553, 1643, 1645,
- /*   350 */  1625, 1649, 1652, 1650, 1653, 1632, 1636, 1637, 1642, 1634,
- /*   360 */  1639, 1641, 1646, 1656, 1655, 1658, 1659, 1661, 1663, 1560,
- /*   370 */  1564, 1596, 1605, 1664, 1670, 1565, 1571, 1627, 1638, 1657,
- /*   380 */  1665, 1623, 1702, 1630, 1666, 1667, 1671, 1673, 1703, 1718,
- /*   390 */  1719, 1729, 1730, 1731, 1621, 1622, 1628, 1720, 1713, 1716,
- /*   400 */  1722, 1723, 1733, 1717, 1724, 1727, 1728, 1725, 1740,
+ /*    70 */  -271, -271, -271, -271, -271, -271, -271, -271, -271,   80,
+ /*    80 */    83,  313,  886,  888,  918,  938, 1021, 1034, 1036, 1141,
+ /*    90 */  1159, 1163, 1166, 1168, 1170, 1176, 1178, 1180, 1184, 1196,
+ /*   100 */  1198, 1205, 1215, 1225, 1227, 1236, 1252, 1254, 1264, 1303,
+ /*   110 */  1309, 1312, 1322, 1325, 1328, 1337, 1340, 1343, 1353, 1371,
+ /*   120 */  1373, 1384, 1386, 1411, 1413, 1420, 1424, 1426, 1458, 1470,
+ /*   130 */  1473, -271, -271, -271, -271, -271, -271, -271, -271, -271,
+ /*   140 */  -271, -271,  138,  459,  396, -158,  470,  302, -212,  521,
+ /*   150 */   201, -195,  -92,  559,  630,  632,  630, -271,  632,  901,
+ /*   160 */    63,  407,  670, -271, -271, -271, -271,  161,  161,  161,
+ /*   170 */   251,  335,  847,  979, 1097,  537,  588,  618,  628,  688,
+ /*   180 */   688, -166, -161,  674,  787,  794,  799,  852,  996, -122,
+ /*   190 */   837, -120, 1018, 1035,  415, 1047, 1001,  958, 1082,  400,
+ /*   200 */  1099,  779, 1137, 1142,  263, 1083, 1145, 1150, 1041, 1139,
+ /*   210 */   965, 1050,  362,  849,  752,  629,  675, 1162, 1173, 1090,
+ /*   220 */  1195, -194,   56,  185, -135,  232,  522,  560,  571,  601,
+ /*   230 */   617,  669,  683,  711,  850,  893, 1000, 1040, 1049, 1081,
+ /*   240 */  1087, 1101,  392, 1114, 1123, 1155, 1161, 1175, 1271, 1293,
+ /*   250 */  1299, 1330, 1339, 1342, 1347,  593, 1282, 1286, 1350, 1359,
+ /*   260 */  1368, 1314, 1480, 1483, 1507, 1085, 1338, 1526, 1527, 1487,
+ /*   270 */  1531,  560, 1532, 1534, 1535, 1538, 1539, 1541, 1448, 1450,
+ /*   280 */  1496, 1484, 1485, 1489, 1490, 1314, 1496, 1496, 1504, 1536,
+ /*   290 */  1564, 1451, 1486, 1492, 1509, 1493, 1465, 1515, 1494, 1495,
+ /*   300 */  1517, 1500, 1519, 1474, 1550, 1543, 1548, 1556, 1565, 1566,
+ /*   310 */  1518, 1523, 1542, 1544, 1525, 1545, 1513, 1553, 1552, 1604,
+ /*   320 */  1508, 1510, 1608, 1609, 1520, 1528, 1612, 1540, 1559, 1560,
+ /*   330 */  1592, 1591, 1595, 1596, 1597, 1629, 1638, 1594, 1569, 1570,
+ /*   340 */  1600, 1568, 1610, 1601, 1611, 1603, 1643, 1651, 1562, 1571,
+ /*   350 */  1655, 1659, 1640, 1663, 1666, 1664, 1667, 1641, 1650, 1652,
+ /*   360 */  1653, 1647, 1656, 1657, 1658, 1668, 1672, 1681, 1649, 1682,
+ /*   370 */  1683, 1573, 1582, 1607, 1615, 1685, 1702, 1586, 1587, 1642,
+ /*   380 */  1646, 1673, 1675, 1636, 1714, 1637, 1677, 1674, 1678, 1694,
+ /*   390 */  1719, 1734, 1735, 1746, 1747, 1750, 1633, 1661, 1686, 1738,
+ /*   400 */  1728, 1733, 1736, 1737, 1740, 1726, 1730, 1742, 1743, 1748,
+ /*   410 */  1753,
 };
 static const YYACTIONTYPE yy_default[] = {
- /*     0 */  1647, 1647, 1647, 1475, 1240, 1351, 1240, 1240, 1240, 1475,
- /*    10 */  1475, 1475, 1240, 1381, 1381, 1528, 1273, 1240, 1240, 1240,
- /*    20 */  1240, 1240, 1240, 1240, 1240, 1240, 1240, 1474, 1240, 1240,
- /*    30 */  1240, 1240, 1563, 1563, 1240, 1240, 1240, 1240, 1240, 1240,
- /*    40 */  1240, 1240, 1390, 1240, 1397, 1240, 1240, 1240, 1240, 1240,
- /*    50 */  1476, 1477, 1240, 1240, 1240, 1527, 1529, 1492, 1404, 1403,
- /*    60 */  1402, 1401, 1510, 1369, 1395, 1388, 1392, 1470, 1471, 1469,
- /*    70 */  1473, 1477, 1476, 1240, 1391, 1438, 1454, 1437, 1240, 1240,
- /*    80 */  1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240,
- /*    90 */  1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240,
- /*   100 */  1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240,
- /*   110 */  1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240,
- /*   120 */  1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240,
- /*   130 */  1446, 1453, 1452, 1451, 1460, 1450, 1447, 1440, 1439, 1441,
- /*   140 */  1442, 1240, 1240, 1264, 1240, 1240, 1261, 1315, 1240, 1240,
- /*   150 */  1240, 1240, 1240, 1547, 1546, 1240, 1443, 1240, 1273, 1432,
- /*   160 */  1431, 1457, 1444, 1456, 1455, 1535, 1599, 1598, 1493, 1240,
- /*   170 */  1240, 1240, 1240, 1240, 1240, 1563, 1240, 1240, 1240, 1240,
- /*   180 */  1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240,
- /*   190 */  1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1371,
- /*   200 */  1563, 1563, 1240, 1273, 1563, 1563, 1372, 1372, 1269, 1269,
- /*   210 */  1375, 1240, 1542, 1342, 1342, 1342, 1342, 1351, 1342, 1240,
- /*   220 */  1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240,
- /*   230 */  1240, 1240, 1240, 1240, 1532, 1530, 1240, 1240, 1240, 1240,
- /*   240 */  1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240,
- /*   250 */  1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240,
- /*   260 */  1240, 1240, 1240, 1347, 1240, 1240, 1240, 1240, 1240, 1240,
- /*   270 */  1240, 1240, 1240, 1240, 1240, 1592, 1240, 1505, 1329, 1347,
- /*   280 */  1347, 1347, 1347, 1349, 1330, 1328, 1341, 1274, 1247, 1639,
- /*   290 */  1407, 1396, 1348, 1396, 1636, 1394, 1407, 1407, 1394, 1407,
- /*   300 */  1348, 1636, 1290, 1615, 1285, 1381, 1381, 1381, 1371, 1371,
- /*   310 */  1371, 1371, 1375, 1375, 1472, 1348, 1341, 1240, 1639, 1639,
- /*   320 */  1357, 1357, 1638, 1638, 1357, 1493, 1623, 1416, 1318, 1324,
- /*   330 */  1324, 1324, 1324, 1357, 1258, 1394, 1623, 1623, 1394, 1416,
- /*   340 */  1318, 1394, 1318, 1394, 1357, 1258, 1509, 1633, 1357, 1258,
- /*   350 */  1483, 1357, 1258, 1357, 1258, 1483, 1316, 1316, 1316, 1305,
- /*   360 */  1240, 1240, 1483, 1316, 1290, 1316, 1305, 1316, 1316, 1581,
- /*   370 */  1240, 1487, 1487, 1483, 1357, 1573, 1573, 1384, 1384, 1389,
- /*   380 */  1375, 1478, 1357, 1240, 1389, 1387, 1385, 1394, 1308, 1595,
- /*   390 */  1595, 1591, 1591, 1591, 1644, 1644, 1542, 1608, 1273, 1273,
- /*   400 */  1273, 1273, 1608, 1292, 1292, 1274, 1274, 1273, 1608, 1240,
- /*   410 */  1240, 1240, 1240, 1240, 1240, 1603, 1240, 1537, 1494, 1361,
- /*   420 */  1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240,
- /*   430 */  1240, 1240, 1240, 1240, 1548, 1240, 1240, 1240, 1240, 1240,
- /*   440 */  1240, 1240, 1240, 1240, 1240, 1421, 1240, 1243, 1539, 1240,
- /*   450 */  1240, 1240, 1240, 1240, 1240, 1240, 1240, 1398, 1399, 1362,
- /*   460 */  1240, 1240, 1240, 1240, 1240, 1240, 1240, 1413, 1240, 1240,
- /*   470 */  1240, 1408, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240,
- /*   480 */  1635, 1240, 1240, 1240, 1240, 1240, 1240, 1508, 1507, 1240,
- /*   490 */  1240, 1359, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240,
- /*   500 */  1240, 1240, 1240, 1240, 1240, 1288, 1240, 1240, 1240, 1240,
- /*   510 */  1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240,
- /*   520 */  1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1386,
- /*   530 */  1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240,
- /*   540 */  1240, 1240, 1240, 1240, 1578, 1376, 1240, 1240, 1240, 1240,
- /*   550 */  1626, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240, 1240,
- /*   560 */  1240, 1240, 1240, 1240, 1240, 1619, 1332, 1423, 1240, 1422,
- /*   570 */  1426, 1262, 1240, 1252, 1240, 1240,
+ /*     0 */  1648, 1648, 1648, 1478, 1243, 1354, 1243, 1243, 1243, 1478,
+ /*    10 */  1478, 1478, 1243, 1384, 1384, 1531, 1276, 1243, 1243, 1243,
+ /*    20 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1477, 1243,
+ /*    30 */  1243, 1243, 1243, 1564, 1564, 1243, 1243, 1243, 1243, 1243,
+ /*    40 */  1243, 1243, 1243, 1393, 1243, 1400, 1243, 1243, 1243, 1243,
+ /*    50 */  1243, 1479, 1480, 1243, 1243, 1243, 1530, 1532, 1495, 1407,
+ /*    60 */  1406, 1405, 1404, 1513, 1372, 1398, 1391, 1395, 1474, 1475,
+ /*    70 */  1473, 1626, 1480, 1479, 1243, 1394, 1442, 1458, 1441, 1243,
+ /*    80 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243,
+ /*    90 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243,
+ /*   100 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243,
+ /*   110 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243,
+ /*   120 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243,
+ /*   130 */  1243, 1450, 1457, 1456, 1455, 1464, 1454, 1451, 1444, 1443,
+ /*   140 */  1445, 1446, 1243, 1243, 1267, 1243, 1243, 1264, 1318, 1243,
+ /*   150 */  1243, 1243, 1243, 1243, 1550, 1549, 1243, 1447, 1243, 1276,
+ /*   160 */  1435, 1434, 1433, 1461, 1448, 1460, 1459, 1538, 1600, 1599,
+ /*   170 */  1496, 1243, 1243, 1243, 1243, 1243, 1243, 1564, 1243, 1243,
+ /*   180 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243,
+ /*   190 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243,
+ /*   200 */  1243, 1374, 1564, 1564, 1243, 1276, 1564, 1564, 1375, 1375,
+ /*   210 */  1272, 1272, 1378, 1243, 1545, 1345, 1345, 1345, 1345, 1354,
+ /*   220 */  1345, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243,
+ /*   230 */  1243, 1243, 1243, 1243, 1243, 1243, 1535, 1533, 1243, 1243,
+ /*   240 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243,
+ /*   250 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243,
+ /*   260 */  1243, 1243, 1243, 1243, 1243, 1350, 1243, 1243, 1243, 1243,
+ /*   270 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1593, 1243, 1508,
+ /*   280 */  1332, 1350, 1350, 1350, 1350, 1352, 1333, 1331, 1344, 1277,
+ /*   290 */  1250, 1640, 1410, 1399, 1351, 1399, 1637, 1397, 1410, 1410,
+ /*   300 */  1397, 1410, 1351, 1637, 1293, 1615, 1288, 1384, 1384, 1384,
+ /*   310 */  1374, 1374, 1374, 1374, 1378, 1378, 1476, 1351, 1344, 1243,
+ /*   320 */  1640, 1640, 1360, 1360, 1639, 1639, 1360, 1496, 1623, 1419,
+ /*   330 */  1321, 1327, 1327, 1327, 1327, 1360, 1261, 1397, 1623, 1623,
+ /*   340 */  1397, 1419, 1321, 1397, 1321, 1397, 1360, 1261, 1512, 1634,
+ /*   350 */  1360, 1261, 1486, 1360, 1261, 1360, 1261, 1486, 1319, 1319,
+ /*   360 */  1319, 1308, 1243, 1243, 1486, 1319, 1293, 1319, 1308, 1319,
+ /*   370 */  1319, 1582, 1243, 1490, 1490, 1486, 1360, 1574, 1574, 1387,
+ /*   380 */  1387, 1392, 1378, 1481, 1360, 1243, 1392, 1390, 1388, 1397,
+ /*   390 */  1311, 1596, 1596, 1592, 1592, 1592, 1645, 1645, 1545, 1608,
+ /*   400 */  1276, 1276, 1276, 1276, 1608, 1295, 1295, 1277, 1277, 1276,
+ /*   410 */  1608, 1243, 1243, 1243, 1243, 1243, 1243, 1603, 1243, 1540,
+ /*   420 */  1497, 1364, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243,
+ /*   430 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1551, 1243,
+ /*   440 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1424,
+ /*   450 */  1243, 1246, 1542, 1243, 1243, 1243, 1243, 1243, 1243, 1243,
+ /*   460 */  1243, 1401, 1402, 1365, 1243, 1243, 1243, 1243, 1243, 1243,
+ /*   470 */  1243, 1416, 1243, 1243, 1243, 1411, 1243, 1243, 1243, 1243,
+ /*   480 */  1243, 1243, 1243, 1243, 1636, 1243, 1243, 1243, 1243, 1243,
+ /*   490 */  1243, 1511, 1510, 1243, 1243, 1362, 1243, 1243, 1243, 1243,
+ /*   500 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1291,
+ /*   510 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243,
+ /*   520 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243,
+ /*   530 */  1243, 1243, 1243, 1389, 1243, 1243, 1243, 1243, 1243, 1243,
+ /*   540 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1579, 1379,
+ /*   550 */  1243, 1243, 1243, 1243, 1627, 1243, 1243, 1243, 1243, 1243,
+ /*   560 */  1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1619,
+ /*   570 */  1335, 1425, 1243, 1428, 1265, 1243, 1255, 1243, 1243,
 };
 /********** End of lemon-generated parsing tables *****************************/
 
@@ -165491,59 +172382,59 @@ static const char *const yyRuleName[] = {
  /* 175 */ "idlist ::= idlist COMMA nm",
  /* 176 */ "idlist ::= nm",
  /* 177 */ "expr ::= LP expr RP",
- /* 178 */ "expr ::= ID|INDEXED",
- /* 179 */ "expr ::= JOIN_KW",
- /* 180 */ "expr ::= nm DOT nm",
- /* 181 */ "expr ::= nm DOT nm DOT nm",
- /* 182 */ "term ::= NULL|FLOAT|BLOB",
- /* 183 */ "term ::= STRING",
- /* 184 */ "term ::= INTEGER",
- /* 185 */ "expr ::= VARIABLE",
- /* 186 */ "expr ::= expr COLLATE ID|STRING",
- /* 187 */ "expr ::= CAST LP expr AS typetoken RP",
- /* 188 */ "expr ::= ID|INDEXED LP distinct exprlist RP",
- /* 189 */ "expr ::= ID|INDEXED LP STAR RP",
- /* 190 */ "expr ::= ID|INDEXED LP distinct exprlist RP filter_over",
- /* 191 */ "expr ::= ID|INDEXED LP STAR RP filter_over",
- /* 192 */ "term ::= CTIME_KW",
- /* 193 */ "expr ::= LP nexprlist COMMA expr RP",
- /* 194 */ "expr ::= expr AND expr",
- /* 195 */ "expr ::= expr OR expr",
- /* 196 */ "expr ::= expr LT|GT|GE|LE expr",
- /* 197 */ "expr ::= expr EQ|NE expr",
- /* 198 */ "expr ::= expr BITAND|BITOR|LSHIFT|RSHIFT expr",
- /* 199 */ "expr ::= expr PLUS|MINUS expr",
- /* 200 */ "expr ::= expr STAR|SLASH|REM expr",
- /* 201 */ "expr ::= expr CONCAT expr",
- /* 202 */ "likeop ::= NOT LIKE_KW|MATCH",
- /* 203 */ "expr ::= expr likeop expr",
- /* 204 */ "expr ::= expr likeop expr ESCAPE expr",
- /* 205 */ "expr ::= expr ISNULL|NOTNULL",
- /* 206 */ "expr ::= expr NOT NULL",
- /* 207 */ "expr ::= expr IS expr",
- /* 208 */ "expr ::= expr IS NOT expr",
- /* 209 */ "expr ::= expr IS NOT DISTINCT FROM expr",
- /* 210 */ "expr ::= expr IS DISTINCT FROM expr",
- /* 211 */ "expr ::= NOT expr",
- /* 212 */ "expr ::= BITNOT expr",
- /* 213 */ "expr ::= PLUS|MINUS expr",
- /* 214 */ "expr ::= expr PTR expr",
- /* 215 */ "between_op ::= BETWEEN",
- /* 216 */ "between_op ::= NOT BETWEEN",
- /* 217 */ "expr ::= expr between_op expr AND expr",
- /* 218 */ "in_op ::= IN",
- /* 219 */ "in_op ::= NOT IN",
- /* 220 */ "expr ::= expr in_op LP exprlist RP",
- /* 221 */ "expr ::= LP select RP",
- /* 222 */ "expr ::= expr in_op LP select RP",
- /* 223 */ "expr ::= expr in_op nm dbnm paren_exprlist",
- /* 224 */ "expr ::= EXISTS LP select RP",
- /* 225 */ "expr ::= CASE case_operand case_exprlist case_else END",
- /* 226 */ "case_exprlist ::= case_exprlist WHEN expr THEN expr",
- /* 227 */ "case_exprlist ::= WHEN expr THEN expr",
- /* 228 */ "case_else ::= ELSE expr",
- /* 229 */ "case_else ::=",
- /* 230 */ "case_operand ::= expr",
+ /* 178 */ "expr ::= ID|INDEXED|JOIN_KW",
+ /* 179 */ "expr ::= nm DOT nm",
+ /* 180 */ "expr ::= nm DOT nm DOT nm",
+ /* 181 */ "term ::= NULL|FLOAT|BLOB",
+ /* 182 */ "term ::= STRING",
+ /* 183 */ "term ::= INTEGER",
+ /* 184 */ "expr ::= VARIABLE",
+ /* 185 */ "expr ::= expr COLLATE ID|STRING",
+ /* 186 */ "expr ::= CAST LP expr AS typetoken RP",
+ /* 187 */ "expr ::= ID|INDEXED|JOIN_KW LP distinct exprlist RP",
+ /* 188 */ "expr ::= ID|INDEXED|JOIN_KW LP distinct exprlist ORDER BY sortlist RP",
+ /* 189 */ "expr ::= ID|INDEXED|JOIN_KW LP STAR RP",
+ /* 190 */ "expr ::= ID|INDEXED|JOIN_KW LP distinct exprlist RP filter_over",
+ /* 191 */ "expr ::= ID|INDEXED|JOIN_KW LP distinct exprlist ORDER BY sortlist RP filter_over",
+ /* 192 */ "expr ::= ID|INDEXED|JOIN_KW LP STAR RP filter_over",
+ /* 193 */ "term ::= CTIME_KW",
+ /* 194 */ "expr ::= LP nexprlist COMMA expr RP",
+ /* 195 */ "expr ::= expr AND expr",
+ /* 196 */ "expr ::= expr OR expr",
+ /* 197 */ "expr ::= expr LT|GT|GE|LE expr",
+ /* 198 */ "expr ::= expr EQ|NE expr",
+ /* 199 */ "expr ::= expr BITAND|BITOR|LSHIFT|RSHIFT expr",
+ /* 200 */ "expr ::= expr PLUS|MINUS expr",
+ /* 201 */ "expr ::= expr STAR|SLASH|REM expr",
+ /* 202 */ "expr ::= expr CONCAT expr",
+ /* 203 */ "likeop ::= NOT LIKE_KW|MATCH",
+ /* 204 */ "expr ::= expr likeop expr",
+ /* 205 */ "expr ::= expr likeop expr ESCAPE expr",
+ /* 206 */ "expr ::= expr ISNULL|NOTNULL",
+ /* 207 */ "expr ::= expr NOT NULL",
+ /* 208 */ "expr ::= expr IS expr",
+ /* 209 */ "expr ::= expr IS NOT expr",
+ /* 210 */ "expr ::= expr IS NOT DISTINCT FROM expr",
+ /* 211 */ "expr ::= expr IS DISTINCT FROM expr",
+ /* 212 */ "expr ::= NOT expr",
+ /* 213 */ "expr ::= BITNOT expr",
+ /* 214 */ "expr ::= PLUS|MINUS expr",
+ /* 215 */ "expr ::= expr PTR expr",
+ /* 216 */ "between_op ::= BETWEEN",
+ /* 217 */ "between_op ::= NOT BETWEEN",
+ /* 218 */ "expr ::= expr between_op expr AND expr",
+ /* 219 */ "in_op ::= IN",
+ /* 220 */ "in_op ::= NOT IN",
+ /* 221 */ "expr ::= expr in_op LP exprlist RP",
+ /* 222 */ "expr ::= LP select RP",
+ /* 223 */ "expr ::= expr in_op LP select RP",
+ /* 224 */ "expr ::= expr in_op nm dbnm paren_exprlist",
+ /* 225 */ "expr ::= EXISTS LP select RP",
+ /* 226 */ "expr ::= CASE case_operand case_exprlist case_else END",
+ /* 227 */ "case_exprlist ::= case_exprlist WHEN expr THEN expr",
+ /* 228 */ "case_exprlist ::= WHEN expr THEN expr",
+ /* 229 */ "case_else ::= ELSE expr",
+ /* 230 */ "case_else ::=",
  /* 231 */ "case_operand ::=",
  /* 232 */ "exprlist ::=",
  /* 233 */ "nexprlist ::= nexprlist COMMA expr",
@@ -165624,100 +172515,100 @@ static const char *const yyRuleName[] = {
  /* 308 */ "wqitem ::= nm eidlist_opt wqas LP select RP",
  /* 309 */ "wqlist ::= wqitem",
  /* 310 */ "wqlist ::= wqlist COMMA wqitem",
- /* 311 */ "windowdefn_list ::= windowdefn",
- /* 312 */ "windowdefn_list ::= windowdefn_list COMMA windowdefn",
- /* 313 */ "windowdefn ::= nm AS LP window RP",
- /* 314 */ "window ::= PARTITION BY nexprlist orderby_opt frame_opt",
- /* 315 */ "window ::= nm PARTITION BY nexprlist orderby_opt frame_opt",
- /* 316 */ "window ::= ORDER BY sortlist frame_opt",
- /* 317 */ "window ::= nm ORDER BY sortlist frame_opt",
- /* 318 */ "window ::= frame_opt",
- /* 319 */ "window ::= nm frame_opt",
- /* 320 */ "frame_opt ::=",
- /* 321 */ "frame_opt ::= range_or_rows frame_bound_s frame_exclude_opt",
- /* 322 */ "frame_opt ::= range_or_rows BETWEEN frame_bound_s AND frame_bound_e frame_exclude_opt",
- /* 323 */ "range_or_rows ::= RANGE|ROWS|GROUPS",
- /* 324 */ "frame_bound_s ::= frame_bound",
- /* 325 */ "frame_bound_s ::= UNBOUNDED PRECEDING",
- /* 326 */ "frame_bound_e ::= frame_bound",
- /* 327 */ "frame_bound_e ::= UNBOUNDED FOLLOWING",
- /* 328 */ "frame_bound ::= expr PRECEDING|FOLLOWING",
- /* 329 */ "frame_bound ::= CURRENT ROW",
- /* 330 */ "frame_exclude_opt ::=",
- /* 331 */ "frame_exclude_opt ::= EXCLUDE frame_exclude",
- /* 332 */ "frame_exclude ::= NO OTHERS",
- /* 333 */ "frame_exclude ::= CURRENT ROW",
- /* 334 */ "frame_exclude ::= GROUP|TIES",
- /* 335 */ "window_clause ::= WINDOW windowdefn_list",
- /* 336 */ "filter_over ::= filter_clause over_clause",
- /* 337 */ "filter_over ::= over_clause",
- /* 338 */ "filter_over ::= filter_clause",
- /* 339 */ "over_clause ::= OVER LP window RP",
- /* 340 */ "over_clause ::= OVER nm",
- /* 341 */ "filter_clause ::= FILTER LP WHERE expr RP",
- /* 342 */ "input ::= cmdlist",
- /* 343 */ "cmdlist ::= cmdlist ecmd",
- /* 344 */ "cmdlist ::= ecmd",
- /* 345 */ "ecmd ::= SEMI",
- /* 346 */ "ecmd ::= cmdx SEMI",
- /* 347 */ "ecmd ::= explain cmdx SEMI",
- /* 348 */ "trans_opt ::=",
- /* 349 */ "trans_opt ::= TRANSACTION",
- /* 350 */ "trans_opt ::= TRANSACTION nm",
- /* 351 */ "savepoint_opt ::= SAVEPOINT",
- /* 352 */ "savepoint_opt ::=",
- /* 353 */ "cmd ::= create_table create_table_args",
- /* 354 */ "table_option_set ::= table_option",
- /* 355 */ "columnlist ::= columnlist COMMA columnname carglist",
- /* 356 */ "columnlist ::= columnname carglist",
- /* 357 */ "nm ::= ID|INDEXED",
- /* 358 */ "nm ::= STRING",
- /* 359 */ "nm ::= JOIN_KW",
- /* 360 */ "typetoken ::= typename",
- /* 361 */ "typename ::= ID|STRING",
- /* 362 */ "signed ::= plus_num",
- /* 363 */ "signed ::= minus_num",
- /* 364 */ "carglist ::= carglist ccons",
- /* 365 */ "carglist ::=",
- /* 366 */ "ccons ::= NULL onconf",
- /* 367 */ "ccons ::= GENERATED ALWAYS AS generated",
- /* 368 */ "ccons ::= AS generated",
- /* 369 */ "conslist_opt ::= COMMA conslist",
- /* 370 */ "conslist ::= conslist tconscomma tcons",
- /* 371 */ "conslist ::= tcons",
- /* 372 */ "tconscomma ::=",
- /* 373 */ "defer_subclause_opt ::= defer_subclause",
- /* 374 */ "resolvetype ::= raisetype",
- /* 375 */ "selectnowith ::= oneselect",
- /* 376 */ "oneselect ::= values",
- /* 377 */ "sclp ::= selcollist COMMA",
- /* 378 */ "as ::= ID|STRING",
- /* 379 */ "indexed_opt ::= indexed_by",
- /* 380 */ "returning ::=",
- /* 381 */ "expr ::= term",
- /* 382 */ "likeop ::= LIKE_KW|MATCH",
- /* 383 */ "exprlist ::= nexprlist",
- /* 384 */ "nmnum ::= plus_num",
- /* 385 */ "nmnum ::= nm",
- /* 386 */ "nmnum ::= ON",
- /* 387 */ "nmnum ::= DELETE",
- /* 388 */ "nmnum ::= DEFAULT",
- /* 389 */ "plus_num ::= INTEGER|FLOAT",
- /* 390 */ "foreach_clause ::=",
- /* 391 */ "foreach_clause ::= FOR EACH ROW",
- /* 392 */ "trnm ::= nm",
- /* 393 */ "tridxby ::=",
- /* 394 */ "database_kw_opt ::= DATABASE",
- /* 395 */ "database_kw_opt ::=",
- /* 396 */ "kwcolumn_opt ::=",
- /* 397 */ "kwcolumn_opt ::= COLUMNKW",
- /* 398 */ "vtabarglist ::= vtabarg",
- /* 399 */ "vtabarglist ::= vtabarglist COMMA vtabarg",
- /* 400 */ "vtabarg ::= vtabarg vtabargtoken",
- /* 401 */ "anylist ::=",
- /* 402 */ "anylist ::= anylist LP anylist RP",
- /* 403 */ "anylist ::= anylist ANY",
- /* 404 */ "with ::=",
+ /* 311 */ "windowdefn_list ::= windowdefn_list COMMA windowdefn",
+ /* 312 */ "windowdefn ::= nm AS LP window RP",
+ /* 313 */ "window ::= PARTITION BY nexprlist orderby_opt frame_opt",
+ /* 314 */ "window ::= nm PARTITION BY nexprlist orderby_opt frame_opt",
+ /* 315 */ "window ::= ORDER BY sortlist frame_opt",
+ /* 316 */ "window ::= nm ORDER BY sortlist frame_opt",
+ /* 317 */ "window ::= nm frame_opt",
+ /* 318 */ "frame_opt ::=",
+ /* 319 */ "frame_opt ::= range_or_rows frame_bound_s frame_exclude_opt",
+ /* 320 */ "frame_opt ::= range_or_rows BETWEEN frame_bound_s AND frame_bound_e frame_exclude_opt",
+ /* 321 */ "range_or_rows ::= RANGE|ROWS|GROUPS",
+ /* 322 */ "frame_bound_s ::= frame_bound",
+ /* 323 */ "frame_bound_s ::= UNBOUNDED PRECEDING",
+ /* 324 */ "frame_bound_e ::= frame_bound",
+ /* 325 */ "frame_bound_e ::= UNBOUNDED FOLLOWING",
+ /* 326 */ "frame_bound ::= expr PRECEDING|FOLLOWING",
+ /* 327 */ "frame_bound ::= CURRENT ROW",
+ /* 328 */ "frame_exclude_opt ::=",
+ /* 329 */ "frame_exclude_opt ::= EXCLUDE frame_exclude",
+ /* 330 */ "frame_exclude ::= NO OTHERS",
+ /* 331 */ "frame_exclude ::= CURRENT ROW",
+ /* 332 */ "frame_exclude ::= GROUP|TIES",
+ /* 333 */ "window_clause ::= WINDOW windowdefn_list",
+ /* 334 */ "filter_over ::= filter_clause over_clause",
+ /* 335 */ "filter_over ::= over_clause",
+ /* 336 */ "filter_over ::= filter_clause",
+ /* 337 */ "over_clause ::= OVER LP window RP",
+ /* 338 */ "over_clause ::= OVER nm",
+ /* 339 */ "filter_clause ::= FILTER LP WHERE expr RP",
+ /* 340 */ "input ::= cmdlist",
+ /* 341 */ "cmdlist ::= cmdlist ecmd",
+ /* 342 */ "cmdlist ::= ecmd",
+ /* 343 */ "ecmd ::= SEMI",
+ /* 344 */ "ecmd ::= cmdx SEMI",
+ /* 345 */ "ecmd ::= explain cmdx SEMI",
+ /* 346 */ "trans_opt ::=",
+ /* 347 */ "trans_opt ::= TRANSACTION",
+ /* 348 */ "trans_opt ::= TRANSACTION nm",
+ /* 349 */ "savepoint_opt ::= SAVEPOINT",
+ /* 350 */ "savepoint_opt ::=",
+ /* 351 */ "cmd ::= create_table create_table_args",
+ /* 352 */ "table_option_set ::= table_option",
+ /* 353 */ "columnlist ::= columnlist COMMA columnname carglist",
+ /* 354 */ "columnlist ::= columnname carglist",
+ /* 355 */ "nm ::= ID|INDEXED|JOIN_KW",
+ /* 356 */ "nm ::= STRING",
+ /* 357 */ "typetoken ::= typename",
+ /* 358 */ "typename ::= ID|STRING",
+ /* 359 */ "signed ::= plus_num",
+ /* 360 */ "signed ::= minus_num",
+ /* 361 */ "carglist ::= carglist ccons",
+ /* 362 */ "carglist ::=",
+ /* 363 */ "ccons ::= NULL onconf",
+ /* 364 */ "ccons ::= GENERATED ALWAYS AS generated",
+ /* 365 */ "ccons ::= AS generated",
+ /* 366 */ "conslist_opt ::= COMMA conslist",
+ /* 367 */ "conslist ::= conslist tconscomma tcons",
+ /* 368 */ "conslist ::= tcons",
+ /* 369 */ "tconscomma ::=",
+ /* 370 */ "defer_subclause_opt ::= defer_subclause",
+ /* 371 */ "resolvetype ::= raisetype",
+ /* 372 */ "selectnowith ::= oneselect",
+ /* 373 */ "oneselect ::= values",
+ /* 374 */ "sclp ::= selcollist COMMA",
+ /* 375 */ "as ::= ID|STRING",
+ /* 376 */ "indexed_opt ::= indexed_by",
+ /* 377 */ "returning ::=",
+ /* 378 */ "expr ::= term",
+ /* 379 */ "likeop ::= LIKE_KW|MATCH",
+ /* 380 */ "case_operand ::= expr",
+ /* 381 */ "exprlist ::= nexprlist",
+ /* 382 */ "nmnum ::= plus_num",
+ /* 383 */ "nmnum ::= nm",
+ /* 384 */ "nmnum ::= ON",
+ /* 385 */ "nmnum ::= DELETE",
+ /* 386 */ "nmnum ::= DEFAULT",
+ /* 387 */ "plus_num ::= INTEGER|FLOAT",
+ /* 388 */ "foreach_clause ::=",
+ /* 389 */ "foreach_clause ::= FOR EACH ROW",
+ /* 390 */ "trnm ::= nm",
+ /* 391 */ "tridxby ::=",
+ /* 392 */ "database_kw_opt ::= DATABASE",
+ /* 393 */ "database_kw_opt ::=",
+ /* 394 */ "kwcolumn_opt ::=",
+ /* 395 */ "kwcolumn_opt ::= COLUMNKW",
+ /* 396 */ "vtabarglist ::= vtabarg",
+ /* 397 */ "vtabarglist ::= vtabarglist COMMA vtabarg",
+ /* 398 */ "vtabarg ::= vtabarg vtabargtoken",
+ /* 399 */ "anylist ::=",
+ /* 400 */ "anylist ::= anylist LP anylist RP",
+ /* 401 */ "anylist ::= anylist ANY",
+ /* 402 */ "with ::=",
+ /* 403 */ "windowdefn_list ::= windowdefn",
+ /* 404 */ "window ::= frame_opt",
 };
 #endif /* NDEBUG */
 
@@ -166402,59 +173293,59 @@ static const YYCODETYPE yyRuleInfoLhs[] = {
    263,  /* (175) idlist ::= idlist COMMA nm */
    263,  /* (176) idlist ::= nm */
    217,  /* (177) expr ::= LP expr RP */
-   217,  /* (178) expr ::= ID|INDEXED */
-   217,  /* (179) expr ::= JOIN_KW */
-   217,  /* (180) expr ::= nm DOT nm */
-   217,  /* (181) expr ::= nm DOT nm DOT nm */
-   216,  /* (182) term ::= NULL|FLOAT|BLOB */
-   216,  /* (183) term ::= STRING */
-   216,  /* (184) term ::= INTEGER */
-   217,  /* (185) expr ::= VARIABLE */
-   217,  /* (186) expr ::= expr COLLATE ID|STRING */
-   217,  /* (187) expr ::= CAST LP expr AS typetoken RP */
-   217,  /* (188) expr ::= ID|INDEXED LP distinct exprlist RP */
-   217,  /* (189) expr ::= ID|INDEXED LP STAR RP */
-   217,  /* (190) expr ::= ID|INDEXED LP distinct exprlist RP filter_over */
-   217,  /* (191) expr ::= ID|INDEXED LP STAR RP filter_over */
-   216,  /* (192) term ::= CTIME_KW */
-   217,  /* (193) expr ::= LP nexprlist COMMA expr RP */
-   217,  /* (194) expr ::= expr AND expr */
-   217,  /* (195) expr ::= expr OR expr */
-   217,  /* (196) expr ::= expr LT|GT|GE|LE expr */
-   217,  /* (197) expr ::= expr EQ|NE expr */
-   217,  /* (198) expr ::= expr BITAND|BITOR|LSHIFT|RSHIFT expr */
-   217,  /* (199) expr ::= expr PLUS|MINUS expr */
-   217,  /* (200) expr ::= expr STAR|SLASH|REM expr */
-   217,  /* (201) expr ::= expr CONCAT expr */
-   274,  /* (202) likeop ::= NOT LIKE_KW|MATCH */
-   217,  /* (203) expr ::= expr likeop expr */
-   217,  /* (204) expr ::= expr likeop expr ESCAPE expr */
-   217,  /* (205) expr ::= expr ISNULL|NOTNULL */
-   217,  /* (206) expr ::= expr NOT NULL */
-   217,  /* (207) expr ::= expr IS expr */
-   217,  /* (208) expr ::= expr IS NOT expr */
-   217,  /* (209) expr ::= expr IS NOT DISTINCT FROM expr */
-   217,  /* (210) expr ::= expr IS DISTINCT FROM expr */
-   217,  /* (211) expr ::= NOT expr */
-   217,  /* (212) expr ::= BITNOT expr */
-   217,  /* (213) expr ::= PLUS|MINUS expr */
-   217,  /* (214) expr ::= expr PTR expr */
-   275,  /* (215) between_op ::= BETWEEN */
-   275,  /* (216) between_op ::= NOT BETWEEN */
-   217,  /* (217) expr ::= expr between_op expr AND expr */
-   276,  /* (218) in_op ::= IN */
-   276,  /* (219) in_op ::= NOT IN */
-   217,  /* (220) expr ::= expr in_op LP exprlist RP */
-   217,  /* (221) expr ::= LP select RP */
-   217,  /* (222) expr ::= expr in_op LP select RP */
-   217,  /* (223) expr ::= expr in_op nm dbnm paren_exprlist */
-   217,  /* (224) expr ::= EXISTS LP select RP */
-   217,  /* (225) expr ::= CASE case_operand case_exprlist case_else END */
-   279,  /* (226) case_exprlist ::= case_exprlist WHEN expr THEN expr */
-   279,  /* (227) case_exprlist ::= WHEN expr THEN expr */
-   280,  /* (228) case_else ::= ELSE expr */
-   280,  /* (229) case_else ::= */
-   278,  /* (230) case_operand ::= expr */
+   217,  /* (178) expr ::= ID|INDEXED|JOIN_KW */
+   217,  /* (179) expr ::= nm DOT nm */
+   217,  /* (180) expr ::= nm DOT nm DOT nm */
+   216,  /* (181) term ::= NULL|FLOAT|BLOB */
+   216,  /* (182) term ::= STRING */
+   216,  /* (183) term ::= INTEGER */
+   217,  /* (184) expr ::= VARIABLE */
+   217,  /* (185) expr ::= expr COLLATE ID|STRING */
+   217,  /* (186) expr ::= CAST LP expr AS typetoken RP */
+   217,  /* (187) expr ::= ID|INDEXED|JOIN_KW LP distinct exprlist RP */
+   217,  /* (188) expr ::= ID|INDEXED|JOIN_KW LP distinct exprlist ORDER BY sortlist RP */
+   217,  /* (189) expr ::= ID|INDEXED|JOIN_KW LP STAR RP */
+   217,  /* (190) expr ::= ID|INDEXED|JOIN_KW LP distinct exprlist RP filter_over */
+   217,  /* (191) expr ::= ID|INDEXED|JOIN_KW LP distinct exprlist ORDER BY sortlist RP filter_over */
+   217,  /* (192) expr ::= ID|INDEXED|JOIN_KW LP STAR RP filter_over */
+   216,  /* (193) term ::= CTIME_KW */
+   217,  /* (194) expr ::= LP nexprlist COMMA expr RP */
+   217,  /* (195) expr ::= expr AND expr */
+   217,  /* (196) expr ::= expr OR expr */
+   217,  /* (197) expr ::= expr LT|GT|GE|LE expr */
+   217,  /* (198) expr ::= expr EQ|NE expr */
+   217,  /* (199) expr ::= expr BITAND|BITOR|LSHIFT|RSHIFT expr */
+   217,  /* (200) expr ::= expr PLUS|MINUS expr */
+   217,  /* (201) expr ::= expr STAR|SLASH|REM expr */
+   217,  /* (202) expr ::= expr CONCAT expr */
+   274,  /* (203) likeop ::= NOT LIKE_KW|MATCH */
+   217,  /* (204) expr ::= expr likeop expr */
+   217,  /* (205) expr ::= expr likeop expr ESCAPE expr */
+   217,  /* (206) expr ::= expr ISNULL|NOTNULL */
+   217,  /* (207) expr ::= expr NOT NULL */
+   217,  /* (208) expr ::= expr IS expr */
+   217,  /* (209) expr ::= expr IS NOT expr */
+   217,  /* (210) expr ::= expr IS NOT DISTINCT FROM expr */
+   217,  /* (211) expr ::= expr IS DISTINCT FROM expr */
+   217,  /* (212) expr ::= NOT expr */
+   217,  /* (213) expr ::= BITNOT expr */
+   217,  /* (214) expr ::= PLUS|MINUS expr */
+   217,  /* (215) expr ::= expr PTR expr */
+   275,  /* (216) between_op ::= BETWEEN */
+   275,  /* (217) between_op ::= NOT BETWEEN */
+   217,  /* (218) expr ::= expr between_op expr AND expr */
+   276,  /* (219) in_op ::= IN */
+   276,  /* (220) in_op ::= NOT IN */
+   217,  /* (221) expr ::= expr in_op LP exprlist RP */
+   217,  /* (222) expr ::= LP select RP */
+   217,  /* (223) expr ::= expr in_op LP select RP */
+   217,  /* (224) expr ::= expr in_op nm dbnm paren_exprlist */
+   217,  /* (225) expr ::= EXISTS LP select RP */
+   217,  /* (226) expr ::= CASE case_operand case_exprlist case_else END */
+   279,  /* (227) case_exprlist ::= case_exprlist WHEN expr THEN expr */
+   279,  /* (228) case_exprlist ::= WHEN expr THEN expr */
+   280,  /* (229) case_else ::= ELSE expr */
+   280,  /* (230) case_else ::= */
    278,  /* (231) case_operand ::= */
    261,  /* (232) exprlist ::= */
    253,  /* (233) nexprlist ::= nexprlist COMMA expr */
@@ -166535,100 +173426,100 @@ static const YYCODETYPE yyRuleInfoLhs[] = {
    304,  /* (308) wqitem ::= nm eidlist_opt wqas LP select RP */
    241,  /* (309) wqlist ::= wqitem */
    241,  /* (310) wqlist ::= wqlist COMMA wqitem */
-   306,  /* (311) windowdefn_list ::= windowdefn */
-   306,  /* (312) windowdefn_list ::= windowdefn_list COMMA windowdefn */
-   307,  /* (313) windowdefn ::= nm AS LP window RP */
-   308,  /* (314) window ::= PARTITION BY nexprlist orderby_opt frame_opt */
-   308,  /* (315) window ::= nm PARTITION BY nexprlist orderby_opt frame_opt */
-   308,  /* (316) window ::= ORDER BY sortlist frame_opt */
-   308,  /* (317) window ::= nm ORDER BY sortlist frame_opt */
-   308,  /* (318) window ::= frame_opt */
-   308,  /* (319) window ::= nm frame_opt */
-   309,  /* (320) frame_opt ::= */
-   309,  /* (321) frame_opt ::= range_or_rows frame_bound_s frame_exclude_opt */
-   309,  /* (322) frame_opt ::= range_or_rows BETWEEN frame_bound_s AND frame_bound_e frame_exclude_opt */
-   313,  /* (323) range_or_rows ::= RANGE|ROWS|GROUPS */
-   315,  /* (324) frame_bound_s ::= frame_bound */
-   315,  /* (325) frame_bound_s ::= UNBOUNDED PRECEDING */
-   316,  /* (326) frame_bound_e ::= frame_bound */
-   316,  /* (327) frame_bound_e ::= UNBOUNDED FOLLOWING */
-   314,  /* (328) frame_bound ::= expr PRECEDING|FOLLOWING */
-   314,  /* (329) frame_bound ::= CURRENT ROW */
-   317,  /* (330) frame_exclude_opt ::= */
-   317,  /* (331) frame_exclude_opt ::= EXCLUDE frame_exclude */
-   318,  /* (332) frame_exclude ::= NO OTHERS */
-   318,  /* (333) frame_exclude ::= CURRENT ROW */
-   318,  /* (334) frame_exclude ::= GROUP|TIES */
-   251,  /* (335) window_clause ::= WINDOW windowdefn_list */
-   273,  /* (336) filter_over ::= filter_clause over_clause */
-   273,  /* (337) filter_over ::= over_clause */
-   273,  /* (338) filter_over ::= filter_clause */
-   312,  /* (339) over_clause ::= OVER LP window RP */
-   312,  /* (340) over_clause ::= OVER nm */
-   311,  /* (341) filter_clause ::= FILTER LP WHERE expr RP */
-   185,  /* (342) input ::= cmdlist */
-   186,  /* (343) cmdlist ::= cmdlist ecmd */
-   186,  /* (344) cmdlist ::= ecmd */
-   187,  /* (345) ecmd ::= SEMI */
-   187,  /* (346) ecmd ::= cmdx SEMI */
-   187,  /* (347) ecmd ::= explain cmdx SEMI */
-   192,  /* (348) trans_opt ::= */
-   192,  /* (349) trans_opt ::= TRANSACTION */
-   192,  /* (350) trans_opt ::= TRANSACTION nm */
-   194,  /* (351) savepoint_opt ::= SAVEPOINT */
-   194,  /* (352) savepoint_opt ::= */
-   190,  /* (353) cmd ::= create_table create_table_args */
-   203,  /* (354) table_option_set ::= table_option */
-   201,  /* (355) columnlist ::= columnlist COMMA columnname carglist */
-   201,  /* (356) columnlist ::= columnname carglist */
-   193,  /* (357) nm ::= ID|INDEXED */
-   193,  /* (358) nm ::= STRING */
-   193,  /* (359) nm ::= JOIN_KW */
-   208,  /* (360) typetoken ::= typename */
-   209,  /* (361) typename ::= ID|STRING */
-   210,  /* (362) signed ::= plus_num */
-   210,  /* (363) signed ::= minus_num */
-   207,  /* (364) carglist ::= carglist ccons */
-   207,  /* (365) carglist ::= */
-   215,  /* (366) ccons ::= NULL onconf */
-   215,  /* (367) ccons ::= GENERATED ALWAYS AS generated */
-   215,  /* (368) ccons ::= AS generated */
-   202,  /* (369) conslist_opt ::= COMMA conslist */
-   228,  /* (370) conslist ::= conslist tconscomma tcons */
-   228,  /* (371) conslist ::= tcons */
-   229,  /* (372) tconscomma ::= */
-   233,  /* (373) defer_subclause_opt ::= defer_subclause */
-   235,  /* (374) resolvetype ::= raisetype */
-   239,  /* (375) selectnowith ::= oneselect */
-   240,  /* (376) oneselect ::= values */
-   254,  /* (377) sclp ::= selcollist COMMA */
-   255,  /* (378) as ::= ID|STRING */
-   264,  /* (379) indexed_opt ::= indexed_by */
-   272,  /* (380) returning ::= */
-   217,  /* (381) expr ::= term */
-   274,  /* (382) likeop ::= LIKE_KW|MATCH */
-   261,  /* (383) exprlist ::= nexprlist */
-   284,  /* (384) nmnum ::= plus_num */
-   284,  /* (385) nmnum ::= nm */
-   284,  /* (386) nmnum ::= ON */
-   284,  /* (387) nmnum ::= DELETE */
-   284,  /* (388) nmnum ::= DEFAULT */
-   211,  /* (389) plus_num ::= INTEGER|FLOAT */
-   289,  /* (390) foreach_clause ::= */
-   289,  /* (391) foreach_clause ::= FOR EACH ROW */
-   292,  /* (392) trnm ::= nm */
-   293,  /* (393) tridxby ::= */
-   294,  /* (394) database_kw_opt ::= DATABASE */
-   294,  /* (395) database_kw_opt ::= */
-   297,  /* (396) kwcolumn_opt ::= */
-   297,  /* (397) kwcolumn_opt ::= COLUMNKW */
-   299,  /* (398) vtabarglist ::= vtabarg */
-   299,  /* (399) vtabarglist ::= vtabarglist COMMA vtabarg */
-   300,  /* (400) vtabarg ::= vtabarg vtabargtoken */
-   303,  /* (401) anylist ::= */
-   303,  /* (402) anylist ::= anylist LP anylist RP */
-   303,  /* (403) anylist ::= anylist ANY */
-   266,  /* (404) with ::= */
+   306,  /* (311) windowdefn_list ::= windowdefn_list COMMA windowdefn */
+   307,  /* (312) windowdefn ::= nm AS LP window RP */
+   308,  /* (313) window ::= PARTITION BY nexprlist orderby_opt frame_opt */
+   308,  /* (314) window ::= nm PARTITION BY nexprlist orderby_opt frame_opt */
+   308,  /* (315) window ::= ORDER BY sortlist frame_opt */
+   308,  /* (316) window ::= nm ORDER BY sortlist frame_opt */
+   308,  /* (317) window ::= nm frame_opt */
+   309,  /* (318) frame_opt ::= */
+   309,  /* (319) frame_opt ::= range_or_rows frame_bound_s frame_exclude_opt */
+   309,  /* (320) frame_opt ::= range_or_rows BETWEEN frame_bound_s AND frame_bound_e frame_exclude_opt */
+   313,  /* (321) range_or_rows ::= RANGE|ROWS|GROUPS */
+   315,  /* (322) frame_bound_s ::= frame_bound */
+   315,  /* (323) frame_bound_s ::= UNBOUNDED PRECEDING */
+   316,  /* (324) frame_bound_e ::= frame_bound */
+   316,  /* (325) frame_bound_e ::= UNBOUNDED FOLLOWING */
+   314,  /* (326) frame_bound ::= expr PRECEDING|FOLLOWING */
+   314,  /* (327) frame_bound ::= CURRENT ROW */
+   317,  /* (328) frame_exclude_opt ::= */
+   317,  /* (329) frame_exclude_opt ::= EXCLUDE frame_exclude */
+   318,  /* (330) frame_exclude ::= NO OTHERS */
+   318,  /* (331) frame_exclude ::= CURRENT ROW */
+   318,  /* (332) frame_exclude ::= GROUP|TIES */
+   251,  /* (333) window_clause ::= WINDOW windowdefn_list */
+   273,  /* (334) filter_over ::= filter_clause over_clause */
+   273,  /* (335) filter_over ::= over_clause */
+   273,  /* (336) filter_over ::= filter_clause */
+   312,  /* (337) over_clause ::= OVER LP window RP */
+   312,  /* (338) over_clause ::= OVER nm */
+   311,  /* (339) filter_clause ::= FILTER LP WHERE expr RP */
+   185,  /* (340) input ::= cmdlist */
+   186,  /* (341) cmdlist ::= cmdlist ecmd */
+   186,  /* (342) cmdlist ::= ecmd */
+   187,  /* (343) ecmd ::= SEMI */
+   187,  /* (344) ecmd ::= cmdx SEMI */
+   187,  /* (345) ecmd ::= explain cmdx SEMI */
+   192,  /* (346) trans_opt ::= */
+   192,  /* (347) trans_opt ::= TRANSACTION */
+   192,  /* (348) trans_opt ::= TRANSACTION nm */
+   194,  /* (349) savepoint_opt ::= SAVEPOINT */
+   194,  /* (350) savepoint_opt ::= */
+   190,  /* (351) cmd ::= create_table create_table_args */
+   203,  /* (352) table_option_set ::= table_option */
+   201,  /* (353) columnlist ::= columnlist COMMA columnname carglist */
+   201,  /* (354) columnlist ::= columnname carglist */
+   193,  /* (355) nm ::= ID|INDEXED|JOIN_KW */
+   193,  /* (356) nm ::= STRING */
+   208,  /* (357) typetoken ::= typename */
+   209,  /* (358) typename ::= ID|STRING */
+   210,  /* (359) signed ::= plus_num */
+   210,  /* (360) signed ::= minus_num */
+   207,  /* (361) carglist ::= carglist ccons */
+   207,  /* (362) carglist ::= */
+   215,  /* (363) ccons ::= NULL onconf */
+   215,  /* (364) ccons ::= GENERATED ALWAYS AS generated */
+   215,  /* (365) ccons ::= AS generated */
+   202,  /* (366) conslist_opt ::= COMMA conslist */
+   228,  /* (367) conslist ::= conslist tconscomma tcons */
+   228,  /* (368) conslist ::= tcons */
+   229,  /* (369) tconscomma ::= */
+   233,  /* (370) defer_subclause_opt ::= defer_subclause */
+   235,  /* (371) resolvetype ::= raisetype */
+   239,  /* (372) selectnowith ::= oneselect */
+   240,  /* (373) oneselect ::= values */
+   254,  /* (374) sclp ::= selcollist COMMA */
+   255,  /* (375) as ::= ID|STRING */
+   264,  /* (376) indexed_opt ::= indexed_by */
+   272,  /* (377) returning ::= */
+   217,  /* (378) expr ::= term */
+   274,  /* (379) likeop ::= LIKE_KW|MATCH */
+   278,  /* (380) case_operand ::= expr */
+   261,  /* (381) exprlist ::= nexprlist */
+   284,  /* (382) nmnum ::= plus_num */
+   284,  /* (383) nmnum ::= nm */
+   284,  /* (384) nmnum ::= ON */
+   284,  /* (385) nmnum ::= DELETE */
+   284,  /* (386) nmnum ::= DEFAULT */
+   211,  /* (387) plus_num ::= INTEGER|FLOAT */
+   289,  /* (388) foreach_clause ::= */
+   289,  /* (389) foreach_clause ::= FOR EACH ROW */
+   292,  /* (390) trnm ::= nm */
+   293,  /* (391) tridxby ::= */
+   294,  /* (392) database_kw_opt ::= DATABASE */
+   294,  /* (393) database_kw_opt ::= */
+   297,  /* (394) kwcolumn_opt ::= */
+   297,  /* (395) kwcolumn_opt ::= COLUMNKW */
+   299,  /* (396) vtabarglist ::= vtabarg */
+   299,  /* (397) vtabarglist ::= vtabarglist COMMA vtabarg */
+   300,  /* (398) vtabarg ::= vtabarg vtabargtoken */
+   303,  /* (399) anylist ::= */
+   303,  /* (400) anylist ::= anylist LP anylist RP */
+   303,  /* (401) anylist ::= anylist ANY */
+   266,  /* (402) with ::= */
+   306,  /* (403) windowdefn_list ::= windowdefn */
+   308,  /* (404) window ::= frame_opt */
 };
 
 /* For rule J, yyRuleInfoNRhs[J] contains the negative of the number
@@ -166812,59 +173703,59 @@ static const signed char yyRuleInfoNRhs[] = {
    -3,  /* (175) idlist ::= idlist COMMA nm */
    -1,  /* (176) idlist ::= nm */
    -3,  /* (177) expr ::= LP expr RP */
-   -1,  /* (178) expr ::= ID|INDEXED */
-   -1,  /* (179) expr ::= JOIN_KW */
-   -3,  /* (180) expr ::= nm DOT nm */
-   -5,  /* (181) expr ::= nm DOT nm DOT nm */
-   -1,  /* (182) term ::= NULL|FLOAT|BLOB */
-   -1,  /* (183) term ::= STRING */
-   -1,  /* (184) term ::= INTEGER */
-   -1,  /* (185) expr ::= VARIABLE */
-   -3,  /* (186) expr ::= expr COLLATE ID|STRING */
-   -6,  /* (187) expr ::= CAST LP expr AS typetoken RP */
-   -5,  /* (188) expr ::= ID|INDEXED LP distinct exprlist RP */
-   -4,  /* (189) expr ::= ID|INDEXED LP STAR RP */
-   -6,  /* (190) expr ::= ID|INDEXED LP distinct exprlist RP filter_over */
-   -5,  /* (191) expr ::= ID|INDEXED LP STAR RP filter_over */
-   -1,  /* (192) term ::= CTIME_KW */
-   -5,  /* (193) expr ::= LP nexprlist COMMA expr RP */
-   -3,  /* (194) expr ::= expr AND expr */
-   -3,  /* (195) expr ::= expr OR expr */
-   -3,  /* (196) expr ::= expr LT|GT|GE|LE expr */
-   -3,  /* (197) expr ::= expr EQ|NE expr */
-   -3,  /* (198) expr ::= expr BITAND|BITOR|LSHIFT|RSHIFT expr */
-   -3,  /* (199) expr ::= expr PLUS|MINUS expr */
-   -3,  /* (200) expr ::= expr STAR|SLASH|REM expr */
-   -3,  /* (201) expr ::= expr CONCAT expr */
-   -2,  /* (202) likeop ::= NOT LIKE_KW|MATCH */
-   -3,  /* (203) expr ::= expr likeop expr */
-   -5,  /* (204) expr ::= expr likeop expr ESCAPE expr */
-   -2,  /* (205) expr ::= expr ISNULL|NOTNULL */
-   -3,  /* (206) expr ::= expr NOT NULL */
-   -3,  /* (207) expr ::= expr IS expr */
-   -4,  /* (208) expr ::= expr IS NOT expr */
-   -6,  /* (209) expr ::= expr IS NOT DISTINCT FROM expr */
-   -5,  /* (210) expr ::= expr IS DISTINCT FROM expr */
-   -2,  /* (211) expr ::= NOT expr */
-   -2,  /* (212) expr ::= BITNOT expr */
-   -2,  /* (213) expr ::= PLUS|MINUS expr */
-   -3,  /* (214) expr ::= expr PTR expr */
-   -1,  /* (215) between_op ::= BETWEEN */
-   -2,  /* (216) between_op ::= NOT BETWEEN */
-   -5,  /* (217) expr ::= expr between_op expr AND expr */
-   -1,  /* (218) in_op ::= IN */
-   -2,  /* (219) in_op ::= NOT IN */
-   -5,  /* (220) expr ::= expr in_op LP exprlist RP */
-   -3,  /* (221) expr ::= LP select RP */
-   -5,  /* (222) expr ::= expr in_op LP select RP */
-   -5,  /* (223) expr ::= expr in_op nm dbnm paren_exprlist */
-   -4,  /* (224) expr ::= EXISTS LP select RP */
-   -5,  /* (225) expr ::= CASE case_operand case_exprlist case_else END */
-   -5,  /* (226) case_exprlist ::= case_exprlist WHEN expr THEN expr */
-   -4,  /* (227) case_exprlist ::= WHEN expr THEN expr */
-   -2,  /* (228) case_else ::= ELSE expr */
-    0,  /* (229) case_else ::= */
-   -1,  /* (230) case_operand ::= expr */
+   -1,  /* (178) expr ::= ID|INDEXED|JOIN_KW */
+   -3,  /* (179) expr ::= nm DOT nm */
+   -5,  /* (180) expr ::= nm DOT nm DOT nm */
+   -1,  /* (181) term ::= NULL|FLOAT|BLOB */
+   -1,  /* (182) term ::= STRING */
+   -1,  /* (183) term ::= INTEGER */
+   -1,  /* (184) expr ::= VARIABLE */
+   -3,  /* (185) expr ::= expr COLLATE ID|STRING */
+   -6,  /* (186) expr ::= CAST LP expr AS typetoken RP */
+   -5,  /* (187) expr ::= ID|INDEXED|JOIN_KW LP distinct exprlist RP */
+   -8,  /* (188) expr ::= ID|INDEXED|JOIN_KW LP distinct exprlist ORDER BY sortlist RP */
+   -4,  /* (189) expr ::= ID|INDEXED|JOIN_KW LP STAR RP */
+   -6,  /* (190) expr ::= ID|INDEXED|JOIN_KW LP distinct exprlist RP filter_over */
+   -9,  /* (191) expr ::= ID|INDEXED|JOIN_KW LP distinct exprlist ORDER BY sortlist RP filter_over */
+   -5,  /* (192) expr ::= ID|INDEXED|JOIN_KW LP STAR RP filter_over */
+   -1,  /* (193) term ::= CTIME_KW */
+   -5,  /* (194) expr ::= LP nexprlist COMMA expr RP */
+   -3,  /* (195) expr ::= expr AND expr */
+   -3,  /* (196) expr ::= expr OR expr */
+   -3,  /* (197) expr ::= expr LT|GT|GE|LE expr */
+   -3,  /* (198) expr ::= expr EQ|NE expr */
+   -3,  /* (199) expr ::= expr BITAND|BITOR|LSHIFT|RSHIFT expr */
+   -3,  /* (200) expr ::= expr PLUS|MINUS expr */
+   -3,  /* (201) expr ::= expr STAR|SLASH|REM expr */
+   -3,  /* (202) expr ::= expr CONCAT expr */
+   -2,  /* (203) likeop ::= NOT LIKE_KW|MATCH */
+   -3,  /* (204) expr ::= expr likeop expr */
+   -5,  /* (205) expr ::= expr likeop expr ESCAPE expr */
+   -2,  /* (206) expr ::= expr ISNULL|NOTNULL */
+   -3,  /* (207) expr ::= expr NOT NULL */
+   -3,  /* (208) expr ::= expr IS expr */
+   -4,  /* (209) expr ::= expr IS NOT expr */
+   -6,  /* (210) expr ::= expr IS NOT DISTINCT FROM expr */
+   -5,  /* (211) expr ::= expr IS DISTINCT FROM expr */
+   -2,  /* (212) expr ::= NOT expr */
+   -2,  /* (213) expr ::= BITNOT expr */
+   -2,  /* (214) expr ::= PLUS|MINUS expr */
+   -3,  /* (215) expr ::= expr PTR expr */
+   -1,  /* (216) between_op ::= BETWEEN */
+   -2,  /* (217) between_op ::= NOT BETWEEN */
+   -5,  /* (218) expr ::= expr between_op expr AND expr */
+   -1,  /* (219) in_op ::= IN */
+   -2,  /* (220) in_op ::= NOT IN */
+   -5,  /* (221) expr ::= expr in_op LP exprlist RP */
+   -3,  /* (222) expr ::= LP select RP */
+   -5,  /* (223) expr ::= expr in_op LP select RP */
+   -5,  /* (224) expr ::= expr in_op nm dbnm paren_exprlist */
+   -4,  /* (225) expr ::= EXISTS LP select RP */
+   -5,  /* (226) expr ::= CASE case_operand case_exprlist case_else END */
+   -5,  /* (227) case_exprlist ::= case_exprlist WHEN expr THEN expr */
+   -4,  /* (228) case_exprlist ::= WHEN expr THEN expr */
+   -2,  /* (229) case_else ::= ELSE expr */
+    0,  /* (230) case_else ::= */
     0,  /* (231) case_operand ::= */
     0,  /* (232) exprlist ::= */
    -3,  /* (233) nexprlist ::= nexprlist COMMA expr */
@@ -166945,100 +173836,100 @@ static const signed char yyRuleInfoNRhs[] = {
    -6,  /* (308) wqitem ::= nm eidlist_opt wqas LP select RP */
    -1,  /* (309) wqlist ::= wqitem */
    -3,  /* (310) wqlist ::= wqlist COMMA wqitem */
-   -1,  /* (311) windowdefn_list ::= windowdefn */
-   -3,  /* (312) windowdefn_list ::= windowdefn_list COMMA windowdefn */
-   -5,  /* (313) windowdefn ::= nm AS LP window RP */
-   -5,  /* (314) window ::= PARTITION BY nexprlist orderby_opt frame_opt */
-   -6,  /* (315) window ::= nm PARTITION BY nexprlist orderby_opt frame_opt */
-   -4,  /* (316) window ::= ORDER BY sortlist frame_opt */
-   -5,  /* (317) window ::= nm ORDER BY sortlist frame_opt */
-   -1,  /* (318) window ::= frame_opt */
-   -2,  /* (319) window ::= nm frame_opt */
-    0,  /* (320) frame_opt ::= */
-   -3,  /* (321) frame_opt ::= range_or_rows frame_bound_s frame_exclude_opt */
-   -6,  /* (322) frame_opt ::= range_or_rows BETWEEN frame_bound_s AND frame_bound_e frame_exclude_opt */
-   -1,  /* (323) range_or_rows ::= RANGE|ROWS|GROUPS */
-   -1,  /* (324) frame_bound_s ::= frame_bound */
-   -2,  /* (325) frame_bound_s ::= UNBOUNDED PRECEDING */
-   -1,  /* (326) frame_bound_e ::= frame_bound */
-   -2,  /* (327) frame_bound_e ::= UNBOUNDED FOLLOWING */
-   -2,  /* (328) frame_bound ::= expr PRECEDING|FOLLOWING */
-   -2,  /* (329) frame_bound ::= CURRENT ROW */
-    0,  /* (330) frame_exclude_opt ::= */
-   -2,  /* (331) frame_exclude_opt ::= EXCLUDE frame_exclude */
-   -2,  /* (332) frame_exclude ::= NO OTHERS */
-   -2,  /* (333) frame_exclude ::= CURRENT ROW */
-   -1,  /* (334) frame_exclude ::= GROUP|TIES */
-   -2,  /* (335) window_clause ::= WINDOW windowdefn_list */
-   -2,  /* (336) filter_over ::= filter_clause over_clause */
-   -1,  /* (337) filter_over ::= over_clause */
-   -1,  /* (338) filter_over ::= filter_clause */
-   -4,  /* (339) over_clause ::= OVER LP window RP */
-   -2,  /* (340) over_clause ::= OVER nm */
-   -5,  /* (341) filter_clause ::= FILTER LP WHERE expr RP */
-   -1,  /* (342) input ::= cmdlist */
-   -2,  /* (343) cmdlist ::= cmdlist ecmd */
-   -1,  /* (344) cmdlist ::= ecmd */
-   -1,  /* (345) ecmd ::= SEMI */
-   -2,  /* (346) ecmd ::= cmdx SEMI */
-   -3,  /* (347) ecmd ::= explain cmdx SEMI */
-    0,  /* (348) trans_opt ::= */
-   -1,  /* (349) trans_opt ::= TRANSACTION */
-   -2,  /* (350) trans_opt ::= TRANSACTION nm */
-   -1,  /* (351) savepoint_opt ::= SAVEPOINT */
-    0,  /* (352) savepoint_opt ::= */
-   -2,  /* (353) cmd ::= create_table create_table_args */
-   -1,  /* (354) table_option_set ::= table_option */
-   -4,  /* (355) columnlist ::= columnlist COMMA columnname carglist */
-   -2,  /* (356) columnlist ::= columnname carglist */
-   -1,  /* (357) nm ::= ID|INDEXED */
-   -1,  /* (358) nm ::= STRING */
-   -1,  /* (359) nm ::= JOIN_KW */
-   -1,  /* (360) typetoken ::= typename */
-   -1,  /* (361) typename ::= ID|STRING */
-   -1,  /* (362) signed ::= plus_num */
-   -1,  /* (363) signed ::= minus_num */
-   -2,  /* (364) carglist ::= carglist ccons */
-    0,  /* (365) carglist ::= */
-   -2,  /* (366) ccons ::= NULL onconf */
-   -4,  /* (367) ccons ::= GENERATED ALWAYS AS generated */
-   -2,  /* (368) ccons ::= AS generated */
-   -2,  /* (369) conslist_opt ::= COMMA conslist */
-   -3,  /* (370) conslist ::= conslist tconscomma tcons */
-   -1,  /* (371) conslist ::= tcons */
-    0,  /* (372) tconscomma ::= */
-   -1,  /* (373) defer_subclause_opt ::= defer_subclause */
-   -1,  /* (374) resolvetype ::= raisetype */
-   -1,  /* (375) selectnowith ::= oneselect */
-   -1,  /* (376) oneselect ::= values */
-   -2,  /* (377) sclp ::= selcollist COMMA */
-   -1,  /* (378) as ::= ID|STRING */
-   -1,  /* (379) indexed_opt ::= indexed_by */
-    0,  /* (380) returning ::= */
-   -1,  /* (381) expr ::= term */
-   -1,  /* (382) likeop ::= LIKE_KW|MATCH */
-   -1,  /* (383) exprlist ::= nexprlist */
-   -1,  /* (384) nmnum ::= plus_num */
-   -1,  /* (385) nmnum ::= nm */
-   -1,  /* (386) nmnum ::= ON */
-   -1,  /* (387) nmnum ::= DELETE */
-   -1,  /* (388) nmnum ::= DEFAULT */
-   -1,  /* (389) plus_num ::= INTEGER|FLOAT */
-    0,  /* (390) foreach_clause ::= */
-   -3,  /* (391) foreach_clause ::= FOR EACH ROW */
-   -1,  /* (392) trnm ::= nm */
-    0,  /* (393) tridxby ::= */
-   -1,  /* (394) database_kw_opt ::= DATABASE */
-    0,  /* (395) database_kw_opt ::= */
-    0,  /* (396) kwcolumn_opt ::= */
-   -1,  /* (397) kwcolumn_opt ::= COLUMNKW */
-   -1,  /* (398) vtabarglist ::= vtabarg */
-   -3,  /* (399) vtabarglist ::= vtabarglist COMMA vtabarg */
-   -2,  /* (400) vtabarg ::= vtabarg vtabargtoken */
-    0,  /* (401) anylist ::= */
-   -4,  /* (402) anylist ::= anylist LP anylist RP */
-   -2,  /* (403) anylist ::= anylist ANY */
-    0,  /* (404) with ::= */
+   -3,  /* (311) windowdefn_list ::= windowdefn_list COMMA windowdefn */
+   -5,  /* (312) windowdefn ::= nm AS LP window RP */
+   -5,  /* (313) window ::= PARTITION BY nexprlist orderby_opt frame_opt */
+   -6,  /* (314) window ::= nm PARTITION BY nexprlist orderby_opt frame_opt */
+   -4,  /* (315) window ::= ORDER BY sortlist frame_opt */
+   -5,  /* (316) window ::= nm ORDER BY sortlist frame_opt */
+   -2,  /* (317) window ::= nm frame_opt */
+    0,  /* (318) frame_opt ::= */
+   -3,  /* (319) frame_opt ::= range_or_rows frame_bound_s frame_exclude_opt */
+   -6,  /* (320) frame_opt ::= range_or_rows BETWEEN frame_bound_s AND frame_bound_e frame_exclude_opt */
+   -1,  /* (321) range_or_rows ::= RANGE|ROWS|GROUPS */
+   -1,  /* (322) frame_bound_s ::= frame_bound */
+   -2,  /* (323) frame_bound_s ::= UNBOUNDED PRECEDING */
+   -1,  /* (324) frame_bound_e ::= frame_bound */
+   -2,  /* (325) frame_bound_e ::= UNBOUNDED FOLLOWING */
+   -2,  /* (326) frame_bound ::= expr PRECEDING|FOLLOWING */
+   -2,  /* (327) frame_bound ::= CURRENT ROW */
+    0,  /* (328) frame_exclude_opt ::= */
+   -2,  /* (329) frame_exclude_opt ::= EXCLUDE frame_exclude */
+   -2,  /* (330) frame_exclude ::= NO OTHERS */
+   -2,  /* (331) frame_exclude ::= CURRENT ROW */
+   -1,  /* (332) frame_exclude ::= GROUP|TIES */
+   -2,  /* (333) window_clause ::= WINDOW windowdefn_list */
+   -2,  /* (334) filter_over ::= filter_clause over_clause */
+   -1,  /* (335) filter_over ::= over_clause */
+   -1,  /* (336) filter_over ::= filter_clause */
+   -4,  /* (337) over_clause ::= OVER LP window RP */
+   -2,  /* (338) over_clause ::= OVER nm */
+   -5,  /* (339) filter_clause ::= FILTER LP WHERE expr RP */
+   -1,  /* (340) input ::= cmdlist */
+   -2,  /* (341) cmdlist ::= cmdlist ecmd */
+   -1,  /* (342) cmdlist ::= ecmd */
+   -1,  /* (343) ecmd ::= SEMI */
+   -2,  /* (344) ecmd ::= cmdx SEMI */
+   -3,  /* (345) ecmd ::= explain cmdx SEMI */
+    0,  /* (346) trans_opt ::= */
+   -1,  /* (347) trans_opt ::= TRANSACTION */
+   -2,  /* (348) trans_opt ::= TRANSACTION nm */
+   -1,  /* (349) savepoint_opt ::= SAVEPOINT */
+    0,  /* (350) savepoint_opt ::= */
+   -2,  /* (351) cmd ::= create_table create_table_args */
+   -1,  /* (352) table_option_set ::= table_option */
+   -4,  /* (353) columnlist ::= columnlist COMMA columnname carglist */
+   -2,  /* (354) columnlist ::= columnname carglist */
+   -1,  /* (355) nm ::= ID|INDEXED|JOIN_KW */
+   -1,  /* (356) nm ::= STRING */
+   -1,  /* (357) typetoken ::= typename */
+   -1,  /* (358) typename ::= ID|STRING */
+   -1,  /* (359) signed ::= plus_num */
+   -1,  /* (360) signed ::= minus_num */
+   -2,  /* (361) carglist ::= carglist ccons */
+    0,  /* (362) carglist ::= */
+   -2,  /* (363) ccons ::= NULL onconf */
+   -4,  /* (364) ccons ::= GENERATED ALWAYS AS generated */
+   -2,  /* (365) ccons ::= AS generated */
+   -2,  /* (366) conslist_opt ::= COMMA conslist */
+   -3,  /* (367) conslist ::= conslist tconscomma tcons */
+   -1,  /* (368) conslist ::= tcons */
+    0,  /* (369) tconscomma ::= */
+   -1,  /* (370) defer_subclause_opt ::= defer_subclause */
+   -1,  /* (371) resolvetype ::= raisetype */
+   -1,  /* (372) selectnowith ::= oneselect */
+   -1,  /* (373) oneselect ::= values */
+   -2,  /* (374) sclp ::= selcollist COMMA */
+   -1,  /* (375) as ::= ID|STRING */
+   -1,  /* (376) indexed_opt ::= indexed_by */
+    0,  /* (377) returning ::= */
+   -1,  /* (378) expr ::= term */
+   -1,  /* (379) likeop ::= LIKE_KW|MATCH */
+   -1,  /* (380) case_operand ::= expr */
+   -1,  /* (381) exprlist ::= nexprlist */
+   -1,  /* (382) nmnum ::= plus_num */
+   -1,  /* (383) nmnum ::= nm */
+   -1,  /* (384) nmnum ::= ON */
+   -1,  /* (385) nmnum ::= DELETE */
+   -1,  /* (386) nmnum ::= DEFAULT */
+   -1,  /* (387) plus_num ::= INTEGER|FLOAT */
+    0,  /* (388) foreach_clause ::= */
+   -3,  /* (389) foreach_clause ::= FOR EACH ROW */
+   -1,  /* (390) trnm ::= nm */
+    0,  /* (391) tridxby ::= */
+   -1,  /* (392) database_kw_opt ::= DATABASE */
+    0,  /* (393) database_kw_opt ::= */
+    0,  /* (394) kwcolumn_opt ::= */
+   -1,  /* (395) kwcolumn_opt ::= COLUMNKW */
+   -1,  /* (396) vtabarglist ::= vtabarg */
+   -3,  /* (397) vtabarglist ::= vtabarglist COMMA vtabarg */
+   -2,  /* (398) vtabarg ::= vtabarg vtabargtoken */
+    0,  /* (399) anylist ::= */
+   -4,  /* (400) anylist ::= anylist LP anylist RP */
+   -2,  /* (401) anylist ::= anylist ANY */
+    0,  /* (402) with ::= */
+   -1,  /* (403) windowdefn_list ::= windowdefn */
+   -1,  /* (404) window ::= frame_opt */
 };
 
 static void yy_accept(yyParser*);  /* Forward Declaration */
@@ -167081,10 +173972,10 @@ static YYACTIONTYPE yy_reduce(
 /********** Begin reduce actions **********************************************/
         YYMINORTYPE yylhsminor;
       case 0: /* explain ::= EXPLAIN */
-{ pParse->explain = 1; }
+{ if( pParse->pReprepare==0 ) pParse->explain = 1; }
         break;
       case 1: /* explain ::= EXPLAIN QUERY PLAN */
-{ pParse->explain = 2; }
+{ if( pParse->pReprepare==0 ) pParse->explain = 2; }
         break;
       case 2: /* cmdx ::= cmd */
 { sqlite3FinishCoding(pParse); }
@@ -167098,7 +173989,7 @@ static YYACTIONTYPE yy_reduce(
       case 5: /* transtype ::= DEFERRED */
       case 6: /* transtype ::= IMMEDIATE */ yytestcase(yyruleno==6);
       case 7: /* transtype ::= EXCLUSIVE */ yytestcase(yyruleno==7);
-      case 323: /* range_or_rows ::= RANGE|ROWS|GROUPS */ yytestcase(yyruleno==323);
+      case 321: /* range_or_rows ::= RANGE|ROWS|GROUPS */ yytestcase(yyruleno==321);
 {yymsp[0].minor.yy394 = yymsp[0].major; /*A-overwrites-X*/}
         break;
       case 8: /* cmd ::= COMMIT|END trans_opt */
@@ -167319,8 +174210,8 @@ static YYACTIONTYPE yy_reduce(
         break;
       case 63: /* init_deferred_pred_opt ::= INITIALLY DEFERRED */
       case 80: /* ifexists ::= IF EXISTS */ yytestcase(yyruleno==80);
-      case 216: /* between_op ::= NOT BETWEEN */ yytestcase(yyruleno==216);
-      case 219: /* in_op ::= NOT IN */ yytestcase(yyruleno==219);
+      case 217: /* between_op ::= NOT BETWEEN */ yytestcase(yyruleno==217);
+      case 220: /* in_op ::= NOT IN */ yytestcase(yyruleno==220);
       case 245: /* collate ::= COLLATE ID|STRING */ yytestcase(yyruleno==245);
 {yymsp[-1].minor.yy394 = 1;}
         break;
@@ -167394,7 +174285,6 @@ static YYACTIONTYPE yy_reduce(
   if( p ){
     parserDoubleLinkSelect(pParse, p);
   }
-  yymsp[0].minor.yy47 = p; /*A-overwrites-X*/
 }
         break;
       case 88: /* selectnowith ::= selectnowith multiselect_op oneselect */
@@ -167486,14 +174376,17 @@ static YYACTIONTYPE yy_reduce(
       case 101: /* selcollist ::= sclp scanpt STAR */
 {
   Expr *p = sqlite3Expr(pParse->db, TK_ASTERISK, 0);
+  sqlite3ExprSetErrorOffset(p, (int)(yymsp[0].minor.yy0.z - pParse->zTail));
   yymsp[-2].minor.yy322 = sqlite3ExprListAppend(pParse, yymsp[-2].minor.yy322, p);
 }
         break;
       case 102: /* selcollist ::= sclp scanpt nm DOT STAR */
 {
-  Expr *pRight = sqlite3PExpr(pParse, TK_ASTERISK, 0, 0);
-  Expr *pLeft = tokenExpr(pParse, TK_ID, yymsp[-2].minor.yy0);
-  Expr *pDot = sqlite3PExpr(pParse, TK_DOT, pLeft, pRight);
+  Expr *pRight, *pLeft, *pDot;
+  pRight = sqlite3PExpr(pParse, TK_ASTERISK, 0, 0);
+  sqlite3ExprSetErrorOffset(pRight, (int)(yymsp[0].minor.yy0.z - pParse->zTail));
+  pLeft = tokenExpr(pParse, TK_ID, yymsp[-2].minor.yy0);
+  pDot = sqlite3PExpr(pParse, TK_DOT, pLeft, pRight);
   yymsp[-4].minor.yy322 = sqlite3ExprListAppend(pParse,yymsp[-4].minor.yy322, pDot);
 }
         break;
@@ -167544,7 +174437,7 @@ static YYACTIONTYPE yy_reduce(
 {
     if( yymsp[-5].minor.yy131==0 && yymsp[-1].minor.yy0.n==0 && yymsp[0].minor.yy561.pOn==0 && yymsp[0].minor.yy561.pUsing==0 ){
       yymsp[-5].minor.yy131 = yymsp[-3].minor.yy131;
-    }else if( yymsp[-3].minor.yy131->nSrc==1 ){
+    }else if( ALWAYS(yymsp[-3].minor.yy131!=0) && yymsp[-3].minor.yy131->nSrc==1 ){
       yymsp[-5].minor.yy131 = sqlite3SrcListAppendFromTerm(pParse,yymsp[-5].minor.yy131,0,0,&yymsp[-1].minor.yy0,0,&yymsp[0].minor.yy561);
       if( yymsp[-5].minor.yy131 ){
         SrcItem *pNew = &yymsp[-5].minor.yy131->a[yymsp[-5].minor.yy131->nSrc-1];
@@ -167672,7 +174565,7 @@ static YYACTIONTYPE yy_reduce(
       case 146: /* limit_opt ::= */ yytestcase(yyruleno==146);
       case 151: /* where_opt ::= */ yytestcase(yyruleno==151);
       case 153: /* where_opt_ret ::= */ yytestcase(yyruleno==153);
-      case 229: /* case_else ::= */ yytestcase(yyruleno==229);
+      case 230: /* case_else ::= */ yytestcase(yyruleno==230);
       case 231: /* case_operand ::= */ yytestcase(yyruleno==231);
       case 250: /* vinto ::= */ yytestcase(yyruleno==250);
 {yymsp[1].minor.yy528 = 0;}
@@ -167680,7 +174573,7 @@ static YYACTIONTYPE yy_reduce(
       case 145: /* having_opt ::= HAVING expr */
       case 152: /* where_opt ::= WHERE expr */ yytestcase(yyruleno==152);
       case 154: /* where_opt_ret ::= WHERE expr */ yytestcase(yyruleno==154);
-      case 228: /* case_else ::= ELSE expr */ yytestcase(yyruleno==228);
+      case 229: /* case_else ::= ELSE expr */ yytestcase(yyruleno==229);
       case 249: /* vinto ::= INTO expr */ yytestcase(yyruleno==249);
 {yymsp[-1].minor.yy528 = yymsp[0].minor.yy528;}
         break;
@@ -167793,11 +174686,10 @@ static YYACTIONTYPE yy_reduce(
       case 177: /* expr ::= LP expr RP */
 {yymsp[-2].minor.yy528 = yymsp[-1].minor.yy528;}
         break;
-      case 178: /* expr ::= ID|INDEXED */
-      case 179: /* expr ::= JOIN_KW */ yytestcase(yyruleno==179);
+      case 178: /* expr ::= ID|INDEXED|JOIN_KW */
 {yymsp[0].minor.yy528=tokenExpr(pParse,TK_ID,yymsp[0].minor.yy0); /*A-overwrites-X*/}
         break;
-      case 180: /* expr ::= nm DOT nm */
+      case 179: /* expr ::= nm DOT nm */
 {
   Expr *temp1 = tokenExpr(pParse,TK_ID,yymsp[-2].minor.yy0);
   Expr *temp2 = tokenExpr(pParse,TK_ID,yymsp[0].minor.yy0);
@@ -167805,7 +174697,7 @@ static YYACTIONTYPE yy_reduce(
 }
   yymsp[-2].minor.yy528 = yylhsminor.yy528;
         break;
-      case 181: /* expr ::= nm DOT nm DOT nm */
+      case 180: /* expr ::= nm DOT nm DOT nm */
 {
   Expr *temp1 = tokenExpr(pParse,TK_ID,yymsp[-4].minor.yy0);
   Expr *temp2 = tokenExpr(pParse,TK_ID,yymsp[-2].minor.yy0);
@@ -167818,18 +174710,18 @@ static YYACTIONTYPE yy_reduce(
 }
   yymsp[-4].minor.yy528 = yylhsminor.yy528;
         break;
-      case 182: /* term ::= NULL|FLOAT|BLOB */
-      case 183: /* term ::= STRING */ yytestcase(yyruleno==183);
+      case 181: /* term ::= NULL|FLOAT|BLOB */
+      case 182: /* term ::= STRING */ yytestcase(yyruleno==182);
 {yymsp[0].minor.yy528=tokenExpr(pParse,yymsp[0].major,yymsp[0].minor.yy0); /*A-overwrites-X*/}
         break;
-      case 184: /* term ::= INTEGER */
+      case 183: /* term ::= INTEGER */
 {
   yylhsminor.yy528 = sqlite3ExprAlloc(pParse->db, TK_INTEGER, &yymsp[0].minor.yy0, 1);
   if( yylhsminor.yy528 ) yylhsminor.yy528->w.iOfst = (int)(yymsp[0].minor.yy0.z - pParse->zTail);
 }
   yymsp[0].minor.yy528 = yylhsminor.yy528;
         break;
-      case 185: /* expr ::= VARIABLE */
+      case 184: /* expr ::= VARIABLE */
 {
   if( !(yymsp[0].minor.yy0.z[0]=='#' && sqlite3Isdigit(yymsp[0].minor.yy0.z[1])) ){
     u32 n = yymsp[0].minor.yy0.n;
@@ -167851,50 +174743,65 @@ static YYACTIONTYPE yy_reduce(
   }
 }
         break;
-      case 186: /* expr ::= expr COLLATE ID|STRING */
+      case 185: /* expr ::= expr COLLATE ID|STRING */
 {
   yymsp[-2].minor.yy528 = sqlite3ExprAddCollateToken(pParse, yymsp[-2].minor.yy528, &yymsp[0].minor.yy0, 1);
 }
         break;
-      case 187: /* expr ::= CAST LP expr AS typetoken RP */
+      case 186: /* expr ::= CAST LP expr AS typetoken RP */
 {
   yymsp[-5].minor.yy528 = sqlite3ExprAlloc(pParse->db, TK_CAST, &yymsp[-1].minor.yy0, 1);
   sqlite3ExprAttachSubtrees(pParse->db, yymsp[-5].minor.yy528, yymsp[-3].minor.yy528, 0);
 }
         break;
-      case 188: /* expr ::= ID|INDEXED LP distinct exprlist RP */
+      case 187: /* expr ::= ID|INDEXED|JOIN_KW LP distinct exprlist RP */
 {
   yylhsminor.yy528 = sqlite3ExprFunction(pParse, yymsp[-1].minor.yy322, &yymsp[-4].minor.yy0, yymsp[-2].minor.yy394);
 }
   yymsp[-4].minor.yy528 = yylhsminor.yy528;
         break;
-      case 189: /* expr ::= ID|INDEXED LP STAR RP */
+      case 188: /* expr ::= ID|INDEXED|JOIN_KW LP distinct exprlist ORDER BY sortlist RP */
+{
+  yylhsminor.yy528 = sqlite3ExprFunction(pParse, yymsp[-4].minor.yy322, &yymsp[-7].minor.yy0, yymsp[-5].minor.yy394);
+  sqlite3ExprAddFunctionOrderBy(pParse, yylhsminor.yy528, yymsp[-1].minor.yy322);
+}
+  yymsp[-7].minor.yy528 = yylhsminor.yy528;
+        break;
+      case 189: /* expr ::= ID|INDEXED|JOIN_KW LP STAR RP */
 {
   yylhsminor.yy528 = sqlite3ExprFunction(pParse, 0, &yymsp[-3].minor.yy0, 0);
 }
   yymsp[-3].minor.yy528 = yylhsminor.yy528;
         break;
-      case 190: /* expr ::= ID|INDEXED LP distinct exprlist RP filter_over */
+      case 190: /* expr ::= ID|INDEXED|JOIN_KW LP distinct exprlist RP filter_over */
 {
   yylhsminor.yy528 = sqlite3ExprFunction(pParse, yymsp[-2].minor.yy322, &yymsp[-5].minor.yy0, yymsp[-3].minor.yy394);
   sqlite3WindowAttach(pParse, yylhsminor.yy528, yymsp[0].minor.yy41);
 }
   yymsp[-5].minor.yy528 = yylhsminor.yy528;
         break;
-      case 191: /* expr ::= ID|INDEXED LP STAR RP filter_over */
+      case 191: /* expr ::= ID|INDEXED|JOIN_KW LP distinct exprlist ORDER BY sortlist RP filter_over */
+{
+  yylhsminor.yy528 = sqlite3ExprFunction(pParse, yymsp[-5].minor.yy322, &yymsp[-8].minor.yy0, yymsp[-6].minor.yy394);
+  sqlite3WindowAttach(pParse, yylhsminor.yy528, yymsp[0].minor.yy41);
+  sqlite3ExprAddFunctionOrderBy(pParse, yylhsminor.yy528, yymsp[-2].minor.yy322);
+}
+  yymsp[-8].minor.yy528 = yylhsminor.yy528;
+        break;
+      case 192: /* expr ::= ID|INDEXED|JOIN_KW LP STAR RP filter_over */
 {
   yylhsminor.yy528 = sqlite3ExprFunction(pParse, 0, &yymsp[-4].minor.yy0, 0);
   sqlite3WindowAttach(pParse, yylhsminor.yy528, yymsp[0].minor.yy41);
 }
   yymsp[-4].minor.yy528 = yylhsminor.yy528;
         break;
-      case 192: /* term ::= CTIME_KW */
+      case 193: /* term ::= CTIME_KW */
 {
   yylhsminor.yy528 = sqlite3ExprFunction(pParse, 0, &yymsp[0].minor.yy0, 0);
 }
   yymsp[0].minor.yy528 = yylhsminor.yy528;
         break;
-      case 193: /* expr ::= LP nexprlist COMMA expr RP */
+      case 194: /* expr ::= LP nexprlist COMMA expr RP */
 {
   ExprList *pList = sqlite3ExprListAppend(pParse, yymsp[-3].minor.yy322, yymsp[-1].minor.yy528);
   yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_VECTOR, 0, 0);
@@ -167908,22 +174815,22 @@ static YYACTIONTYPE yy_reduce(
   }
 }
         break;
-      case 194: /* expr ::= expr AND expr */
+      case 195: /* expr ::= expr AND expr */
 {yymsp[-2].minor.yy528=sqlite3ExprAnd(pParse,yymsp[-2].minor.yy528,yymsp[0].minor.yy528);}
         break;
-      case 195: /* expr ::= expr OR expr */
-      case 196: /* expr ::= expr LT|GT|GE|LE expr */ yytestcase(yyruleno==196);
-      case 197: /* expr ::= expr EQ|NE expr */ yytestcase(yyruleno==197);
-      case 198: /* expr ::= expr BITAND|BITOR|LSHIFT|RSHIFT expr */ yytestcase(yyruleno==198);
-      case 199: /* expr ::= expr PLUS|MINUS expr */ yytestcase(yyruleno==199);
-      case 200: /* expr ::= expr STAR|SLASH|REM expr */ yytestcase(yyruleno==200);
-      case 201: /* expr ::= expr CONCAT expr */ yytestcase(yyruleno==201);
+      case 196: /* expr ::= expr OR expr */
+      case 197: /* expr ::= expr LT|GT|GE|LE expr */ yytestcase(yyruleno==197);
+      case 198: /* expr ::= expr EQ|NE expr */ yytestcase(yyruleno==198);
+      case 199: /* expr ::= expr BITAND|BITOR|LSHIFT|RSHIFT expr */ yytestcase(yyruleno==199);
+      case 200: /* expr ::= expr PLUS|MINUS expr */ yytestcase(yyruleno==200);
+      case 201: /* expr ::= expr STAR|SLASH|REM expr */ yytestcase(yyruleno==201);
+      case 202: /* expr ::= expr CONCAT expr */ yytestcase(yyruleno==202);
 {yymsp[-2].minor.yy528=sqlite3PExpr(pParse,yymsp[-1].major,yymsp[-2].minor.yy528,yymsp[0].minor.yy528);}
         break;
-      case 202: /* likeop ::= NOT LIKE_KW|MATCH */
+      case 203: /* likeop ::= NOT LIKE_KW|MATCH */
 {yymsp[-1].minor.yy0=yymsp[0].minor.yy0; yymsp[-1].minor.yy0.n|=0x80000000; /*yymsp[-1].minor.yy0-overwrite-yymsp[0].minor.yy0*/}
         break;
-      case 203: /* expr ::= expr likeop expr */
+      case 204: /* expr ::= expr likeop expr */
 {
   ExprList *pList;
   int bNot = yymsp[-1].minor.yy0.n & 0x80000000;
@@ -167935,7 +174842,7 @@ static YYACTIONTYPE yy_reduce(
   if( yymsp[-2].minor.yy528 ) yymsp[-2].minor.yy528->flags |= EP_InfixFunc;
 }
         break;
-      case 204: /* expr ::= expr likeop expr ESCAPE expr */
+      case 205: /* expr ::= expr likeop expr ESCAPE expr */
 {
   ExprList *pList;
   int bNot = yymsp[-3].minor.yy0.n & 0x80000000;
@@ -167948,47 +174855,47 @@ static YYACTIONTYPE yy_reduce(
   if( yymsp[-4].minor.yy528 ) yymsp[-4].minor.yy528->flags |= EP_InfixFunc;
 }
         break;
-      case 205: /* expr ::= expr ISNULL|NOTNULL */
+      case 206: /* expr ::= expr ISNULL|NOTNULL */
 {yymsp[-1].minor.yy528 = sqlite3PExpr(pParse,yymsp[0].major,yymsp[-1].minor.yy528,0);}
         break;
-      case 206: /* expr ::= expr NOT NULL */
+      case 207: /* expr ::= expr NOT NULL */
 {yymsp[-2].minor.yy528 = sqlite3PExpr(pParse,TK_NOTNULL,yymsp[-2].minor.yy528,0);}
         break;
-      case 207: /* expr ::= expr IS expr */
+      case 208: /* expr ::= expr IS expr */
 {
   yymsp[-2].minor.yy528 = sqlite3PExpr(pParse,TK_IS,yymsp[-2].minor.yy528,yymsp[0].minor.yy528);
   binaryToUnaryIfNull(pParse, yymsp[0].minor.yy528, yymsp[-2].minor.yy528, TK_ISNULL);
 }
         break;
-      case 208: /* expr ::= expr IS NOT expr */
+      case 209: /* expr ::= expr IS NOT expr */
 {
   yymsp[-3].minor.yy528 = sqlite3PExpr(pParse,TK_ISNOT,yymsp[-3].minor.yy528,yymsp[0].minor.yy528);
   binaryToUnaryIfNull(pParse, yymsp[0].minor.yy528, yymsp[-3].minor.yy528, TK_NOTNULL);
 }
         break;
-      case 209: /* expr ::= expr IS NOT DISTINCT FROM expr */
+      case 210: /* expr ::= expr IS NOT DISTINCT FROM expr */
 {
   yymsp[-5].minor.yy528 = sqlite3PExpr(pParse,TK_IS,yymsp[-5].minor.yy528,yymsp[0].minor.yy528);
   binaryToUnaryIfNull(pParse, yymsp[0].minor.yy528, yymsp[-5].minor.yy528, TK_ISNULL);
 }
         break;
-      case 210: /* expr ::= expr IS DISTINCT FROM expr */
+      case 211: /* expr ::= expr IS DISTINCT FROM expr */
 {
   yymsp[-4].minor.yy528 = sqlite3PExpr(pParse,TK_ISNOT,yymsp[-4].minor.yy528,yymsp[0].minor.yy528);
   binaryToUnaryIfNull(pParse, yymsp[0].minor.yy528, yymsp[-4].minor.yy528, TK_NOTNULL);
 }
         break;
-      case 211: /* expr ::= NOT expr */
-      case 212: /* expr ::= BITNOT expr */ yytestcase(yyruleno==212);
+      case 212: /* expr ::= NOT expr */
+      case 213: /* expr ::= BITNOT expr */ yytestcase(yyruleno==213);
 {yymsp[-1].minor.yy528 = sqlite3PExpr(pParse, yymsp[-1].major, yymsp[0].minor.yy528, 0);/*A-overwrites-B*/}
         break;
-      case 213: /* expr ::= PLUS|MINUS expr */
+      case 214: /* expr ::= PLUS|MINUS expr */
 {
   yymsp[-1].minor.yy528 = sqlite3PExpr(pParse, yymsp[-1].major==TK_PLUS ? TK_UPLUS : TK_UMINUS, yymsp[0].minor.yy528, 0);
   /*A-overwrites-B*/
 }
         break;
-      case 214: /* expr ::= expr PTR expr */
+      case 215: /* expr ::= expr PTR expr */
 {
   ExprList *pList = sqlite3ExprListAppend(pParse, 0, yymsp[-2].minor.yy528);
   pList = sqlite3ExprListAppend(pParse, pList, yymsp[0].minor.yy528);
@@ -167996,11 +174903,11 @@ static YYACTIONTYPE yy_reduce(
 }
   yymsp[-2].minor.yy528 = yylhsminor.yy528;
         break;
-      case 215: /* between_op ::= BETWEEN */
-      case 218: /* in_op ::= IN */ yytestcase(yyruleno==218);
+      case 216: /* between_op ::= BETWEEN */
+      case 219: /* in_op ::= IN */ yytestcase(yyruleno==219);
 {yymsp[0].minor.yy394 = 0;}
         break;
-      case 217: /* expr ::= expr between_op expr AND expr */
+      case 218: /* expr ::= expr between_op expr AND expr */
 {
   ExprList *pList = sqlite3ExprListAppend(pParse,0, yymsp[-2].minor.yy528);
   pList = sqlite3ExprListAppend(pParse,pList, yymsp[0].minor.yy528);
@@ -168013,7 +174920,7 @@ static YYACTIONTYPE yy_reduce(
   if( yymsp[-3].minor.yy394 ) yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_NOT, yymsp[-4].minor.yy528, 0);
 }
         break;
-      case 220: /* expr ::= expr in_op LP exprlist RP */
+      case 221: /* expr ::= expr in_op LP exprlist RP */
 {
     if( yymsp[-1].minor.yy322==0 ){
       /* Expressions of the form
@@ -168034,6 +174941,11 @@ static YYACTIONTYPE yy_reduce(
         sqlite3ExprListDelete(pParse->db, yymsp[-1].minor.yy322);
         pRHS = sqlite3PExpr(pParse, TK_UPLUS, pRHS, 0);
         yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_EQ, yymsp[-4].minor.yy528, pRHS);
+      }else if( yymsp[-1].minor.yy322->nExpr==1 && pRHS->op==TK_SELECT ){
+        yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_IN, yymsp[-4].minor.yy528, 0);
+        sqlite3PExprAddSelect(pParse, yymsp[-4].minor.yy528, pRHS->x.pSelect);
+        pRHS->x.pSelect = 0;
+        sqlite3ExprListDelete(pParse->db, yymsp[-1].minor.yy322);
       }else{
         yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_IN, yymsp[-4].minor.yy528, 0);
         if( yymsp[-4].minor.yy528==0 ){
@@ -168054,20 +174966,20 @@ static YYACTIONTYPE yy_reduce(
     }
   }
         break;
-      case 221: /* expr ::= LP select RP */
+      case 222: /* expr ::= LP select RP */
 {
     yymsp[-2].minor.yy528 = sqlite3PExpr(pParse, TK_SELECT, 0, 0);
     sqlite3PExprAddSelect(pParse, yymsp[-2].minor.yy528, yymsp[-1].minor.yy47);
   }
         break;
-      case 222: /* expr ::= expr in_op LP select RP */
+      case 223: /* expr ::= expr in_op LP select RP */
 {
     yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_IN, yymsp[-4].minor.yy528, 0);
     sqlite3PExprAddSelect(pParse, yymsp[-4].minor.yy528, yymsp[-1].minor.yy47);
     if( yymsp[-3].minor.yy394 ) yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_NOT, yymsp[-4].minor.yy528, 0);
   }
         break;
-      case 223: /* expr ::= expr in_op nm dbnm paren_exprlist */
+      case 224: /* expr ::= expr in_op nm dbnm paren_exprlist */
 {
     SrcList *pSrc = sqlite3SrcListAppend(pParse, 0,&yymsp[-2].minor.yy0,&yymsp[-1].minor.yy0);
     Select *pSelect = sqlite3SelectNew(pParse, 0,pSrc,0,0,0,0,0,0);
@@ -168077,14 +174989,14 @@ static YYACTIONTYPE yy_reduce(
     if( yymsp[-3].minor.yy394 ) yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_NOT, yymsp[-4].minor.yy528, 0);
   }
         break;
-      case 224: /* expr ::= EXISTS LP select RP */
+      case 225: /* expr ::= EXISTS LP select RP */
 {
     Expr *p;
     p = yymsp[-3].minor.yy528 = sqlite3PExpr(pParse, TK_EXISTS, 0, 0);
     sqlite3PExprAddSelect(pParse, p, yymsp[-1].minor.yy47);
   }
         break;
-      case 225: /* expr ::= CASE case_operand case_exprlist case_else END */
+      case 226: /* expr ::= CASE case_operand case_exprlist case_else END */
 {
   yymsp[-4].minor.yy528 = sqlite3PExpr(pParse, TK_CASE, yymsp[-3].minor.yy528, 0);
   if( yymsp[-4].minor.yy528 ){
@@ -168096,21 +175008,18 @@ static YYACTIONTYPE yy_reduce(
   }
 }
         break;
-      case 226: /* case_exprlist ::= case_exprlist WHEN expr THEN expr */
+      case 227: /* case_exprlist ::= case_exprlist WHEN expr THEN expr */
 {
   yymsp[-4].minor.yy322 = sqlite3ExprListAppend(pParse,yymsp[-4].minor.yy322, yymsp[-2].minor.yy528);
   yymsp[-4].minor.yy322 = sqlite3ExprListAppend(pParse,yymsp[-4].minor.yy322, yymsp[0].minor.yy528);
 }
         break;
-      case 227: /* case_exprlist ::= WHEN expr THEN expr */
+      case 228: /* case_exprlist ::= WHEN expr THEN expr */
 {
   yymsp[-3].minor.yy322 = sqlite3ExprListAppend(pParse,0, yymsp[-2].minor.yy528);
   yymsp[-3].minor.yy322 = sqlite3ExprListAppend(pParse,yymsp[-3].minor.yy322, yymsp[0].minor.yy528);
 }
         break;
-      case 230: /* case_operand ::= expr */
-{yymsp[0].minor.yy528 = yymsp[0].minor.yy528; /*A-overwrites-X*/}
-        break;
       case 233: /* nexprlist ::= nexprlist COMMA expr */
 {yymsp[-2].minor.yy322 = sqlite3ExprListAppend(pParse,yymsp[-2].minor.yy322,yymsp[0].minor.yy528);}
         break;
@@ -168386,11 +175295,7 @@ static YYACTIONTYPE yy_reduce(
   yymsp[-2].minor.yy521 = sqlite3WithAdd(pParse, yymsp[-2].minor.yy521, yymsp[0].minor.yy385);
 }
         break;
-      case 311: /* windowdefn_list ::= windowdefn */
-{ yylhsminor.yy41 = yymsp[0].minor.yy41; }
-  yymsp[0].minor.yy41 = yylhsminor.yy41;
-        break;
-      case 312: /* windowdefn_list ::= windowdefn_list COMMA windowdefn */
+      case 311: /* windowdefn_list ::= windowdefn_list COMMA windowdefn */
 {
   assert( yymsp[0].minor.yy41!=0 );
   sqlite3WindowChain(pParse, yymsp[0].minor.yy41, yymsp[-2].minor.yy41);
@@ -168399,7 +175304,7 @@ static YYACTIONTYPE yy_reduce(
 }
   yymsp[-2].minor.yy41 = yylhsminor.yy41;
         break;
-      case 313: /* windowdefn ::= nm AS LP window RP */
+      case 312: /* windowdefn ::= nm AS LP window RP */
 {
   if( ALWAYS(yymsp[-1].minor.yy41) ){
     yymsp[-1].minor.yy41->zName = sqlite3DbStrNDup(pParse->db, yymsp[-4].minor.yy0.z, yymsp[-4].minor.yy0.n);
@@ -168408,90 +175313,83 @@ static YYACTIONTYPE yy_reduce(
 }
   yymsp[-4].minor.yy41 = yylhsminor.yy41;
         break;
-      case 314: /* window ::= PARTITION BY nexprlist orderby_opt frame_opt */
+      case 313: /* window ::= PARTITION BY nexprlist orderby_opt frame_opt */
 {
   yymsp[-4].minor.yy41 = sqlite3WindowAssemble(pParse, yymsp[0].minor.yy41, yymsp[-2].minor.yy322, yymsp[-1].minor.yy322, 0);
 }
         break;
-      case 315: /* window ::= nm PARTITION BY nexprlist orderby_opt frame_opt */
+      case 314: /* window ::= nm PARTITION BY nexprlist orderby_opt frame_opt */
 {
   yylhsminor.yy41 = sqlite3WindowAssemble(pParse, yymsp[0].minor.yy41, yymsp[-2].minor.yy322, yymsp[-1].minor.yy322, &yymsp[-5].minor.yy0);
 }
   yymsp[-5].minor.yy41 = yylhsminor.yy41;
         break;
-      case 316: /* window ::= ORDER BY sortlist frame_opt */
+      case 315: /* window ::= ORDER BY sortlist frame_opt */
 {
   yymsp[-3].minor.yy41 = sqlite3WindowAssemble(pParse, yymsp[0].minor.yy41, 0, yymsp[-1].minor.yy322, 0);
 }
         break;
-      case 317: /* window ::= nm ORDER BY sortlist frame_opt */
+      case 316: /* window ::= nm ORDER BY sortlist frame_opt */
 {
   yylhsminor.yy41 = sqlite3WindowAssemble(pParse, yymsp[0].minor.yy41, 0, yymsp[-1].minor.yy322, &yymsp[-4].minor.yy0);
 }
   yymsp[-4].minor.yy41 = yylhsminor.yy41;
         break;
-      case 318: /* window ::= frame_opt */
-      case 337: /* filter_over ::= over_clause */ yytestcase(yyruleno==337);
-{
-  yylhsminor.yy41 = yymsp[0].minor.yy41;
-}
-  yymsp[0].minor.yy41 = yylhsminor.yy41;
-        break;
-      case 319: /* window ::= nm frame_opt */
+      case 317: /* window ::= nm frame_opt */
 {
   yylhsminor.yy41 = sqlite3WindowAssemble(pParse, yymsp[0].minor.yy41, 0, 0, &yymsp[-1].minor.yy0);
 }
   yymsp[-1].minor.yy41 = yylhsminor.yy41;
         break;
-      case 320: /* frame_opt ::= */
+      case 318: /* frame_opt ::= */
 {
   yymsp[1].minor.yy41 = sqlite3WindowAlloc(pParse, 0, TK_UNBOUNDED, 0, TK_CURRENT, 0, 0);
 }
         break;
-      case 321: /* frame_opt ::= range_or_rows frame_bound_s frame_exclude_opt */
+      case 319: /* frame_opt ::= range_or_rows frame_bound_s frame_exclude_opt */
 {
   yylhsminor.yy41 = sqlite3WindowAlloc(pParse, yymsp[-2].minor.yy394, yymsp[-1].minor.yy595.eType, yymsp[-1].minor.yy595.pExpr, TK_CURRENT, 0, yymsp[0].minor.yy516);
 }
   yymsp[-2].minor.yy41 = yylhsminor.yy41;
         break;
-      case 322: /* frame_opt ::= range_or_rows BETWEEN frame_bound_s AND frame_bound_e frame_exclude_opt */
+      case 320: /* frame_opt ::= range_or_rows BETWEEN frame_bound_s AND frame_bound_e frame_exclude_opt */
 {
   yylhsminor.yy41 = sqlite3WindowAlloc(pParse, yymsp[-5].minor.yy394, yymsp[-3].minor.yy595.eType, yymsp[-3].minor.yy595.pExpr, yymsp[-1].minor.yy595.eType, yymsp[-1].minor.yy595.pExpr, yymsp[0].minor.yy516);
 }
   yymsp[-5].minor.yy41 = yylhsminor.yy41;
         break;
-      case 324: /* frame_bound_s ::= frame_bound */
-      case 326: /* frame_bound_e ::= frame_bound */ yytestcase(yyruleno==326);
+      case 322: /* frame_bound_s ::= frame_bound */
+      case 324: /* frame_bound_e ::= frame_bound */ yytestcase(yyruleno==324);
 {yylhsminor.yy595 = yymsp[0].minor.yy595;}
   yymsp[0].minor.yy595 = yylhsminor.yy595;
         break;
-      case 325: /* frame_bound_s ::= UNBOUNDED PRECEDING */
-      case 327: /* frame_bound_e ::= UNBOUNDED FOLLOWING */ yytestcase(yyruleno==327);
-      case 329: /* frame_bound ::= CURRENT ROW */ yytestcase(yyruleno==329);
+      case 323: /* frame_bound_s ::= UNBOUNDED PRECEDING */
+      case 325: /* frame_bound_e ::= UNBOUNDED FOLLOWING */ yytestcase(yyruleno==325);
+      case 327: /* frame_bound ::= CURRENT ROW */ yytestcase(yyruleno==327);
 {yylhsminor.yy595.eType = yymsp[-1].major; yylhsminor.yy595.pExpr = 0;}
   yymsp[-1].minor.yy595 = yylhsminor.yy595;
         break;
-      case 328: /* frame_bound ::= expr PRECEDING|FOLLOWING */
+      case 326: /* frame_bound ::= expr PRECEDING|FOLLOWING */
 {yylhsminor.yy595.eType = yymsp[0].major; yylhsminor.yy595.pExpr = yymsp[-1].minor.yy528;}
   yymsp[-1].minor.yy595 = yylhsminor.yy595;
         break;
-      case 330: /* frame_exclude_opt ::= */
+      case 328: /* frame_exclude_opt ::= */
 {yymsp[1].minor.yy516 = 0;}
         break;
-      case 331: /* frame_exclude_opt ::= EXCLUDE frame_exclude */
+      case 329: /* frame_exclude_opt ::= EXCLUDE frame_exclude */
 {yymsp[-1].minor.yy516 = yymsp[0].minor.yy516;}
         break;
-      case 332: /* frame_exclude ::= NO OTHERS */
-      case 333: /* frame_exclude ::= CURRENT ROW */ yytestcase(yyruleno==333);
+      case 330: /* frame_exclude ::= NO OTHERS */
+      case 331: /* frame_exclude ::= CURRENT ROW */ yytestcase(yyruleno==331);
 {yymsp[-1].minor.yy516 = yymsp[-1].major; /*A-overwrites-X*/}
         break;
-      case 334: /* frame_exclude ::= GROUP|TIES */
+      case 332: /* frame_exclude ::= GROUP|TIES */
 {yymsp[0].minor.yy516 = yymsp[0].major; /*A-overwrites-X*/}
         break;
-      case 335: /* window_clause ::= WINDOW windowdefn_list */
+      case 333: /* window_clause ::= WINDOW windowdefn_list */
 { yymsp[-1].minor.yy41 = yymsp[0].minor.yy41; }
         break;
-      case 336: /* filter_over ::= filter_clause over_clause */
+      case 334: /* filter_over ::= filter_clause over_clause */
 {
   if( yymsp[0].minor.yy41 ){
     yymsp[0].minor.yy41->pFilter = yymsp[-1].minor.yy528;
@@ -168502,7 +175400,13 @@ static YYACTIONTYPE yy_reduce(
 }
   yymsp[-1].minor.yy41 = yylhsminor.yy41;
         break;
-      case 338: /* filter_over ::= filter_clause */
+      case 335: /* filter_over ::= over_clause */
+{
+  yylhsminor.yy41 = yymsp[0].minor.yy41;
+}
+  yymsp[0].minor.yy41 = yylhsminor.yy41;
+        break;
+      case 336: /* filter_over ::= filter_clause */
 {
   yylhsminor.yy41 = (Window*)sqlite3DbMallocZero(pParse->db, sizeof(Window));
   if( yylhsminor.yy41 ){
@@ -168514,13 +175418,13 @@ static YYACTIONTYPE yy_reduce(
 }
   yymsp[0].minor.yy41 = yylhsminor.yy41;
         break;
-      case 339: /* over_clause ::= OVER LP window RP */
+      case 337: /* over_clause ::= OVER LP window RP */
 {
   yymsp[-3].minor.yy41 = yymsp[-1].minor.yy41;
   assert( yymsp[-3].minor.yy41!=0 );
 }
         break;
-      case 340: /* over_clause ::= OVER nm */
+      case 338: /* over_clause ::= OVER nm */
 {
   yymsp[-1].minor.yy41 = (Window*)sqlite3DbMallocZero(pParse->db, sizeof(Window));
   if( yymsp[-1].minor.yy41 ){
@@ -168528,73 +175432,75 @@ static YYACTIONTYPE yy_reduce(
   }
 }
         break;
-      case 341: /* filter_clause ::= FILTER LP WHERE expr RP */
+      case 339: /* filter_clause ::= FILTER LP WHERE expr RP */
 { yymsp[-4].minor.yy528 = yymsp[-1].minor.yy528; }
         break;
       default:
-      /* (342) input ::= cmdlist */ yytestcase(yyruleno==342);
-      /* (343) cmdlist ::= cmdlist ecmd */ yytestcase(yyruleno==343);
-      /* (344) cmdlist ::= ecmd (OPTIMIZED OUT) */ assert(yyruleno!=344);
-      /* (345) ecmd ::= SEMI */ yytestcase(yyruleno==345);
-      /* (346) ecmd ::= cmdx SEMI */ yytestcase(yyruleno==346);
-      /* (347) ecmd ::= explain cmdx SEMI (NEVER REDUCES) */ assert(yyruleno!=347);
-      /* (348) trans_opt ::= */ yytestcase(yyruleno==348);
-      /* (349) trans_opt ::= TRANSACTION */ yytestcase(yyruleno==349);
-      /* (350) trans_opt ::= TRANSACTION nm */ yytestcase(yyruleno==350);
-      /* (351) savepoint_opt ::= SAVEPOINT */ yytestcase(yyruleno==351);
-      /* (352) savepoint_opt ::= */ yytestcase(yyruleno==352);
-      /* (353) cmd ::= create_table create_table_args */ yytestcase(yyruleno==353);
-      /* (354) table_option_set ::= table_option (OPTIMIZED OUT) */ assert(yyruleno!=354);
-      /* (355) columnlist ::= columnlist COMMA columnname carglist */ yytestcase(yyruleno==355);
-      /* (356) columnlist ::= columnname carglist */ yytestcase(yyruleno==356);
-      /* (357) nm ::= ID|INDEXED */ yytestcase(yyruleno==357);
-      /* (358) nm ::= STRING */ yytestcase(yyruleno==358);
-      /* (359) nm ::= JOIN_KW */ yytestcase(yyruleno==359);
-      /* (360) typetoken ::= typename */ yytestcase(yyruleno==360);
-      /* (361) typename ::= ID|STRING */ yytestcase(yyruleno==361);
-      /* (362) signed ::= plus_num (OPTIMIZED OUT) */ assert(yyruleno!=362);
-      /* (363) signed ::= minus_num (OPTIMIZED OUT) */ assert(yyruleno!=363);
-      /* (364) carglist ::= carglist ccons */ yytestcase(yyruleno==364);
-      /* (365) carglist ::= */ yytestcase(yyruleno==365);
-      /* (366) ccons ::= NULL onconf */ yytestcase(yyruleno==366);
-      /* (367) ccons ::= GENERATED ALWAYS AS generated */ yytestcase(yyruleno==367);
-      /* (368) ccons ::= AS generated */ yytestcase(yyruleno==368);
-      /* (369) conslist_opt ::= COMMA conslist */ yytestcase(yyruleno==369);
-      /* (370) conslist ::= conslist tconscomma tcons */ yytestcase(yyruleno==370);
-      /* (371) conslist ::= tcons (OPTIMIZED OUT) */ assert(yyruleno!=371);
-      /* (372) tconscomma ::= */ yytestcase(yyruleno==372);
-      /* (373) defer_subclause_opt ::= defer_subclause (OPTIMIZED OUT) */ assert(yyruleno!=373);
-      /* (374) resolvetype ::= raisetype (OPTIMIZED OUT) */ assert(yyruleno!=374);
-      /* (375) selectnowith ::= oneselect (OPTIMIZED OUT) */ assert(yyruleno!=375);
-      /* (376) oneselect ::= values */ yytestcase(yyruleno==376);
-      /* (377) sclp ::= selcollist COMMA */ yytestcase(yyruleno==377);
-      /* (378) as ::= ID|STRING */ yytestcase(yyruleno==378);
-      /* (379) indexed_opt ::= indexed_by (OPTIMIZED OUT) */ assert(yyruleno!=379);
-      /* (380) returning ::= */ yytestcase(yyruleno==380);
-      /* (381) expr ::= term (OPTIMIZED OUT) */ assert(yyruleno!=381);
-      /* (382) likeop ::= LIKE_KW|MATCH */ yytestcase(yyruleno==382);
-      /* (383) exprlist ::= nexprlist */ yytestcase(yyruleno==383);
-      /* (384) nmnum ::= plus_num (OPTIMIZED OUT) */ assert(yyruleno!=384);
-      /* (385) nmnum ::= nm (OPTIMIZED OUT) */ assert(yyruleno!=385);
-      /* (386) nmnum ::= ON */ yytestcase(yyruleno==386);
-      /* (387) nmnum ::= DELETE */ yytestcase(yyruleno==387);
-      /* (388) nmnum ::= DEFAULT */ yytestcase(yyruleno==388);
-      /* (389) plus_num ::= INTEGER|FLOAT */ yytestcase(yyruleno==389);
-      /* (390) foreach_clause ::= */ yytestcase(yyruleno==390);
-      /* (391) foreach_clause ::= FOR EACH ROW */ yytestcase(yyruleno==391);
-      /* (392) trnm ::= nm */ yytestcase(yyruleno==392);
-      /* (393) tridxby ::= */ yytestcase(yyruleno==393);
-      /* (394) database_kw_opt ::= DATABASE */ yytestcase(yyruleno==394);
-      /* (395) database_kw_opt ::= */ yytestcase(yyruleno==395);
-      /* (396) kwcolumn_opt ::= */ yytestcase(yyruleno==396);
-      /* (397) kwcolumn_opt ::= COLUMNKW */ yytestcase(yyruleno==397);
-      /* (398) vtabarglist ::= vtabarg */ yytestcase(yyruleno==398);
-      /* (399) vtabarglist ::= vtabarglist COMMA vtabarg */ yytestcase(yyruleno==399);
-      /* (400) vtabarg ::= vtabarg vtabargtoken */ yytestcase(yyruleno==400);
-      /* (401) anylist ::= */ yytestcase(yyruleno==401);
-      /* (402) anylist ::= anylist LP anylist RP */ yytestcase(yyruleno==402);
-      /* (403) anylist ::= anylist ANY */ yytestcase(yyruleno==403);
-      /* (404) with ::= */ yytestcase(yyruleno==404);
+      /* (340) input ::= cmdlist */ yytestcase(yyruleno==340);
+      /* (341) cmdlist ::= cmdlist ecmd */ yytestcase(yyruleno==341);
+      /* (342) cmdlist ::= ecmd (OPTIMIZED OUT) */ assert(yyruleno!=342);
+      /* (343) ecmd ::= SEMI */ yytestcase(yyruleno==343);
+      /* (344) ecmd ::= cmdx SEMI */ yytestcase(yyruleno==344);
+      /* (345) ecmd ::= explain cmdx SEMI (NEVER REDUCES) */ assert(yyruleno!=345);
+      /* (346) trans_opt ::= */ yytestcase(yyruleno==346);
+      /* (347) trans_opt ::= TRANSACTION */ yytestcase(yyruleno==347);
+      /* (348) trans_opt ::= TRANSACTION nm */ yytestcase(yyruleno==348);
+      /* (349) savepoint_opt ::= SAVEPOINT */ yytestcase(yyruleno==349);
+      /* (350) savepoint_opt ::= */ yytestcase(yyruleno==350);
+      /* (351) cmd ::= create_table create_table_args */ yytestcase(yyruleno==351);
+      /* (352) table_option_set ::= table_option (OPTIMIZED OUT) */ assert(yyruleno!=352);
+      /* (353) columnlist ::= columnlist COMMA columnname carglist */ yytestcase(yyruleno==353);
+      /* (354) columnlist ::= columnname carglist */ yytestcase(yyruleno==354);
+      /* (355) nm ::= ID|INDEXED|JOIN_KW */ yytestcase(yyruleno==355);
+      /* (356) nm ::= STRING */ yytestcase(yyruleno==356);
+      /* (357) typetoken ::= typename */ yytestcase(yyruleno==357);
+      /* (358) typename ::= ID|STRING */ yytestcase(yyruleno==358);
+      /* (359) signed ::= plus_num (OPTIMIZED OUT) */ assert(yyruleno!=359);
+      /* (360) signed ::= minus_num (OPTIMIZED OUT) */ assert(yyruleno!=360);
+      /* (361) carglist ::= carglist ccons */ yytestcase(yyruleno==361);
+      /* (362) carglist ::= */ yytestcase(yyruleno==362);
+      /* (363) ccons ::= NULL onconf */ yytestcase(yyruleno==363);
+      /* (364) ccons ::= GENERATED ALWAYS AS generated */ yytestcase(yyruleno==364);
+      /* (365) ccons ::= AS generated */ yytestcase(yyruleno==365);
+      /* (366) conslist_opt ::= COMMA conslist */ yytestcase(yyruleno==366);
+      /* (367) conslist ::= conslist tconscomma tcons */ yytestcase(yyruleno==367);
+      /* (368) conslist ::= tcons (OPTIMIZED OUT) */ assert(yyruleno!=368);
+      /* (369) tconscomma ::= */ yytestcase(yyruleno==369);
+      /* (370) defer_subclause_opt ::= defer_subclause (OPTIMIZED OUT) */ assert(yyruleno!=370);
+      /* (371) resolvetype ::= raisetype (OPTIMIZED OUT) */ assert(yyruleno!=371);
+      /* (372) selectnowith ::= oneselect (OPTIMIZED OUT) */ assert(yyruleno!=372);
+      /* (373) oneselect ::= values */ yytestcase(yyruleno==373);
+      /* (374) sclp ::= selcollist COMMA */ yytestcase(yyruleno==374);
+      /* (375) as ::= ID|STRING */ yytestcase(yyruleno==375);
+      /* (376) indexed_opt ::= indexed_by (OPTIMIZED OUT) */ assert(yyruleno!=376);
+      /* (377) returning ::= */ yytestcase(yyruleno==377);
+      /* (378) expr ::= term (OPTIMIZED OUT) */ assert(yyruleno!=378);
+      /* (379) likeop ::= LIKE_KW|MATCH */ yytestcase(yyruleno==379);
+      /* (380) case_operand ::= expr */ yytestcase(yyruleno==380);
+      /* (381) exprlist ::= nexprlist */ yytestcase(yyruleno==381);
+      /* (382) nmnum ::= plus_num (OPTIMIZED OUT) */ assert(yyruleno!=382);
+      /* (383) nmnum ::= nm (OPTIMIZED OUT) */ assert(yyruleno!=383);
+      /* (384) nmnum ::= ON */ yytestcase(yyruleno==384);
+      /* (385) nmnum ::= DELETE */ yytestcase(yyruleno==385);
+      /* (386) nmnum ::= DEFAULT */ yytestcase(yyruleno==386);
+      /* (387) plus_num ::= INTEGER|FLOAT */ yytestcase(yyruleno==387);
+      /* (388) foreach_clause ::= */ yytestcase(yyruleno==388);
+      /* (389) foreach_clause ::= FOR EACH ROW */ yytestcase(yyruleno==389);
+      /* (390) trnm ::= nm */ yytestcase(yyruleno==390);
+      /* (391) tridxby ::= */ yytestcase(yyruleno==391);
+      /* (392) database_kw_opt ::= DATABASE */ yytestcase(yyruleno==392);
+      /* (393) database_kw_opt ::= */ yytestcase(yyruleno==393);
+      /* (394) kwcolumn_opt ::= */ yytestcase(yyruleno==394);
+      /* (395) kwcolumn_opt ::= COLUMNKW */ yytestcase(yyruleno==395);
+      /* (396) vtabarglist ::= vtabarg */ yytestcase(yyruleno==396);
+      /* (397) vtabarglist ::= vtabarglist COMMA vtabarg */ yytestcase(yyruleno==397);
+      /* (398) vtabarg ::= vtabarg vtabargtoken */ yytestcase(yyruleno==398);
+      /* (399) anylist ::= */ yytestcase(yyruleno==399);
+      /* (400) anylist ::= anylist LP anylist RP */ yytestcase(yyruleno==400);
+      /* (401) anylist ::= anylist ANY */ yytestcase(yyruleno==401);
+      /* (402) with ::= */ yytestcase(yyruleno==402);
+      /* (403) windowdefn_list ::= windowdefn (OPTIMIZED OUT) */ assert(yyruleno!=403);
+      /* (404) window ::= frame_opt (OPTIMIZED OUT) */ assert(yyruleno!=404);
         break;
 /********** End reduce actions ************************************************/
   };
@@ -169170,7 +176076,7 @@ static const unsigned char aKWHash[127] = {
 /* aKWNext[] forms the hash collision chain.  If aKWHash[i]==0
 ** then the i-th keyword has no more hash collisions.  Otherwise,
 ** the next keyword with the same hash is aKWHash[i]-1. */
-static const unsigned char aKWNext[147] = {
+static const unsigned char aKWNext[148] = {0,
      0,   0,   0,   0,   4,   0,  43,   0,   0, 106, 114,   0,   0,
      0,   2,   0,   0, 143,   0,   0,   0,  13,   0,   0,   0,   0,
    141,   0,   0, 119,  52,   0,   0, 137,  12,   0,   0,  62,   0,
@@ -169185,7 +176091,7 @@ static const unsigned char aKWNext[147] = {
    102,   0,   0,  87,
 };
 /* aKWLen[i] is the length (in bytes) of the i-th keyword */
-static const unsigned char aKWLen[147] = {
+static const unsigned char aKWLen[148] = {0,
      7,   7,   5,   4,   6,   4,   5,   3,   6,   7,   3,   6,   6,
      7,   7,   3,   8,   2,   6,   5,   4,   4,   3,  10,   4,   7,
      6,   9,   4,   2,   6,   5,   9,   9,   4,   7,   3,   2,   4,
@@ -169201,7 +176107,7 @@ static const unsigned char aKWLen[147] = {
 };
 /* aKWOffset[i] is the index into zKWText[] of the start of
 ** the text for the i-th keyword. */
-static const unsigned short int aKWOffset[147] = {
+static const unsigned short int aKWOffset[148] = {0,
      0,   2,   2,   8,   9,  14,  16,  20,  23,  25,  25,  29,  33,
     36,  41,  46,  48,  53,  54,  59,  62,  65,  67,  69,  78,  81,
     86,  90,  90,  94,  99, 101, 105, 111, 119, 123, 123, 123, 126,
@@ -169216,7 +176122,7 @@ static const unsigned short int aKWOffset[147] = {
    648, 650, 655, 659,
 };
 /* aKWCode[i] is the parser symbol code for the i-th keyword */
-static const unsigned char aKWCode[147] = {
+static const unsigned char aKWCode[148] = {0,
   TK_REINDEX,    TK_INDEXED,    TK_INDEX,      TK_DESC,       TK_ESCAPE,
   TK_EACH,       TK_CHECK,      TK_KEY,        TK_BEFORE,     TK_FOREIGN,
   TK_FOR,        TK_IGNORE,     TK_LIKE_KW,    TK_EXPLAIN,    TK_INSTEAD,
@@ -169383,185 +176289,185 @@ static const unsigned char aKWCode[147] = {
 static int keywordCode(const char *z, int n, int *pType){
   int i, j;
   const char *zKW;
-  if( n>=2 ){
-    i = ((charMap(z[0])*4) ^ (charMap(z[n-1])*3) ^ n*1) % 127;
-    for(i=((int)aKWHash[i])-1; i>=0; i=((int)aKWNext[i])-1){
-      if( aKWLen[i]!=n ) continue;
-      zKW = &zKWText[aKWOffset[i]];
+  assert( n>=2 );
+  i = ((charMap(z[0])*4) ^ (charMap(z[n-1])*3) ^ n*1) % 127;
+  for(i=(int)aKWHash[i]; i>0; i=aKWNext[i]){
+    if( aKWLen[i]!=n ) continue;
+    zKW = &zKWText[aKWOffset[i]];
 #ifdef SQLITE_ASCII
-      if( (z[0]&~0x20)!=zKW[0] ) continue;
-      if( (z[1]&~0x20)!=zKW[1] ) continue;
-      j = 2;
-      while( j<n && (z[j]&~0x20)==zKW[j] ){ j++; }
+    if( (z[0]&~0x20)!=zKW[0] ) continue;
+    if( (z[1]&~0x20)!=zKW[1] ) continue;
+    j = 2;
+    while( j<n && (z[j]&~0x20)==zKW[j] ){ j++; }
 #endif
 #ifdef SQLITE_EBCDIC
-      if( toupper(z[0])!=zKW[0] ) continue;
-      if( toupper(z[1])!=zKW[1] ) continue;
-      j = 2;
-      while( j<n && toupper(z[j])==zKW[j] ){ j++; }
-#endif
-      if( j<n ) continue;
-      testcase( i==0 ); /* REINDEX */
-      testcase( i==1 ); /* INDEXED */
-      testcase( i==2 ); /* INDEX */
-      testcase( i==3 ); /* DESC */
-      testcase( i==4 ); /* ESCAPE */
-      testcase( i==5 ); /* EACH */
-      testcase( i==6 ); /* CHECK */
-      testcase( i==7 ); /* KEY */
-      testcase( i==8 ); /* BEFORE */
-      testcase( i==9 ); /* FOREIGN */
-      testcase( i==10 ); /* FOR */
-      testcase( i==11 ); /* IGNORE */
-      testcase( i==12 ); /* REGEXP */
-      testcase( i==13 ); /* EXPLAIN */
-      testcase( i==14 ); /* INSTEAD */
-      testcase( i==15 ); /* ADD */
-      testcase( i==16 ); /* DATABASE */
-      testcase( i==17 ); /* AS */
-      testcase( i==18 ); /* SELECT */
-      testcase( i==19 ); /* TABLE */
-      testcase( i==20 ); /* LEFT */
-      testcase( i==21 ); /* THEN */
-      testcase( i==22 ); /* END */
-      testcase( i==23 ); /* DEFERRABLE */
-      testcase( i==24 ); /* ELSE */
-      testcase( i==25 ); /* EXCLUDE */
-      testcase( i==26 ); /* DELETE */
-      testcase( i==27 ); /* TEMPORARY */
-      testcase( i==28 ); /* TEMP */
-      testcase( i==29 ); /* OR */
-      testcase( i==30 ); /* ISNULL */
-      testcase( i==31 ); /* NULLS */
-      testcase( i==32 ); /* SAVEPOINT */
-      testcase( i==33 ); /* INTERSECT */
-      testcase( i==34 ); /* TIES */
-      testcase( i==35 ); /* NOTNULL */
-      testcase( i==36 ); /* NOT */
-      testcase( i==37 ); /* NO */
-      testcase( i==38 ); /* NULL */
-      testcase( i==39 ); /* LIKE */
-      testcase( i==40 ); /* EXCEPT */
-      testcase( i==41 ); /* TRANSACTION */
-      testcase( i==42 ); /* ACTION */
-      testcase( i==43 ); /* ON */
-      testcase( i==44 ); /* NATURAL */
-      testcase( i==45 ); /* ALTER */
-      testcase( i==46 ); /* RAISE */
-      testcase( i==47 ); /* EXCLUSIVE */
-      testcase( i==48 ); /* EXISTS */
-      testcase( i==49 ); /* CONSTRAINT */
-      testcase( i==50 ); /* INTO */
-      testcase( i==51 ); /* OFFSET */
-      testcase( i==52 ); /* OF */
-      testcase( i==53 ); /* SET */
-      testcase( i==54 ); /* TRIGGER */
-      testcase( i==55 ); /* RANGE */
-      testcase( i==56 ); /* GENERATED */
-      testcase( i==57 ); /* DETACH */
-      testcase( i==58 ); /* HAVING */
-      testcase( i==59 ); /* GLOB */
-      testcase( i==60 ); /* BEGIN */
-      testcase( i==61 ); /* INNER */
-      testcase( i==62 ); /* REFERENCES */
-      testcase( i==63 ); /* UNIQUE */
-      testcase( i==64 ); /* QUERY */
-      testcase( i==65 ); /* WITHOUT */
-      testcase( i==66 ); /* WITH */
-      testcase( i==67 ); /* OUTER */
-      testcase( i==68 ); /* RELEASE */
-      testcase( i==69 ); /* ATTACH */
-      testcase( i==70 ); /* BETWEEN */
-      testcase( i==71 ); /* NOTHING */
-      testcase( i==72 ); /* GROUPS */
-      testcase( i==73 ); /* GROUP */
-      testcase( i==74 ); /* CASCADE */
-      testcase( i==75 ); /* ASC */
-      testcase( i==76 ); /* DEFAULT */
-      testcase( i==77 ); /* CASE */
-      testcase( i==78 ); /* COLLATE */
-      testcase( i==79 ); /* CREATE */
-      testcase( i==80 ); /* CURRENT_DATE */
-      testcase( i==81 ); /* IMMEDIATE */
-      testcase( i==82 ); /* JOIN */
-      testcase( i==83 ); /* INSERT */
-      testcase( i==84 ); /* MATCH */
-      testcase( i==85 ); /* PLAN */
-      testcase( i==86 ); /* ANALYZE */
-      testcase( i==87 ); /* PRAGMA */
-      testcase( i==88 ); /* MATERIALIZED */
-      testcase( i==89 ); /* DEFERRED */
-      testcase( i==90 ); /* DISTINCT */
-      testcase( i==91 ); /* IS */
-      testcase( i==92 ); /* UPDATE */
-      testcase( i==93 ); /* VALUES */
-      testcase( i==94 ); /* VIRTUAL */
-      testcase( i==95 ); /* ALWAYS */
-      testcase( i==96 ); /* WHEN */
-      testcase( i==97 ); /* WHERE */
-      testcase( i==98 ); /* RECURSIVE */
-      testcase( i==99 ); /* ABORT */
-      testcase( i==100 ); /* AFTER */
-      testcase( i==101 ); /* RENAME */
-      testcase( i==102 ); /* AND */
-      testcase( i==103 ); /* DROP */
-      testcase( i==104 ); /* PARTITION */
-      testcase( i==105 ); /* AUTOINCREMENT */
-      testcase( i==106 ); /* TO */
-      testcase( i==107 ); /* IN */
-      testcase( i==108 ); /* CAST */
-      testcase( i==109 ); /* COLUMN */
-      testcase( i==110 ); /* COMMIT */
-      testcase( i==111 ); /* CONFLICT */
-      testcase( i==112 ); /* CROSS */
-      testcase( i==113 ); /* CURRENT_TIMESTAMP */
-      testcase( i==114 ); /* CURRENT_TIME */
-      testcase( i==115 ); /* CURRENT */
-      testcase( i==116 ); /* PRECEDING */
-      testcase( i==117 ); /* FAIL */
-      testcase( i==118 ); /* LAST */
-      testcase( i==119 ); /* FILTER */
-      testcase( i==120 ); /* REPLACE */
-      testcase( i==121 ); /* FIRST */
-      testcase( i==122 ); /* FOLLOWING */
-      testcase( i==123 ); /* FROM */
-      testcase( i==124 ); /* FULL */
-      testcase( i==125 ); /* LIMIT */
-      testcase( i==126 ); /* IF */
-      testcase( i==127 ); /* ORDER */
-      testcase( i==128 ); /* RESTRICT */
-      testcase( i==129 ); /* OTHERS */
-      testcase( i==130 ); /* OVER */
-      testcase( i==131 ); /* RETURNING */
-      testcase( i==132 ); /* RIGHT */
-      testcase( i==133 ); /* ROLLBACK */
-      testcase( i==134 ); /* ROWS */
-      testcase( i==135 ); /* ROW */
-      testcase( i==136 ); /* UNBOUNDED */
-      testcase( i==137 ); /* UNION */
-      testcase( i==138 ); /* USING */
-      testcase( i==139 ); /* VACUUM */
-      testcase( i==140 ); /* VIEW */
-      testcase( i==141 ); /* WINDOW */
-      testcase( i==142 ); /* DO */
-      testcase( i==143 ); /* BY */
-      testcase( i==144 ); /* INITIALLY */
-      testcase( i==145 ); /* ALL */
-      testcase( i==146 ); /* PRIMARY */
-      *pType = aKWCode[i];
-      break;
-    }
+    if( toupper(z[0])!=zKW[0] ) continue;
+    if( toupper(z[1])!=zKW[1] ) continue;
+    j = 2;
+    while( j<n && toupper(z[j])==zKW[j] ){ j++; }
+#endif
+    if( j<n ) continue;
+    testcase( i==1 ); /* REINDEX */
+    testcase( i==2 ); /* INDEXED */
+    testcase( i==3 ); /* INDEX */
+    testcase( i==4 ); /* DESC */
+    testcase( i==5 ); /* ESCAPE */
+    testcase( i==6 ); /* EACH */
+    testcase( i==7 ); /* CHECK */
+    testcase( i==8 ); /* KEY */
+    testcase( i==9 ); /* BEFORE */
+    testcase( i==10 ); /* FOREIGN */
+    testcase( i==11 ); /* FOR */
+    testcase( i==12 ); /* IGNORE */
+    testcase( i==13 ); /* REGEXP */
+    testcase( i==14 ); /* EXPLAIN */
+    testcase( i==15 ); /* INSTEAD */
+    testcase( i==16 ); /* ADD */
+    testcase( i==17 ); /* DATABASE */
+    testcase( i==18 ); /* AS */
+    testcase( i==19 ); /* SELECT */
+    testcase( i==20 ); /* TABLE */
+    testcase( i==21 ); /* LEFT */
+    testcase( i==22 ); /* THEN */
+    testcase( i==23 ); /* END */
+    testcase( i==24 ); /* DEFERRABLE */
+    testcase( i==25 ); /* ELSE */
+    testcase( i==26 ); /* EXCLUDE */
+    testcase( i==27 ); /* DELETE */
+    testcase( i==28 ); /* TEMPORARY */
+    testcase( i==29 ); /* TEMP */
+    testcase( i==30 ); /* OR */
+    testcase( i==31 ); /* ISNULL */
+    testcase( i==32 ); /* NULLS */
+    testcase( i==33 ); /* SAVEPOINT */
+    testcase( i==34 ); /* INTERSECT */
+    testcase( i==35 ); /* TIES */
+    testcase( i==36 ); /* NOTNULL */
+    testcase( i==37 ); /* NOT */
+    testcase( i==38 ); /* NO */
+    testcase( i==39 ); /* NULL */
+    testcase( i==40 ); /* LIKE */
+    testcase( i==41 ); /* EXCEPT */
+    testcase( i==42 ); /* TRANSACTION */
+    testcase( i==43 ); /* ACTION */
+    testcase( i==44 ); /* ON */
+    testcase( i==45 ); /* NATURAL */
+    testcase( i==46 ); /* ALTER */
+    testcase( i==47 ); /* RAISE */
+    testcase( i==48 ); /* EXCLUSIVE */
+    testcase( i==49 ); /* EXISTS */
+    testcase( i==50 ); /* CONSTRAINT */
+    testcase( i==51 ); /* INTO */
+    testcase( i==52 ); /* OFFSET */
+    testcase( i==53 ); /* OF */
+    testcase( i==54 ); /* SET */
+    testcase( i==55 ); /* TRIGGER */
+    testcase( i==56 ); /* RANGE */
+    testcase( i==57 ); /* GENERATED */
+    testcase( i==58 ); /* DETACH */
+    testcase( i==59 ); /* HAVING */
+    testcase( i==60 ); /* GLOB */
+    testcase( i==61 ); /* BEGIN */
+    testcase( i==62 ); /* INNER */
+    testcase( i==63 ); /* REFERENCES */
+    testcase( i==64 ); /* UNIQUE */
+    testcase( i==65 ); /* QUERY */
+    testcase( i==66 ); /* WITHOUT */
+    testcase( i==67 ); /* WITH */
+    testcase( i==68 ); /* OUTER */
+    testcase( i==69 ); /* RELEASE */
+    testcase( i==70 ); /* ATTACH */
+    testcase( i==71 ); /* BETWEEN */
+    testcase( i==72 ); /* NOTHING */
+    testcase( i==73 ); /* GROUPS */
+    testcase( i==74 ); /* GROUP */
+    testcase( i==75 ); /* CASCADE */
+    testcase( i==76 ); /* ASC */
+    testcase( i==77 ); /* DEFAULT */
+    testcase( i==78 ); /* CASE */
+    testcase( i==79 ); /* COLLATE */
+    testcase( i==80 ); /* CREATE */
+    testcase( i==81 ); /* CURRENT_DATE */
+    testcase( i==82 ); /* IMMEDIATE */
+    testcase( i==83 ); /* JOIN */
+    testcase( i==84 ); /* INSERT */
+    testcase( i==85 ); /* MATCH */
+    testcase( i==86 ); /* PLAN */
+    testcase( i==87 ); /* ANALYZE */
+    testcase( i==88 ); /* PRAGMA */
+    testcase( i==89 ); /* MATERIALIZED */
+    testcase( i==90 ); /* DEFERRED */
+    testcase( i==91 ); /* DISTINCT */
+    testcase( i==92 ); /* IS */
+    testcase( i==93 ); /* UPDATE */
+    testcase( i==94 ); /* VALUES */
+    testcase( i==95 ); /* VIRTUAL */
+    testcase( i==96 ); /* ALWAYS */
+    testcase( i==97 ); /* WHEN */
+    testcase( i==98 ); /* WHERE */
+    testcase( i==99 ); /* RECURSIVE */
+    testcase( i==100 ); /* ABORT */
+    testcase( i==101 ); /* AFTER */
+    testcase( i==102 ); /* RENAME */
+    testcase( i==103 ); /* AND */
+    testcase( i==104 ); /* DROP */
+    testcase( i==105 ); /* PARTITION */
+    testcase( i==106 ); /* AUTOINCREMENT */
+    testcase( i==107 ); /* TO */
+    testcase( i==108 ); /* IN */
+    testcase( i==109 ); /* CAST */
+    testcase( i==110 ); /* COLUMN */
+    testcase( i==111 ); /* COMMIT */
+    testcase( i==112 ); /* CONFLICT */
+    testcase( i==113 ); /* CROSS */
+    testcase( i==114 ); /* CURRENT_TIMESTAMP */
+    testcase( i==115 ); /* CURRENT_TIME */
+    testcase( i==116 ); /* CURRENT */
+    testcase( i==117 ); /* PRECEDING */
+    testcase( i==118 ); /* FAIL */
+    testcase( i==119 ); /* LAST */
+    testcase( i==120 ); /* FILTER */
+    testcase( i==121 ); /* REPLACE */
+    testcase( i==122 ); /* FIRST */
+    testcase( i==123 ); /* FOLLOWING */
+    testcase( i==124 ); /* FROM */
+    testcase( i==125 ); /* FULL */
+    testcase( i==126 ); /* LIMIT */
+    testcase( i==127 ); /* IF */
+    testcase( i==128 ); /* ORDER */
+    testcase( i==129 ); /* RESTRICT */
+    testcase( i==130 ); /* OTHERS */
+    testcase( i==131 ); /* OVER */
+    testcase( i==132 ); /* RETURNING */
+    testcase( i==133 ); /* RIGHT */
+    testcase( i==134 ); /* ROLLBACK */
+    testcase( i==135 ); /* ROWS */
+    testcase( i==136 ); /* ROW */
+    testcase( i==137 ); /* UNBOUNDED */
+    testcase( i==138 ); /* UNION */
+    testcase( i==139 ); /* USING */
+    testcase( i==140 ); /* VACUUM */
+    testcase( i==141 ); /* VIEW */
+    testcase( i==142 ); /* WINDOW */
+    testcase( i==143 ); /* DO */
+    testcase( i==144 ); /* BY */
+    testcase( i==145 ); /* INITIALLY */
+    testcase( i==146 ); /* ALL */
+    testcase( i==147 ); /* PRIMARY */
+    *pType = aKWCode[i];
+    break;
   }
   return n;
 }
 SQLITE_PRIVATE int sqlite3KeywordCode(const unsigned char *z, int n){
   int id = TK_ID;
-  keywordCode((char*)z, n, &id);
+  if( n>=2 ) keywordCode((char*)z, n, &id);
   return id;
 }
 #define SQLITE_N_KEYWORD 147
 SQLITE_API int sqlite3_keyword_name(int i,const char **pzName,int *pnName){
   if( i<0 || i>=SQLITE_N_KEYWORD ) return SQLITE_ERROR;
+  i++;
   *pzName = zKWText + aKWOffset[i];
   *pnName = aKWLen[i];
   return SQLITE_OK;
@@ -169860,7 +176766,7 @@ SQLITE_PRIVATE int sqlite3GetToken(const unsigned char *z, int *tokenType){
       testcase( z[0]=='0' );  testcase( z[0]=='1' );  testcase( z[0]=='2' );
       testcase( z[0]=='3' );  testcase( z[0]=='4' );  testcase( z[0]=='5' );
       testcase( z[0]=='6' );  testcase( z[0]=='7' );  testcase( z[0]=='8' );
-      testcase( z[0]=='9' );
+      testcase( z[0]=='9' );  testcase( z[0]=='.' );
       *tokenType = TK_INTEGER;
 #ifndef SQLITE_OMIT_HEX_INTEGER
       if( z[0]=='0' && (z[1]=='x' || z[1]=='X') && sqlite3Isxdigit(z[2]) ){
@@ -169932,7 +176838,8 @@ SQLITE_PRIVATE int sqlite3GetToken(const unsigned char *z, int *tokenType){
       return i;
     }
     case CC_KYWD0: {
-      for(i=1; aiClass[z[i]]<=CC_KYWD; i++){}
+      if( aiClass[z[1]]>CC_KYWD ){ i = 1;  break; }
+      for(i=2; aiClass[z[i]]<=CC_KYWD; i++){}
       if( IdChar(z[i]) ){
         /* This token started out using characters that can appear in keywords,
         ** but z[i] is a character not allowed within keywords, so this must
@@ -170138,7 +177045,7 @@ SQLITE_PRIVATE int sqlite3RunParser(Parse *pParse, const char *zSql){
   if( pParse->pNewTrigger && !IN_RENAME_OBJECT ){
     sqlite3DeleteTrigger(db, pParse->pNewTrigger);
   }
-  if( pParse->pVList ) sqlite3DbFreeNN(db, pParse->pVList);
+  if( pParse->pVList ) sqlite3DbNNFreeNN(db, pParse->pVList);
   db->pParse = pParentParse;
   assert( nErr==0 || pParse->rc!=SQLITE_OK );
   return nErr;
@@ -170711,30 +177618,20 @@ static int sqlite3TestExtInit(sqlite3 *db){
 ** Forward declarations of external module initializer functions
 ** for modules that need them.
 */
-#ifdef SQLITE_ENABLE_FTS1
-SQLITE_PRIVATE int sqlite3Fts1Init(sqlite3*);
-#endif
-#ifdef SQLITE_ENABLE_FTS2
-SQLITE_PRIVATE int sqlite3Fts2Init(sqlite3*);
-#endif
 #ifdef SQLITE_ENABLE_FTS5
 SQLITE_PRIVATE int sqlite3Fts5Init(sqlite3*);
 #endif
 #ifdef SQLITE_ENABLE_STMTVTAB
 SQLITE_PRIVATE int sqlite3StmtVtabInit(sqlite3*);
 #endif
-
+#ifdef SQLITE_EXTRA_AUTOEXT
+int SQLITE_EXTRA_AUTOEXT(sqlite3*);
+#endif
 /*
 ** An array of pointers to extension initializer functions for
 ** built-in extensions.
 */
 static int (*const sqlite3BuiltinExtensions[])(sqlite3*) = {
-#ifdef SQLITE_ENABLE_FTS1
-  sqlite3Fts1Init,
-#endif
-#ifdef SQLITE_ENABLE_FTS2
-  sqlite3Fts2Init,
-#endif
 #ifdef SQLITE_ENABLE_FTS3
   sqlite3Fts3Init,
 #endif
@@ -170763,6 +177660,9 @@ static int (*const sqlite3BuiltinExtensions[])(sqlite3*) = {
 #ifdef SQLITE_ENABLE_BYTECODE_VTAB
   sqlite3VdbeBytecodeVtabInit,
 #endif
+#ifdef SQLITE_EXTRA_AUTOEXT
+  SQLITE_EXTRA_AUTOEXT,
+#endif
 };
 
 #ifndef SQLITE_AMALGAMATION
@@ -170836,6 +177736,32 @@ SQLITE_API char *sqlite3_temp_directory = 0;
 */
 SQLITE_API char *sqlite3_data_directory = 0;
 
+/*
+** Determine whether or not high-precision (long double) floating point
+** math works correctly on CPU currently running.
+*/
+static SQLITE_NOINLINE int hasHighPrecisionDouble(int rc){
+  if( sizeof(LONGDOUBLE_TYPE)<=8 ){
+    /* If the size of "long double" is not more than 8, then
+    ** high-precision math is not possible. */
+    return 0;
+  }else{
+    /* Just because sizeof(long double)>8 does not mean that the underlying
+    ** hardware actually supports high-precision floating point.  For example,
+    ** clearing the 0x100 bit in the floating-point control word on Intel
+    ** processors will make long double work like double, even though long
+    ** double takes up more space.  The only way to determine if long double
+    ** actually works is to run an experiment. */
+    LONGDOUBLE_TYPE a, b, c;
+    rc++;
+    a = 1.0+rc*0.1;
+    b = 1.0e+18+rc*25.0;
+    c = a+b;
+    return b!=c;
+  }
+}
+
+
 /*
 ** Initialize SQLite.
 **
@@ -171031,6 +177957,12 @@ SQLITE_API int sqlite3_initialize(void){
   }
 #endif
 
+  /* Experimentally determine if high-precision floating point is
+  ** available. */
+#ifndef SQLITE_OMIT_WSD
+  sqlite3Config.bUseLongDouble = hasHighPrecisionDouble(rc);
+#endif
+
   return rc;
 }
 
@@ -171100,9 +178032,21 @@ SQLITE_API int sqlite3_config(int op, ...){
   va_list ap;
   int rc = SQLITE_OK;
 
-  /* sqlite3_config() shall return SQLITE_MISUSE if it is invoked while
-  ** the SQLite library is in use. */
-  if( sqlite3GlobalConfig.isInit ) return SQLITE_MISUSE_BKPT;
+  /* sqlite3_config() normally returns SQLITE_MISUSE if it is invoked while
+  ** the SQLite library is in use.  Except, a few selected opcodes
+  ** are allowed.
+  */
+  if( sqlite3GlobalConfig.isInit ){
+    static const u64 mAnytimeConfigOption = 0
+       | MASKBIT64( SQLITE_CONFIG_LOG )
+       | MASKBIT64( SQLITE_CONFIG_PCACHE_HDRSZ )
+    ;
+    if( op<0 || op>63 || (MASKBIT64(op) & mAnytimeConfigOption)==0 ){
+      return SQLITE_MISUSE_BKPT;
+    }
+    testcase( op==SQLITE_CONFIG_LOG );
+    testcase( op==SQLITE_CONFIG_PCACHE_HDRSZ );
+  }
 
   va_start(ap, op);
   switch( op ){
@@ -171171,6 +178115,7 @@ SQLITE_API int sqlite3_config(int op, ...){
       break;
     }
     case SQLITE_CONFIG_MEMSTATUS: {
+      assert( !sqlite3GlobalConfig.isInit );  /* Cannot change at runtime */
       /* EVIDENCE-OF: R-61275-35157 The SQLITE_CONFIG_MEMSTATUS option takes
       ** single argument of type int, interpreted as a boolean, which enables
       ** or disables the collection of memory allocation statistics. */
@@ -171294,8 +178239,10 @@ SQLITE_API int sqlite3_config(int op, ...){
       ** sqlite3GlobalConfig.xLog = va_arg(ap, void(*)(void*,int,const char*));
       */
       typedef void(*LOGFUNC_t)(void*,int,const char*);
-      sqlite3GlobalConfig.xLog = va_arg(ap, LOGFUNC_t);
-      sqlite3GlobalConfig.pLogArg = va_arg(ap, void*);
+      LOGFUNC_t xLog = va_arg(ap, LOGFUNC_t);
+      void *pLogArg = va_arg(ap, void*);
+      AtomicStore(&sqlite3GlobalConfig.xLog, xLog);
+      AtomicStore(&sqlite3GlobalConfig.pLogArg, pLogArg);
       break;
     }
 
@@ -171309,7 +178256,8 @@ SQLITE_API int sqlite3_config(int op, ...){
       ** argument of type int. If non-zero, then URI handling is globally
       ** enabled. If the parameter is zero, then URI handling is globally
       ** disabled. */
-      sqlite3GlobalConfig.bOpenUri = va_arg(ap, int);
+      int bOpenUri = va_arg(ap, int);
+      AtomicStore(&sqlite3GlobalConfig.bOpenUri, bOpenUri);
       break;
     }
 
@@ -171494,18 +178442,19 @@ static int setupLookaside(sqlite3 *db, void *pBuf, int sz, int cnt){
     db->lookaside.bMalloced = pBuf==0 ?1:0;
     db->lookaside.nSlot = nBig+nSm;
   }else{
-    db->lookaside.pStart = db;
+    db->lookaside.pStart = 0;
 #ifndef SQLITE_OMIT_TWOSIZE_LOOKASIDE
     db->lookaside.pSmallInit = 0;
     db->lookaside.pSmallFree = 0;
-    db->lookaside.pMiddle = db;
+    db->lookaside.pMiddle = 0;
 #endif /* SQLITE_OMIT_TWOSIZE_LOOKASIDE */
-    db->lookaside.pEnd = db;
+    db->lookaside.pEnd = 0;
     db->lookaside.bDisable = 1;
     db->lookaside.sz = 0;
     db->lookaside.bMalloced = 0;
     db->lookaside.nSlot = 0;
   }
+  db->lookaside.pTrueEnd = db->lookaside.pEnd;
   assert( sqlite3LookasideUsed(db,0)==0 );
 #endif /* SQLITE_OMIT_LOOKASIDE */
   return SQLITE_OK;
@@ -171584,6 +178533,11 @@ SQLITE_API int sqlite3_db_cacheflush(sqlite3 *db){
 SQLITE_API int sqlite3_db_config(sqlite3 *db, int op, ...){
   va_list ap;
   int rc;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT;
+#endif
+  sqlite3_mutex_enter(db->mutex);
   va_start(ap, op);
   switch( op ){
     case SQLITE_DBCONFIG_MAINDBNAME: {
@@ -171622,6 +178576,8 @@ SQLITE_API int sqlite3_db_config(sqlite3 *db, int op, ...){
         { SQLITE_DBCONFIG_DQS_DML,               SQLITE_DqsDML         },
         { SQLITE_DBCONFIG_LEGACY_FILE_FORMAT,    SQLITE_LegacyFileFmt  },
         { SQLITE_DBCONFIG_TRUSTED_SCHEMA,        SQLITE_TrustedSchema  },
+        { SQLITE_DBCONFIG_STMT_SCANSTATUS,       SQLITE_StmtScanStatus },
+        { SQLITE_DBCONFIG_REVERSE_SCANORDER,     SQLITE_ReverseOrder   },
       };
       unsigned int i;
       rc = SQLITE_ERROR; /* IMP: R-42790-23372 */
@@ -171649,6 +178605,7 @@ SQLITE_API int sqlite3_db_config(sqlite3 *db, int op, ...){
     }
   }
   va_end(ap);
+  sqlite3_mutex_leave(db->mutex);
   return rc;
 }
 
@@ -171909,6 +178866,14 @@ static int sqlite3Close(sqlite3 *db, int forceZombie){
   }
 #endif
 
+  while( db->pDbData ){
+    DbClientData *p = db->pDbData;
+    db->pDbData = p->pNext;
+    assert( p->pData!=0 );
+    if( p->xDestructor ) p->xDestructor(p->pData);
+    sqlite3_free(p);
+  }
+
   /* Convert the connection into a zombie and then close it.
   */
   db->eOpenState = SQLITE_STATE_ZOMBIE;
@@ -172233,6 +179198,7 @@ SQLITE_PRIVATE const char *sqlite3ErrName(int rc){
       case SQLITE_NOTICE_RECOVER_WAL: zName = "SQLITE_NOTICE_RECOVER_WAL";break;
       case SQLITE_NOTICE_RECOVER_ROLLBACK:
                                 zName = "SQLITE_NOTICE_RECOVER_ROLLBACK"; break;
+      case SQLITE_NOTICE_RBU:         zName = "SQLITE_NOTICE_RBU"; break;
       case SQLITE_WARNING:            zName = "SQLITE_WARNING";           break;
       case SQLITE_WARNING_AUTOINDEX:  zName = "SQLITE_WARNING_AUTOINDEX"; break;
       case SQLITE_DONE:               zName = "SQLITE_DONE";              break;
@@ -172325,9 +179291,9 @@ static int sqliteDefaultBusyCallback(
   void *ptr,               /* Database connection */
   int count                /* Number of times table has been busy */
 ){
-#if SQLITE_OS_WIN || HAVE_USLEEP
+#if SQLITE_OS_WIN || !defined(HAVE_NANOSLEEP) || HAVE_NANOSLEEP
   /* This case is for systems that have support for sleeping for fractions of
-  ** a second.  Examples:  All windows systems, unix systems with usleep() */
+  ** a second.  Examples:  All windows systems, unix systems with nanosleep() */
   static const u8 delays[] =
      { 1, 2, 5, 10, 15, 20, 25, 25,  25,  50,  50, 100 };
   static const u8 totals[] =
@@ -172462,7 +179428,9 @@ SQLITE_API int sqlite3_busy_timeout(sqlite3 *db, int ms){
 */
 SQLITE_API void sqlite3_interrupt(sqlite3 *db){
 #ifdef SQLITE_ENABLE_API_ARMOR
-  if( !sqlite3SafetyCheckOk(db) && (db==0 || db->eOpenState!=SQLITE_STATE_ZOMBIE) ){
+  if( !sqlite3SafetyCheckOk(db)
+   && (db==0 || db->eOpenState!=SQLITE_STATE_ZOMBIE)
+  ){
     (void)SQLITE_MISUSE_BKPT;
     return;
   }
@@ -172470,6 +179438,21 @@ SQLITE_API void sqlite3_interrupt(sqlite3 *db){
   AtomicStore(&db->u1.isInterrupted, 1);
 }
 
+/*
+** Return true or false depending on whether or not an interrupt is
+** pending on connection db.
+*/
+SQLITE_API int sqlite3_is_interrupted(sqlite3 *db){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db)
+   && (db==0 || db->eOpenState!=SQLITE_STATE_ZOMBIE)
+  ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  return AtomicLoad(&db->u1.isInterrupted)!=0;
+}
 
 /*
 ** This function is exactly the same as sqlite3_create_function(), except
@@ -172508,13 +179491,13 @@ SQLITE_PRIVATE int sqlite3CreateFunc(
   assert( SQLITE_FUNC_CONSTANT==SQLITE_DETERMINISTIC );
   assert( SQLITE_FUNC_DIRECT==SQLITE_DIRECTONLY );
   extraFlags = enc &  (SQLITE_DETERMINISTIC|SQLITE_DIRECTONLY|
-                       SQLITE_SUBTYPE|SQLITE_INNOCUOUS);
+                       SQLITE_SUBTYPE|SQLITE_INNOCUOUS|SQLITE_RESULT_SUBTYPE);
   enc &= (SQLITE_FUNC_ENCMASK|SQLITE_ANY);
 
   /* The SQLITE_INNOCUOUS flag is the same bit as SQLITE_FUNC_UNSAFE.  But
   ** the meaning is inverted.  So flip the bit. */
   assert( SQLITE_FUNC_UNSAFE==SQLITE_INNOCUOUS );
-  extraFlags ^= SQLITE_FUNC_UNSAFE;
+  extraFlags ^= SQLITE_FUNC_UNSAFE;  /* tag-20230109-1 */
 
 
 #ifndef SQLITE_OMIT_UTF16
@@ -172532,11 +179515,11 @@ SQLITE_PRIVATE int sqlite3CreateFunc(
     case SQLITE_ANY: {
       int rc;
       rc = sqlite3CreateFunc(db, zFunctionName, nArg,
-           (SQLITE_UTF8|extraFlags)^SQLITE_FUNC_UNSAFE,
+           (SQLITE_UTF8|extraFlags)^SQLITE_FUNC_UNSAFE, /* tag-20230109-1 */
            pUserData, xSFunc, xStep, xFinal, xValue, xInverse, pDestructor);
       if( rc==SQLITE_OK ){
         rc = sqlite3CreateFunc(db, zFunctionName, nArg,
-             (SQLITE_UTF16LE|extraFlags)^SQLITE_FUNC_UNSAFE,
+             (SQLITE_UTF16LE|extraFlags)^SQLITE_FUNC_UNSAFE, /* tag-20230109-1*/
              pUserData, xSFunc, xStep, xFinal, xValue, xInverse, pDestructor);
       }
       if( rc!=SQLITE_OK ){
@@ -172785,7 +179768,7 @@ SQLITE_API int sqlite3_overload_function(
   rc = sqlite3FindFunction(db, zName, nArg, SQLITE_UTF8, 0)!=0;
   sqlite3_mutex_leave(db->mutex);
   if( rc ) return SQLITE_OK;
-  zCopy = sqlite3_mprintf(zName);
+  zCopy = sqlite3_mprintf("%s", zName);
   if( zCopy==0 ) return SQLITE_NOMEM;
   return sqlite3_create_function_v2(db, zName, nArg, SQLITE_UTF8,
                            zCopy, sqlite3InvalidFunction, 0, 0, sqlite3_free);
@@ -172965,6 +179948,12 @@ SQLITE_API void *sqlite3_preupdate_hook(
   void *pArg                /* First callback argument */
 ){
   void *pRet;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( db==0 ){
+    return 0;
+  }
+#endif
   sqlite3_mutex_enter(db->mutex);
   pRet = db->pPreUpdateArg;
   db->xPreUpdateCallback = xCallback;
@@ -173111,7 +180100,7 @@ SQLITE_API int sqlite3_wal_checkpoint_v2(
   if( eMode<SQLITE_CHECKPOINT_PASSIVE || eMode>SQLITE_CHECKPOINT_TRUNCATE ){
     /* EVIDENCE-OF: R-03996-12088 The M parameter must be a valid checkpoint
     ** mode: */
-    return SQLITE_MISUSE;
+    return SQLITE_MISUSE_BKPT;
   }
 
   sqlite3_mutex_enter(db->mutex);
@@ -173588,9 +180577,9 @@ SQLITE_PRIVATE int sqlite3ParseUri(
 
   assert( *pzErrMsg==0 );
 
-  if( ((flags & SQLITE_OPEN_URI)             /* IMP: R-48725-32206 */
-            || sqlite3GlobalConfig.bOpenUri) /* IMP: R-51689-46548 */
-   && nUri>=5 && memcmp(zUri, "file:", 5)==0 /* IMP: R-57884-37496 */
+  if( ((flags & SQLITE_OPEN_URI)                     /* IMP: R-48725-32206 */
+       || AtomicLoad(&sqlite3GlobalConfig.bOpenUri)) /* IMP: R-51689-46548 */
+   && nUri>=5 && memcmp(zUri, "file:", 5)==0         /* IMP: R-57884-37496 */
   ){
     char *zOpt;
     int eState;                   /* Parser state when parsing URI */
@@ -173948,7 +180937,7 @@ static int openDatabase(
 **         0                  off                         off
 **
 ** Legacy behavior is 3 (double-quoted string literals are allowed anywhere)
-** and so that is the default.  But developers are encouranged to use
+** and so that is the default.  But developers are encouraged to use
 ** -DSQLITE_DQS=0 (best) or -DSQLITE_DQS=1 (second choice) if possible.
 */
 #if !defined(SQLITE_DQS)
@@ -173996,6 +180985,9 @@ static int openDatabase(
 #endif
 #if defined(SQLITE_DEFAULT_LEGACY_ALTER_TABLE)
                  | SQLITE_LegacyAlter
+#endif
+#if defined(SQLITE_ENABLE_STMT_SCANSTATUS)
+                 | SQLITE_StmtScanStatus
 #endif
       ;
   sqlite3HashInit(&db->aCollSeq);
@@ -174019,6 +181011,19 @@ static int openDatabase(
     goto opendb_out;
   }
 
+#if SQLITE_OS_UNIX && defined(SQLITE_OS_KV_OPTIONAL)
+  /* Process magic filenames ":localStorage:" and ":sessionStorage:" */
+  if( zFilename && zFilename[0]==':' ){
+    if( strcmp(zFilename, ":localStorage:")==0 ){
+      zFilename = "file:local?vfs=kvvfs";
+      flags |= SQLITE_OPEN_URI;
+    }else if( strcmp(zFilename, ":sessionStorage:")==0 ){
+      zFilename = "file:session?vfs=kvvfs";
+      flags |= SQLITE_OPEN_URI;
+    }
+  }
+#endif /* SQLITE_OS_UNIX && defined(SQLITE_OS_KV_OPTIONAL) */
+
   /* Parse the filename/URI argument
   **
   ** Only allow sensible combinations of bits in the flags argument.
@@ -174049,6 +181054,12 @@ static int openDatabase(
     sqlite3_free(zErrMsg);
     goto opendb_out;
   }
+  assert( db->pVfs!=0 );
+#if SQLITE_OS_KV || defined(SQLITE_OS_KV_OPTIONAL)
+  if( sqlite3_stricmp(db->pVfs->zName, "kvvfs")==0 ){
+    db->temp_store = 2;
+  }
+#endif
 
   /* Open the backend database driver */
   rc = sqlite3BtreeOpen(db->pVfs, zOpen, db, &db->aDb[0].pBt, 0,
@@ -174326,6 +181337,69 @@ SQLITE_API int sqlite3_collation_needed16(
 }
 #endif /* SQLITE_OMIT_UTF16 */
 
+/*
+** Find existing client data.
+*/
+SQLITE_API void *sqlite3_get_clientdata(sqlite3 *db, const char *zName){
+  DbClientData *p;
+  sqlite3_mutex_enter(db->mutex);
+  for(p=db->pDbData; p; p=p->pNext){
+    if( strcmp(p->zName, zName)==0 ){
+      void *pResult = p->pData;
+      sqlite3_mutex_leave(db->mutex);
+      return pResult;
+    }
+  }
+  sqlite3_mutex_leave(db->mutex);
+  return 0;
+}
+
+/*
+** Add new client data to a database connection.
+*/
+SQLITE_API int sqlite3_set_clientdata(
+  sqlite3 *db,                   /* Attach client data to this connection */
+  const char *zName,             /* Name of the client data */
+  void *pData,                   /* The client data itself */
+  void (*xDestructor)(void*)     /* Destructor */
+){
+  DbClientData *p, **pp;
+  sqlite3_mutex_enter(db->mutex);
+  pp = &db->pDbData;
+  for(p=db->pDbData; p && strcmp(p->zName,zName); p=p->pNext){
+    pp = &p->pNext;
+  }
+  if( p ){
+    assert( p->pData!=0 );
+    if( p->xDestructor ) p->xDestructor(p->pData);
+    if( pData==0 ){
+      *pp = p->pNext;
+      sqlite3_free(p);
+      sqlite3_mutex_leave(db->mutex);
+      return SQLITE_OK;
+    }
+  }else if( pData==0 ){
+    sqlite3_mutex_leave(db->mutex);
+    return SQLITE_OK;
+  }else{
+    size_t n = strlen(zName);
+    p = sqlite3_malloc64( sizeof(DbClientData)+n+1 );
+    if( p==0 ){
+      if( xDestructor ) xDestructor(pData);
+      sqlite3_mutex_leave(db->mutex);
+      return SQLITE_NOMEM;
+    }
+    memcpy(p->zName, zName, n+1);
+    p->pNext = db->pDbData;
+    db->pDbData = p;
+  }
+  p->pData = pData;
+  p->xDestructor = xDestructor;
+  sqlite3_mutex_leave(db->mutex);
+  return SQLITE_OK;
+}
+
+
 #ifndef SQLITE_OMIT_DEPRECATED
 /*
 ** This function is now an anachronism. It used to be used to recover from a
@@ -174461,7 +181535,7 @@ SQLITE_API int sqlite3_table_column_metadata(
 
   /* Find the column for which info is requested */
   if( zColumnName==0 ){
-    /* Query for existance of table only */
+    /* Query for existence of table only */
   }else{
     for(iCol=0; iCol<pTab->nCol; iCol++){
       pCol = &pTab->aCol[iCol];
@@ -174542,7 +181616,7 @@ SQLITE_API int sqlite3_sleep(int ms){
   /* This function works in milliseconds, but the underlying OsSleep()
   ** API uses microseconds. Hence the 1000's.
   */
-  rc = (sqlite3OsSleep(pVfs, 1000*ms)/1000);
+  rc = (sqlite3OsSleep(pVfs, ms<0 ? 0 : 1000*ms)/1000);
   return rc;
 }
 
@@ -174598,6 +181672,9 @@ SQLITE_API int sqlite3_file_control(sqlite3 *db, const char *zDbName, int op, vo
         sqlite3BtreeSetPageSize(pBtree, 0, iNew, 0);
       }
       rc = SQLITE_OK;
+    }else if( op==SQLITE_FCNTL_RESET_CACHE ){
+      sqlite3BtreeClearCache(pBtree);
+      rc = SQLITE_OK;
     }else{
       int nSave = db->busyHandler.nBusy;
       rc = sqlite3OsFileControl(fd, op, pArg);
@@ -174672,6 +181749,28 @@ SQLITE_API int sqlite3_test_control(int op, ...){
     }
 #endif
 
+    /*  sqlite3_test_control(SQLITE_TESTCTRL_FK_NO_ACTION, sqlite3 *db, int b);
+    **
+    ** If b is true, then activate the SQLITE_FkNoAction setting.  If b is
+    ** false then clearn that setting.  If the SQLITE_FkNoAction setting is
+    ** abled, all foreign key ON DELETE and ON UPDATE actions behave as if
+    ** they were NO ACTION, regardless of how they are defined.
+    **
+    ** NB:  One must usually run "PRAGMA writable_schema=RESET" after
+    ** using this test-control, before it will take full effect.  failing
+    ** to reset the schema can result in some unexpected behavior.
+    */
+    case SQLITE_TESTCTRL_FK_NO_ACTION: {
+      sqlite3 *db = va_arg(ap, sqlite3*);
+      int b = va_arg(ap, int);
+      if( b ){
+        db->flags |= SQLITE_FkNoAction;
+      }else{
+        db->flags &= ~SQLITE_FkNoAction;
+      }
+      break;
+    }
+
     /*
     **  sqlite3_test_control(BITVEC_TEST, size, program)
     **
@@ -174778,10 +181877,12 @@ SQLITE_API int sqlite3_test_control(int op, ...){
         sqlite3ShowSrcList(0);
         sqlite3ShowWith(0);
         sqlite3ShowUpsert(0);
+#ifndef SQLITE_OMIT_TRIGGER
         sqlite3ShowTriggerStep(0);
         sqlite3ShowTriggerStepList(0);
         sqlite3ShowTrigger(0);
         sqlite3ShowTriggerList(0);
+#endif
 #ifndef SQLITE_OMIT_WINDOWFUNC
         sqlite3ShowWindow(0);
         sqlite3ShowWinFunc(0);
@@ -174898,7 +181999,7 @@ SQLITE_API int sqlite3_test_control(int op, ...){
     ** formed and never corrupt.  This flag is clear by default, indicating that
     ** database files might have arbitrary corruption.  Setting the flag during
     ** testing causes certain assert() statements in the code to be activated
-    ** that demonstrat invariants on well-formed database files.
+    ** that demonstrate invariants on well-formed database files.
     */
     case SQLITE_TESTCTRL_NEVER_CORRUPT: {
       sqlite3GlobalConfig.neverCorrupt = va_arg(ap, int);
@@ -175052,7 +182153,7 @@ SQLITE_API int sqlite3_test_control(int op, ...){
     **
     **   op==0       Store the current sqlite3TreeTrace in *ptr
     **   op==1       Set sqlite3TreeTrace to the value *ptr
-    **   op==3       Store the current sqlite3WhereTrace in *ptr
+    **   op==2       Store the current sqlite3WhereTrace in *ptr
     **   op==3       Set sqlite3WhereTrace to the value *ptr
     */
     case SQLITE_TESTCTRL_TRACEFLAGS: {
@@ -175088,6 +182189,23 @@ SQLITE_API int sqlite3_test_control(int op, ...){
       break;
     }
 
+#if !defined(SQLITE_OMIT_WSD)
+    /* sqlite3_test_control(SQLITE_TESTCTRL_USELONGDOUBLE, int X);
+    **
+    **   X<0     Make no changes to the bUseLongDouble.  Just report value.
+    **   X==0    Disable bUseLongDouble
+    **   X==1    Enable bUseLongDouble
+    **   X>=2    Set bUseLongDouble to its default value for this platform
+    */
+    case SQLITE_TESTCTRL_USELONGDOUBLE: {
+      int b = va_arg(ap, int);
+      if( b>=2 ) b = hasHighPrecisionDouble(b);
+      if( b>=0 ) sqlite3Config.bUseLongDouble = b>0;
+      rc = sqlite3Config.bUseLongDouble!=0;
+      break;
+    }
+#endif
+
 
 #if defined(SQLITE_DEBUG) && !defined(SQLITE_OMIT_WSD)
     /* sqlite3_test_control(SQLITE_TESTCTRL_TUNE, id, *piValue)
@@ -175158,7 +182276,7 @@ static char *appendText(char *p, const char *z){
 ** Memory layout must be compatible with that generated by the pager
 ** and expected by sqlite3_uri_parameter() and databaseName().
 */
-SQLITE_API char *sqlite3_create_filename(
+SQLITE_API const char *sqlite3_create_filename(
   const char *zDatabase,
   const char *zJournal,
   const char *zWal,
@@ -175194,10 +182312,10 @@ SQLITE_API char *sqlite3_create_filename(
 ** error to call this routine with any parameter other than a pointer
 ** previously obtained from sqlite3_create_filename() or a NULL pointer.
 */
-SQLITE_API void sqlite3_free_filename(char *p){
+SQLITE_API void sqlite3_free_filename(const char *p){
   if( p==0 ) return;
-  p = (char*)databaseName(p);
-  sqlite3_free(p - 4);
+  p = databaseName(p);
+  sqlite3_free((char*)p - 4);
 }
 
 
@@ -175388,7 +182506,7 @@ SQLITE_API int sqlite3_snapshot_get(
 }
 
 /*
-** Open a read-transaction on the snapshot idendified by pSnapshot.
+** Open a read-transaction on the snapshot identified by pSnapshot.
 */
 SQLITE_API int sqlite3_snapshot_open(
   sqlite3 *db,
@@ -175448,8 +182566,8 @@ SQLITE_API int sqlite3_snapshot_open(
 */
 SQLITE_API int sqlite3_snapshot_recover(sqlite3 *db, const char *zDb){
   int rc = SQLITE_ERROR;
-  int iDb;
 #ifndef SQLITE_OMIT_WAL
+  int iDb;
 
 #ifdef SQLITE_ENABLE_API_ARMOR
   if( !sqlite3SafetyCheckOk(db) ){
@@ -175495,7 +182613,7 @@ SQLITE_API int sqlite3_compileoption_used(const char *zOptName){
   int nOpt;
   const char **azCompileOpt;
 
-#if SQLITE_ENABLE_API_ARMOR
+#ifdef SQLITE_ENABLE_API_ARMOR
   if( zOptName==0 ){
     (void)SQLITE_MISUSE_BKPT;
     return 0;
@@ -175690,6 +182808,9 @@ SQLITE_API int sqlite3_unlock_notify(
 ){
   int rc = SQLITE_OK;
 
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT;
+#endif
   sqlite3_mutex_enter(db->mutex);
   enterMutex();
 
@@ -176711,6 +183832,7 @@ struct Fts3Table {
   int nPgsz;                      /* Page size for host database */
   char *zSegmentsTbl;             /* Name of %_segments table */
   sqlite3_blob *pSegments;        /* Blob handle open on %_segments table */
+  int iSavepoint;
 
   /*
   ** The following array of hash tables is used to buffer pending index
@@ -177004,7 +184126,7 @@ struct Fts3MultiSegReader {
   int nAdvance;                   /* How many seg-readers to advance */
   Fts3SegFilter *pFilter;         /* Pointer to filter object */
   char *aBuffer;                  /* Buffer to merge doclists in */
-  int nBuffer;                    /* Allocated size of aBuffer[] in bytes */
+  i64 nBuffer;                    /* Allocated size of aBuffer[] in bytes */
 
   int iColFilter;                 /* If >=0, filter for this column */
   int bRestart;
@@ -177096,6 +184218,8 @@ SQLITE_PRIVATE int sqlite3FtsUnicodeIsalnum(int);
 SQLITE_PRIVATE int sqlite3FtsUnicodeIsdiacritic(int);
 #endif
 
+SQLITE_PRIVATE int sqlite3Fts3ExprIterate(Fts3Expr*, int (*x)(Fts3Expr*,int,void*), void*);
+
 #endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */
 #endif /* _FTSINT_H */
 
@@ -177452,6 +184576,7 @@ static void fts3DeclareVtab(int *pRc, Fts3Table *p){
 
     zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid");
     sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
+    sqlite3_vtab_config(p->db, SQLITE_VTAB_INNOCUOUS);
 
     /* Create a list of user columns for the virtual table */
     zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]);
@@ -179700,7 +186825,7 @@ static int fts3TermSelectMerge(
     **
     ** Similar padding is added in the fts3DoclistOrMerge() function.
     */
-    pTS->aaOutput[0] = sqlite3_malloc(nDoclist + FTS3_VARINT_MAX + 1);
+    pTS->aaOutput[0] = sqlite3_malloc64((i64)nDoclist + FTS3_VARINT_MAX + 1);
     pTS->anOutput[0] = nDoclist;
     if( pTS->aaOutput[0] ){
       memcpy(pTS->aaOutput[0], aDoclist, nDoclist);
@@ -180701,6 +187826,8 @@ static int fts3RenameMethod(
     rc = sqlite3Fts3PendingTermsFlush(p);
   }
 
+  p->bIgnoreSavepoint = 1;
+
   if( p->zContentTbl==0 ){
     fts3DbExec(&rc, db,
       "ALTER TABLE %Q.'%q_content'  RENAME TO '%q_content';",
@@ -180728,6 +187855,8 @@ static int fts3RenameMethod(
     "ALTER TABLE %Q.'%q_segdir'   RENAME TO '%q_segdir';",
     p->zDb, p->zName, zName
   );
+
+  p->bIgnoreSavepoint = 0;
   return rc;
 }
 
@@ -180738,12 +187867,28 @@ static int fts3RenameMethod(
 */
 static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
   int rc = SQLITE_OK;
-  UNUSED_PARAMETER(iSavepoint);
-  assert( ((Fts3Table *)pVtab)->inTransaction );
-  assert( ((Fts3Table *)pVtab)->mxSavepoint <= iSavepoint );
-  TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint );
-  if( ((Fts3Table *)pVtab)->bIgnoreSavepoint==0 ){
-    rc = fts3SyncMethod(pVtab);
+  Fts3Table *pTab = (Fts3Table*)pVtab;
+  assert( pTab->inTransaction );
+  assert( pTab->mxSavepoint<=iSavepoint );
+  TESTONLY( pTab->mxSavepoint = iSavepoint );
+
+  if( pTab->bIgnoreSavepoint==0 ){
+    if( fts3HashCount(&pTab->aIndex[0].hPending)>0 ){
+      char *zSql = sqlite3_mprintf("INSERT INTO %Q.%Q(%Q) VALUES('flush')",
+          pTab->zDb, pTab->zName, pTab->zName
+          );
+      if( zSql ){
+        pTab->bIgnoreSavepoint = 1;
+        rc = sqlite3_exec(pTab->db, zSql, 0, 0, 0);
+        pTab->bIgnoreSavepoint = 0;
+        sqlite3_free(zSql);
+      }else{
+        rc = SQLITE_NOMEM;
+      }
+    }
+    if( rc==SQLITE_OK ){
+      pTab->iSavepoint = iSavepoint+1;
+    }
   }
   return rc;
 }
@@ -180754,12 +187899,11 @@ static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
 ** This is a no-op.
 */
 static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
-  TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
-  UNUSED_PARAMETER(iSavepoint);
-  UNUSED_PARAMETER(pVtab);
-  assert( p->inTransaction );
-  assert( p->mxSavepoint >= iSavepoint );
-  TESTONLY( p->mxSavepoint = iSavepoint-1 );
+  Fts3Table *pTab = (Fts3Table*)pVtab;
+  assert( pTab->inTransaction );
+  assert( pTab->mxSavepoint >= iSavepoint );
+  TESTONLY( pTab->mxSavepoint = iSavepoint-1 );
+  pTab->iSavepoint = iSavepoint;
   return SQLITE_OK;
 }
 
@@ -180769,11 +187913,13 @@ static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
 ** Discard the contents of the pending terms table.
 */
 static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
-  Fts3Table *p = (Fts3Table*)pVtab;
+  Fts3Table *pTab = (Fts3Table*)pVtab;
   UNUSED_PARAMETER(iSavepoint);
-  assert( p->inTransaction );
-  TESTONLY( p->mxSavepoint = iSavepoint );
-  sqlite3Fts3PendingTermsClear(p);
+  assert( pTab->inTransaction );
+  TESTONLY( pTab->mxSavepoint = iSavepoint );
+  if( (iSavepoint+1)<=pTab->iSavepoint ){
+    sqlite3Fts3PendingTermsClear(pTab);
+  }
   return SQLITE_OK;
 }
 
@@ -180792,8 +187938,49 @@ static int fts3ShadowName(const char *zName){
   return 0;
 }
 
+/*
+** Implementation of the xIntegrity() method on the FTS3/FTS4 virtual
+** table.
+*/
+static int fts3Integrity(
+  sqlite3_vtab *pVtab,      /* The virtual table to be checked */
+  const char *zSchema,      /* Name of schema in which pVtab lives */
+  const char *zTabname,     /* Name of the pVTab table */
+  int isQuick,              /* True if this is a quick_check */
+  char **pzErr              /* Write error message here */
+){
+  Fts3Table *p = (Fts3Table*)pVtab;
+  char *zSql;
+  int rc;
+  char *zErr = 0;
+
+  assert( pzErr!=0 );
+  assert( *pzErr==0 );
+  UNUSED_PARAMETER(isQuick);
+  zSql = sqlite3_mprintf(
+            "INSERT INTO \"%w\".\"%w\"(\"%w\") VALUES('integrity-check');",
+            zSchema, zTabname, zTabname);
+  if( zSql==0 ){
+    return SQLITE_NOMEM;
+  }
+  rc = sqlite3_exec(p->db, zSql, 0, 0, &zErr);
+  sqlite3_free(zSql);
+  if( (rc&0xff)==SQLITE_CORRUPT ){
+    *pzErr = sqlite3_mprintf("malformed inverted index for FTS%d table %s.%s",
+                p->bFts4 ? 4 : 3, zSchema, zTabname);
+  }else if( rc!=SQLITE_OK ){
+    *pzErr = sqlite3_mprintf("unable to validate the inverted index for"
+                             " FTS%d table %s.%s: %s",
+                p->bFts4 ? 4 : 3, zSchema, zTabname, zErr);
+  }
+  sqlite3_free(zErr);
+  return SQLITE_OK;
+}
+
+
+
 static const sqlite3_module fts3Module = {
-  /* iVersion      */ 3,
+  /* iVersion      */ 4,
   /* xCreate       */ fts3CreateMethod,
   /* xConnect      */ fts3ConnectMethod,
   /* xBestIndex    */ fts3BestIndexMethod,
@@ -180817,6 +188004,7 @@ static const sqlite3_module fts3Module = {
   /* xRelease      */ fts3ReleaseMethod,
   /* xRollbackTo   */ fts3RollbackToMethod,
   /* xShadowName   */ fts3ShadowName,
+  /* xIntegrity    */ fts3Integrity,
 };
 
 /*
@@ -181557,7 +188745,7 @@ static int fts3EvalIncrPhraseNext(
       if( bEof==0 ){
         int nList = 0;
         int nByte = a[p->nToken-1].nList;
-        char *aDoclist = sqlite3_malloc(nByte+FTS3_BUFFER_PADDING);
+        char *aDoclist = sqlite3_malloc64((i64)nByte+FTS3_BUFFER_PADDING);
         if( !aDoclist ) return SQLITE_NOMEM;
         memcpy(aDoclist, a[p->nToken-1].pList, nByte+1);
         memset(&aDoclist[nByte], 0, FTS3_BUFFER_PADDING);
@@ -182099,9 +189287,8 @@ static void fts3EvalNextRow(
   Fts3Expr *pExpr,                /* Expr. to advance to next matching row */
   int *pRc                        /* IN/OUT: Error code */
 ){
-  if( *pRc==SQLITE_OK ){
+  if( *pRc==SQLITE_OK && pExpr->bEof==0 ){
     int bDescDoclist = pCsr->bDesc;         /* Used by DOCID_CMP() macro */
-    assert( pExpr->bEof==0 );
     pExpr->bStart = 1;
 
     switch( pExpr->eType ){
@@ -182577,6 +189764,22 @@ static void fts3EvalUpdateCounts(Fts3Expr *pExpr, int nCol){
   }
 }
 
+/*
+** This is an sqlite3Fts3ExprIterate() callback. If the Fts3Expr.aMI[] array
+** has not yet been allocated, allocate and zero it. Otherwise, just zero
+** it.
+*/
+static int fts3AllocateMSI(Fts3Expr *pExpr, int iPhrase, void *pCtx){
+  Fts3Table *pTab = (Fts3Table*)pCtx;
+  UNUSED_PARAMETER(iPhrase);
+  if( pExpr->aMI==0 ){
+    pExpr->aMI = (u32 *)sqlite3_malloc64(pTab->nColumn * 3 * sizeof(u32));
+    if( pExpr->aMI==0 ) return SQLITE_NOMEM;
+  }
+  memset(pExpr->aMI, 0, pTab->nColumn * 3 * sizeof(u32));
+  return SQLITE_OK;
+}
+
 /*
 ** Expression pExpr must be of type FTSQUERY_PHRASE.
 **
@@ -182598,7 +189801,6 @@ static int fts3EvalGatherStats(
   if( pExpr->aMI==0 ){
     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
     Fts3Expr *pRoot;                /* Root of NEAR expression */
-    Fts3Expr *p;                    /* Iterator used for several purposes */
 
     sqlite3_int64 iPrevId = pCsr->iPrevId;
     sqlite3_int64 iDocid;
@@ -182606,7 +189808,9 @@ static int fts3EvalGatherStats(
 
     /* Find the root of the NEAR expression */
     pRoot = pExpr;
-    while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){
+    while( pRoot->pParent
+        && (pRoot->pParent->eType==FTSQUERY_NEAR || pRoot->bDeferred)
+    ){
       pRoot = pRoot->pParent;
     }
     iDocid = pRoot->iDocid;
@@ -182614,14 +189818,8 @@ static int fts3EvalGatherStats(
     assert( pRoot->bStart );
 
     /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */
-    for(p=pRoot; p; p=p->pLeft){
-      Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight);
-      assert( pE->aMI==0 );
-      pE->aMI = (u32 *)sqlite3_malloc64(pTab->nColumn * 3 * sizeof(u32));
-      if( !pE->aMI ) return SQLITE_NOMEM;
-      memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32));
-    }
-
+    rc = sqlite3Fts3ExprIterate(pRoot, fts3AllocateMSI, (void*)pTab);
+    if( rc!=SQLITE_OK ) return rc;
     fts3EvalRestart(pCsr, pRoot, &rc);
 
     while( pCsr->isEof==0 && rc==SQLITE_OK ){
@@ -182777,6 +189975,7 @@ SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist(
     u8 bTreeEof = 0;
     Fts3Expr *p;                  /* Used to iterate from pExpr to root */
     Fts3Expr *pNear;              /* Most senior NEAR ancestor (or pExpr) */
+    Fts3Expr *pRun;               /* Closest non-deferred ancestor of pNear */
     int bMatch;
 
     /* Check if this phrase descends from an OR expression node. If not,
@@ -182791,25 +189990,30 @@ SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist(
       if( p->bEof ) bTreeEof = 1;
     }
     if( bOr==0 ) return SQLITE_OK;
+    pRun = pNear;
+    while( pRun->bDeferred ){
+      assert( pRun->pParent );
+      pRun = pRun->pParent;
+    }
 
     /* This is the descendent of an OR node. In this case we cannot use
     ** an incremental phrase. Load the entire doclist for the phrase
     ** into memory in this case.  */
     if( pPhrase->bIncr ){
-      int bEofSave = pNear->bEof;
-      fts3EvalRestart(pCsr, pNear, &rc);
-      while( rc==SQLITE_OK && !pNear->bEof ){
-        fts3EvalNextRow(pCsr, pNear, &rc);
-        if( bEofSave==0 && pNear->iDocid==iDocid ) break;
+      int bEofSave = pRun->bEof;
+      fts3EvalRestart(pCsr, pRun, &rc);
+      while( rc==SQLITE_OK && !pRun->bEof ){
+        fts3EvalNextRow(pCsr, pRun, &rc);
+        if( bEofSave==0 && pRun->iDocid==iDocid ) break;
       }
       assert( rc!=SQLITE_OK || pPhrase->bIncr==0 );
-      if( rc==SQLITE_OK && pNear->bEof!=bEofSave ){
+      if( rc==SQLITE_OK && pRun->bEof!=bEofSave ){
         rc = FTS_CORRUPT_VTAB;
       }
     }
     if( bTreeEof ){
-      while( rc==SQLITE_OK && !pNear->bEof ){
-        fts3EvalNextRow(pCsr, pNear, &rc);
+      while( rc==SQLITE_OK && !pRun->bEof ){
+        fts3EvalNextRow(pCsr, pRun, &rc);
       }
     }
     if( rc!=SQLITE_OK ) return rc;
@@ -183476,7 +190680,8 @@ SQLITE_PRIVATE int sqlite3Fts3InitAux(sqlite3 *db){
      0,                           /* xSavepoint    */
      0,                           /* xRelease      */
      0,                           /* xRollbackTo   */
-     0                            /* xShadowName   */
+     0,                           /* xShadowName   */
+     0                            /* xIntegrity    */
   };
   int rc;                         /* Return code */
 
@@ -185793,7 +192998,7 @@ static int porterNext(
       if( n>c->nAllocated ){
         char *pNew;
         c->nAllocated = n+20;
-        pNew = sqlite3_realloc(c->zToken, c->nAllocated);
+        pNew = sqlite3_realloc64(c->zToken, c->nAllocated);
         if( !pNew ) return SQLITE_NOMEM;
         c->zToken = pNew;
       }
@@ -186545,7 +193750,7 @@ static int simpleNext(
       if( n>c->nTokenAllocated ){
         char *pNew;
         c->nTokenAllocated = n+20;
-        pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated);
+        pNew = sqlite3_realloc64(c->pToken, c->nTokenAllocated);
         if( !pNew ) return SQLITE_NOMEM;
         c->pToken = pNew;
       }
@@ -187042,7 +194247,8 @@ SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash, void(*xDestr
      0,                           /* xSavepoint    */
      0,                           /* xRelease      */
      0,                           /* xRollbackTo   */
-     0                            /* xShadowName   */
+     0,                           /* xShadowName   */
+     0                            /* xIntegrity    */
   };
   int rc;                         /* Return code */
 
@@ -187707,7 +194913,7 @@ static int fts3PendingListAppendVarint(
 
   /* Allocate or grow the PendingList as required. */
   if( !p ){
-    p = sqlite3_malloc(sizeof(*p) + 100);
+    p = sqlite3_malloc64(sizeof(*p) + 100);
     if( !p ){
       return SQLITE_NOMEM;
     }
@@ -187716,14 +194922,14 @@ static int fts3PendingListAppendVarint(
     p->nData = 0;
   }
   else if( p->nData+FTS3_VARINT_MAX+1>p->nSpace ){
-    int nNew = p->nSpace * 2;
-    p = sqlite3_realloc(p, sizeof(*p) + nNew);
+    i64 nNew = p->nSpace * 2;
+    p = sqlite3_realloc64(p, sizeof(*p) + nNew);
     if( !p ){
       sqlite3_free(*pp);
       *pp = 0;
       return SQLITE_NOMEM;
     }
-    p->nSpace = nNew;
+    p->nSpace = (int)nNew;
     p->aData = (char *)&p[1];
   }
 
@@ -188280,7 +195486,7 @@ SQLITE_PRIVATE int sqlite3Fts3ReadBlock(
     int nByte = sqlite3_blob_bytes(p->pSegments);
     *pnBlob = nByte;
     if( paBlob ){
-      char *aByte = sqlite3_malloc(nByte + FTS3_NODE_PADDING);
+      char *aByte = sqlite3_malloc64((i64)nByte + FTS3_NODE_PADDING);
       if( !aByte ){
         rc = SQLITE_NOMEM;
       }else{
@@ -188397,7 +195603,7 @@ static int fts3SegReaderNext(
         int nTerm = fts3HashKeysize(pElem);
         if( (nTerm+1)>pReader->nTermAlloc ){
           sqlite3_free(pReader->zTerm);
-          pReader->zTerm = (char*)sqlite3_malloc((nTerm+1)*2);
+          pReader->zTerm = (char*)sqlite3_malloc64(((i64)nTerm+1)*2);
           if( !pReader->zTerm ) return SQLITE_NOMEM;
           pReader->nTermAlloc = (nTerm+1)*2;
         }
@@ -188405,7 +195611,7 @@ static int fts3SegReaderNext(
         pReader->zTerm[nTerm] = '\0';
         pReader->nTerm = nTerm;
 
-        aCopy = (char*)sqlite3_malloc(nCopy);
+        aCopy = (char*)sqlite3_malloc64(nCopy);
         if( !aCopy ) return SQLITE_NOMEM;
         memcpy(aCopy, pList->aData, nCopy);
         pReader->nNode = pReader->nDoclist = nCopy;
@@ -188692,7 +195898,7 @@ SQLITE_PRIVATE int sqlite3Fts3SegReaderNew(
     nExtra = nRoot + FTS3_NODE_PADDING;
   }
 
-  pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra);
+  pReader = (Fts3SegReader *)sqlite3_malloc64(sizeof(Fts3SegReader) + nExtra);
   if( !pReader ){
     return SQLITE_NOMEM;
   }
@@ -188784,7 +195990,7 @@ SQLITE_PRIVATE int sqlite3Fts3SegReaderPending(
         if( nElem==nAlloc ){
           Fts3HashElem **aElem2;
           nAlloc += 16;
-          aElem2 = (Fts3HashElem **)sqlite3_realloc(
+          aElem2 = (Fts3HashElem **)sqlite3_realloc64(
               aElem, nAlloc*sizeof(Fts3HashElem *)
           );
           if( !aElem2 ){
@@ -189118,7 +196324,7 @@ static int fts3NodeAddTerm(
         ** this is not expected to be a serious problem.
         */
         assert( pTree->aData==(char *)&pTree[1] );
-        pTree->aData = (char *)sqlite3_malloc(nReq);
+        pTree->aData = (char *)sqlite3_malloc64(nReq);
         if( !pTree->aData ){
           return SQLITE_NOMEM;
         }
@@ -189136,7 +196342,7 @@ static int fts3NodeAddTerm(
 
       if( isCopyTerm ){
         if( pTree->nMalloc<nTerm ){
-          char *zNew = sqlite3_realloc(pTree->zMalloc, nTerm*2);
+          char *zNew = sqlite3_realloc64(pTree->zMalloc, (i64)nTerm*2);
           if( !zNew ){
             return SQLITE_NOMEM;
           }
@@ -189162,7 +196368,7 @@ static int fts3NodeAddTerm(
   ** now. Instead, the term is inserted into the parent of pTree. If pTree
   ** has no parent, one is created here.
   */
-  pNew = (SegmentNode *)sqlite3_malloc(sizeof(SegmentNode) + p->nNodeSize);
+  pNew = (SegmentNode *)sqlite3_malloc64(sizeof(SegmentNode) + p->nNodeSize);
   if( !pNew ){
     return SQLITE_NOMEM;
   }
@@ -189300,7 +196506,7 @@ static int fts3SegWriterAdd(
 ){
   int nPrefix;                    /* Size of term prefix in bytes */
   int nSuffix;                    /* Size of term suffix in bytes */
-  int nReq;                       /* Number of bytes required on leaf page */
+  i64 nReq;                       /* Number of bytes required on leaf page */
   int nData;
   SegmentWriter *pWriter = *ppWriter;
 
@@ -189309,13 +196515,13 @@ static int fts3SegWriterAdd(
     sqlite3_stmt *pStmt;
 
     /* Allocate the SegmentWriter structure */
-    pWriter = (SegmentWriter *)sqlite3_malloc(sizeof(SegmentWriter));
+    pWriter = (SegmentWriter *)sqlite3_malloc64(sizeof(SegmentWriter));
     if( !pWriter ) return SQLITE_NOMEM;
     memset(pWriter, 0, sizeof(SegmentWriter));
     *ppWriter = pWriter;
 
     /* Allocate a buffer in which to accumulate data */
-    pWriter->aData = (char *)sqlite3_malloc(p->nNodeSize);
+    pWriter->aData = (char *)sqlite3_malloc64(p->nNodeSize);
     if( !pWriter->aData ) return SQLITE_NOMEM;
     pWriter->nSize = p->nNodeSize;
 
@@ -189390,7 +196596,7 @@ static int fts3SegWriterAdd(
   ** the buffer to make it large enough.
   */
   if( nReq>pWriter->nSize ){
-    char *aNew = sqlite3_realloc(pWriter->aData, nReq);
+    char *aNew = sqlite3_realloc64(pWriter->aData, nReq);
     if( !aNew ) return SQLITE_NOMEM;
     pWriter->aData = aNew;
     pWriter->nSize = nReq;
@@ -189415,7 +196621,7 @@ static int fts3SegWriterAdd(
   */
   if( isCopyTerm ){
     if( nTerm>pWriter->nMalloc ){
-      char *zNew = sqlite3_realloc(pWriter->zMalloc, nTerm*2);
+      char *zNew = sqlite3_realloc64(pWriter->zMalloc, (i64)nTerm*2);
       if( !zNew ){
         return SQLITE_NOMEM;
       }
@@ -189723,18 +196929,20 @@ static void fts3ColumnFilter(
 static int fts3MsrBufferData(
   Fts3MultiSegReader *pMsr,       /* Multi-segment-reader handle */
   char *pList,
-  int nList
+  i64 nList
 ){
-  if( nList>pMsr->nBuffer ){
+  if( (nList+FTS3_NODE_PADDING)>pMsr->nBuffer ){
     char *pNew;
-    pMsr->nBuffer = nList*2;
-    pNew = (char *)sqlite3_realloc(pMsr->aBuffer, pMsr->nBuffer);
+    int nNew = nList*2 + FTS3_NODE_PADDING;
+    pNew = (char *)sqlite3_realloc64(pMsr->aBuffer, nNew);
     if( !pNew ) return SQLITE_NOMEM;
     pMsr->aBuffer = pNew;
+    pMsr->nBuffer = nNew;
   }
 
   assert( nList>0 );
   memcpy(pMsr->aBuffer, pList, nList);
+  memset(&pMsr->aBuffer[nList], 0, FTS3_NODE_PADDING);
   return SQLITE_OK;
 }
 
@@ -189784,7 +196992,7 @@ SQLITE_PRIVATE int sqlite3Fts3MsrIncrNext(
       fts3SegReaderSort(pMsr->apSegment, nMerge, j, xCmp);
 
       if( nList>0 && fts3SegReaderIsPending(apSegment[0]) ){
-        rc = fts3MsrBufferData(pMsr, pList, nList+1);
+        rc = fts3MsrBufferData(pMsr, pList, (i64)nList+1);
         if( rc!=SQLITE_OK ) return rc;
         assert( (pMsr->aBuffer[nList] & 0xFE)==0x00 );
         pList = pMsr->aBuffer;
@@ -189921,11 +197129,11 @@ SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr){
   return SQLITE_OK;
 }
 
-static int fts3GrowSegReaderBuffer(Fts3MultiSegReader *pCsr, int nReq){
+static int fts3GrowSegReaderBuffer(Fts3MultiSegReader *pCsr, i64 nReq){
   if( nReq>pCsr->nBuffer ){
     char *aNew;
     pCsr->nBuffer = nReq*2;
-    aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer);
+    aNew = sqlite3_realloc64(pCsr->aBuffer, pCsr->nBuffer);
     if( !aNew ){
       return SQLITE_NOMEM;
     }
@@ -190016,7 +197224,8 @@ SQLITE_PRIVATE int sqlite3Fts3SegReaderStep(
     ){
       pCsr->nDoclist = apSegment[0]->nDoclist;
       if( fts3SegReaderIsPending(apSegment[0]) ){
-        rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist, pCsr->nDoclist);
+        rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist,
+                               (i64)pCsr->nDoclist);
         pCsr->aDoclist = pCsr->aBuffer;
       }else{
         pCsr->aDoclist = apSegment[0]->aDoclist;
@@ -190069,7 +197278,8 @@ SQLITE_PRIVATE int sqlite3Fts3SegReaderStep(
 
           nByte = sqlite3Fts3VarintLen(iDelta) + (isRequirePos?nList+1:0);
 
-          rc = fts3GrowSegReaderBuffer(pCsr, nByte+nDoclist+FTS3_NODE_PADDING);
+          rc = fts3GrowSegReaderBuffer(pCsr,
+                                   (i64)nByte+nDoclist+FTS3_NODE_PADDING);
           if( rc ) return rc;
 
           if( isFirst ){
@@ -190095,7 +197305,7 @@ SQLITE_PRIVATE int sqlite3Fts3SegReaderStep(
         fts3SegReaderSort(apSegment, nMerge, j, xCmp);
       }
       if( nDoclist>0 ){
-        rc = fts3GrowSegReaderBuffer(pCsr, nDoclist+FTS3_NODE_PADDING);
+        rc = fts3GrowSegReaderBuffer(pCsr, (i64)nDoclist+FTS3_NODE_PADDING);
         if( rc ) return rc;
         memset(&pCsr->aBuffer[nDoclist], 0, FTS3_NODE_PADDING);
         pCsr->aDoclist = pCsr->aBuffer;
@@ -190379,7 +197589,6 @@ SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
     rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING);
     if( rc==SQLITE_DONE ) rc = SQLITE_OK;
   }
-  sqlite3Fts3PendingTermsClear(p);
 
   /* Determine the auto-incr-merge setting if unknown.  If enabled,
   ** estimate the number of leaf blocks of content to be written
@@ -190401,6 +197610,10 @@ SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
       rc = sqlite3_reset(pStmt);
     }
   }
+
+  if( rc==SQLITE_OK ){
+    sqlite3Fts3PendingTermsClear(p);
+  }
   return rc;
 }
 
@@ -190808,7 +198021,7 @@ struct NodeReader {
 static void blobGrowBuffer(Blob *pBlob, int nMin, int *pRc){
   if( *pRc==SQLITE_OK && nMin>pBlob->nAlloc ){
     int nAlloc = nMin;
-    char *a = (char *)sqlite3_realloc(pBlob->a, nAlloc);
+    char *a = (char *)sqlite3_realloc64(pBlob->a, nAlloc);
     if( a ){
       pBlob->nAlloc = nAlloc;
       pBlob->a = a;
@@ -191032,6 +198245,8 @@ static int fts3AppendToNode(
 
   blobGrowBuffer(pPrev, nTerm, &rc);
   if( rc!=SQLITE_OK ) return rc;
+  assert( pPrev!=0 );
+  assert( pPrev->a!=0 );
 
   nPrefix = fts3PrefixCompress(pPrev->a, pPrev->n, zTerm, nTerm);
   nSuffix = nTerm - nPrefix;
@@ -191088,9 +198303,13 @@ static int fts3IncrmergeAppend(
   nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist;
 
   /* If the current block is not empty, and if adding this term/doclist
-  ** to the current block would make it larger than Fts3Table.nNodeSize
-  ** bytes, write this block out to the database. */
-  if( pLeaf->block.n>0 && (pLeaf->block.n + nSpace)>p->nNodeSize ){
+  ** to the current block would make it larger than Fts3Table.nNodeSize bytes,
+  ** and if there is still room for another leaf page, write this block out to
+  ** the database. */
+  if( pLeaf->block.n>0
+   && (pLeaf->block.n + nSpace)>p->nNodeSize
+   && pLeaf->iBlock < (pWriter->iStart + pWriter->nLeafEst)
+  ){
     rc = fts3WriteSegment(p, pLeaf->iBlock, pLeaf->block.a, pLeaf->block.n);
     pWriter->nWork++;
 
@@ -191401,6 +198620,7 @@ static int fts3IncrmergeLoad(
 
       for(i=nHeight; i>=0 && rc==SQLITE_OK; i--){
         NodeReader reader;
+        memset(&reader, 0, sizeof(reader));
         pNode = &pWriter->aNodeWriter[i];
 
         if( pNode->block.a){
@@ -191421,7 +198641,7 @@ static int fts3IncrmergeLoad(
               rc = sqlite3Fts3ReadBlock(p, reader.iChild, &aBlock, &nBlock,0);
               blobGrowBuffer(&pNode->block,
                   MAX(nBlock, p->nNodeSize)+FTS3_NODE_PADDING, &rc
-                  );
+              );
               if( rc==SQLITE_OK ){
                 memcpy(pNode->block.a, aBlock, nBlock);
                 pNode->block.n = nBlock;
@@ -191605,7 +198825,7 @@ static int fts3RepackSegdirLevel(
       if( nIdx>=nAlloc ){
         int *aNew;
         nAlloc += 16;
-        aNew = sqlite3_realloc(aIdx, nAlloc*sizeof(int));
+        aNew = sqlite3_realloc64(aIdx, nAlloc*sizeof(int));
         if( !aNew ){
           rc = SQLITE_NOMEM;
           break;
@@ -191979,7 +199199,7 @@ SQLITE_PRIVATE int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
 
   /* Allocate space for the cursor, filter and writer objects */
   const int nAlloc = sizeof(*pCsr) + sizeof(*pFilter) + sizeof(*pWriter);
-  pWriter = (IncrmergeWriter *)sqlite3_malloc(nAlloc);
+  pWriter = (IncrmergeWriter *)sqlite3_malloc64(nAlloc);
   if( !pWriter ) return SQLITE_NOMEM;
   pFilter = (Fts3SegFilter *)&pWriter[1];
   pCsr = (Fts3MultiSegReader *)&pFilter[1];
@@ -192271,7 +199491,7 @@ static u64 fts3ChecksumIndex(
   int rc;
   u64 cksum = 0;
 
-  assert( *pRc==SQLITE_OK );
+  if( *pRc ) return 0;
 
   memset(&filter, 0, sizeof(filter));
   memset(&csr, 0, sizeof(csr));
@@ -192486,8 +199706,11 @@ static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){
     rc = fts3DoIncrmerge(p, &zVal[6]);
   }else if( nVal>10 && 0==sqlite3_strnicmp(zVal, "automerge=", 10) ){
     rc = fts3DoAutoincrmerge(p, &zVal[10]);
+  }else if( nVal==5 && 0==sqlite3_strnicmp(zVal, "flush", 5) ){
+    rc = sqlite3Fts3PendingTermsFlush(p);
+  }
 #if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
-  }else{
+  else{
     int v;
     if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){
       v = atoi(&zVal[9]);
@@ -192505,8 +199728,8 @@ static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){
       if( v>=4 && v<=FTS3_MERGE_COUNT && (v&1)==0 ) p->nMergeCount = v;
       rc = SQLITE_OK;
     }
-#endif
   }
+#endif
   return rc;
 }
 
@@ -192615,7 +199838,7 @@ SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList(
     return SQLITE_OK;
   }
 
-  pRet = (char *)sqlite3_malloc(p->pList->nData);
+  pRet = (char *)sqlite3_malloc64(p->pList->nData);
   if( !pRet ) return SQLITE_NOMEM;
 
   nSkip = sqlite3Fts3GetVarint(p->pList->aData, &dummy);
@@ -192635,7 +199858,7 @@ SQLITE_PRIVATE int sqlite3Fts3DeferToken(
   int iCol                        /* Column that token must appear in (or -1) */
 ){
   Fts3DeferredToken *pDeferred;
-  pDeferred = sqlite3_malloc(sizeof(*pDeferred));
+  pDeferred = sqlite3_malloc64(sizeof(*pDeferred));
   if( !pDeferred ){
     return SQLITE_NOMEM;
   }
@@ -192914,7 +200137,7 @@ typedef sqlite3_int64 i64;
 
 
 /*
-** Used as an fts3ExprIterate() context when loading phrase doclists to
+** Used as an sqlite3Fts3ExprIterate() context when loading phrase doclists to
 ** Fts3Expr.aDoclist[]/nDoclist.
 */
 typedef struct LoadDoclistCtx LoadDoclistCtx;
@@ -192958,7 +200181,7 @@ struct SnippetFragment {
 };
 
 /*
-** This type is used as an fts3ExprIterate() context object while
+** This type is used as an sqlite3Fts3ExprIterate() context object while
 ** accumulating the data returned by the matchinfo() function.
 */
 typedef struct MatchInfo MatchInfo;
@@ -193117,7 +200340,7 @@ static void fts3GetDeltaPosition(char **pp, i64 *piPos){
 }
 
 /*
-** Helper function for fts3ExprIterate() (see below).
+** Helper function for sqlite3Fts3ExprIterate() (see below).
 */
 static int fts3ExprIterate2(
   Fts3Expr *pExpr,                /* Expression to iterate phrases of */
@@ -193151,7 +200374,7 @@ static int fts3ExprIterate2(
 ** Otherwise, SQLITE_OK is returned after a callback has been made for
 ** all eligible phrase nodes.
 */
-static int fts3ExprIterate(
+SQLITE_PRIVATE int sqlite3Fts3ExprIterate(
   Fts3Expr *pExpr,                /* Expression to iterate phrases of */
   int (*x)(Fts3Expr*,int,void*),  /* Callback function to invoke for phrases */
   void *pCtx                      /* Second argument to pass to callback */
@@ -193160,10 +200383,9 @@ static int fts3ExprIterate(
   return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
 }
 
-
 /*
-** This is an fts3ExprIterate() callback used while loading the doclists
-** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
+** This is an sqlite3Fts3ExprIterate() callback used while loading the
+** doclists for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
 ** fts3ExprLoadDoclists().
 */
 static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
@@ -193195,9 +200417,9 @@ static int fts3ExprLoadDoclists(
   int *pnToken                    /* OUT: Number of tokens in query */
 ){
   int rc;                         /* Return Code */
-  LoadDoclistCtx sCtx = {0,0,0};  /* Context for fts3ExprIterate() */
+  LoadDoclistCtx sCtx = {0,0,0};  /* Context for sqlite3Fts3ExprIterate() */
   sCtx.pCsr = pCsr;
-  rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx);
+  rc = sqlite3Fts3ExprIterate(pCsr->pExpr,fts3ExprLoadDoclistsCb,(void*)&sCtx);
   if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
   if( pnToken ) *pnToken = sCtx.nToken;
   return rc;
@@ -193210,7 +200432,7 @@ static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
 }
 static int fts3ExprPhraseCount(Fts3Expr *pExpr){
   int nPhrase = 0;
-  (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
+  (void)sqlite3Fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
   return nPhrase;
 }
 
@@ -193338,8 +200560,9 @@ static void fts3SnippetDetails(
 }
 
 /*
-** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
-** Each invocation populates an element of the SnippetIter.aPhrase[] array.
+** This function is an sqlite3Fts3ExprIterate() callback used by
+** fts3BestSnippet().  Each invocation populates an element of the
+** SnippetIter.aPhrase[] array.
 */
 static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
   SnippetIter *p = (SnippetIter *)ctx;
@@ -193429,7 +200652,9 @@ static int fts3BestSnippet(
   sIter.nSnippet = nSnippet;
   sIter.nPhrase = nList;
   sIter.iCurrent = -1;
-  rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter);
+  rc = sqlite3Fts3ExprIterate(
+      pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter
+  );
   if( rc==SQLITE_OK ){
 
     /* Set the *pmSeen output variable. */
@@ -193790,10 +201015,10 @@ static int fts3ExprLHitGather(
 }
 
 /*
-** fts3ExprIterate() callback used to collect the "global" matchinfo stats
-** for a single query.
+** sqlite3Fts3ExprIterate() callback used to collect the "global" matchinfo
+** stats for a single query.
 **
-** fts3ExprIterate() callback to load the 'global' elements of a
+** sqlite3Fts3ExprIterate() callback to load the 'global' elements of a
 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements
 ** of the matchinfo array that are constant for all rows returned by the
 ** current query.
@@ -193828,7 +201053,7 @@ static int fts3ExprGlobalHitsCb(
 }
 
 /*
-** fts3ExprIterate() callback used to collect the "local" part of the
+** sqlite3Fts3ExprIterate() callback used to collect the "local" part of the
 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the
 ** array that are different for each row returned by the query.
 */
@@ -194024,7 +201249,7 @@ static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
   **/
   aIter = sqlite3Fts3MallocZero(sizeof(LcsIterator) * pCsr->nPhrase);
   if( !aIter ) return SQLITE_NOMEM;
-  (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
+  (void)sqlite3Fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
 
   for(i=0; i<pInfo->nPhrase; i++){
     LcsIterator *pIter = &aIter[i];
@@ -194201,11 +201426,11 @@ static int fts3MatchinfoValues(
             rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc,0,0);
             if( rc!=SQLITE_OK ) break;
           }
-          rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
+          rc = sqlite3Fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
           sqlite3Fts3EvalTestDeferred(pCsr, &rc);
           if( rc!=SQLITE_OK ) break;
         }
-        (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
+        (void)sqlite3Fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
         break;
       }
     }
@@ -194428,7 +201653,7 @@ struct TermOffsetCtx {
 };
 
 /*
-** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
+** This function is an sqlite3Fts3ExprIterate() callback used by sqlite3Fts3Offsets().
 */
 static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
   TermOffsetCtx *p = (TermOffsetCtx *)ctx;
@@ -194510,7 +201735,9 @@ SQLITE_PRIVATE void sqlite3Fts3Offsets(
     */
     sCtx.iCol = iCol;
     sCtx.iTerm = 0;
-    rc = fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx);
+    rc = sqlite3Fts3ExprIterate(
+        pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx
+    );
     if( rc!=SQLITE_OK ) goto offsets_out;
 
     /* Retreive the text stored in column iCol. If an SQL NULL is stored
@@ -195443,25 +202670,51 @@ SQLITE_PRIVATE int sqlite3FtsUnicodeFold(int c, int eRemoveDiacritic){
 ** increase for the parser.  (Ubuntu14.10 gcc 4.8.4 x64 with -Os).
 */
 static const char jsonIsSpace[] = {
-  0, 0, 0, 0, 0, 0, 0, 0,     0, 1, 1, 0, 0, 1, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-  1, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,  0, 1, 1, 0, 0, 1, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+  1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+
+  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 };
 #define fast_isspace(x) (jsonIsSpace[(unsigned char)x])
 
+/*
+** Characters that are special to JSON.  Control charaters,
+** '"' and '\\'.
+*/
+static const char jsonIsOk[256] = {
+  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+  1, 1, 0, 1, 1, 1, 1, 0,  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 0, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+
+  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1
+};
+
+
 #if !defined(SQLITE_DEBUG) && !defined(SQLITE_COVERAGE_TEST)
 #  define VVA(X)
 #else
@@ -195472,6 +202725,7 @@ static const char jsonIsSpace[] = {
 typedef struct JsonString JsonString;
 typedef struct JsonNode JsonNode;
 typedef struct JsonParse JsonParse;
+typedef struct JsonCleanup JsonCleanup;
 
 /* An instance of this object represents a JSON string
 ** under construction.  Really, this is a generic string accumulator
@@ -195487,16 +202741,26 @@ struct JsonString {
   char zSpace[100];        /* Initial static space */
 };
 
+/* A deferred cleanup task.  A list of JsonCleanup objects might be
+** run when the JsonParse object is destroyed.
+*/
+struct JsonCleanup {
+  JsonCleanup *pJCNext;    /* Next in a list */
+  void (*xOp)(void*);      /* Routine to run */
+  void *pArg;              /* Argument to xOp() */
+};
+
 /* JSON type values
 */
-#define JSON_NULL     0
-#define JSON_TRUE     1
-#define JSON_FALSE    2
-#define JSON_INT      3
-#define JSON_REAL     4
-#define JSON_STRING   5
-#define JSON_ARRAY    6
-#define JSON_OBJECT   7
+#define JSON_SUBST    0    /* Special edit node.  Uses u.iPrev */
+#define JSON_NULL     1
+#define JSON_TRUE     2
+#define JSON_FALSE    3
+#define JSON_INT      4
+#define JSON_REAL     5
+#define JSON_STRING   6
+#define JSON_ARRAY    7
+#define JSON_OBJECT   8
 
 /* The "subtype" set for JSON values */
 #define JSON_SUBTYPE  74    /* Ascii for "J" */
@@ -195505,59 +202769,97 @@ struct JsonString {
 ** Names of the various JSON types:
 */
 static const char * const jsonType[] = {
+  "subst",
   "null", "true", "false", "integer", "real", "text", "array", "object"
 };
 
 /* Bit values for the JsonNode.jnFlag field
 */
-#define JNODE_RAW     0x01         /* Content is raw, not JSON encoded */
-#define JNODE_ESCAPE  0x02         /* Content is text with \ escapes */
-#define JNODE_REMOVE  0x04         /* Do not output */
-#define JNODE_REPLACE 0x08         /* Replace with JsonNode.u.iReplace */
-#define JNODE_PATCH   0x10         /* Patch with JsonNode.u.pPatch */
-#define JNODE_APPEND  0x20         /* More ARRAY/OBJECT entries at u.iAppend */
-#define JNODE_LABEL   0x40         /* Is a label of an object */
+#define JNODE_RAW     0x01  /* Content is raw, not JSON encoded */
+#define JNODE_ESCAPE  0x02  /* Content is text with \ escapes */
+#define JNODE_REMOVE  0x04  /* Do not output */
+#define JNODE_REPLACE 0x08  /* Target of a JSON_SUBST node */
+#define JNODE_APPEND  0x10  /* More ARRAY/OBJECT entries at u.iAppend */
+#define JNODE_LABEL   0x20  /* Is a label of an object */
+#define JNODE_JSON5   0x40  /* Node contains JSON5 enhancements */
 
 
-/* A single node of parsed JSON
+/* A single node of parsed JSON.  An array of these nodes describes
+** a parse of JSON + edits.
+**
+** Use the json_parse() SQL function (available when compiled with
+** -DSQLITE_DEBUG) to see a dump of complete JsonParse objects, including
+** a complete listing and decoding of the array of JsonNodes.
 */
 struct JsonNode {
   u8 eType;              /* One of the JSON_ type values */
   u8 jnFlags;            /* JNODE flags */
   u8 eU;                 /* Which union element to use */
-  u32 n;                 /* Bytes of content, or number of sub-nodes */
+  u32 n;                 /* Bytes of content for INT, REAL or STRING
+                         ** Number of sub-nodes for ARRAY and OBJECT
+                         ** Node that SUBST applies to */
   union {
     const char *zJContent; /* 1: Content for INT, REAL, and STRING */
     u32 iAppend;           /* 2: More terms for ARRAY and OBJECT */
     u32 iKey;              /* 3: Key for ARRAY objects in json_tree() */
-    u32 iReplace;          /* 4: Replacement content for JNODE_REPLACE */
-    JsonNode *pPatch;      /* 5: Node chain of patch for JNODE_PATCH */
+    u32 iPrev;             /* 4: Previous SUBST node, or 0 */
   } u;
 };
 
-/* A completely parsed JSON string
+
+/* A parsed and possibly edited JSON string.  Lifecycle:
+**
+**   1.  JSON comes in and is parsed into an array aNode[].  The original
+**       JSON text is stored in zJson.
+**
+**   2.  Zero or more changes are made (via json_remove() or json_replace()
+**       or similar) to the aNode[] array.
+**
+**   3.  A new, edited and mimified JSON string is generated from aNode
+**       and stored in zAlt.  The JsonParse object always owns zAlt.
+**
+** Step 1 always happens.  Step 2 and 3 may or may not happen, depending
+** on the operation.
+**
+** aNode[].u.zJContent entries typically point into zJson.  Hence zJson
+** must remain valid for the lifespan of the parse.  For edits,
+** aNode[].u.zJContent might point to malloced space other than zJson.
+** Entries in pClup are responsible for freeing that extra malloced space.
+**
+** When walking the parse tree in aNode[], edits are ignored if useMod is
+** false.
 */
 struct JsonParse {
   u32 nNode;         /* Number of slots of aNode[] used */
   u32 nAlloc;        /* Number of slots of aNode[] allocated */
   JsonNode *aNode;   /* Array of nodes containing the parse */
-  const char *zJson; /* Original JSON string */
+  char *zJson;       /* Original JSON string (before edits) */
+  char *zAlt;        /* Revised and/or mimified JSON */
   u32 *aUp;          /* Index of parent of each node */
-  u8 oom;            /* Set to true if out of memory */
-  u8 nErr;           /* Number of errors seen */
+  JsonCleanup *pClup;/* Cleanup operations prior to freeing this object */
   u16 iDepth;        /* Nesting depth */
+  u8 nErr;           /* Number of errors seen */
+  u8 oom;            /* Set to true if out of memory */
+  u8 bJsonIsRCStr;   /* True if zJson is an RCStr */
+  u8 hasNonstd;      /* True if input uses non-standard features like JSON5 */
+  u8 useMod;         /* Actually use the edits contain inside aNode */
+  u8 hasMod;         /* aNode contains edits from the original zJson */
+  u32 nJPRef;        /* Number of references to this object */
   int nJson;         /* Length of the zJson string in bytes */
-  u32 iHold;         /* Replace cache line with the lowest iHold value */
+  int nAlt;          /* Length of alternative JSON string zAlt, in bytes */
+  u32 iErr;          /* Error location in zJson[] */
+  u32 iSubst;        /* Last JSON_SUBST entry in aNode[] */
+  u32 iHold;         /* Age of this entry in the cache for LRU replacement */
 };
 
 /*
 ** Maximum nesting depth of JSON for this implementation.
 **
 ** This limit is needed to avoid a stack overflow in the recursive
-** descent parser.  A depth of 2000 is far deeper than any sane JSON
-** should go.
+** descent parser.  A depth of 1000 is far deeper than any sane JSON
+** should go.  Historical note: This limit was 2000 prior to version 3.42.0
 */
-#define JSON_MAX_DEPTH  2000
+#define JSON_MAX_DEPTH  1000
 
 /**************************************************************************
 ** Utility routines for dealing with JsonString objects
@@ -195580,16 +202882,14 @@ static void jsonInit(JsonString *p, sqlite3_context *pCtx){
   jsonZero(p);
 }
 
-
 /* Free all allocated memory and reset the JsonString object back to its
 ** initial state.
 */
 static void jsonReset(JsonString *p){
-  if( !p->bStatic ) sqlite3_free(p->zBuf);
+  if( !p->bStatic ) sqlite3RCStrUnref(p->zBuf);
   jsonZero(p);
 }
 
-
 /* Report an out-of-memory (OOM) condition
 */
 static void jsonOom(JsonString *p){
@@ -195606,7 +202906,7 @@ static int jsonGrow(JsonString *p, u32 N){
   char *zNew;
   if( p->bStatic ){
     if( p->bErr ) return 1;
-    zNew = sqlite3_malloc64(nTotal);
+    zNew = sqlite3RCStrNew(nTotal);
     if( zNew==0 ){
       jsonOom(p);
       return SQLITE_NOMEM;
@@ -195615,12 +202915,12 @@ static int jsonGrow(JsonString *p, u32 N){
     p->zBuf = zNew;
     p->bStatic = 0;
   }else{
-    zNew = sqlite3_realloc64(p->zBuf, nTotal);
-    if( zNew==0 ){
-      jsonOom(p);
+    p->zBuf = sqlite3RCStrResize(p->zBuf, nTotal);
+    if( p->zBuf==0 ){
+      p->bErr = 1;
+      jsonZero(p);
       return SQLITE_NOMEM;
     }
-    p->zBuf = zNew;
   }
   p->nAlloc = nTotal;
   return SQLITE_OK;
@@ -195628,12 +202928,35 @@ static int jsonGrow(JsonString *p, u32 N){
 
 /* Append N bytes from zIn onto the end of the JsonString string.
 */
-static void jsonAppendRaw(JsonString *p, const char *zIn, u32 N){
-  if( N==0 ) return;
-  if( (N+p->nUsed >= p->nAlloc) && jsonGrow(p,N)!=0 ) return;
+static SQLITE_NOINLINE void jsonAppendExpand(
+  JsonString *p,
+  const char *zIn,
+  u32 N
+){
+  assert( N>0 );
+  if( jsonGrow(p,N) ) return;
   memcpy(p->zBuf+p->nUsed, zIn, N);
   p->nUsed += N;
 }
+static void jsonAppendRaw(JsonString *p, const char *zIn, u32 N){
+  if( N==0 ) return;
+  if( N+p->nUsed >= p->nAlloc ){
+    jsonAppendExpand(p,zIn,N);
+  }else{
+    memcpy(p->zBuf+p->nUsed, zIn, N);
+    p->nUsed += N;
+  }
+}
+static void jsonAppendRawNZ(JsonString *p, const char *zIn, u32 N){
+  assert( N>0 );
+  if( N+p->nUsed >= p->nAlloc ){
+    jsonAppendExpand(p,zIn,N);
+  }else{
+    memcpy(p->zBuf+p->nUsed, zIn, N);
+    p->nUsed += N;
+  }
+}
+
 
 /* Append formatted text (not to exceed N bytes) to the JsonString.
 */
@@ -195648,10 +202971,35 @@ static void jsonPrintf(int N, JsonString *p, const char *zFormat, ...){
 
 /* Append a single character
 */
-static void jsonAppendChar(JsonString *p, char c){
-  if( p->nUsed>=p->nAlloc && jsonGrow(p,1)!=0 ) return;
+static SQLITE_NOINLINE void jsonAppendCharExpand(JsonString *p, char c){
+  if( jsonGrow(p,1) ) return;
   p->zBuf[p->nUsed++] = c;
 }
+static void jsonAppendChar(JsonString *p, char c){
+  if( p->nUsed>=p->nAlloc ){
+    jsonAppendCharExpand(p,c);
+  }else{
+    p->zBuf[p->nUsed++] = c;
+  }
+}
+
+/* Try to force the string to be a zero-terminated RCStr string.
+**
+** Return true on success.  Return false if an OOM prevents this
+** from happening.
+*/
+static int jsonForceRCStr(JsonString *p){
+  jsonAppendChar(p, 0);
+  if( p->bErr ) return 0;
+  p->nUsed--;
+  if( p->bStatic==0 ) return 1;
+  p->nAlloc = 0;
+  p->nUsed++;
+  jsonGrow(p, p->nUsed);
+  p->nUsed--;
+  return p->bStatic==0;
+}
+
 
 /* Append a comma separator to the output buffer, if the previous
 ** character is not '[' or '{'.
@@ -195660,7 +203008,8 @@ static void jsonAppendSeparator(JsonString *p){
   char c;
   if( p->nUsed==0 ) return;
   c = p->zBuf[p->nUsed-1];
-  if( c!='[' && c!='{' ) jsonAppendChar(p, ',');
+  if( c=='[' || c=='{' ) return;
+  jsonAppendChar(p, ',');
 }
 
 /* Append the N-byte string in zIn to the end of the JsonString string
@@ -195674,11 +203023,16 @@ static void jsonAppendString(JsonString *p, const char *zIn, u32 N){
   p->zBuf[p->nUsed++] = '"';
   for(i=0; i<N; i++){
     unsigned char c = ((unsigned const char*)zIn)[i];
-    if( c=='"' || c=='\\' ){
+    if( jsonIsOk[c] ){
+      p->zBuf[p->nUsed++] = c;
+    }else if( c=='"' || c=='\\' ){
       json_simple_escape:
       if( (p->nUsed+N+3-i > p->nAlloc) && jsonGrow(p,N+3-i)!=0 ) return;
       p->zBuf[p->nUsed++] = '\\';
-    }else if( c<=0x1f ){
+      p->zBuf[p->nUsed++] = c;
+    }else if( c=='\'' ){
+      p->zBuf[p->nUsed++] = c;
+    }else{
       static const char aSpecial[] = {
          0, 0, 0, 0, 0, 0, 0, 0, 'b', 't', 'n', 0, 'f', 'r', 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0,   0,   0,   0, 0,   0,   0, 0, 0
@@ -195689,6 +203043,7 @@ static void jsonAppendString(JsonString *p, const char *zIn, u32 N){
       assert( aSpecial['\n']=='n' );
       assert( aSpecial['\r']=='r' );
       assert( aSpecial['\t']=='t' );
+      assert( c>=0 && c<sizeof(aSpecial) );
       if( aSpecial[c] ){
         c = aSpecial[c];
         goto json_simple_escape;
@@ -195698,15 +203053,144 @@ static void jsonAppendString(JsonString *p, const char *zIn, u32 N){
       p->zBuf[p->nUsed++] = 'u';
       p->zBuf[p->nUsed++] = '0';
       p->zBuf[p->nUsed++] = '0';
-      p->zBuf[p->nUsed++] = '0' + (c>>4);
-      c = "0123456789abcdef"[c&0xf];
+      p->zBuf[p->nUsed++] = "0123456789abcdef"[c>>4];
+      p->zBuf[p->nUsed++] = "0123456789abcdef"[c&0xf];
     }
-    p->zBuf[p->nUsed++] = c;
   }
   p->zBuf[p->nUsed++] = '"';
   assert( p->nUsed<p->nAlloc );
 }
 
+/*
+** The zIn[0..N] string is a JSON5 string literal.  Append to p a translation
+** of the string literal that standard JSON and that omits all JSON5
+** features.
+*/
+static void jsonAppendNormalizedString(JsonString *p, const char *zIn, u32 N){
+  u32 i;
+  jsonAppendChar(p, '"');
+  zIn++;
+  N -= 2;
+  while( N>0 ){
+    for(i=0; i<N && zIn[i]!='\\' && zIn[i]!='"'; i++){}
+    if( i>0 ){
+      jsonAppendRawNZ(p, zIn, i);
+      zIn += i;
+      N -= i;
+      if( N==0 ) break;
+    }
+    if( zIn[0]=='"' ){
+      jsonAppendRawNZ(p, "\\\"", 2);
+      zIn++;
+      N--;
+      continue;
+    }
+    assert( zIn[0]=='\\' );
+    switch( (u8)zIn[1] ){
+      case '\'':
+        jsonAppendChar(p, '\'');
+        break;
+      case 'v':
+        jsonAppendRawNZ(p, "\\u0009", 6);
+        break;
+      case 'x':
+        jsonAppendRawNZ(p, "\\u00", 4);
+        jsonAppendRawNZ(p, &zIn[2], 2);
+        zIn += 2;
+        N -= 2;
+        break;
+      case '0':
+        jsonAppendRawNZ(p, "\\u0000", 6);
+        break;
+      case '\r':
+        if( zIn[2]=='\n' ){
+          zIn++;
+          N--;
+        }
+        break;
+      case '\n':
+        break;
+      case 0xe2:
+        assert( N>=4 );
+        assert( 0x80==(u8)zIn[2] );
+        assert( 0xa8==(u8)zIn[3] || 0xa9==(u8)zIn[3] );
+        zIn += 2;
+        N -= 2;
+        break;
+      default:
+        jsonAppendRawNZ(p, zIn, 2);
+        break;
+    }
+    zIn += 2;
+    N -= 2;
+  }
+  jsonAppendChar(p, '"');
+}
+
+/*
+** The zIn[0..N] string is a JSON5 integer literal.  Append to p a translation
+** of the string literal that standard JSON and that omits all JSON5
+** features.
+*/
+static void jsonAppendNormalizedInt(JsonString *p, const char *zIn, u32 N){
+  if( zIn[0]=='+' ){
+    zIn++;
+    N--;
+  }else if( zIn[0]=='-' ){
+    jsonAppendChar(p, '-');
+    zIn++;
+    N--;
+  }
+  if( zIn[0]=='0' && (zIn[1]=='x' || zIn[1]=='X') ){
+    sqlite3_int64 i = 0;
+    int rc = sqlite3DecOrHexToI64(zIn, &i);
+    if( rc<=1 ){
+      jsonPrintf(100,p,"%lld",i);
+    }else{
+      assert( rc==2 );
+      jsonAppendRawNZ(p, "9.0e999", 7);
+    }
+    return;
+  }
+  assert( N>0 );
+  jsonAppendRawNZ(p, zIn, N);
+}
+
+/*
+** The zIn[0..N] string is a JSON5 real literal.  Append to p a translation
+** of the string literal that standard JSON and that omits all JSON5
+** features.
+*/
+static void jsonAppendNormalizedReal(JsonString *p, const char *zIn, u32 N){
+  u32 i;
+  if( zIn[0]=='+' ){
+    zIn++;
+    N--;
+  }else if( zIn[0]=='-' ){
+    jsonAppendChar(p, '-');
+    zIn++;
+    N--;
+  }
+  if( zIn[0]=='.' ){
+    jsonAppendChar(p, '0');
+  }
+  for(i=0; i<N; i++){
+    if( zIn[i]=='.' && (i+1==N || !sqlite3Isdigit(zIn[i+1])) ){
+      i++;
+      jsonAppendRaw(p, zIn, i);
+      zIn += i;
+      N -= i;
+      jsonAppendChar(p, '0');
+      break;
+    }
+  }
+  if( N>0 ){
+    jsonAppendRawNZ(p, zIn, N);
+  }
+}
+
+
+
 /*
 ** Append a function parameter value to the JSON string under
 ** construction.
@@ -195717,11 +203201,14 @@ static void jsonAppendValue(
 ){
   switch( sqlite3_value_type(pValue) ){
     case SQLITE_NULL: {
-      jsonAppendRaw(p, "null", 4);
+      jsonAppendRawNZ(p, "null", 4);
       break;
     }
-    case SQLITE_INTEGER:
     case SQLITE_FLOAT: {
+      jsonPrintf(100, p, "%!0.15g", sqlite3_value_double(pValue));
+      break;
+    }
+    case SQLITE_INTEGER: {
       const char *z = (const char*)sqlite3_value_text(pValue);
       u32 n = (u32)sqlite3_value_bytes(pValue);
       jsonAppendRaw(p, z, n);
@@ -195750,15 +203237,25 @@ static void jsonAppendValue(
 
 
 /* Make the JSON in p the result of the SQL function.
+**
+** The JSON string is reset.
 */
 static void jsonResult(JsonString *p){
   if( p->bErr==0 ){
-    sqlite3_result_text64(p->pCtx, p->zBuf, p->nUsed,
-                          p->bStatic ? SQLITE_TRANSIENT : sqlite3_free,
-                          SQLITE_UTF8);
-    jsonZero(p);
+    if( p->bStatic ){
+      sqlite3_result_text64(p->pCtx, p->zBuf, p->nUsed,
+                            SQLITE_TRANSIENT, SQLITE_UTF8);
+    }else if( jsonForceRCStr(p) ){
+      sqlite3RCStrRef(p->zBuf);
+      sqlite3_result_text64(p->pCtx, p->zBuf, p->nUsed,
+                            sqlite3RCStrUnref,
+                            SQLITE_UTF8);
+    }
   }
-  assert( p->bStatic );
+  if( p->bErr==1 ){
+    sqlite3_result_error_nomem(p->pCtx);
+  }
+  jsonReset(p);
 }
 
 /**************************************************************************
@@ -195783,20 +203280,73 @@ static u32 jsonNodeSize(JsonNode *pNode){
 ** delete the JsonParse object itself.
 */
 static void jsonParseReset(JsonParse *pParse){
-  sqlite3_free(pParse->aNode);
-  pParse->aNode = 0;
+  while( pParse->pClup ){
+    JsonCleanup *pTask = pParse->pClup;
+    pParse->pClup = pTask->pJCNext;
+    pTask->xOp(pTask->pArg);
+    sqlite3_free(pTask);
+  }
+  assert( pParse->nJPRef<=1 );
+  if( pParse->aNode ){
+    sqlite3_free(pParse->aNode);
+    pParse->aNode = 0;
+  }
   pParse->nNode = 0;
   pParse->nAlloc = 0;
-  sqlite3_free(pParse->aUp);
-  pParse->aUp = 0;
+  if( pParse->aUp ){
+    sqlite3_free(pParse->aUp);
+    pParse->aUp = 0;
+  }
+  if( pParse->bJsonIsRCStr ){
+    sqlite3RCStrUnref(pParse->zJson);
+    pParse->zJson = 0;
+    pParse->bJsonIsRCStr = 0;
+  }
+  if( pParse->zAlt ){
+    sqlite3RCStrUnref(pParse->zAlt);
+    pParse->zAlt = 0;
+  }
 }
 
 /*
 ** Free a JsonParse object that was obtained from sqlite3_malloc().
+**
+** Note that destroying JsonParse might call sqlite3RCStrUnref() to
+** destroy the zJson value.  The RCStr object might recursively invoke
+** JsonParse to destroy this pParse object again.  Take care to ensure
+** that this recursive destructor sequence terminates harmlessly.
 */
 static void jsonParseFree(JsonParse *pParse){
-  jsonParseReset(pParse);
-  sqlite3_free(pParse);
+  if( pParse->nJPRef>1 ){
+    pParse->nJPRef--;
+  }else{
+    jsonParseReset(pParse);
+    sqlite3_free(pParse);
+  }
+}
+
+/*
+** Add a cleanup task to the JsonParse object.
+**
+** If an OOM occurs, the cleanup operation happens immediately
+** and this function returns SQLITE_NOMEM.
+*/
+static int jsonParseAddCleanup(
+  JsonParse *pParse,          /* Add the cleanup task to this parser */
+  void(*xOp)(void*),          /* The cleanup task */
+  void *pArg                  /* Argument to the cleanup */
+){
+  JsonCleanup *pTask = sqlite3_malloc64( sizeof(*pTask) );
+  if( pTask==0 ){
+    pParse->oom = 1;
+    xOp(pArg);
+    return SQLITE_ERROR;
+  }
+  pTask->pJCNext = pParse->pClup;
+  pParse->pClup = pTask;
+  pTask->xOp = xOp;
+  pTask->pArg = pArg;
+  return SQLITE_OK;
 }
 
 /*
@@ -195805,46 +203355,76 @@ static void jsonParseFree(JsonParse *pParse){
 ** the number of JsonNode objects that are encoded.
 */
 static void jsonRenderNode(
+  JsonParse *pParse,             /* the complete parse of the JSON */
   JsonNode *pNode,               /* The node to render */
-  JsonString *pOut,              /* Write JSON here */
-  sqlite3_value **aReplace       /* Replacement values */
+  JsonString *pOut               /* Write JSON here */
 ){
   assert( pNode!=0 );
-  if( pNode->jnFlags & (JNODE_REPLACE|JNODE_PATCH) ){
-    if( (pNode->jnFlags & JNODE_REPLACE)!=0 && ALWAYS(aReplace!=0) ){
-      assert( pNode->eU==4 );
-      jsonAppendValue(pOut, aReplace[pNode->u.iReplace]);
-      return;
+  while( (pNode->jnFlags & JNODE_REPLACE)!=0 && pParse->useMod ){
+    u32 idx = (u32)(pNode - pParse->aNode);
+    u32 i = pParse->iSubst;
+    while( 1 /*exit-by-break*/ ){
+      assert( i<pParse->nNode );
+      assert( pParse->aNode[i].eType==JSON_SUBST );
+      assert( pParse->aNode[i].eU==4 );
+      assert( pParse->aNode[i].u.iPrev<i );
+      if( pParse->aNode[i].n==idx ){
+        pNode = &pParse->aNode[i+1];
+        break;
+      }
+      i = pParse->aNode[i].u.iPrev;
     }
-    assert( pNode->eU==5 );
-    pNode = pNode->u.pPatch;
   }
   switch( pNode->eType ){
     default: {
       assert( pNode->eType==JSON_NULL );
-      jsonAppendRaw(pOut, "null", 4);
+      jsonAppendRawNZ(pOut, "null", 4);
       break;
     }
     case JSON_TRUE: {
-      jsonAppendRaw(pOut, "true", 4);
+      jsonAppendRawNZ(pOut, "true", 4);
       break;
     }
     case JSON_FALSE: {
-      jsonAppendRaw(pOut, "false", 5);
+      jsonAppendRawNZ(pOut, "false", 5);
       break;
     }
     case JSON_STRING: {
+      assert( pNode->eU==1 );
       if( pNode->jnFlags & JNODE_RAW ){
-        assert( pNode->eU==1 );
-        jsonAppendString(pOut, pNode->u.zJContent, pNode->n);
-        break;
+        if( pNode->jnFlags & JNODE_LABEL ){
+          jsonAppendChar(pOut, '"');
+          jsonAppendRaw(pOut, pNode->u.zJContent, pNode->n);
+          jsonAppendChar(pOut, '"');
+        }else{
+          jsonAppendString(pOut, pNode->u.zJContent, pNode->n);
+        }
+      }else if( pNode->jnFlags & JNODE_JSON5 ){
+        jsonAppendNormalizedString(pOut, pNode->u.zJContent, pNode->n);
+      }else{
+        assert( pNode->n>0 );
+        jsonAppendRawNZ(pOut, pNode->u.zJContent, pNode->n);
       }
-      /* no break */ deliberate_fall_through
+      break;
+    }
+    case JSON_REAL: {
+      assert( pNode->eU==1 );
+      if( pNode->jnFlags & JNODE_JSON5 ){
+        jsonAppendNormalizedReal(pOut, pNode->u.zJContent, pNode->n);
+      }else{
+        assert( pNode->n>0 );
+        jsonAppendRawNZ(pOut, pNode->u.zJContent, pNode->n);
+      }
+      break;
     }
-    case JSON_REAL:
     case JSON_INT: {
       assert( pNode->eU==1 );
-      jsonAppendRaw(pOut, pNode->u.zJContent, pNode->n);
+      if( pNode->jnFlags & JNODE_JSON5 ){
+        jsonAppendNormalizedInt(pOut, pNode->u.zJContent, pNode->n);
+      }else{
+        assert( pNode->n>0 );
+        jsonAppendRawNZ(pOut, pNode->u.zJContent, pNode->n);
+      }
       break;
     }
     case JSON_ARRAY: {
@@ -195852,15 +203432,16 @@ static void jsonRenderNode(
       jsonAppendChar(pOut, '[');
       for(;;){
         while( j<=pNode->n ){
-          if( (pNode[j].jnFlags & JNODE_REMOVE)==0 ){
+          if( (pNode[j].jnFlags & JNODE_REMOVE)==0 || pParse->useMod==0 ){
             jsonAppendSeparator(pOut);
-            jsonRenderNode(&pNode[j], pOut, aReplace);
+            jsonRenderNode(pParse, &pNode[j], pOut);
           }
           j += jsonNodeSize(&pNode[j]);
         }
         if( (pNode->jnFlags & JNODE_APPEND)==0 ) break;
+        if( pParse->useMod==0 ) break;
         assert( pNode->eU==2 );
-        pNode = &pNode[pNode->u.iAppend];
+        pNode = &pParse->aNode[pNode->u.iAppend];
         j = 1;
       }
       jsonAppendChar(pOut, ']');
@@ -195871,17 +203452,18 @@ static void jsonRenderNode(
       jsonAppendChar(pOut, '{');
       for(;;){
         while( j<=pNode->n ){
-          if( (pNode[j+1].jnFlags & JNODE_REMOVE)==0 ){
+          if( (pNode[j+1].jnFlags & JNODE_REMOVE)==0 || pParse->useMod==0 ){
             jsonAppendSeparator(pOut);
-            jsonRenderNode(&pNode[j], pOut, aReplace);
+            jsonRenderNode(pParse, &pNode[j], pOut);
             jsonAppendChar(pOut, ':');
-            jsonRenderNode(&pNode[j+1], pOut, aReplace);
+            jsonRenderNode(pParse, &pNode[j+1], pOut);
           }
           j += 1 + jsonNodeSize(&pNode[j+1]);
         }
         if( (pNode->jnFlags & JNODE_APPEND)==0 ) break;
+        if( pParse->useMod==0 ) break;
         assert( pNode->eU==2 );
-        pNode = &pNode[pNode->u.iAppend];
+        pNode = &pParse->aNode[pNode->u.iAppend];
         j = 1;
       }
       jsonAppendChar(pOut, '}');
@@ -195891,18 +203473,30 @@ static void jsonRenderNode(
 }
 
 /*
-** Return a JsonNode and all its descendents as a JSON string.
+** Return a JsonNode and all its descendants as a JSON string.
 */
 static void jsonReturnJson(
+  JsonParse *pParse,          /* The complete JSON */
   JsonNode *pNode,            /* Node to return */
   sqlite3_context *pCtx,      /* Return value for this function */
-  sqlite3_value **aReplace    /* Array of replacement values */
+  int bGenerateAlt,           /* Also store the rendered text in zAlt */
+  int omitSubtype             /* Do not call sqlite3_result_subtype() */
 ){
   JsonString s;
-  jsonInit(&s, pCtx);
-  jsonRenderNode(pNode, &s, aReplace);
-  jsonResult(&s);
-  sqlite3_result_subtype(pCtx, JSON_SUBTYPE);
+  if( pParse->oom ){
+    sqlite3_result_error_nomem(pCtx);
+    return;
+  }
+  if( pParse->nErr==0 ){
+    jsonInit(&s, pCtx);
+    jsonRenderNode(pParse, pNode, &s);
+    if( bGenerateAlt && pParse->zAlt==0 && jsonForceRCStr(&s) ){
+      pParse->zAlt = sqlite3RCStrRef(s.zBuf);
+      pParse->nAlt = s.nUsed;
+    }
+    jsonResult(&s);
+    if( !omitSubtype ) sqlite3_result_subtype(pCtx, JSON_SUBTYPE);
+  }
 }
 
 /*
@@ -195940,9 +203534,10 @@ static u32 jsonHexToInt4(const char *z){
 ** Make the JsonNode the return value of the function.
 */
 static void jsonReturn(
+  JsonParse *pParse,          /* Complete JSON parse tree */
   JsonNode *pNode,            /* Node to return */
   sqlite3_context *pCtx,      /* Return value for this function */
-  sqlite3_value **aReplace    /* Array of replacement values */
+  int omitSubtype             /* Do not call sqlite3_result_subtype() */
 ){
   switch( pNode->eType ){
     default: {
@@ -195960,59 +203555,40 @@ static void jsonReturn(
     }
     case JSON_INT: {
       sqlite3_int64 i = 0;
+      int rc;
+      int bNeg = 0;
       const char *z;
+
       assert( pNode->eU==1 );
       z = pNode->u.zJContent;
-      if( z[0]=='-' ){ z++; }
-      while( z[0]>='0' && z[0]<='9' ){
-        unsigned v = *(z++) - '0';
-        if( i>=LARGEST_INT64/10 ){
-          if( i>LARGEST_INT64/10 ) goto int_as_real;
-          if( z[0]>='0' && z[0]<='9' ) goto int_as_real;
-          if( v==9 ) goto int_as_real;
-          if( v==8 ){
-            if( pNode->u.zJContent[0]=='-' ){
-              sqlite3_result_int64(pCtx, SMALLEST_INT64);
-              goto int_done;
-            }else{
-              goto int_as_real;
-            }
-          }
-        }
-        i = i*10 + v;
+      if( z[0]=='-' ){ z++; bNeg = 1; }
+      else if( z[0]=='+' ){ z++; }
+      rc = sqlite3DecOrHexToI64(z, &i);
+      if( rc<=1 ){
+        sqlite3_result_int64(pCtx, bNeg ? -i : i);
+      }else if( rc==3 && bNeg ){
+        sqlite3_result_int64(pCtx, SMALLEST_INT64);
+      }else{
+        goto to_double;
       }
-      if( pNode->u.zJContent[0]=='-' ){ i = -i; }
-      sqlite3_result_int64(pCtx, i);
-      int_done:
       break;
-      int_as_real: ; /* no break */ deliberate_fall_through
     }
     case JSON_REAL: {
       double r;
-#ifdef SQLITE_AMALGAMATION
       const char *z;
       assert( pNode->eU==1 );
+    to_double:
       z = pNode->u.zJContent;
       sqlite3AtoF(z, &r, sqlite3Strlen30(z), SQLITE_UTF8);
-#else
-      assert( pNode->eU==1 );
-      r = strtod(pNode->u.zJContent, 0);
-#endif
       sqlite3_result_double(pCtx, r);
       break;
     }
     case JSON_STRING: {
-#if 0 /* Never happens because JNODE_RAW is only set by json_set(),
-      ** json_insert() and json_replace() and those routines do not
-      ** call jsonReturn() */
       if( pNode->jnFlags & JNODE_RAW ){
         assert( pNode->eU==1 );
         sqlite3_result_text(pCtx, pNode->u.zJContent, pNode->n,
                             SQLITE_TRANSIENT);
-      }else
-#endif
-      assert( (pNode->jnFlags & JNODE_RAW)==0 );
-      if( (pNode->jnFlags & JNODE_ESCAPE)==0 ){
+      }else if( (pNode->jnFlags & JNODE_ESCAPE)==0 ){
         /* JSON formatted without any backslash-escapes */
         assert( pNode->eU==1 );
         sqlite3_result_text(pCtx, pNode->u.zJContent+1, pNode->n-2,
@@ -196024,18 +203600,17 @@ static void jsonReturn(
         const char *z;
         char *zOut;
         u32 j;
+        u32 nOut = n;
         assert( pNode->eU==1 );
         z = pNode->u.zJContent;
-        zOut = sqlite3_malloc( n+1 );
+        zOut = sqlite3_malloc( nOut+1 );
         if( zOut==0 ){
           sqlite3_result_error_nomem(pCtx);
           break;
         }
         for(i=1, j=0; i<n-1; i++){
           char c = z[i];
-          if( c!='\\' ){
-            zOut[j++] = c;
-          }else{
+          if( c=='\\' ){
             c = z[++i];
             if( c=='u' ){
               u32 v = jsonHexToInt4(z+i+1);
@@ -196067,22 +203642,40 @@ static void jsonReturn(
                   zOut[j++] = 0x80 | (v&0x3f);
                 }
               }
+              continue;
+            }else if( c=='b' ){
+              c = '\b';
+            }else if( c=='f' ){
+              c = '\f';
+            }else if( c=='n' ){
+              c = '\n';
+            }else if( c=='r' ){
+              c = '\r';
+            }else if( c=='t' ){
+              c = '\t';
+            }else if( c=='v' ){
+              c = '\v';
+            }else if( c=='\'' || c=='"' || c=='/' || c=='\\' ){
+              /* pass through unchanged */
+            }else if( c=='0' ){
+              c = 0;
+            }else if( c=='x' ){
+              c = (jsonHexToInt(z[i+1])<<4) | jsonHexToInt(z[i+2]);
+              i += 2;
+            }else if( c=='\r' && z[i+1]=='\n' ){
+              i++;
+              continue;
+            }else if( 0xe2==(u8)c ){
+              assert( 0x80==(u8)z[i+1] );
+              assert( 0xa8==(u8)z[i+2] || 0xa9==(u8)z[i+2] );
+              i += 2;
+              continue;
             }else{
-              if( c=='b' ){
-                c = '\b';
-              }else if( c=='f' ){
-                c = '\f';
-              }else if( c=='n' ){
-                c = '\n';
-              }else if( c=='r' ){
-                c = '\r';
-              }else if( c=='t' ){
-                c = '\t';
-              }
-              zOut[j++] = c;
+              continue;
             }
-          }
-        }
+          } /* end if( c=='\\' ) */
+          zOut[j++] = c;
+        } /* end for() */
         zOut[j] = 0;
         sqlite3_result_text(pCtx, zOut, j, sqlite3_free);
       }
@@ -196090,7 +203683,7 @@ static void jsonReturn(
     }
     case JSON_ARRAY:
     case JSON_OBJECT: {
-      jsonReturnJson(pNode, pCtx, aReplace);
+      jsonReturnJson(pParse, pNode, pCtx, 0, omitSubtype);
       break;
     }
   }
@@ -196112,6 +203705,12 @@ static int jsonParseAddNode(JsonParse*,u32,u32,const char*);
 #endif
 
 
+/*
+** Add a single node to pParse->aNode after first expanding the
+** size of the aNode array.  Return the index of the new node.
+**
+** If an OOM error occurs, set pParse->oom and return -1.
+*/
 static JSON_NOINLINE int jsonParseAddNodeExpand(
   JsonParse *pParse,        /* Append the node to this object */
   u32 eType,                /* Node type */
@@ -196128,7 +203727,7 @@ static JSON_NOINLINE int jsonParseAddNodeExpand(
     pParse->oom = 1;
     return -1;
   }
-  pParse->nAlloc = nNew;
+  pParse->nAlloc = sqlite3_msize(pNew)/sizeof(JsonNode);
   pParse->aNode = pNew;
   assert( pParse->nNode<pParse->nAlloc );
   return jsonParseAddNode(pParse, eType, n, zContent);
@@ -196146,34 +203745,239 @@ static int jsonParseAddNode(
   const char *zContent      /* Content */
 ){
   JsonNode *p;
-  if( pParse->aNode==0 || pParse->nNode>=pParse->nAlloc ){
+  assert( pParse->aNode!=0 || pParse->nNode>=pParse->nAlloc );
+  if( pParse->nNode>=pParse->nAlloc ){
     return jsonParseAddNodeExpand(pParse, eType, n, zContent);
   }
+  assert( pParse->aNode!=0 );
   p = &pParse->aNode[pParse->nNode];
-  p->eType = (u8)eType;
-  p->jnFlags = 0;
+  assert( p!=0 );
+  p->eType = (u8)(eType & 0xff);
+  p->jnFlags = (u8)(eType >> 8);
   VVA( p->eU = zContent ? 1 : 0 );
   p->n = n;
   p->u.zJContent = zContent;
   return pParse->nNode++;
 }
 
+/*
+** Add an array of new nodes to the current pParse->aNode array.
+** Return the index of the first node added.
+**
+** If an OOM error occurs, set pParse->oom.
+*/
+static void jsonParseAddNodeArray(
+  JsonParse *pParse,        /* Append the node to this object */
+  JsonNode *aNode,          /* Array of nodes to add */
+  u32 nNode                 /* Number of elements in aNew */
+){
+  assert( aNode!=0 );
+  assert( nNode>=1 );
+  if( pParse->nNode + nNode > pParse->nAlloc ){
+    u32 nNew = pParse->nNode + nNode;
+    JsonNode *aNew = sqlite3_realloc64(pParse->aNode, nNew*sizeof(JsonNode));
+    if( aNew==0 ){
+      pParse->oom = 1;
+      return;
+    }
+    pParse->nAlloc = sqlite3_msize(aNew)/sizeof(JsonNode);
+    pParse->aNode = aNew;
+  }
+  memcpy(&pParse->aNode[pParse->nNode], aNode, nNode*sizeof(JsonNode));
+  pParse->nNode += nNode;
+}
+
+/*
+** Add a new JSON_SUBST node.  The node immediately following
+** this new node will be the substitute content for iNode.
+*/
+static int jsonParseAddSubstNode(
+  JsonParse *pParse,       /* Add the JSON_SUBST here */
+  u32 iNode                /* References this node */
+){
+  int idx = jsonParseAddNode(pParse, JSON_SUBST, iNode, 0);
+  if( pParse->oom ) return -1;
+  pParse->aNode[iNode].jnFlags |= JNODE_REPLACE;
+  pParse->aNode[idx].eU = 4;
+  pParse->aNode[idx].u.iPrev = pParse->iSubst;
+  pParse->iSubst = idx;
+  pParse->hasMod = 1;
+  pParse->useMod = 1;
+  return idx;
+}
+
+/*
+** Return true if z[] begins with 2 (or more) hexadecimal digits
+*/
+static int jsonIs2Hex(const char *z){
+  return sqlite3Isxdigit(z[0]) && sqlite3Isxdigit(z[1]);
+}
+
 /*
 ** Return true if z[] begins with 4 (or more) hexadecimal digits
 */
 static int jsonIs4Hex(const char *z){
-  int i;
-  for(i=0; i<4; i++) if( !sqlite3Isxdigit(z[i]) ) return 0;
-  return 1;
+  return jsonIs2Hex(z) && jsonIs2Hex(&z[2]);
+}
+
+/*
+** Return the number of bytes of JSON5 whitespace at the beginning of
+** the input string z[].
+**
+** JSON5 whitespace consists of any of the following characters:
+**
+**    Unicode  UTF-8         Name
+**    U+0009   09            horizontal tab
+**    U+000a   0a            line feed
+**    U+000b   0b            vertical tab
+**    U+000c   0c            form feed
+**    U+000d   0d            carriage return
+**    U+0020   20            space
+**    U+00a0   c2 a0         non-breaking space
+**    U+1680   e1 9a 80      ogham space mark
+**    U+2000   e2 80 80      en quad
+**    U+2001   e2 80 81      em quad
+**    U+2002   e2 80 82      en space
+**    U+2003   e2 80 83      em space
+**    U+2004   e2 80 84      three-per-em space
+**    U+2005   e2 80 85      four-per-em space
+**    U+2006   e2 80 86      six-per-em space
+**    U+2007   e2 80 87      figure space
+**    U+2008   e2 80 88      punctuation space
+**    U+2009   e2 80 89      thin space
+**    U+200a   e2 80 8a      hair space
+**    U+2028   e2 80 a8      line separator
+**    U+2029   e2 80 a9      paragraph separator
+**    U+202f   e2 80 af      narrow no-break space (NNBSP)
+**    U+205f   e2 81 9f      medium mathematical space (MMSP)
+**    U+3000   e3 80 80      ideographical space
+**    U+FEFF   ef bb bf      byte order mark
+**
+** In addition, comments between '/', '*' and '*', '/' and
+** from '/', '/' to end-of-line are also considered to be whitespace.
+*/
+static int json5Whitespace(const char *zIn){
+  int n = 0;
+  const u8 *z = (u8*)zIn;
+  while( 1 /*exit by "goto whitespace_done"*/ ){
+    switch( z[n] ){
+      case 0x09:
+      case 0x0a:
+      case 0x0b:
+      case 0x0c:
+      case 0x0d:
+      case 0x20: {
+        n++;
+        break;
+      }
+      case '/': {
+        if( z[n+1]=='*' && z[n+2]!=0 ){
+          int j;
+          for(j=n+3; z[j]!='/' || z[j-1]!='*'; j++){
+            if( z[j]==0 ) goto whitespace_done;
+          }
+          n = j+1;
+          break;
+        }else if( z[n+1]=='/' ){
+          int j;
+          char c;
+          for(j=n+2; (c = z[j])!=0; j++){
+            if( c=='\n' || c=='\r' ) break;
+            if( 0xe2==(u8)c && 0x80==(u8)z[j+1]
+             && (0xa8==(u8)z[j+2] || 0xa9==(u8)z[j+2])
+            ){
+              j += 2;
+              break;
+            }
+          }
+          n = j;
+          if( z[n] ) n++;
+          break;
+        }
+        goto whitespace_done;
+      }
+      case 0xc2: {
+        if( z[n+1]==0xa0 ){
+          n += 2;
+          break;
+        }
+        goto whitespace_done;
+      }
+      case 0xe1: {
+        if( z[n+1]==0x9a && z[n+2]==0x80 ){
+          n += 3;
+          break;
+        }
+        goto whitespace_done;
+      }
+      case 0xe2: {
+        if( z[n+1]==0x80 ){
+          u8 c = z[n+2];
+          if( c<0x80 ) goto whitespace_done;
+          if( c<=0x8a || c==0xa8 || c==0xa9 || c==0xaf ){
+            n += 3;
+            break;
+          }
+        }else if( z[n+1]==0x81 && z[n+2]==0x9f ){
+          n += 3;
+          break;
+        }
+        goto whitespace_done;
+      }
+      case 0xe3: {
+        if( z[n+1]==0x80 && z[n+2]==0x80 ){
+          n += 3;
+          break;
+        }
+        goto whitespace_done;
+      }
+      case 0xef: {
+        if( z[n+1]==0xbb && z[n+2]==0xbf ){
+          n += 3;
+          break;
+        }
+        goto whitespace_done;
+      }
+      default: {
+        goto whitespace_done;
+      }
+    }
+  }
+  whitespace_done:
+  return n;
 }
 
+/*
+** Extra floating-point literals to allow in JSON.
+*/
+static const struct NanInfName {
+  char c1;
+  char c2;
+  char n;
+  char eType;
+  char nRepl;
+  char *zMatch;
+  char *zRepl;
+} aNanInfName[] = {
+  { 'i', 'I', 3, JSON_REAL, 7, "inf", "9.0e999" },
+  { 'i', 'I', 8, JSON_REAL, 7, "infinity", "9.0e999" },
+  { 'n', 'N', 3, JSON_NULL, 4, "NaN", "null" },
+  { 'q', 'Q', 4, JSON_NULL, 4, "QNaN", "null" },
+  { 's', 'S', 4, JSON_NULL, 4, "SNaN", "null" },
+};
+
 /*
 ** Parse a single JSON value which begins at pParse->zJson[i].  Return the
 ** index of the first character past the end of the value parsed.
 **
-** Return negative for a syntax error.  Special cases:  return -2 if the
-** first non-whitespace character is '}' and return -3 if the first
-** non-whitespace character is ']'.
+** Special return values:
+**
+**      0    End of input
+**     -1    Syntax error
+**     -2    '}' seen
+**     -3    ']' seen
+**     -4    ',' seen
+**     -5    ':' seen
 */
 static int jsonParseValue(JsonParse *pParse, u32 i){
   char c;
@@ -196182,175 +203986,457 @@ static int jsonParseValue(JsonParse *pParse, u32 i){
   int x;
   JsonNode *pNode;
   const char *z = pParse->zJson;
-  while( fast_isspace(z[i]) ){ i++; }
-  if( (c = z[i])=='{' ){
+json_parse_restart:
+  switch( (u8)z[i] ){
+  case '{': {
     /* Parse object */
     iThis = jsonParseAddNode(pParse, JSON_OBJECT, 0, 0);
     if( iThis<0 ) return -1;
+    if( ++pParse->iDepth > JSON_MAX_DEPTH ){
+      pParse->iErr = i;
+      return -1;
+    }
     for(j=i+1;;j++){
-      while( fast_isspace(z[j]) ){ j++; }
-      if( ++pParse->iDepth > JSON_MAX_DEPTH ) return -1;
+      u32 nNode = pParse->nNode;
       x = jsonParseValue(pParse, j);
-      if( x<0 ){
-        pParse->iDepth--;
-        if( x==(-2) && pParse->nNode==(u32)iThis+1 ) return j+1;
-        return -1;
+      if( x<=0 ){
+        if( x==(-2) ){
+          j = pParse->iErr;
+          if( pParse->nNode!=(u32)iThis+1 ) pParse->hasNonstd = 1;
+          break;
+        }
+        j += json5Whitespace(&z[j]);
+        if( sqlite3JsonId1(z[j])
+         || (z[j]=='\\' && z[j+1]=='u' && jsonIs4Hex(&z[j+2]))
+        ){
+          int k = j+1;
+          while( (sqlite3JsonId2(z[k]) && json5Whitespace(&z[k])==0)
+            || (z[k]=='\\' && z[k+1]=='u' && jsonIs4Hex(&z[k+2]))
+          ){
+            k++;
+          }
+          jsonParseAddNode(pParse, JSON_STRING | (JNODE_RAW<<8), k-j, &z[j]);
+          pParse->hasNonstd = 1;
+          x = k;
+        }else{
+          if( x!=-1 ) pParse->iErr = j;
+          return -1;
+        }
       }
       if( pParse->oom ) return -1;
-      pNode = &pParse->aNode[pParse->nNode-1];
-      if( pNode->eType!=JSON_STRING ) return -1;
+      pNode = &pParse->aNode[nNode];
+      if( pNode->eType!=JSON_STRING ){
+        pParse->iErr = j;
+        return -1;
+      }
       pNode->jnFlags |= JNODE_LABEL;
       j = x;
-      while( fast_isspace(z[j]) ){ j++; }
-      if( z[j]!=':' ) return -1;
-      j++;
+      if( z[j]==':' ){
+        j++;
+      }else{
+        if( fast_isspace(z[j]) ){
+          do{ j++; }while( fast_isspace(z[j]) );
+          if( z[j]==':' ){
+            j++;
+            goto parse_object_value;
+          }
+        }
+        x = jsonParseValue(pParse, j);
+        if( x!=(-5) ){
+          if( x!=(-1) ) pParse->iErr = j;
+          return -1;
+        }
+        j = pParse->iErr+1;
+      }
+    parse_object_value:
       x = jsonParseValue(pParse, j);
-      pParse->iDepth--;
-      if( x<0 ) return -1;
+      if( x<=0 ){
+        if( x!=(-1) ) pParse->iErr = j;
+        return -1;
+      }
       j = x;
-      while( fast_isspace(z[j]) ){ j++; }
-      c = z[j];
-      if( c==',' ) continue;
-      if( c!='}' ) return -1;
-      break;
+      if( z[j]==',' ){
+        continue;
+      }else if( z[j]=='}' ){
+        break;
+      }else{
+        if( fast_isspace(z[j]) ){
+          do{ j++; }while( fast_isspace(z[j]) );
+          if( z[j]==',' ){
+            continue;
+          }else if( z[j]=='}' ){
+            break;
+          }
+        }
+        x = jsonParseValue(pParse, j);
+        if( x==(-4) ){
+          j = pParse->iErr;
+          continue;
+        }
+        if( x==(-2) ){
+          j = pParse->iErr;
+          break;
+        }
+      }
+      pParse->iErr = j;
+      return -1;
     }
     pParse->aNode[iThis].n = pParse->nNode - (u32)iThis - 1;
+    pParse->iDepth--;
     return j+1;
-  }else if( c=='[' ){
+  }
+  case '[': {
     /* Parse array */
     iThis = jsonParseAddNode(pParse, JSON_ARRAY, 0, 0);
     if( iThis<0 ) return -1;
+    if( ++pParse->iDepth > JSON_MAX_DEPTH ){
+      pParse->iErr = i;
+      return -1;
+    }
     memset(&pParse->aNode[iThis].u, 0, sizeof(pParse->aNode[iThis].u));
     for(j=i+1;;j++){
-      while( fast_isspace(z[j]) ){ j++; }
-      if( ++pParse->iDepth > JSON_MAX_DEPTH ) return -1;
       x = jsonParseValue(pParse, j);
-      pParse->iDepth--;
-      if( x<0 ){
-        if( x==(-3) && pParse->nNode==(u32)iThis+1 ) return j+1;
+      if( x<=0 ){
+        if( x==(-3) ){
+          j = pParse->iErr;
+          if( pParse->nNode!=(u32)iThis+1 ) pParse->hasNonstd = 1;
+          break;
+        }
+        if( x!=(-1) ) pParse->iErr = j;
         return -1;
       }
       j = x;
-      while( fast_isspace(z[j]) ){ j++; }
-      c = z[j];
-      if( c==',' ) continue;
-      if( c!=']' ) return -1;
-      break;
+      if( z[j]==',' ){
+        continue;
+      }else if( z[j]==']' ){
+        break;
+      }else{
+        if( fast_isspace(z[j]) ){
+          do{ j++; }while( fast_isspace(z[j]) );
+          if( z[j]==',' ){
+            continue;
+          }else if( z[j]==']' ){
+            break;
+          }
+        }
+        x = jsonParseValue(pParse, j);
+        if( x==(-4) ){
+          j = pParse->iErr;
+          continue;
+        }
+        if( x==(-3) ){
+          j = pParse->iErr;
+          break;
+        }
+      }
+      pParse->iErr = j;
+      return -1;
     }
     pParse->aNode[iThis].n = pParse->nNode - (u32)iThis - 1;
+    pParse->iDepth--;
     return j+1;
-  }else if( c=='"' ){
+  }
+  case '\'': {
+    u8 jnFlags;
+    char cDelim;
+    pParse->hasNonstd = 1;
+    jnFlags = JNODE_JSON5;
+    goto parse_string;
+  case '"':
     /* Parse string */
-    u8 jnFlags = 0;
-    j = i+1;
-    for(;;){
+    jnFlags = 0;
+  parse_string:
+    cDelim = z[i];
+    for(j=i+1; 1; j++){
+      if( jsonIsOk[(unsigned char)z[j]] ) continue;
       c = z[j];
-      if( (c & ~0x1f)==0 ){
-        /* Control characters are not allowed in strings */
-        return -1;
-      }
-      if( c=='\\' ){
+      if( c==cDelim ){
+        break;
+      }else if( c=='\\' ){
         c = z[++j];
         if( c=='"' || c=='\\' || c=='/' || c=='b' || c=='f'
            || c=='n' || c=='r' || c=='t'
-           || (c=='u' && jsonIs4Hex(z+j+1)) ){
-          jnFlags = JNODE_ESCAPE;
+           || (c=='u' && jsonIs4Hex(&z[j+1])) ){
+          jnFlags |= JNODE_ESCAPE;
+        }else if( c=='\'' || c=='0' || c=='v' || c=='\n'
+           || (0xe2==(u8)c && 0x80==(u8)z[j+1]
+                && (0xa8==(u8)z[j+2] || 0xa9==(u8)z[j+2]))
+           || (c=='x' && jsonIs2Hex(&z[j+1])) ){
+          jnFlags |= (JNODE_ESCAPE|JNODE_JSON5);
+          pParse->hasNonstd = 1;
+        }else if( c=='\r' ){
+          if( z[j+1]=='\n' ) j++;
+          jnFlags |= (JNODE_ESCAPE|JNODE_JSON5);
+          pParse->hasNonstd = 1;
         }else{
+          pParse->iErr = j;
           return -1;
         }
-      }else if( c=='"' ){
-        break;
+      }else if( c<=0x1f ){
+        /* Control characters are not allowed in strings */
+        pParse->iErr = j;
+        return -1;
       }
-      j++;
     }
-    jsonParseAddNode(pParse, JSON_STRING, j+1-i, &z[i]);
-    if( !pParse->oom ) pParse->aNode[pParse->nNode-1].jnFlags = jnFlags;
+    jsonParseAddNode(pParse, JSON_STRING | (jnFlags<<8), j+1-i, &z[i]);
     return j+1;
-  }else if( c=='n'
-         && strncmp(z+i,"null",4)==0
-         && !sqlite3Isalnum(z[i+4]) ){
-    jsonParseAddNode(pParse, JSON_NULL, 0, 0);
-    return i+4;
-  }else if( c=='t'
-         && strncmp(z+i,"true",4)==0
-         && !sqlite3Isalnum(z[i+4]) ){
-    jsonParseAddNode(pParse, JSON_TRUE, 0, 0);
-    return i+4;
-  }else if( c=='f'
-         && strncmp(z+i,"false",5)==0
-         && !sqlite3Isalnum(z[i+5]) ){
-    jsonParseAddNode(pParse, JSON_FALSE, 0, 0);
-    return i+5;
-  }else if( c=='-' || (c>='0' && c<='9') ){
+  }
+  case 't': {
+    if( strncmp(z+i,"true",4)==0 && !sqlite3Isalnum(z[i+4]) ){
+      jsonParseAddNode(pParse, JSON_TRUE, 0, 0);
+      return i+4;
+    }
+    pParse->iErr = i;
+    return -1;
+  }
+  case 'f': {
+    if( strncmp(z+i,"false",5)==0 && !sqlite3Isalnum(z[i+5]) ){
+      jsonParseAddNode(pParse, JSON_FALSE, 0, 0);
+      return i+5;
+    }
+    pParse->iErr = i;
+    return -1;
+  }
+  case '+': {
+    u8 seenDP, seenE, jnFlags;
+    pParse->hasNonstd = 1;
+    jnFlags = JNODE_JSON5;
+    goto parse_number;
+  case '.':
+    if( sqlite3Isdigit(z[i+1]) ){
+      pParse->hasNonstd = 1;
+      jnFlags = JNODE_JSON5;
+      seenE = 0;
+      seenDP = JSON_REAL;
+      goto parse_number_2;
+    }
+    pParse->iErr = i;
+    return -1;
+  case '-':
+  case '0':
+  case '1':
+  case '2':
+  case '3':
+  case '4':
+  case '5':
+  case '6':
+  case '7':
+  case '8':
+  case '9':
     /* Parse number */
-    u8 seenDP = 0;
-    u8 seenE = 0;
+    jnFlags = 0;
+  parse_number:
+    seenDP = JSON_INT;
+    seenE = 0;
     assert( '-' < '0' );
+    assert( '+' < '0' );
+    assert( '.' < '0' );
+    c = z[i];
+
     if( c<='0' ){
-      j = c=='-' ? i+1 : i;
-      if( z[j]=='0' && z[j+1]>='0' && z[j+1]<='9' ) return -1;
+      if( c=='0' ){
+        if( (z[i+1]=='x' || z[i+1]=='X') && sqlite3Isxdigit(z[i+2]) ){
+          assert( seenDP==JSON_INT );
+          pParse->hasNonstd = 1;
+          jnFlags |= JNODE_JSON5;
+          for(j=i+3; sqlite3Isxdigit(z[j]); j++){}
+          goto parse_number_finish;
+        }else if( sqlite3Isdigit(z[i+1]) ){
+          pParse->iErr = i+1;
+          return -1;
+        }
+      }else{
+        if( !sqlite3Isdigit(z[i+1]) ){
+          /* JSON5 allows for "+Infinity" and "-Infinity" using exactly
+          ** that case.  SQLite also allows these in any case and it allows
+          ** "+inf" and "-inf". */
+          if( (z[i+1]=='I' || z[i+1]=='i')
+           && sqlite3StrNICmp(&z[i+1], "inf",3)==0
+          ){
+            pParse->hasNonstd = 1;
+            if( z[i]=='-' ){
+              jsonParseAddNode(pParse, JSON_REAL, 8, "-9.0e999");
+            }else{
+              jsonParseAddNode(pParse, JSON_REAL, 7, "9.0e999");
+            }
+            return i + (sqlite3StrNICmp(&z[i+4],"inity",5)==0 ? 9 : 4);
+          }
+          if( z[i+1]=='.' ){
+            pParse->hasNonstd = 1;
+            jnFlags |= JNODE_JSON5;
+            goto parse_number_2;
+          }
+          pParse->iErr = i;
+          return -1;
+        }
+        if( z[i+1]=='0' ){
+          if( sqlite3Isdigit(z[i+2]) ){
+            pParse->iErr = i+1;
+            return -1;
+          }else if( (z[i+2]=='x' || z[i+2]=='X') && sqlite3Isxdigit(z[i+3]) ){
+            pParse->hasNonstd = 1;
+            jnFlags |= JNODE_JSON5;
+            for(j=i+4; sqlite3Isxdigit(z[j]); j++){}
+            goto parse_number_finish;
+          }
+        }
+      }
     }
-    j = i+1;
-    for(;; j++){
+  parse_number_2:
+    for(j=i+1;; j++){
       c = z[j];
-      if( c>='0' && c<='9' ) continue;
+      if( sqlite3Isdigit(c) ) continue;
       if( c=='.' ){
-        if( z[j-1]=='-' ) return -1;
-        if( seenDP ) return -1;
-        seenDP = 1;
+        if( seenDP==JSON_REAL ){
+          pParse->iErr = j;
+          return -1;
+        }
+        seenDP = JSON_REAL;
         continue;
       }
       if( c=='e' || c=='E' ){
-        if( z[j-1]<'0' ) return -1;
-        if( seenE ) return -1;
-        seenDP = seenE = 1;
+        if( z[j-1]<'0' ){
+          if( ALWAYS(z[j-1]=='.') && ALWAYS(j-2>=i) && sqlite3Isdigit(z[j-2]) ){
+            pParse->hasNonstd = 1;
+            jnFlags |= JNODE_JSON5;
+          }else{
+            pParse->iErr = j;
+            return -1;
+          }
+        }
+        if( seenE ){
+          pParse->iErr = j;
+          return -1;
+        }
+        seenDP = JSON_REAL;
+        seenE = 1;
         c = z[j+1];
         if( c=='+' || c=='-' ){
           j++;
           c = z[j+1];
         }
-        if( c<'0' || c>'9' ) return -1;
+        if( c<'0' || c>'9' ){
+          pParse->iErr = j;
+          return -1;
+        }
         continue;
       }
       break;
     }
-    if( z[j-1]<'0' ) return -1;
-    jsonParseAddNode(pParse, seenDP ? JSON_REAL : JSON_INT,
-                        j - i, &z[i]);
+    if( z[j-1]<'0' ){
+      if( ALWAYS(z[j-1]=='.') && ALWAYS(j-2>=i) && sqlite3Isdigit(z[j-2]) ){
+        pParse->hasNonstd = 1;
+        jnFlags |= JNODE_JSON5;
+      }else{
+        pParse->iErr = j;
+        return -1;
+      }
+    }
+  parse_number_finish:
+    jsonParseAddNode(pParse, seenDP | (jnFlags<<8), j - i, &z[i]);
     return j;
-  }else if( c=='}' ){
+  }
+  case '}': {
+    pParse->iErr = i;
     return -2;  /* End of {...} */
-  }else if( c==']' ){
+  }
+  case ']': {
+    pParse->iErr = i;
     return -3;  /* End of [...] */
-  }else if( c==0 ){
+  }
+  case ',': {
+    pParse->iErr = i;
+    return -4;  /* List separator */
+  }
+  case ':': {
+    pParse->iErr = i;
+    return -5;  /* Object label/value separator */
+  }
+  case 0: {
     return 0;   /* End of file */
-  }else{
+  }
+  case 0x09:
+  case 0x0a:
+  case 0x0d:
+  case 0x20: {
+    do{
+      i++;
+    }while( fast_isspace(z[i]) );
+    goto json_parse_restart;
+  }
+  case 0x0b:
+  case 0x0c:
+  case '/':
+  case 0xc2:
+  case 0xe1:
+  case 0xe2:
+  case 0xe3:
+  case 0xef: {
+    j = json5Whitespace(&z[i]);
+    if( j>0 ){
+      i += j;
+      pParse->hasNonstd = 1;
+      goto json_parse_restart;
+    }
+    pParse->iErr = i;
+    return -1;
+  }
+  case 'n': {
+    if( strncmp(z+i,"null",4)==0 && !sqlite3Isalnum(z[i+4]) ){
+      jsonParseAddNode(pParse, JSON_NULL, 0, 0);
+      return i+4;
+    }
+    /* fall-through into the default case that checks for NaN */
+  }
+  default: {
+    u32 k;
+    int nn;
+    c = z[i];
+    for(k=0; k<sizeof(aNanInfName)/sizeof(aNanInfName[0]); k++){
+      if( c!=aNanInfName[k].c1 && c!=aNanInfName[k].c2 ) continue;
+      nn = aNanInfName[k].n;
+      if( sqlite3StrNICmp(&z[i], aNanInfName[k].zMatch, nn)!=0 ){
+        continue;
+      }
+      if( sqlite3Isalnum(z[i+nn]) ) continue;
+      jsonParseAddNode(pParse, aNanInfName[k].eType,
+          aNanInfName[k].nRepl, aNanInfName[k].zRepl);
+      pParse->hasNonstd = 1;
+      return i + nn;
+    }
+    pParse->iErr = i;
     return -1;  /* Syntax error */
   }
+  } /* End switch(z[i]) */
 }
 
 /*
 ** Parse a complete JSON string.  Return 0 on success or non-zero if there
-** are any errors.  If an error occurs, free all memory associated with
-** pParse.
+** are any errors.  If an error occurs, free all memory held by pParse,
+** but not pParse itself.
 **
-** pParse is uninitialized when this routine is called.
+** pParse must be initialized to an empty parse object prior to calling
+** this routine.
 */
 static int jsonParse(
   JsonParse *pParse,           /* Initialize and fill this JsonParse object */
-  sqlite3_context *pCtx,       /* Report errors here */
-  const char *zJson            /* Input JSON text to be parsed */
+  sqlite3_context *pCtx        /* Report errors here */
 ){
   int i;
-  memset(pParse, 0, sizeof(*pParse));
-  if( zJson==0 ) return 1;
-  pParse->zJson = zJson;
+  const char *zJson = pParse->zJson;
   i = jsonParseValue(pParse, 0);
   if( pParse->oom ) i = -1;
   if( i>0 ){
     assert( pParse->iDepth==0 );
     while( fast_isspace(zJson[i]) ) i++;
-    if( zJson[i] ) i = -1;
+    if( zJson[i] ){
+      i += json5Whitespace(&zJson[i]);
+      if( zJson[i] ){
+        jsonParseReset(pParse);
+        return 1;
+      }
+      pParse->hasNonstd = 1;
+    }
   }
   if( i<=0 ){
     if( pCtx!=0 ){
@@ -196366,6 +204452,7 @@ static int jsonParse(
   return 0;
 }
 
+
 /* Mark node i of pParse as being a child of iParent.  Call recursively
 ** to fill in all the descendants of node i.
 */
@@ -196415,26 +204502,49 @@ static int jsonParseFindParents(JsonParse *pParse){
 #define JSON_CACHE_SZ  4          /* Max number of cache entries */
 
 /*
-** Obtain a complete parse of the JSON found in the first argument
-** of the argv array.  Use the sqlite3_get_auxdata() cache for this
-** parse if it is available.  If the cache is not available or if it
-** is no longer valid, parse the JSON again and return the new parse,
-** and also register the new parse so that it will be available for
+** Obtain a complete parse of the JSON found in the pJson argument
+**
+** Use the sqlite3_get_auxdata() cache to find a preexisting parse
+** if it is available.  If the cache is not available or if it
+** is no longer valid, parse the JSON again and return the new parse.
+** Also register the new parse so that it will be available for
 ** future sqlite3_get_auxdata() calls.
+**
+** If an error occurs and pErrCtx!=0 then report the error on pErrCtx
+** and return NULL.
+**
+** The returned pointer (if it is not NULL) is owned by the cache in
+** most cases, not the caller.  The caller does NOT need to invoke
+** jsonParseFree(), in most cases.
+**
+** Except, if an error occurs and pErrCtx==0 then return the JsonParse
+** object with JsonParse.nErr non-zero and the caller will own the JsonParse
+** object.  In that case, it will be the responsibility of the caller to
+** invoke jsonParseFree().  To summarize:
+**
+**   pErrCtx!=0 || p->nErr==0      ==>   Return value p is owned by the
+**                                       cache.  Call does not need to
+**                                       free it.
+**
+**   pErrCtx==0 && p->nErr!=0      ==>   Return value is owned by the caller
+**                                       and so the caller must free it.
 */
 static JsonParse *jsonParseCached(
-  sqlite3_context *pCtx,
-  sqlite3_value **argv,
-  sqlite3_context *pErrCtx
+  sqlite3_context *pCtx,         /* Context to use for cache search */
+  sqlite3_value *pJson,          /* Function param containing JSON text */
+  sqlite3_context *pErrCtx,      /* Write parse errors here if not NULL */
+  int bUnedited                  /* No prior edits allowed */
 ){
-  const char *zJson = (const char*)sqlite3_value_text(argv[0]);
-  int nJson = sqlite3_value_bytes(argv[0]);
+  char *zJson = (char*)sqlite3_value_text(pJson);
+  int nJson = sqlite3_value_bytes(pJson);
   JsonParse *p;
   JsonParse *pMatch = 0;
   int iKey;
   int iMinKey = 0;
   u32 iMinHold = 0xffffffff;
   u32 iMaxHold = 0;
+  int bJsonRCStr;
+
   if( zJson==0 ) return 0;
   for(iKey=0; iKey<JSON_CACHE_SZ; iKey++){
     p = (JsonParse*)sqlite3_get_auxdata(pCtx, JSON_CACHE_ID+iKey);
@@ -196444,9 +204554,21 @@ static JsonParse *jsonParseCached(
     }
     if( pMatch==0
      && p->nJson==nJson
-     && memcmp(p->zJson,zJson,nJson)==0
+     && (p->hasMod==0 || bUnedited==0)
+     && (p->zJson==zJson || memcmp(p->zJson,zJson,nJson)==0)
     ){
       p->nErr = 0;
+      p->useMod = 0;
+      pMatch = p;
+    }else
+    if( pMatch==0
+     && p->zAlt!=0
+     && bUnedited==0
+     && p->nAlt==nJson
+     && memcmp(p->zAlt, zJson, nJson)==0
+    ){
+      p->nErr = 0;
+      p->useMod = 1;
       pMatch = p;
     }else if( p->iHold<iMinHold ){
       iMinHold = p->iHold;
@@ -196457,24 +204579,44 @@ static JsonParse *jsonParseCached(
     }
   }
   if( pMatch ){
+    /* The input JSON text was found in the cache.  Use the preexisting
+    ** parse of this JSON */
     pMatch->nErr = 0;
     pMatch->iHold = iMaxHold+1;
+    assert( pMatch->nJPRef>0 ); /* pMatch is owned by the cache */
     return pMatch;
   }
-  p = sqlite3_malloc64( sizeof(*p) + nJson + 1 );
+
+  /* The input JSON was not found anywhere in the cache.  We will need
+  ** to parse it ourselves and generate a new JsonParse object.
+  */
+  bJsonRCStr = sqlite3ValueIsOfClass(pJson,sqlite3RCStrUnref);
+  p = sqlite3_malloc64( sizeof(*p) + (bJsonRCStr ? 0 : nJson+1) );
   if( p==0 ){
     sqlite3_result_error_nomem(pCtx);
     return 0;
   }
   memset(p, 0, sizeof(*p));
-  p->zJson = (char*)&p[1];
-  memcpy((char*)p->zJson, zJson, nJson+1);
-  if( jsonParse(p, pErrCtx, p->zJson) ){
-    sqlite3_free(p);
+  if( bJsonRCStr ){
+    p->zJson = sqlite3RCStrRef(zJson);
+    p->bJsonIsRCStr = 1;
+  }else{
+    p->zJson = (char*)&p[1];
+    memcpy(p->zJson, zJson, nJson+1);
+  }
+  p->nJPRef = 1;
+  if( jsonParse(p, pErrCtx) ){
+    if( pErrCtx==0 ){
+      p->nErr = 1;
+      assert( p->nJPRef==1 ); /* Caller will own the new JsonParse object p */
+      return p;
+    }
+    jsonParseFree(p);
     return 0;
   }
   p->nJson = nJson;
   p->iHold = iMaxHold+1;
+  /* Transfer ownership of the new JsonParse to the cache */
   sqlite3_set_auxdata(pCtx, JSON_CACHE_ID+iMinKey, p,
                       (void(*)(void*))jsonParseFree);
   return (JsonParse*)sqlite3_get_auxdata(pCtx, JSON_CACHE_ID+iMinKey);
@@ -196484,7 +204626,7 @@ static JsonParse *jsonParseCached(
 ** Compare the OBJECT label at pNode against zKey,nKey.  Return true on
 ** a match.
 */
-static int jsonLabelCompare(JsonNode *pNode, const char *zKey, u32 nKey){
+static int jsonLabelCompare(const JsonNode *pNode, const char *zKey, u32 nKey){
   assert( pNode->eU==1 );
   if( pNode->jnFlags & JNODE_RAW ){
     if( pNode->n!=nKey ) return 0;
@@ -196494,6 +204636,15 @@ static int jsonLabelCompare(JsonNode *pNode, const char *zKey, u32 nKey){
     return strncmp(pNode->u.zJContent+1, zKey, nKey)==0;
   }
 }
+static int jsonSameLabel(const JsonNode *p1, const JsonNode *p2){
+  if( p1->jnFlags & JNODE_RAW ){
+    return jsonLabelCompare(p2, p1->u.zJContent, p1->n);
+  }else if( p2->jnFlags & JNODE_RAW ){
+    return jsonLabelCompare(p1, p2->u.zJContent, p2->n);
+  }else{
+    return p1->n==p2->n && strncmp(p1->u.zJContent,p2->u.zJContent,p1->n)==0;
+  }
+}
 
 /* forward declaration */
 static JsonNode *jsonLookupAppend(JsonParse*,const char*,int*,const char**);
@@ -196516,9 +204667,31 @@ static JsonNode *jsonLookupStep(
 ){
   u32 i, j, nKey;
   const char *zKey;
-  JsonNode *pRoot = &pParse->aNode[iRoot];
+  JsonNode *pRoot;
+  if( pParse->oom ) return 0;
+  pRoot = &pParse->aNode[iRoot];
+  if( pRoot->jnFlags & (JNODE_REPLACE|JNODE_REMOVE) && pParse->useMod ){
+    while( (pRoot->jnFlags & JNODE_REPLACE)!=0 ){
+      u32 idx = (u32)(pRoot - pParse->aNode);
+      i = pParse->iSubst;
+      while( 1 /*exit-by-break*/ ){
+        assert( i<pParse->nNode );
+        assert( pParse->aNode[i].eType==JSON_SUBST );
+        assert( pParse->aNode[i].eU==4 );
+        assert( pParse->aNode[i].u.iPrev<i );
+        if( pParse->aNode[i].n==idx ){
+          pRoot = &pParse->aNode[i+1];
+          iRoot = i+1;
+          break;
+        }
+        i = pParse->aNode[i].u.iPrev;
+      }
+    }
+    if( pRoot->jnFlags & JNODE_REMOVE ){
+      return 0;
+    }
+  }
   if( zPath[0]==0 ) return pRoot;
-  if( pRoot->jnFlags & JNODE_REPLACE ) return 0;
   if( zPath[0]=='.' ){
     if( pRoot->eType!=JSON_OBJECT ) return 0;
     zPath++;
@@ -196552,14 +204725,16 @@ static JsonNode *jsonLookupStep(
         j += jsonNodeSize(&pRoot[j]);
       }
       if( (pRoot->jnFlags & JNODE_APPEND)==0 ) break;
+      if( pParse->useMod==0 ) break;
       assert( pRoot->eU==2 );
-      iRoot += pRoot->u.iAppend;
+      iRoot = pRoot->u.iAppend;
       pRoot = &pParse->aNode[iRoot];
       j = 1;
     }
     if( pApnd ){
       u32 iStart, iLabel;
       JsonNode *pNode;
+      assert( pParse->useMod );
       iStart = jsonParseAddNode(pParse, JSON_OBJECT, 2, 0);
       iLabel = jsonParseAddNode(pParse, JSON_STRING, nKey, zKey);
       zPath += i;
@@ -196568,7 +204743,7 @@ static JsonNode *jsonLookupStep(
       if( pNode ){
         pRoot = &pParse->aNode[iRoot];
         assert( pRoot->eU==0 );
-        pRoot->u.iAppend = iStart - iRoot;
+        pRoot->u.iAppend = iStart;
         pRoot->jnFlags |= JNODE_APPEND;
         VVA( pRoot->eU = 2 );
         pParse->aNode[iLabel].jnFlags |= JNODE_RAW;
@@ -196589,12 +204764,13 @@ static JsonNode *jsonLookupStep(
         if( pRoot->eType!=JSON_ARRAY ) return 0;
         for(;;){
           while( j<=pBase->n ){
-            if( (pBase[j].jnFlags & JNODE_REMOVE)==0 ) i++;
+            if( (pBase[j].jnFlags & JNODE_REMOVE)==0 || pParse->useMod==0 ) i++;
             j += jsonNodeSize(&pBase[j]);
           }
           if( (pBase->jnFlags & JNODE_APPEND)==0 ) break;
+          if( pParse->useMod==0 ) break;
           assert( pBase->eU==2 );
-          iBase += pBase->u.iAppend;
+          iBase = pBase->u.iAppend;
           pBase = &pParse->aNode[iBase];
           j = 1;
         }
@@ -196622,13 +204798,17 @@ static JsonNode *jsonLookupStep(
     zPath += j + 1;
     j = 1;
     for(;;){
-      while( j<=pRoot->n && (i>0 || (pRoot[j].jnFlags & JNODE_REMOVE)!=0) ){
-        if( (pRoot[j].jnFlags & JNODE_REMOVE)==0 ) i--;
+      while( j<=pRoot->n
+         && (i>0 || ((pRoot[j].jnFlags & JNODE_REMOVE)!=0 && pParse->useMod))
+      ){
+        if( (pRoot[j].jnFlags & JNODE_REMOVE)==0 || pParse->useMod==0 ) i--;
         j += jsonNodeSize(&pRoot[j]);
       }
+      if( i==0 && j<=pRoot->n ) break;
       if( (pRoot->jnFlags & JNODE_APPEND)==0 ) break;
+      if( pParse->useMod==0 ) break;
       assert( pRoot->eU==2 );
-      iRoot += pRoot->u.iAppend;
+      iRoot = pRoot->u.iAppend;
       pRoot = &pParse->aNode[iRoot];
       j = 1;
     }
@@ -196638,13 +204818,14 @@ static JsonNode *jsonLookupStep(
     if( i==0 && pApnd ){
       u32 iStart;
       JsonNode *pNode;
+      assert( pParse->useMod );
       iStart = jsonParseAddNode(pParse, JSON_ARRAY, 1, 0);
       pNode = jsonLookupAppend(pParse, zPath, pApnd, pzErr);
       if( pParse->oom ) return 0;
       if( pNode ){
         pRoot = &pParse->aNode[iRoot];
         assert( pRoot->eU==0 );
-        pRoot->u.iAppend = iStart - iRoot;
+        pRoot->u.iAppend = iStart;
         pRoot->jnFlags |= JNODE_APPEND;
         VVA( pRoot->eU = 2 );
       }
@@ -196771,47 +204952,90 @@ static void jsonRemoveAllNulls(JsonNode *pNode){
 ** SQL functions used for testing and debugging
 ****************************************************************************/
 
+#if SQLITE_DEBUG
+/*
+** Print N node entries.
+*/
+static void jsonDebugPrintNodeEntries(
+  JsonNode *aNode,  /* First node entry to print */
+  int N             /* Number of node entries to print */
+){
+  int i;
+  for(i=0; i<N; i++){
+    const char *zType;
+    if( aNode[i].jnFlags & JNODE_LABEL ){
+      zType = "label";
+    }else{
+      zType = jsonType[aNode[i].eType];
+    }
+    printf("node %4u: %-7s n=%-5d", i, zType, aNode[i].n);
+    if( (aNode[i].jnFlags & ~JNODE_LABEL)!=0 ){
+      u8 f = aNode[i].jnFlags;
+      if( f & JNODE_RAW )     printf(" RAW");
+      if( f & JNODE_ESCAPE )  printf(" ESCAPE");
+      if( f & JNODE_REMOVE )  printf(" REMOVE");
+      if( f & JNODE_REPLACE ) printf(" REPLACE");
+      if( f & JNODE_APPEND )  printf(" APPEND");
+      if( f & JNODE_JSON5 )   printf(" JSON5");
+    }
+    switch( aNode[i].eU ){
+      case 1:  printf(" zJContent=[%.*s]\n",
+                      aNode[i].n, aNode[i].u.zJContent);           break;
+      case 2:  printf(" iAppend=%u\n", aNode[i].u.iAppend);        break;
+      case 3:  printf(" iKey=%u\n", aNode[i].u.iKey);              break;
+      case 4:  printf(" iPrev=%u\n", aNode[i].u.iPrev);            break;
+      default: printf("\n");
+    }
+  }
+}
+#endif /* SQLITE_DEBUG */
+
+
+#if 0  /* 1 for debugging.  0 normally.  Requires -DSQLITE_DEBUG too */
+static void jsonDebugPrintParse(JsonParse *p){
+  jsonDebugPrintNodeEntries(p->aNode, p->nNode);
+}
+static void jsonDebugPrintNode(JsonNode *pNode){
+  jsonDebugPrintNodeEntries(pNode, jsonNodeSize(pNode));
+}
+#else
+   /* The usual case */
+# define jsonDebugPrintNode(X)
+# define jsonDebugPrintParse(X)
+#endif
+
 #ifdef SQLITE_DEBUG
 /*
-** The json_parse(JSON) function returns a string which describes
-** a parse of the JSON provided.  Or it returns NULL if JSON is not
-** well-formed.
+** SQL function:   json_parse(JSON)
+**
+** Parse JSON using jsonParseCached().  Then print a dump of that
+** parse on standard output.  Return the mimified JSON result, just
+** like the json() function.
 */
 static void jsonParseFunc(
   sqlite3_context *ctx,
   int argc,
   sqlite3_value **argv
 ){
-  JsonString s;       /* Output string - not real JSON */
-  JsonParse x;        /* The parse */
-  u32 i;
+  JsonParse *p;        /* The parse */
 
   assert( argc==1 );
-  if( jsonParse(&x, ctx, (const char*)sqlite3_value_text(argv[0])) ) return;
-  jsonParseFindParents(&x);
-  jsonInit(&s, ctx);
-  for(i=0; i<x.nNode; i++){
-    const char *zType;
-    if( x.aNode[i].jnFlags & JNODE_LABEL ){
-      assert( x.aNode[i].eType==JSON_STRING );
-      zType = "label";
-    }else{
-      zType = jsonType[x.aNode[i].eType];
-    }
-    jsonPrintf(100, &s,"node %3u: %7s n=%-4d up=%-4d",
-               i, zType, x.aNode[i].n, x.aUp[i]);
-    assert( x.aNode[i].eU==0 || x.aNode[i].eU==1 );
-    if( x.aNode[i].u.zJContent!=0 ){
-      assert( x.aNode[i].eU==1 );
-      jsonAppendRaw(&s, " ", 1);
-      jsonAppendRaw(&s, x.aNode[i].u.zJContent, x.aNode[i].n);
-    }else{
-      assert( x.aNode[i].eU==0 );
-    }
-    jsonAppendRaw(&s, "\n", 1);
-  }
-  jsonParseReset(&x);
-  jsonResult(&s);
+  p = jsonParseCached(ctx, argv[0], ctx, 0);
+  if( p==0 ) return;
+  printf("nNode     = %u\n", p->nNode);
+  printf("nAlloc    = %u\n", p->nAlloc);
+  printf("nJson     = %d\n", p->nJson);
+  printf("nAlt      = %d\n", p->nAlt);
+  printf("nErr      = %u\n", p->nErr);
+  printf("oom       = %u\n", p->oom);
+  printf("hasNonstd = %u\n", p->hasNonstd);
+  printf("useMod    = %u\n", p->useMod);
+  printf("hasMod    = %u\n", p->hasMod);
+  printf("nJPRef    = %u\n", p->nJPRef);
+  printf("iSubst    = %u\n", p->iSubst);
+  printf("iHold     = %u\n", p->iHold);
+  jsonDebugPrintNodeEntries(p->aNode, p->nNode);
+  jsonReturnJson(p, p->aNode, ctx, 1, 0);
 }
 
 /*
@@ -196895,7 +205119,7 @@ static void jsonArrayLengthFunc(
   u32 i;
   JsonNode *pNode;
 
-  p = jsonParseCached(ctx, argv, ctx);
+  p = jsonParseCached(ctx, argv[0], ctx, 0);
   if( p==0 ) return;
   assert( p->nNode );
   if( argc==2 ){
@@ -196908,9 +205132,16 @@ static void jsonArrayLengthFunc(
     return;
   }
   if( pNode->eType==JSON_ARRAY ){
-    assert( (pNode->jnFlags & JNODE_APPEND)==0 );
-    for(i=1; i<=pNode->n; n++){
-      i += jsonNodeSize(&pNode[i]);
+    while( 1 /*exit-by-break*/ ){
+      i = 1;
+      while( i<=pNode->n ){
+        if( (pNode[i].jnFlags & JNODE_REMOVE)==0 ) n++;
+        i += jsonNodeSize(&pNode[i]);
+      }
+      if( (pNode->jnFlags & JNODE_APPEND)==0 ) break;
+      if( p->useMod==0 ) break;
+      assert( pNode->eU==2 );
+      pNode = &p->aNode[pNode->u.iAppend];
     }
   }
   sqlite3_result_int64(ctx, n);
@@ -196957,14 +205188,14 @@ static void jsonExtractFunc(
   JsonString jx;
 
   if( argc<2 ) return;
-  p = jsonParseCached(ctx, argv, ctx);
+  p = jsonParseCached(ctx, argv[0], ctx, 0);
   if( p==0 ) return;
   if( argc==2 ){
     /* With a single PATH argument */
     zPath = (const char*)sqlite3_value_text(argv[1]);
     if( zPath==0 ) return;
     if( flags & JSON_ABPATH ){
-      if( zPath[0]!='$' ){
+      if( zPath[0]!='$' || (zPath[1]!='.' && zPath[1]!='[' && zPath[1]!=0) ){
         /* The -> and ->> operators accept abbreviated PATH arguments.  This
         ** is mostly for compatibility with PostgreSQL, but also for
         ** convenience.
@@ -196975,11 +205206,11 @@ static void jsonExtractFunc(
         */
         jsonInit(&jx, ctx);
         if( sqlite3Isdigit(zPath[0]) ){
-          jsonAppendRaw(&jx, "$[", 2);
+          jsonAppendRawNZ(&jx, "$[", 2);
           jsonAppendRaw(&jx, zPath, (int)strlen(zPath));
-          jsonAppendRaw(&jx, "]", 2);
+          jsonAppendRawNZ(&jx, "]", 2);
         }else{
-          jsonAppendRaw(&jx, "$.", 1 + (zPath[0]!='['));
+          jsonAppendRawNZ(&jx, "$.", 1 + (zPath[0]!='['));
           jsonAppendRaw(&jx, zPath, (int)strlen(zPath));
           jsonAppendChar(&jx, 0);
         }
@@ -196990,15 +205221,14 @@ static void jsonExtractFunc(
       }
       if( pNode ){
         if( flags & JSON_JSON ){
-          jsonReturnJson(pNode, ctx, 0);
+          jsonReturnJson(p, pNode, ctx, 0, 0);
         }else{
-          jsonReturn(pNode, ctx, 0);
-          sqlite3_result_subtype(ctx, 0);
+          jsonReturn(p, pNode, ctx, 1);
         }
       }
     }else{
       pNode = jsonLookup(p, zPath, 0, ctx);
-      if( p->nErr==0 && pNode ) jsonReturn(pNode, ctx, 0);
+      if( p->nErr==0 && pNode ) jsonReturn(p, pNode, ctx, 0);
     }
   }else{
     /* Two or more PATH arguments results in a JSON array with each
@@ -197012,9 +205242,9 @@ static void jsonExtractFunc(
       if( p->nErr ) break;
       jsonAppendSeparator(&jx);
       if( pNode ){
-        jsonRenderNode(pNode, &jx, 0);
+        jsonRenderNode(p, pNode, &jx);
       }else{
-        jsonAppendRaw(&jx, "null", 4);
+        jsonAppendRawNZ(&jx, "null", 4);
       }
     }
     if( i==argc ){
@@ -197055,51 +205285,42 @@ static JsonNode *jsonMergePatch(
     assert( pPatch[i].eU==1 );
     nKey = pPatch[i].n;
     zKey = pPatch[i].u.zJContent;
-    assert( (pPatch[i].jnFlags & JNODE_RAW)==0 );
     for(j=1; j<pTarget->n; j += jsonNodeSize(&pTarget[j+1])+1 ){
       assert( pTarget[j].eType==JSON_STRING );
       assert( pTarget[j].jnFlags & JNODE_LABEL );
-      assert( (pPatch[i].jnFlags & JNODE_RAW)==0 );
-      if( pTarget[j].n==nKey && strncmp(pTarget[j].u.zJContent,zKey,nKey)==0 ){
-        if( pTarget[j+1].jnFlags & (JNODE_REMOVE|JNODE_PATCH) ) break;
+      if( jsonSameLabel(&pPatch[i], &pTarget[j]) ){
+        if( pTarget[j+1].jnFlags & (JNODE_REMOVE|JNODE_REPLACE) ) break;
         if( pPatch[i+1].eType==JSON_NULL ){
           pTarget[j+1].jnFlags |= JNODE_REMOVE;
         }else{
           JsonNode *pNew = jsonMergePatch(pParse, iTarget+j+1, &pPatch[i+1]);
           if( pNew==0 ) return 0;
-          pTarget = &pParse->aNode[iTarget];
-          if( pNew!=&pTarget[j+1] ){
-            assert( pTarget[j+1].eU==0
-                 || pTarget[j+1].eU==1
-                 || pTarget[j+1].eU==2 );
-            testcase( pTarget[j+1].eU==1 );
-            testcase( pTarget[j+1].eU==2 );
-            VVA( pTarget[j+1].eU = 5 );
-            pTarget[j+1].u.pPatch = pNew;
-            pTarget[j+1].jnFlags |= JNODE_PATCH;
+          if( pNew!=&pParse->aNode[iTarget+j+1] ){
+            jsonParseAddSubstNode(pParse, iTarget+j+1);
+            jsonParseAddNodeArray(pParse, pNew, jsonNodeSize(pNew));
           }
+          pTarget = &pParse->aNode[iTarget];
         }
         break;
       }
     }
     if( j>=pTarget->n && pPatch[i+1].eType!=JSON_NULL ){
-      int iStart, iPatch;
-      iStart = jsonParseAddNode(pParse, JSON_OBJECT, 2, 0);
+      int iStart;
+      JsonNode *pApnd;
+      u32 nApnd;
+      iStart = jsonParseAddNode(pParse, JSON_OBJECT, 0, 0);
       jsonParseAddNode(pParse, JSON_STRING, nKey, zKey);
-      iPatch = jsonParseAddNode(pParse, JSON_TRUE, 0, 0);
+      pApnd = &pPatch[i+1];
+      if( pApnd->eType==JSON_OBJECT ) jsonRemoveAllNulls(pApnd);
+      nApnd = jsonNodeSize(pApnd);
+      jsonParseAddNodeArray(pParse, pApnd, jsonNodeSize(pApnd));
       if( pParse->oom ) return 0;
-      jsonRemoveAllNulls(pPatch);
-      pTarget = &pParse->aNode[iTarget];
-      assert( pParse->aNode[iRoot].eU==0 || pParse->aNode[iRoot].eU==2 );
-      testcase( pParse->aNode[iRoot].eU==2 );
+      pParse->aNode[iStart].n = 1+nApnd;
       pParse->aNode[iRoot].jnFlags |= JNODE_APPEND;
+      pParse->aNode[iRoot].u.iAppend = iStart;
       VVA( pParse->aNode[iRoot].eU = 2 );
-      pParse->aNode[iRoot].u.iAppend = iStart - iRoot;
       iRoot = iStart;
-      assert( pParse->aNode[iPatch].eU==0 );
-      VVA( pParse->aNode[iPatch].eU = 5 );
-      pParse->aNode[iPatch].jnFlags |= JNODE_PATCH;
-      pParse->aNode[iPatch].u.pPatch = &pPatch[i+1];
+      pTarget = &pParse->aNode[iTarget];
     }
   }
   return pTarget;
@@ -197115,25 +205336,28 @@ static void jsonPatchFunc(
   int argc,
   sqlite3_value **argv
 ){
-  JsonParse x;     /* The JSON that is being patched */
-  JsonParse y;     /* The patch */
+  JsonParse *pX;     /* The JSON that is being patched */
+  JsonParse *pY;     /* The patch */
   JsonNode *pResult;   /* The result of the merge */
 
   UNUSED_PARAMETER(argc);
-  if( jsonParse(&x, ctx, (const char*)sqlite3_value_text(argv[0])) ) return;
-  if( jsonParse(&y, ctx, (const char*)sqlite3_value_text(argv[1])) ){
-    jsonParseReset(&x);
-    return;
-  }
-  pResult = jsonMergePatch(&x, 0, y.aNode);
-  assert( pResult!=0 || x.oom );
-  if( pResult ){
-    jsonReturnJson(pResult, ctx, 0);
+  pX = jsonParseCached(ctx, argv[0], ctx, 1);
+  if( pX==0 ) return;
+  assert( pX->hasMod==0 );
+  pX->hasMod = 1;
+  pY = jsonParseCached(ctx, argv[1], ctx, 1);
+  if( pY==0 ) return;
+  pX->useMod = 1;
+  pY->useMod = 1;
+  pResult = jsonMergePatch(pX, 0, pY->aNode);
+  assert( pResult!=0 || pX->oom );
+  if( pResult && pX->oom==0 ){
+    jsonDebugPrintParse(pX);
+    jsonDebugPrintNode(pResult);
+    jsonReturnJson(pX, pResult, ctx, 0, 0);
   }else{
     sqlite3_result_error_nomem(ctx);
   }
-  jsonParseReset(&x);
-  jsonParseReset(&y);
 }
 
 
@@ -197189,26 +205413,120 @@ static void jsonRemoveFunc(
   int argc,
   sqlite3_value **argv
 ){
-  JsonParse x;          /* The parse */
+  JsonParse *pParse;          /* The parse */
   JsonNode *pNode;
   const char *zPath;
   u32 i;
 
   if( argc<1 ) return;
-  if( jsonParse(&x, ctx, (const char*)sqlite3_value_text(argv[0])) ) return;
-  assert( x.nNode );
+  pParse = jsonParseCached(ctx, argv[0], ctx, argc>1);
+  if( pParse==0 ) return;
   for(i=1; i<(u32)argc; i++){
     zPath = (const char*)sqlite3_value_text(argv[i]);
     if( zPath==0 ) goto remove_done;
-    pNode = jsonLookup(&x, zPath, 0, ctx);
-    if( x.nErr ) goto remove_done;
-    if( pNode ) pNode->jnFlags |= JNODE_REMOVE;
+    pNode = jsonLookup(pParse, zPath, 0, ctx);
+    if( pParse->nErr ) goto remove_done;
+    if( pNode ){
+      pNode->jnFlags |= JNODE_REMOVE;
+      pParse->hasMod = 1;
+      pParse->useMod = 1;
+    }
   }
-  if( (x.aNode[0].jnFlags & JNODE_REMOVE)==0 ){
-    jsonReturnJson(x.aNode, ctx, 0);
+  if( (pParse->aNode[0].jnFlags & JNODE_REMOVE)==0 ){
+    jsonReturnJson(pParse, pParse->aNode, ctx, 1, 0);
   }
 remove_done:
-  jsonParseReset(&x);
+  jsonDebugPrintParse(p);
+}
+
+/*
+** Substitute the value at iNode with the pValue parameter.
+*/
+static void jsonReplaceNode(
+  sqlite3_context *pCtx,
+  JsonParse *p,
+  int iNode,
+  sqlite3_value *pValue
+){
+  int idx = jsonParseAddSubstNode(p, iNode);
+  if( idx<=0 ){
+    assert( p->oom );
+    return;
+  }
+  switch( sqlite3_value_type(pValue) ){
+    case SQLITE_NULL: {
+      jsonParseAddNode(p, JSON_NULL, 0, 0);
+      break;
+    }
+    case SQLITE_FLOAT: {
+      char *z = sqlite3_mprintf("%!0.15g", sqlite3_value_double(pValue));
+      int n;
+      if( z==0 ){
+        p->oom = 1;
+        break;
+      }
+      n = sqlite3Strlen30(z);
+      jsonParseAddNode(p, JSON_REAL, n, z);
+      jsonParseAddCleanup(p, sqlite3_free, z);
+      break;
+    }
+    case SQLITE_INTEGER: {
+      char *z = sqlite3_mprintf("%lld", sqlite3_value_int64(pValue));
+      int n;
+      if( z==0 ){
+        p->oom = 1;
+        break;
+      }
+      n = sqlite3Strlen30(z);
+      jsonParseAddNode(p, JSON_INT, n, z);
+      jsonParseAddCleanup(p, sqlite3_free, z);
+
+      break;
+    }
+    case SQLITE_TEXT: {
+      const char *z = (const char*)sqlite3_value_text(pValue);
+      u32 n = (u32)sqlite3_value_bytes(pValue);
+      if( z==0 ){
+         p->oom = 1;
+         break;
+      }
+      if( sqlite3_value_subtype(pValue)!=JSON_SUBTYPE ){
+        char *zCopy = sqlite3_malloc64( n+1 );
+        int k;
+        if( zCopy ){
+          memcpy(zCopy, z, n);
+          zCopy[n] = 0;
+          jsonParseAddCleanup(p, sqlite3_free, zCopy);
+        }else{
+          p->oom = 1;
+          sqlite3_result_error_nomem(pCtx);
+        }
+        k = jsonParseAddNode(p, JSON_STRING, n, zCopy);
+        assert( k>0 || p->oom );
+        if( p->oom==0 ) p->aNode[k].jnFlags |= JNODE_RAW;
+      }else{
+        JsonParse *pPatch = jsonParseCached(pCtx, pValue, pCtx, 1);
+        if( pPatch==0 ){
+          p->oom = 1;
+          break;
+        }
+        jsonParseAddNodeArray(p, pPatch->aNode, pPatch->nNode);
+        /* The nodes copied out of pPatch and into p likely contain
+        ** u.zJContent pointers into pPatch->zJson.  So preserve the
+        ** content of pPatch until p is destroyed. */
+        assert( pPatch->nJPRef>=1 );
+        pPatch->nJPRef++;
+        jsonParseAddCleanup(p, (void(*)(void*))jsonParseFree, pPatch);
+      }
+      break;
+    }
+    default: {
+      jsonParseAddNode(p, JSON_NULL, 0, 0);
+      sqlite3_result_error(pCtx, "JSON cannot hold BLOB values", -1);
+      p->nErr++;
+      break;
+    }
+  }
 }
 
 /*
@@ -197222,7 +205540,7 @@ static void jsonReplaceFunc(
   int argc,
   sqlite3_value **argv
 ){
-  JsonParse x;          /* The parse */
+  JsonParse *pParse;          /* The parse */
   JsonNode *pNode;
   const char *zPath;
   u32 i;
@@ -197232,28 +205550,22 @@ static void jsonReplaceFunc(
     jsonWrongNumArgs(ctx, "replace");
     return;
   }
-  if( jsonParse(&x, ctx, (const char*)sqlite3_value_text(argv[0])) ) return;
-  assert( x.nNode );
+  pParse = jsonParseCached(ctx, argv[0], ctx, argc>1);
+  if( pParse==0 ) return;
+  pParse->nJPRef++;
   for(i=1; i<(u32)argc; i+=2){
     zPath = (const char*)sqlite3_value_text(argv[i]);
-    pNode = jsonLookup(&x, zPath, 0, ctx);
-    if( x.nErr ) goto replace_err;
+    pParse->useMod = 1;
+    pNode = jsonLookup(pParse, zPath, 0, ctx);
+    if( pParse->nErr ) goto replace_err;
     if( pNode ){
-      assert( pNode->eU==0 || pNode->eU==1 || pNode->eU==4 );
-      testcase( pNode->eU!=0 && pNode->eU!=1 );
-      pNode->jnFlags |= (u8)JNODE_REPLACE;
-      VVA( pNode->eU =  4 );
-      pNode->u.iReplace = i + 1;
+      jsonReplaceNode(ctx, pParse, (u32)(pNode - pParse->aNode), argv[i+1]);
     }
   }
-  if( x.aNode[0].jnFlags & JNODE_REPLACE ){
-    assert( x.aNode[0].eU==4 );
-    sqlite3_result_value(ctx, argv[x.aNode[0].u.iReplace]);
-  }else{
-    jsonReturnJson(x.aNode, ctx, argv);
-  }
+  jsonReturnJson(pParse, pParse->aNode, ctx, 1, 0);
 replace_err:
-  jsonParseReset(&x);
+  jsonDebugPrintParse(pParse);
+  jsonParseFree(pParse);
 }
 
 
@@ -197274,7 +205586,7 @@ static void jsonSetFunc(
   int argc,
   sqlite3_value **argv
 ){
-  JsonParse x;          /* The parse */
+  JsonParse *pParse;       /* The parse */
   JsonNode *pNode;
   const char *zPath;
   u32 i;
@@ -197286,33 +205598,27 @@ static void jsonSetFunc(
     jsonWrongNumArgs(ctx, bIsSet ? "set" : "insert");
     return;
   }
-  if( jsonParse(&x, ctx, (const char*)sqlite3_value_text(argv[0])) ) return;
-  assert( x.nNode );
+  pParse = jsonParseCached(ctx, argv[0], ctx, argc>1);
+  if( pParse==0 ) return;
+  pParse->nJPRef++;
   for(i=1; i<(u32)argc; i+=2){
     zPath = (const char*)sqlite3_value_text(argv[i]);
     bApnd = 0;
-    pNode = jsonLookup(&x, zPath, &bApnd, ctx);
-    if( x.oom ){
+    pParse->useMod = 1;
+    pNode = jsonLookup(pParse, zPath, &bApnd, ctx);
+    if( pParse->oom ){
       sqlite3_result_error_nomem(ctx);
       goto jsonSetDone;
-    }else if( x.nErr ){
+    }else if( pParse->nErr ){
       goto jsonSetDone;
     }else if( pNode && (bApnd || bIsSet) ){
-      testcase( pNode->eU!=0 && pNode->eU!=1 );
-      assert( pNode->eU!=3 && pNode->eU!=5 );
-      VVA( pNode->eU = 4 );
-      pNode->jnFlags |= (u8)JNODE_REPLACE;
-      pNode->u.iReplace = i + 1;
+      jsonReplaceNode(ctx, pParse, (u32)(pNode - pParse->aNode), argv[i+1]);
     }
   }
-  if( x.aNode[0].jnFlags & JNODE_REPLACE ){
-    assert( x.aNode[0].eU==4 );
-    sqlite3_result_value(ctx, argv[x.aNode[0].u.iReplace]);
-  }else{
-    jsonReturnJson(x.aNode, ctx, argv);
-  }
+  jsonDebugPrintParse(pParse);
+  jsonReturnJson(pParse, pParse->aNode, ctx, 1, 0);
 jsonSetDone:
-  jsonParseReset(&x);
+  jsonParseFree(pParse);
 }
 
 /*
@@ -197331,7 +205637,7 @@ static void jsonTypeFunc(
   const char *zPath;
   JsonNode *pNode;
 
-  p = jsonParseCached(ctx, argv, ctx);
+  p = jsonParseCached(ctx, argv[0], ctx, 0);
   if( p==0 ) return;
   if( argc==2 ){
     zPath = (const char*)sqlite3_value_text(argv[1]);
@@ -197347,8 +205653,8 @@ static void jsonTypeFunc(
 /*
 ** json_valid(JSON)
 **
-** Return 1 if JSON is a well-formed JSON string according to RFC-7159.
-** Return 0 otherwise.
+** Return 1 if JSON is a well-formed canonical JSON string according
+** to RFC-7159. Return 0 otherwise.
 */
 static void jsonValidFunc(
   sqlite3_context *ctx,
@@ -197357,8 +205663,75 @@ static void jsonValidFunc(
 ){
   JsonParse *p;          /* The parse */
   UNUSED_PARAMETER(argc);
-  p = jsonParseCached(ctx, argv, 0);
-  sqlite3_result_int(ctx, p!=0);
+  if( sqlite3_value_type(argv[0])==SQLITE_NULL ){
+#ifdef SQLITE_LEGACY_JSON_VALID
+    /* Incorrect legacy behavior was to return FALSE for a NULL input */
+    sqlite3_result_int(ctx, 0);
+#endif
+    return;
+  }
+  p = jsonParseCached(ctx, argv[0], 0, 0);
+  if( p==0 || p->oom ){
+    sqlite3_result_error_nomem(ctx);
+    sqlite3_free(p);
+  }else{
+    sqlite3_result_int(ctx, p->nErr==0 && (p->hasNonstd==0 || p->useMod));
+    if( p->nErr ) jsonParseFree(p);
+  }
+}
+
+/*
+** json_error_position(JSON)
+**
+** If the argument is not an interpretable JSON string, then return the 1-based
+** character position at which the parser first recognized that the input
+** was in error.  The left-most character is 1.  If the string is valid
+** JSON, then return 0.
+**
+** Note that json_valid() is only true for strictly conforming canonical JSON.
+** But this routine returns zero if the input contains extension.  Thus:
+**
+** (1) If the input X is strictly conforming canonical JSON:
+**
+**         json_valid(X) returns true
+**         json_error_position(X) returns 0
+**
+** (2) If the input X is JSON but it includes extension (such as JSON5) that
+**     are not part of RFC-8259:
+**
+**         json_valid(X) returns false
+**         json_error_position(X) return 0
+**
+** (3) If the input X cannot be interpreted as JSON even taking extensions
+**     into account:
+**
+**         json_valid(X) return false
+**         json_error_position(X) returns 1 or more
+*/
+static void jsonErrorFunc(
+  sqlite3_context *ctx,
+  int argc,
+  sqlite3_value **argv
+){
+  JsonParse *p;          /* The parse */
+  UNUSED_PARAMETER(argc);
+  if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
+  p = jsonParseCached(ctx, argv[0], 0, 0);
+  if( p==0 || p->oom ){
+    sqlite3_result_error_nomem(ctx);
+    sqlite3_free(p);
+  }else if( p->nErr==0 ){
+    sqlite3_result_int(ctx, 0);
+  }else{
+    int n = 1;
+    u32 i;
+    const char *z = (const char*)sqlite3_value_text(argv[0]);
+    for(i=0; i<p->iErr && ALWAYS(z[i]); i++){
+      if( (z[i]&0xc0)!=0x80 ) n++;
+    }
+    sqlite3_result_int(ctx, n);
+    jsonParseFree(p);
+  }
 }
 
 
@@ -197400,7 +205773,8 @@ static void jsonArrayCompute(sqlite3_context *ctx, int isFinal){
       assert( pStr->bStatic );
     }else if( isFinal ){
       sqlite3_result_text(ctx, pStr->zBuf, (int)pStr->nUsed,
-                          pStr->bStatic ? SQLITE_TRANSIENT : sqlite3_free);
+                          pStr->bStatic ? SQLITE_TRANSIENT :
+                              sqlite3RCStrUnref);
       pStr->bStatic = 1;
     }else{
       sqlite3_result_text(ctx, pStr->zBuf, (int)pStr->nUsed, SQLITE_TRANSIENT);
@@ -197441,7 +205815,7 @@ static void jsonGroupInverse(
   pStr = (JsonString*)sqlite3_aggregate_context(ctx, 0);
 #ifdef NEVER
   /* pStr is always non-NULL since jsonArrayStep() or jsonObjectStep() will
-  ** always have been called to initalize it */
+  ** always have been called to initialize it */
   if( NEVER(!pStr) ) return;
 #endif
   z = pStr->zBuf;
@@ -197508,7 +205882,8 @@ static void jsonObjectCompute(sqlite3_context *ctx, int isFinal){
       assert( pStr->bStatic );
     }else if( isFinal ){
       sqlite3_result_text(ctx, pStr->zBuf, (int)pStr->nUsed,
-                          pStr->bStatic ? SQLITE_TRANSIENT : sqlite3_free);
+                          pStr->bStatic ? SQLITE_TRANSIENT :
+                          sqlite3RCStrUnref);
       pStr->bStatic = 1;
     }else{
       sqlite3_result_text(ctx, pStr->zBuf, (int)pStr->nUsed, SQLITE_TRANSIENT);
@@ -197619,7 +205994,6 @@ static int jsonEachOpenTree(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
 /* Reset a JsonEachCursor back to its original state.  Free any memory
 ** held. */
 static void jsonEachCursorReset(JsonEachCursor *p){
-  sqlite3_free(p->zJson);
   sqlite3_free(p->zRoot);
   jsonParseReset(&p->sParse);
   p->iRowid = 0;
@@ -197702,14 +206076,16 @@ static void jsonAppendObjectPathElement(
   assert( pNode->eU==1 );
   z = pNode->u.zJContent;
   nn = pNode->n;
-  assert( nn>=2 );
-  assert( z[0]=='"' );
-  assert( z[nn-1]=='"' );
-  if( nn>2 && sqlite3Isalpha(z[1]) ){
-    for(jj=2; jj<nn-1 && sqlite3Isalnum(z[jj]); jj++){}
-    if( jj==nn-1 ){
-      z++;
-      nn -= 2;
+  if( (pNode->jnFlags & JNODE_RAW)==0 ){
+    assert( nn>=2 );
+    assert( z[0]=='"' || z[0]=='\'' );
+    assert( z[nn-1]=='"' || z[0]=='\'' );
+    if( nn>2 && sqlite3Isalpha(z[1]) ){
+      for(jj=2; jj<nn-1 && sqlite3Isalnum(z[jj]); jj++){}
+      if( jj==nn-1 ){
+        z++;
+        nn -= 2;
+      }
     }
   }
   jsonPrintf(nn+2, pStr, ".%.*s", nn, z);
@@ -197755,7 +206131,7 @@ static int jsonEachColumn(
     case JEACH_KEY: {
       if( p->i==0 ) break;
       if( p->eType==JSON_OBJECT ){
-        jsonReturn(pThis, ctx, 0);
+        jsonReturn(&p->sParse, pThis, ctx, 0);
       }else if( p->eType==JSON_ARRAY ){
         u32 iKey;
         if( p->bRecursive ){
@@ -197771,7 +206147,7 @@ static int jsonEachColumn(
     }
     case JEACH_VALUE: {
       if( pThis->jnFlags & JNODE_LABEL ) pThis++;
-      jsonReturn(pThis, ctx, 0);
+      jsonReturn(&p->sParse, pThis, ctx, 0);
       break;
     }
     case JEACH_TYPE: {
@@ -197782,7 +206158,7 @@ static int jsonEachColumn(
     case JEACH_ATOM: {
       if( pThis->jnFlags & JNODE_LABEL ) pThis++;
       if( pThis->eType>=JSON_ARRAY ) break;
-      jsonReturn(pThis, ctx, 0);
+      jsonReturn(&p->sParse, pThis, ctx, 0);
       break;
     }
     case JEACH_ID: {
@@ -197886,6 +206262,13 @@ static int jsonEachBestIndex(
       idxMask |= iMask;
     }
   }
+  if( pIdxInfo->nOrderBy>0
+   && pIdxInfo->aOrderBy[0].iColumn<0
+   && pIdxInfo->aOrderBy[0].desc==0
+  ){
+    pIdxInfo->orderByConsumed = 1;
+  }
+
   if( (unusableMask & ~idxMask)!=0 ){
     /* If there are any unusable constraints on JSON or ROOT, then reject
     ** this entire plan */
@@ -197930,11 +206313,19 @@ static int jsonEachFilter(
   if( idxNum==0 ) return SQLITE_OK;
   z = (const char*)sqlite3_value_text(argv[0]);
   if( z==0 ) return SQLITE_OK;
-  n = sqlite3_value_bytes(argv[0]);
-  p->zJson = sqlite3_malloc64( n+1 );
-  if( p->zJson==0 ) return SQLITE_NOMEM;
-  memcpy(p->zJson, z, (size_t)n+1);
-  if( jsonParse(&p->sParse, 0, p->zJson) ){
+  memset(&p->sParse, 0, sizeof(p->sParse));
+  p->sParse.nJPRef = 1;
+  if( sqlite3ValueIsOfClass(argv[0], sqlite3RCStrUnref) ){
+    p->sParse.zJson = sqlite3RCStrRef((char*)z);
+  }else{
+    n = sqlite3_value_bytes(argv[0]);
+    p->sParse.zJson = sqlite3RCStrNew( n+1 );
+    if( p->sParse.zJson==0 ) return SQLITE_NOMEM;
+    memcpy(p->sParse.zJson, z, (size_t)n+1);
+  }
+  p->sParse.bJsonIsRCStr = 1;
+  p->zJson = p->sParse.zJson;
+  if( jsonParse(&p->sParse, 0) ){
     int rc = SQLITE_NOMEM;
     if( p->sParse.oom==0 ){
       sqlite3_free(cur->pVtab->zErrMsg);
@@ -198019,7 +206410,8 @@ static sqlite3_module jsonEachModule = {
   0,                         /* xSavepoint */
   0,                         /* xRelease */
   0,                         /* xRollbackTo */
-  0                          /* xShadowName */
+  0,                         /* xShadowName */
+  0                          /* xIntegrity */
 };
 
 /* The methods of the json_tree virtual table. */
@@ -198047,7 +206439,8 @@ static sqlite3_module jsonTreeModule = {
   0,                         /* xSavepoint */
   0,                         /* xRelease */
   0,                         /* xRollbackTo */
-  0                          /* xShadowName */
+  0,                         /* xShadowName */
+  0                          /* xIntegrity */
 };
 #endif /* SQLITE_OMIT_VIRTUALTABLE */
 #endif /* !defined(SQLITE_OMIT_JSON) */
@@ -198058,33 +206451,43 @@ static sqlite3_module jsonTreeModule = {
 SQLITE_PRIVATE void sqlite3RegisterJsonFunctions(void){
 #ifndef SQLITE_OMIT_JSON
   static FuncDef aJsonFunc[] = {
-    JFUNCTION(json,               1, 0,  jsonRemoveFunc),
-    JFUNCTION(json_array,        -1, 0,  jsonArrayFunc),
-    JFUNCTION(json_array_length,  1, 0,  jsonArrayLengthFunc),
-    JFUNCTION(json_array_length,  2, 0,  jsonArrayLengthFunc),
-    JFUNCTION(json_extract,      -1, 0,  jsonExtractFunc),
-    JFUNCTION(->,                 2, JSON_JSON, jsonExtractFunc),
-    JFUNCTION(->>,                2, JSON_SQL, jsonExtractFunc),
-    JFUNCTION(json_insert,       -1, 0,  jsonSetFunc),
-    JFUNCTION(json_object,       -1, 0,  jsonObjectFunc),
-    JFUNCTION(json_patch,         2, 0,  jsonPatchFunc),
-    JFUNCTION(json_quote,         1, 0,  jsonQuoteFunc),
-    JFUNCTION(json_remove,       -1, 0,  jsonRemoveFunc),
-    JFUNCTION(json_replace,      -1, 0,  jsonReplaceFunc),
-    JFUNCTION(json_set,          -1, JSON_ISSET,  jsonSetFunc),
-    JFUNCTION(json_type,          1, 0,  jsonTypeFunc),
-    JFUNCTION(json_type,          2, 0,  jsonTypeFunc),
-    JFUNCTION(json_valid,         1, 0,  jsonValidFunc),
-#if SQLITE_DEBUG
-    JFUNCTION(json_parse,         1, 0,  jsonParseFunc),
-    JFUNCTION(json_test1,         1, 0,  jsonTest1Func),
+    /*                     calls sqlite3_result_subtype()                    */
+    /*                                  |                                    */
+    /*              Uses cache ______   |   __ calls sqlite3_value_subtype() */
+    /*                               |  |  |                                 */
+    /*          Num args _________   |  |  |   ___ Flags                     */
+    /*                            |  |  |  |  |                              */
+    /*                            |  |  |  |  |                              */
+    JFUNCTION(json,               1, 1, 1, 0, 0,          jsonRemoveFunc),
+    JFUNCTION(json_array,        -1, 0, 1, 1, 0,          jsonArrayFunc),
+    JFUNCTION(json_array_length,  1, 1, 0, 0, 0,          jsonArrayLengthFunc),
+    JFUNCTION(json_array_length,  2, 1, 0, 0, 0,          jsonArrayLengthFunc),
+    JFUNCTION(json_error_position,1, 1, 0, 0, 0,          jsonErrorFunc),
+    JFUNCTION(json_extract,      -1, 1, 1, 0, 0,          jsonExtractFunc),
+    JFUNCTION(->,                 2, 1, 1, 0, JSON_JSON,  jsonExtractFunc),
+    JFUNCTION(->>,                2, 1, 0, 0, JSON_SQL,   jsonExtractFunc),
+    JFUNCTION(json_insert,       -1, 1, 1, 1, 0,          jsonSetFunc),
+    JFUNCTION(json_object,       -1, 0, 1, 1, 0,          jsonObjectFunc),
+    JFUNCTION(json_patch,         2, 1, 1, 0, 0,          jsonPatchFunc),
+    JFUNCTION(json_quote,         1, 0, 1, 1, 0,          jsonQuoteFunc),
+    JFUNCTION(json_remove,       -1, 1, 1, 0, 0,          jsonRemoveFunc),
+    JFUNCTION(json_replace,      -1, 1, 1, 1, 0,          jsonReplaceFunc),
+    JFUNCTION(json_set,          -1, 1, 1, 1, JSON_ISSET, jsonSetFunc),
+    JFUNCTION(json_type,          1, 1, 0, 0, 0,          jsonTypeFunc),
+    JFUNCTION(json_type,          2, 1, 0, 0, 0,          jsonTypeFunc),
+    JFUNCTION(json_valid,         1, 1, 0, 0, 0,          jsonValidFunc),
+#ifdef SQLITE_DEBUG
+    JFUNCTION(json_parse,         1, 1, 1, 0, 0,          jsonParseFunc),
+    JFUNCTION(json_test1,         1, 1, 0, 1, 0,          jsonTest1Func),
 #endif
     WAGGREGATE(json_group_array,  1, 0, 0,
        jsonArrayStep, jsonArrayFinal, jsonArrayValue, jsonGroupInverse,
-       SQLITE_SUBTYPE|SQLITE_UTF8|SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS),
+       SQLITE_SUBTYPE|SQLITE_RESULT_SUBTYPE|SQLITE_UTF8|
+       SQLITE_DETERMINISTIC),
     WAGGREGATE(json_group_object, 2, 0, 0,
        jsonObjectStep, jsonObjectFinal, jsonObjectValue, jsonGroupInverse,
-       SQLITE_SUBTYPE|SQLITE_UTF8|SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS)
+       SQLITE_SUBTYPE|SQLITE_RESULT_SUBTYPE|SQLITE_UTF8|
+       SQLITE_DETERMINISTIC)
   };
   sqlite3InsertBuiltinFuncs(aJsonFunc, ArraySize(aJsonFunc));
 #endif
@@ -198211,6 +206614,11 @@ typedef unsigned int u32;
 #endif
 #endif /* !defined(SQLITE_AMALGAMATION) */
 
+/* Macro to check for 4-byte alignment.  Only used inside of assert() */
+#ifdef SQLITE_DEBUG
+# define FOUR_BYTE_ALIGNED(X)  ((((char*)(X) - (char*)0) & 3)==0)
+#endif
+
 /* #include <string.h> */
 /* #include <stdio.h> */
 /* #include <assert.h> */
@@ -198276,6 +206684,7 @@ struct Rtree {
   int iDepth;                 /* Current depth of the r-tree structure */
   char *zDb;                  /* Name of database containing r-tree table */
   char *zName;                /* Name of r-tree table */
+  char *zNodeName;            /* Name of the %_node table */
   u32 nBusy;                  /* Current number of users of this structure */
   i64 nRowEst;                /* Estimated number of rows in this table */
   u32 nCursor;                /* Number of open cursors */
@@ -198288,7 +206697,6 @@ struct Rtree {
   ** headed by the node (leaf nodes have RtreeNode.iNode==0).
   */
   RtreeNode *pDeleted;
-  int iReinsertHeight;        /* Height of sub-trees Reinsert() has run on */
 
   /* Blob I/O on xxx_node */
   sqlite3_blob *pNodeBlob;
@@ -198585,17 +206993,23 @@ struct RtreeMatchArg {
 ** -DSQLITE_RUNTIME_BYTEORDER=1 is set, then byte-order is determined
 ** at run-time.
 */
-#ifndef SQLITE_BYTEORDER
-#if defined(i386)     || defined(__i386__)   || defined(_M_IX86) ||    \
-    defined(__x86_64) || defined(__x86_64__) || defined(_M_X64)  ||    \
-    defined(_M_AMD64) || defined(_M_ARM)     || defined(__x86)   ||    \
-    defined(__arm__)
-# define SQLITE_BYTEORDER    1234
-#elif defined(sparc)    || defined(__ppc__)
-# define SQLITE_BYTEORDER    4321
-#else
-# define SQLITE_BYTEORDER    0     /* 0 means "unknown at compile-time" */
-#endif
+#ifndef SQLITE_BYTEORDER /* Replicate changes at tag-20230904a */
+# if defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
+#   define SQLITE_BYTEORDER 4321
+# elif defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
+#   define SQLITE_BYTEORDER 1234
+# elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1
+#   define SQLITE_BYTEORDER 4321
+# elif defined(i386)    || defined(__i386__)      || defined(_M_IX86) ||    \
+     defined(__x86_64)  || defined(__x86_64__)    || defined(_M_X64)  ||    \
+     defined(_M_AMD64)  || defined(_M_ARM)        || defined(__x86)   ||    \
+     defined(__ARMEL__) || defined(__AARCH64EL__) || defined(_M_ARM64)
+#   define SQLITE_BYTEORDER 1234
+# elif defined(sparc)   || defined(__ARMEB__)     || defined(__AARCH64EB__)
+#   define SQLITE_BYTEORDER 4321
+# else
+#   define SQLITE_BYTEORDER 0
+# endif
 #endif
 
 
@@ -198616,7 +207030,7 @@ static int readInt16(u8 *p){
   return (p[0]<<8) + p[1];
 }
 static void readCoord(u8 *p, RtreeCoord *pCoord){
-  assert( ((((char*)p) - (char*)0)&3)==0 );  /* p is always 4-byte aligned */
+  assert( FOUR_BYTE_ALIGNED(p) );
 #if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300
   pCoord->u = _byteswap_ulong(*(u32*)p);
 #elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4003000
@@ -198670,7 +207084,7 @@ static void writeInt16(u8 *p, int i){
 }
 static int writeCoord(u8 *p, RtreeCoord *pCoord){
   u32 i;
-  assert( ((((char*)p) - (char*)0)&3)==0 );  /* p is always 4-byte aligned */
+  assert( FOUR_BYTE_ALIGNED(p) );
   assert( sizeof(RtreeCoord)==4 );
   assert( sizeof(u32)==4 );
 #if SQLITE_BYTEORDER==1234 && GCC_VERSION>=4003000
@@ -198841,11 +207255,9 @@ static int nodeAcquire(
     }
   }
   if( pRtree->pNodeBlob==0 ){
-    char *zTab = sqlite3_mprintf("%s_node", pRtree->zName);
-    if( zTab==0 ) return SQLITE_NOMEM;
-    rc = sqlite3_blob_open(pRtree->db, pRtree->zDb, zTab, "data", iNode, 0,
+    rc = sqlite3_blob_open(pRtree->db, pRtree->zDb, pRtree->zNodeName,
+                           "data", iNode, 0,
                            &pRtree->pNodeBlob);
-    sqlite3_free(zTab);
   }
   if( rc ){
     nodeBlobReset(pRtree);
@@ -199398,7 +207810,7 @@ static void rtreeNonleafConstraint(
   assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE
       || p->op==RTREE_GT || p->op==RTREE_EQ || p->op==RTREE_TRUE
       || p->op==RTREE_FALSE );
-  assert( ((((char*)pCellData) - (char*)0)&3)==0 );  /* 4-byte aligned */
+  assert( FOUR_BYTE_ALIGNED(pCellData) );
   switch( p->op ){
     case RTREE_TRUE:  return;   /* Always satisfied */
     case RTREE_FALSE: break;    /* Never satisfied */
@@ -199451,7 +207863,7 @@ static void rtreeLeafConstraint(
       || p->op==RTREE_GT || p->op==RTREE_EQ || p->op==RTREE_TRUE
       || p->op==RTREE_FALSE );
   pCellData += 8 + p->iCoord*4;
-  assert( ((((char*)pCellData) - (char*)0)&3)==0 );  /* 4-byte aligned */
+  assert( FOUR_BYTE_ALIGNED(pCellData) );
   RTREE_DECODE_COORD(eInt, pCellData, xN);
   switch( p->op ){
     case RTREE_TRUE:  return;   /* Always satisfied */
@@ -200021,7 +208433,20 @@ static int rtreeFilter(
             p->pInfo->nCoord = pRtree->nDim2;
             p->pInfo->anQueue = pCsr->anQueue;
             p->pInfo->mxLevel = pRtree->iDepth + 1;
-          }else if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
+          }else if( eType==SQLITE_INTEGER ){
+            sqlite3_int64 iVal = sqlite3_value_int64(argv[ii]);
+#ifdef SQLITE_RTREE_INT_ONLY
+            p->u.rValue = iVal;
+#else
+            p->u.rValue = (double)iVal;
+            if( iVal>=((sqlite3_int64)1)<<48
+             || iVal<=-(((sqlite3_int64)1)<<48)
+            ){
+              if( p->op==RTREE_LT ) p->op = RTREE_LE;
+              if( p->op==RTREE_GT ) p->op = RTREE_GE;
+            }
+#endif
+          }else if( eType==SQLITE_FLOAT ){
 #ifdef SQLITE_RTREE_INT_ONLY
             p->u.rValue = sqlite3_value_int64(argv[ii]);
 #else
@@ -200152,11 +208577,12 @@ static int rtreeBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
         || p->op==SQLITE_INDEX_CONSTRAINT_MATCH)
     ){
       u8 op;
+      u8 doOmit = 1;
       switch( p->op ){
-        case SQLITE_INDEX_CONSTRAINT_EQ:    op = RTREE_EQ;    break;
-        case SQLITE_INDEX_CONSTRAINT_GT:    op = RTREE_GT;    break;
+        case SQLITE_INDEX_CONSTRAINT_EQ:    op = RTREE_EQ;    doOmit = 0; break;
+        case SQLITE_INDEX_CONSTRAINT_GT:    op = RTREE_GT;    doOmit = 0; break;
         case SQLITE_INDEX_CONSTRAINT_LE:    op = RTREE_LE;    break;
-        case SQLITE_INDEX_CONSTRAINT_LT:    op = RTREE_LT;    break;
+        case SQLITE_INDEX_CONSTRAINT_LT:    op = RTREE_LT;    doOmit = 0; break;
         case SQLITE_INDEX_CONSTRAINT_GE:    op = RTREE_GE;    break;
         case SQLITE_INDEX_CONSTRAINT_MATCH: op = RTREE_MATCH; break;
         default:                            op = 0;           break;
@@ -200165,15 +208591,19 @@ static int rtreeBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
         zIdxStr[iIdx++] = op;
         zIdxStr[iIdx++] = (char)(p->iColumn - 1 + '0');
         pIdxInfo->aConstraintUsage[ii].argvIndex = (iIdx/2);
-        pIdxInfo->aConstraintUsage[ii].omit = 1;
+        pIdxInfo->aConstraintUsage[ii].omit = doOmit;
       }
     }
   }
 
   pIdxInfo->idxNum = 2;
   pIdxInfo->needToFreeIdxStr = 1;
-  if( iIdx>0 && 0==(pIdxInfo->idxStr = sqlite3_mprintf("%s", zIdxStr)) ){
-    return SQLITE_NOMEM;
+  if( iIdx>0 ){
+    pIdxInfo->idxStr = sqlite3_malloc( iIdx+1 );
+    if( pIdxInfo->idxStr==0 ){
+      return SQLITE_NOMEM;
+    }
+    memcpy(pIdxInfo->idxStr, zIdxStr, iIdx+1);
   }
 
   nRow = pRtree->nRowEst >> (iIdx/2);
@@ -200252,31 +208682,22 @@ static void cellUnion(Rtree *pRtree, RtreeCell *p1, RtreeCell *p2){
 */
 static int cellContains(Rtree *pRtree, RtreeCell *p1, RtreeCell *p2){
   int ii;
-  int isInt = (pRtree->eCoordType==RTREE_COORD_INT32);
-  for(ii=0; ii<pRtree->nDim2; ii+=2){
-    RtreeCoord *a1 = &p1->aCoord[ii];
-    RtreeCoord *a2 = &p2->aCoord[ii];
-    if( (!isInt && (a2[0].f<a1[0].f || a2[1].f>a1[1].f))
-     || ( isInt && (a2[0].i<a1[0].i || a2[1].i>a1[1].i))
-    ){
-      return 0;
+  if( pRtree->eCoordType==RTREE_COORD_INT32 ){
+    for(ii=0; ii<pRtree->nDim2; ii+=2){
+      RtreeCoord *a1 = &p1->aCoord[ii];
+      RtreeCoord *a2 = &p2->aCoord[ii];
+      if( a2[0].i<a1[0].i || a2[1].i>a1[1].i ) return 0;
+    }
+  }else{
+    for(ii=0; ii<pRtree->nDim2; ii+=2){
+      RtreeCoord *a1 = &p1->aCoord[ii];
+      RtreeCoord *a2 = &p2->aCoord[ii];
+      if( a2[0].f<a1[0].f || a2[1].f>a1[1].f ) return 0;
     }
   }
   return 1;
 }
 
-/*
-** Return the amount cell p would grow by if it were unioned with pCell.
-*/
-static RtreeDValue cellGrowth(Rtree *pRtree, RtreeCell *p, RtreeCell *pCell){
-  RtreeDValue area;
-  RtreeCell cell;
-  memcpy(&cell, p, sizeof(RtreeCell));
-  area = cellArea(pRtree, &cell);
-  cellUnion(pRtree, &cell, pCell);
-  return (cellArea(pRtree, &cell)-area);
-}
-
 static RtreeDValue cellOverlap(
   Rtree *pRtree,
   RtreeCell *p,
@@ -200323,38 +208744,52 @@ static int ChooseLeaf(
   for(ii=0; rc==SQLITE_OK && ii<(pRtree->iDepth-iHeight); ii++){
     int iCell;
     sqlite3_int64 iBest = 0;
-
+    int bFound = 0;
     RtreeDValue fMinGrowth = RTREE_ZERO;
     RtreeDValue fMinArea = RTREE_ZERO;
-
     int nCell = NCELL(pNode);
-    RtreeCell cell;
     RtreeNode *pChild = 0;
 
-    RtreeCell *aCell = 0;
-
-    /* Select the child node which will be enlarged the least if pCell
-    ** is inserted into it. Resolve ties by choosing the entry with
-    ** the smallest area.
+    /* First check to see if there is are any cells in pNode that completely
+    ** contains pCell.  If two or more cells in pNode completely contain pCell
+    ** then pick the smallest.
     */
     for(iCell=0; iCell<nCell; iCell++){
-      int bBest = 0;
-      RtreeDValue growth;
-      RtreeDValue area;
+      RtreeCell cell;
       nodeGetCell(pRtree, pNode, iCell, &cell);
-      growth = cellGrowth(pRtree, &cell, pCell);
-      area = cellArea(pRtree, &cell);
-      if( iCell==0||growth<fMinGrowth||(growth==fMinGrowth && area<fMinArea) ){
-        bBest = 1;
+      if( cellContains(pRtree, &cell, pCell) ){
+        RtreeDValue area = cellArea(pRtree, &cell);
+        if( bFound==0 || area<fMinArea ){
+          iBest = cell.iRowid;
+          fMinArea = area;
+          bFound = 1;
+        }
       }
-      if( bBest ){
-        fMinGrowth = growth;
-        fMinArea = area;
-        iBest = cell.iRowid;
+    }
+    if( !bFound ){
+      /* No cells of pNode will completely contain pCell.  So pick the
+      ** cell of pNode that grows by the least amount when pCell is added.
+      ** Break ties by selecting the smaller cell.
+      */
+      for(iCell=0; iCell<nCell; iCell++){
+        RtreeCell cell;
+        RtreeDValue growth;
+        RtreeDValue area;
+        nodeGetCell(pRtree, pNode, iCell, &cell);
+        area = cellArea(pRtree, &cell);
+        cellUnion(pRtree, &cell, pCell);
+        growth = cellArea(pRtree, &cell)-area;
+        if( iCell==0
+         || growth<fMinGrowth
+         || (growth==fMinGrowth && area<fMinArea)
+        ){
+          fMinGrowth = growth;
+          fMinArea = area;
+          iBest = cell.iRowid;
+        }
       }
     }
 
-    sqlite3_free(aCell);
     rc = nodeAcquire(pRtree, iBest, pNode, &pChild);
     nodeRelease(pRtree, pNode);
     pNode = pChild;
@@ -200427,77 +208862,6 @@ static int parentWrite(Rtree *pRtree, sqlite3_int64 iNode, sqlite3_int64 iPar){
 static int rtreeInsertCell(Rtree *, RtreeNode *, RtreeCell *, int);
 
 
-/*
-** Arguments aIdx, aDistance and aSpare all point to arrays of size
-** nIdx. The aIdx array contains the set of integers from 0 to
-** (nIdx-1) in no particular order. This function sorts the values
-** in aIdx according to the indexed values in aDistance. For
-** example, assuming the inputs:
-**
-**   aIdx      = { 0,   1,   2,   3 }
-**   aDistance = { 5.0, 2.0, 7.0, 6.0 }
-**
-** this function sets the aIdx array to contain:
-**
-**   aIdx      = { 0,   1,   2,   3 }
-**
-** The aSpare array is used as temporary working space by the
-** sorting algorithm.
-*/
-static void SortByDistance(
-  int *aIdx,
-  int nIdx,
-  RtreeDValue *aDistance,
-  int *aSpare
-){
-  if( nIdx>1 ){
-    int iLeft = 0;
-    int iRight = 0;
-
-    int nLeft = nIdx/2;
-    int nRight = nIdx-nLeft;
-    int *aLeft = aIdx;
-    int *aRight = &aIdx[nLeft];
-
-    SortByDistance(aLeft, nLeft, aDistance, aSpare);
-    SortByDistance(aRight, nRight, aDistance, aSpare);
-
-    memcpy(aSpare, aLeft, sizeof(int)*nLeft);
-    aLeft = aSpare;
-
-    while( iLeft<nLeft || iRight<nRight ){
-      if( iLeft==nLeft ){
-        aIdx[iLeft+iRight] = aRight[iRight];
-        iRight++;
-      }else if( iRight==nRight ){
-        aIdx[iLeft+iRight] = aLeft[iLeft];
-        iLeft++;
-      }else{
-        RtreeDValue fLeft = aDistance[aLeft[iLeft]];
-        RtreeDValue fRight = aDistance[aRight[iRight]];
-        if( fLeft<fRight ){
-          aIdx[iLeft+iRight] = aLeft[iLeft];
-          iLeft++;
-        }else{
-          aIdx[iLeft+iRight] = aRight[iRight];
-          iRight++;
-        }
-      }
-    }
-
-#if 0
-    /* Check that the sort worked */
-    {
-      int jj;
-      for(jj=1; jj<nIdx; jj++){
-        RtreeDValue left = aDistance[aIdx[jj-1]];
-        RtreeDValue right = aDistance[aIdx[jj]];
-        assert( left<=right );
-      }
-    }
-#endif
-  }
-}
 
 /*
 ** Arguments aIdx, aCell and aSpare all point to arrays of size
@@ -200982,107 +209346,6 @@ static int deleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell, int iHeight){
   return rc;
 }
 
-static int Reinsert(
-  Rtree *pRtree,
-  RtreeNode *pNode,
-  RtreeCell *pCell,
-  int iHeight
-){
-  int *aOrder;
-  int *aSpare;
-  RtreeCell *aCell;
-  RtreeDValue *aDistance;
-  int nCell;
-  RtreeDValue aCenterCoord[RTREE_MAX_DIMENSIONS];
-  int iDim;
-  int ii;
-  int rc = SQLITE_OK;
-  int n;
-
-  memset(aCenterCoord, 0, sizeof(RtreeDValue)*RTREE_MAX_DIMENSIONS);
-
-  nCell = NCELL(pNode)+1;
-  n = (nCell+1)&(~1);
-
-  /* Allocate the buffers used by this operation. The allocation is
-  ** relinquished before this function returns.
-  */
-  aCell = (RtreeCell *)sqlite3_malloc64(n * (
-    sizeof(RtreeCell)     +         /* aCell array */
-    sizeof(int)           +         /* aOrder array */
-    sizeof(int)           +         /* aSpare array */
-    sizeof(RtreeDValue)             /* aDistance array */
-  ));
-  if( !aCell ){
-    return SQLITE_NOMEM;
-  }
-  aOrder    = (int *)&aCell[n];
-  aSpare    = (int *)&aOrder[n];
-  aDistance = (RtreeDValue *)&aSpare[n];
-
-  for(ii=0; ii<nCell; ii++){
-    if( ii==(nCell-1) ){
-      memcpy(&aCell[ii], pCell, sizeof(RtreeCell));
-    }else{
-      nodeGetCell(pRtree, pNode, ii, &aCell[ii]);
-    }
-    aOrder[ii] = ii;
-    for(iDim=0; iDim<pRtree->nDim; iDim++){
-      aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2]);
-      aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2+1]);
-    }
-  }
-  for(iDim=0; iDim<pRtree->nDim; iDim++){
-    aCenterCoord[iDim] = (aCenterCoord[iDim]/(nCell*(RtreeDValue)2));
-  }
-
-  for(ii=0; ii<nCell; ii++){
-    aDistance[ii] = RTREE_ZERO;
-    for(iDim=0; iDim<pRtree->nDim; iDim++){
-      RtreeDValue coord = (DCOORD(aCell[ii].aCoord[iDim*2+1]) -
-                               DCOORD(aCell[ii].aCoord[iDim*2]));
-      aDistance[ii] += (coord-aCenterCoord[iDim])*(coord-aCenterCoord[iDim]);
-    }
-  }
-
-  SortByDistance(aOrder, nCell, aDistance, aSpare);
-  nodeZero(pRtree, pNode);
-
-  for(ii=0; rc==SQLITE_OK && ii<(nCell-(RTREE_MINCELLS(pRtree)+1)); ii++){
-    RtreeCell *p = &aCell[aOrder[ii]];
-    nodeInsertCell(pRtree, pNode, p);
-    if( p->iRowid==pCell->iRowid ){
-      if( iHeight==0 ){
-        rc = rowidWrite(pRtree, p->iRowid, pNode->iNode);
-      }else{
-        rc = parentWrite(pRtree, p->iRowid, pNode->iNode);
-      }
-    }
-  }
-  if( rc==SQLITE_OK ){
-    rc = fixBoundingBox(pRtree, pNode);
-  }
-  for(; rc==SQLITE_OK && ii<nCell; ii++){
-    /* Find a node to store this cell in. pNode->iNode currently contains
-    ** the height of the sub-tree headed by the cell.
-    */
-    RtreeNode *pInsert;
-    RtreeCell *p = &aCell[aOrder[ii]];
-    rc = ChooseLeaf(pRtree, p, iHeight, &pInsert);
-    if( rc==SQLITE_OK ){
-      int rc2;
-      rc = rtreeInsertCell(pRtree, pInsert, p, iHeight);
-      rc2 = nodeRelease(pRtree, pInsert);
-      if( rc==SQLITE_OK ){
-        rc = rc2;
-      }
-    }
-  }
-
-  sqlite3_free(aCell);
-  return rc;
-}
-
 /*
 ** Insert cell pCell into node pNode. Node pNode is the head of a
 ** subtree iHeight high (leaf nodes have iHeight==0).
@@ -201103,12 +209366,7 @@ static int rtreeInsertCell(
     }
   }
   if( nodeInsertCell(pRtree, pNode, pCell) ){
-    if( iHeight<=pRtree->iReinsertHeight || pNode->iNode==1){
-      rc = SplitNode(pRtree, pNode, pCell, iHeight);
-    }else{
-      pRtree->iReinsertHeight = iHeight;
-      rc = Reinsert(pRtree, pNode, pCell, iHeight);
-    }
+    rc = SplitNode(pRtree, pNode, pCell, iHeight);
   }else{
     rc = AdjustTree(pRtree, pNode, pCell);
     if( ALWAYS(rc==SQLITE_OK) ){
@@ -201350,7 +209608,7 @@ static int rtreeUpdate(
   rtreeReference(pRtree);
   assert(nData>=1);
 
-  cell.iRowid = 0;  /* Used only to suppress a compiler warning */
+  memset(&cell, 0, sizeof(cell));
 
   /* Constraint handling. A write operation on an r-tree table may return
   ** SQLITE_CONSTRAINT for two reasons:
@@ -201451,7 +209709,6 @@ static int rtreeUpdate(
     }
     if( rc==SQLITE_OK ){
       int rc2;
-      pRtree->iReinsertHeight = -1;
       rc = rtreeInsertCell(pRtree, pLeaf, &cell, 0);
       rc2 = nodeRelease(pRtree, pLeaf);
       if( rc==SQLITE_OK ){
@@ -201592,8 +209849,11 @@ static int rtreeShadowName(const char *zName){
   return 0;
 }
 
+/* Forward declaration */
+static int rtreeIntegrity(sqlite3_vtab*, const char*, const char*, int, char**);
+
 static sqlite3_module rtreeModule = {
-  3,                          /* iVersion */
+  4,                          /* iVersion */
   rtreeCreate,                /* xCreate - create a table */
   rtreeConnect,               /* xConnect - connect to an existing table */
   rtreeBestIndex,             /* xBestIndex - Determine search strategy */
@@ -201616,7 +209876,8 @@ static sqlite3_module rtreeModule = {
   rtreeSavepoint,             /* xSavepoint */
   0,                          /* xRelease */
   0,                          /* xRollbackTo */
-  rtreeShadowName             /* xShadowName */
+  rtreeShadowName,            /* xShadowName */
+  rtreeIntegrity              /* xIntegrity */
 };
 
 static int rtreeSqlInit(
@@ -201872,22 +210133,27 @@ static int rtreeInit(
   }
 
   sqlite3_vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
+  sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS);
+
 
   /* Allocate the sqlite3_vtab structure */
   nDb = (int)strlen(argv[1]);
   nName = (int)strlen(argv[2]);
-  pRtree = (Rtree *)sqlite3_malloc64(sizeof(Rtree)+nDb+nName+2);
+  pRtree = (Rtree *)sqlite3_malloc64(sizeof(Rtree)+nDb+nName*2+8);
   if( !pRtree ){
     return SQLITE_NOMEM;
   }
-  memset(pRtree, 0, sizeof(Rtree)+nDb+nName+2);
+  memset(pRtree, 0, sizeof(Rtree)+nDb+nName*2+8);
   pRtree->nBusy = 1;
   pRtree->base.pModule = &rtreeModule;
   pRtree->zDb = (char *)&pRtree[1];
   pRtree->zName = &pRtree->zDb[nDb+1];
+  pRtree->zNodeName = &pRtree->zName[nName+1];
   pRtree->eCoordType = (u8)eCoordType;
   memcpy(pRtree->zDb, argv[1], nDb);
   memcpy(pRtree->zName, argv[2], nName);
+  memcpy(pRtree->zNodeName, argv[2], nName);
+  memcpy(&pRtree->zNodeName[nName], "_node", 6);
 
 
   /* Create/Connect to the underlying relational database schema. If
@@ -202384,7 +210650,6 @@ static int rtreeCheckTable(
 ){
   RtreeCheck check;               /* Common context for various routines */
   sqlite3_stmt *pStmt = 0;        /* Used to find column count of rtree table */
-  int bEnd = 0;                   /* True if transaction should be closed */
   int nAux = 0;                   /* Number of extra columns. */
 
   /* Initialize the context object */
@@ -202393,14 +210658,6 @@ static int rtreeCheckTable(
   check.zDb = zDb;
   check.zTab = zTab;
 
-  /* If there is not already an open transaction, open one now. This is
-  ** to ensure that the queries run as part of this integrity-check operate
-  ** on a consistent snapshot.  */
-  if( sqlite3_get_autocommit(db) ){
-    check.rc = sqlite3_exec(db, "BEGIN", 0, 0, 0);
-    bEnd = 1;
-  }
-
   /* Find the number of auxiliary columns */
   if( check.rc==SQLITE_OK ){
     pStmt = rtreeCheckPrepare(&check, "SELECT * FROM %Q.'%q_rowid'", zDb, zTab);
@@ -202441,15 +210698,34 @@ static int rtreeCheckTable(
   sqlite3_finalize(check.aCheckMapping[0]);
   sqlite3_finalize(check.aCheckMapping[1]);
 
-  /* If one was opened, close the transaction */
-  if( bEnd ){
-    int rc = sqlite3_exec(db, "END", 0, 0, 0);
-    if( check.rc==SQLITE_OK ) check.rc = rc;
-  }
   *pzReport = check.zReport;
   return check.rc;
 }
 
+/*
+** Implementation of the xIntegrity method for Rtree.
+*/
+static int rtreeIntegrity(
+  sqlite3_vtab *pVtab,   /* The virtual table to check */
+  const char *zSchema,   /* Schema in which the virtual table lives */
+  const char *zName,     /* Name of the virtual table */
+  int isQuick,           /* True for a quick_check */
+  char **pzErr           /* Write results here */
+){
+  Rtree *pRtree = (Rtree*)pVtab;
+  int rc;
+  assert( pzErr!=0 && *pzErr==0 );
+  UNUSED_PARAMETER(zSchema);
+  UNUSED_PARAMETER(zName);
+  UNUSED_PARAMETER(isQuick);
+  rc = rtreeCheckTable(pRtree->db, pRtree->zDb, pRtree->zName, pzErr);
+  if( rc==SQLITE_OK && *pzErr ){
+    *pzErr = sqlite3_mprintf("In RTree %s.%s:\n%z",
+                 pRtree->zDb, pRtree->zName, *pzErr);
+  }
+  return rc;
+}
+
 /*
 ** Usage:
 **
@@ -202823,7 +211099,7 @@ static GeoPoly *geopolyFuncParam(
   int nByte;
   testcase( pCtx==0 );
   if( sqlite3_value_type(pVal)==SQLITE_BLOB
-   && (nByte = sqlite3_value_bytes(pVal))>=(4+6*sizeof(GeoCoord))
+   && (nByte = sqlite3_value_bytes(pVal))>=(int)(4+6*sizeof(GeoCoord))
   ){
     const unsigned char *a = sqlite3_value_blob(pVal);
     int nVertex;
@@ -202881,6 +211157,7 @@ static void geopolyBlobFunc(
   sqlite3_value **argv
 ){
   GeoPoly *p = geopolyFuncParam(context, argv[0], 0);
+  (void)argc;
   if( p ){
     sqlite3_result_blob(context, p->hdr,
        4+8*p->nVertex, SQLITE_TRANSIENT);
@@ -202900,6 +211177,7 @@ static void geopolyJsonFunc(
   sqlite3_value **argv
 ){
   GeoPoly *p = geopolyFuncParam(context, argv[0], 0);
+  (void)argc;
   if( p ){
     sqlite3 *db = sqlite3_context_db_handle(context);
     sqlite3_str *x = sqlite3_str_new(db);
@@ -202981,6 +211259,7 @@ static void geopolyXformFunc(
   double F = sqlite3_value_double(argv[6]);
   GeoCoord x1, y1, x0, y0;
   int ii;
+  (void)argc;
   if( p ){
     for(ii=0; ii<p->nVertex; ii++){
       x0 = GeoX(p,ii);
@@ -203031,6 +211310,7 @@ static void geopolyAreaFunc(
   sqlite3_value **argv
 ){
   GeoPoly *p = geopolyFuncParam(context, argv[0], 0);
+  (void)argc;
   if( p ){
     sqlite3_result_double(context, geopolyArea(p));
     sqlite3_free(p);
@@ -203056,6 +211336,7 @@ static void geopolyCcwFunc(
   sqlite3_value **argv
 ){
   GeoPoly *p = geopolyFuncParam(context, argv[0], 0);
+  (void)argc;
   if( p ){
     if( geopolyArea(p)<0.0 ){
       int ii, jj;
@@ -203110,6 +211391,7 @@ static void geopolyRegularFunc(
   int n = sqlite3_value_int(argv[3]);
   int i;
   GeoPoly *p;
+  (void)argc;
 
   if( n<3 || r<=0.0 ) return;
   if( n>1000 ) n = 1000;
@@ -203219,6 +211501,7 @@ static void geopolyBBoxFunc(
   sqlite3_value **argv
 ){
   GeoPoly *p = geopolyBBox(context, argv[0], 0, 0);
+  (void)argc;
   if( p ){
     sqlite3_result_blob(context, p->hdr,
        4+8*p->nVertex, SQLITE_TRANSIENT);
@@ -203246,6 +211529,7 @@ static void geopolyBBoxStep(
 ){
   RtreeCoord a[4];
   int rc = SQLITE_OK;
+  (void)argc;
   (void)geopolyBBox(context, argv[0], a, &rc);
   if( rc==SQLITE_OK ){
     GeoBBox *pBBox;
@@ -203334,6 +211618,8 @@ static void geopolyContainsPointFunc(
   int v = 0;
   int cnt = 0;
   int ii;
+  (void)argc;
+
   if( p1==0 ) return;
   for(ii=0; ii<p1->nVertex-1; ii++){
     v = pointBeneathLine(x0,y0,GeoX(p1,ii), GeoY(p1,ii),
@@ -203373,6 +211659,7 @@ static void geopolyWithinFunc(
 ){
   GeoPoly *p1 = geopolyFuncParam(context, argv[0], 0);
   GeoPoly *p2 = geopolyFuncParam(context, argv[1], 0);
+  (void)argc;
   if( p1 && p2 ){
     int x = geopolyOverlap(p1, p2);
     if( x<0 ){
@@ -203703,6 +211990,7 @@ static void geopolyOverlapFunc(
 ){
   GeoPoly *p1 = geopolyFuncParam(context, argv[0], 0);
   GeoPoly *p2 = geopolyFuncParam(context, argv[1], 0);
+  (void)argc;
   if( p1 && p2 ){
     int x = geopolyOverlap(p1, p2);
     if( x<0 ){
@@ -203723,8 +212011,12 @@ static void geopolyDebugFunc(
   int argc,
   sqlite3_value **argv
 ){
+  (void)context;
+  (void)argc;
 #ifdef GEOPOLY_ENABLE_DEBUG
   geo_debug = sqlite3_value_int(argv[0]);
+#else
+  (void)argv;
 #endif
 }
 
@@ -203752,26 +212044,31 @@ static int geopolyInit(
   sqlite3_str *pSql;
   char *zSql;
   int ii;
+  (void)pAux;
 
   sqlite3_vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
+  sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS);
 
   /* Allocate the sqlite3_vtab structure */
   nDb = strlen(argv[1]);
   nName = strlen(argv[2]);
-  pRtree = (Rtree *)sqlite3_malloc64(sizeof(Rtree)+nDb+nName+2);
+  pRtree = (Rtree *)sqlite3_malloc64(sizeof(Rtree)+nDb+nName*2+8);
   if( !pRtree ){
     return SQLITE_NOMEM;
   }
-  memset(pRtree, 0, sizeof(Rtree)+nDb+nName+2);
+  memset(pRtree, 0, sizeof(Rtree)+nDb+nName*2+8);
   pRtree->nBusy = 1;
   pRtree->base.pModule = &rtreeModule;
   pRtree->zDb = (char *)&pRtree[1];
   pRtree->zName = &pRtree->zDb[nDb+1];
+  pRtree->zNodeName = &pRtree->zName[nName+1];
   pRtree->eCoordType = RTREE_COORD_REAL32;
   pRtree->nDim = 2;
   pRtree->nDim2 = 4;
   memcpy(pRtree->zDb, argv[1], nDb);
   memcpy(pRtree->zName, argv[2], nName);
+  memcpy(pRtree->zNodeName, argv[2], nName);
+  memcpy(&pRtree->zNodeName[nName], "_node", 6);
 
 
   /* Create/Connect to the underlying relational database schema. If
@@ -203868,6 +212165,7 @@ static int geopolyFilter(
   RtreeNode *pRoot = 0;
   int rc = SQLITE_OK;
   int iCell = 0;
+  (void)idxStr;
 
   rtreeReference(pRtree);
 
@@ -203994,6 +212292,7 @@ static int geopolyBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
   int iRowidTerm = -1;
   int iFuncTerm = -1;
   int idxNum = 0;
+  (void)tab;
 
   for(ii=0; ii<pIdxInfo->nConstraint; ii++){
     struct sqlite3_index_constraint *p = &pIdxInfo->aConstraint[ii];
@@ -204183,7 +212482,6 @@ static int geopolyUpdate(
     }
     if( rc==SQLITE_OK ){
       int rc2;
-      pRtree->iReinsertHeight = -1;
       rc = rtreeInsertCell(pRtree, pLeaf, &cell, 0);
       rc2 = nodeRelease(pRtree, pLeaf);
       if( rc==SQLITE_OK ){
@@ -204240,6 +212538,8 @@ static int geopolyFindFunction(
   void (**pxFunc)(sqlite3_context*,int,sqlite3_value**),
   void **ppArg
 ){
+  (void)pVtab;
+  (void)nArg;
   if( sqlite3_stricmp(zName, "geopoly_overlap")==0 ){
     *pxFunc = geopolyOverlapFunc;
     *ppArg = 0;
@@ -204278,7 +212578,8 @@ static sqlite3_module geopolyModule = {
   rtreeSavepoint,             /* xSavepoint */
   0,                          /* xRelease */
   0,                          /* xRollbackTo */
-  rtreeShadowName             /* xShadowName */
+  rtreeShadowName,            /* xShadowName */
+  rtreeIntegrity              /* xIntegrity */
 };
 
 static int sqlite3_geopoly_init(sqlite3 *db){
@@ -204309,7 +212610,7 @@ static int sqlite3_geopoly_init(sqlite3 *db){
   } aAgg[] = {
      { geopolyBBoxStep, geopolyBBoxFinal, "geopoly_group_bbox"    },
   };
-  int i;
+  unsigned int i;
   for(i=0; i<sizeof(aFunc)/sizeof(aFunc[0]) && rc==SQLITE_OK; i++){
     int enc;
     if( aFunc[i].bPure ){
@@ -204817,8 +213118,9 @@ static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){
 
     if( U_SUCCESS(status) ){
       sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete);
-    }else{
-      assert(!pExpr);
+      pExpr = sqlite3_get_auxdata(p, 0);
+    }
+    if( !pExpr ){
       icuFunctionError(p, "uregex_open", status);
       return;
     }
@@ -205529,7 +213831,7 @@ SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule(
 ** The order of the columns in the data_% table does not matter.
 **
 ** Instead of a regular table, the RBU database may also contain virtual
-** tables or view named using the data_<target> naming scheme.
+** tables or views named using the data_<target> naming scheme.
 **
 ** Instead of the plain data_<target> naming scheme, RBU database tables
 ** may also be named data<integer>_<target>, where <integer> is any sequence
@@ -205542,7 +213844,7 @@ SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule(
 **
 ** If the target database table is a virtual table or a table that has no
 ** PRIMARY KEY declaration, the data_% table must also contain a column
-** named "rbu_rowid". This column is mapped to the tables implicit primary
+** named "rbu_rowid". This column is mapped to the table's implicit primary
 ** key column - "rowid". Virtual tables for which the "rowid" column does
 ** not function like a primary key value cannot be updated using RBU. For
 ** example, if the target db contains either of the following:
@@ -205975,6 +214277,34 @@ SQLITE_API void sqlite3rbu_bp_progress(sqlite3rbu *pRbu, int *pnOne, int*pnTwo);
 
 SQLITE_API int sqlite3rbu_state(sqlite3rbu *pRbu);
 
+/*
+** As part of applying an RBU update or performing an RBU vacuum operation,
+** the system must at one point move the *-oal file to the equivalent *-wal
+** path. Normally, it does this by invoking POSIX function rename(2) directly.
+** Except on WINCE platforms, where it uses win32 API MoveFileW(). This
+** function may be used to register a callback that the RBU module will invoke
+** instead of one of these APIs.
+**
+** If a callback is registered with an RBU handle, it invokes it instead
+** of rename(2) when it needs to move a file within the file-system. The
+** first argument passed to the xRename() callback is a copy of the second
+** argument (pArg) passed to this function. The second is the full path
+** to the file to move and the third the full path to which it should be
+** moved. The callback function should return SQLITE_OK to indicate
+** success. If an error occurs, it should return an SQLite error code.
+** In this case the RBU operation will be abandoned and the error returned
+** to the RBU user.
+**
+** Passing a NULL pointer in place of the xRename argument to this function
+** restores the default behaviour.
+*/
+SQLITE_API void sqlite3rbu_rename_handler(
+  sqlite3rbu *pRbu,
+  void *pArg,
+  int (*xRename)(void *pArg, const char *zOld, const char *zNew)
+);
+
+
 /*
 ** Create an RBU VFS named zName that accesses the underlying file-system
 ** via existing VFS zParent. Or, if the zParent parameter is passed NULL,
@@ -206342,6 +214672,8 @@ struct sqlite3rbu {
   int nPagePerSector;             /* Pages per sector for pTargetFd */
   i64 iOalSz;
   i64 nPhaseOneStep;
+  void *pRenameArg;
+  int (*xRename)(void*, const char*, const char*);
 
   /* The following state variables are used as part of the incremental
   ** checkpoint stage (eStage==RBU_STAGE_CKPT). See comments surrounding
@@ -208730,7 +217062,7 @@ static void rbuOpenDatabase(sqlite3rbu *p, sqlite3 *dbMain, int *pbRetry){
     sqlite3_file_control(p->dbRbu, "main", SQLITE_FCNTL_RBUCNT, (void*)p);
     if( p->zState==0 ){
       const char *zFile = sqlite3_db_filename(p->dbRbu, "main");
-      p->zState = rbuMPrintf(p, "file://%s-vacuum?modeof=%s", zFile, zFile);
+      p->zState = rbuMPrintf(p, "file:///%s-vacuum?modeof=%s", zFile, zFile);
     }
   }
 
@@ -208978,11 +217310,11 @@ static void rbuSetupCheckpoint(sqlite3rbu *p, RbuState *pState){
   **     no-ops. These locks will not be released until the connection
   **     is closed.
   **
-  **   * Attempting to xSync() the database file causes an SQLITE_INTERNAL
+  **   * Attempting to xSync() the database file causes an SQLITE_NOTICE
   **     error.
   **
   ** As a result, unless an error (i.e. OOM or SQLITE_BUSY) occurs, the
-  ** checkpoint below fails with SQLITE_INTERNAL, and leaves the aFrame[]
+  ** checkpoint below fails with SQLITE_NOTICE, and leaves the aFrame[]
   ** array populated with a set of (frame -> page) mappings. Because the
   ** WRITER, CHECKPOINT and READ0 locks are still held, it is safe to copy
   ** data from the wal file into the database file according to the
@@ -208992,7 +217324,7 @@ static void rbuSetupCheckpoint(sqlite3rbu *p, RbuState *pState){
     int rc2;
     p->eStage = RBU_STAGE_CAPTURE;
     rc2 = sqlite3_exec(p->dbMain, "PRAGMA main.wal_checkpoint=restart", 0, 0,0);
-    if( rc2!=SQLITE_INTERNAL ) p->rc = rc2;
+    if( rc2!=SQLITE_NOTICE ) p->rc = rc2;
   }
 
   if( p->rc==SQLITE_OK && p->nFrame>0 ){
@@ -209038,7 +217370,7 @@ static int rbuCaptureWalRead(sqlite3rbu *pRbu, i64 iOff, int iAmt){
 
   if( pRbu->mLock!=mReq ){
     pRbu->rc = SQLITE_BUSY;
-    return SQLITE_INTERNAL;
+    return SQLITE_NOTICE_RBU;
   }
 
   pRbu->pgsz = iAmt;
@@ -209088,6 +217420,11 @@ static void rbuCheckpointFrame(sqlite3rbu *p, RbuFrame *pFrame){
   p->rc = pDb->pMethods->xWrite(pDb, p->aBuf, p->pgsz, iOff);
 }
 
+/*
+** This value is copied from the definition of ZIPVFS_CTRL_FILE_POINTER
+** in zipvfs.h.
+*/
+#define RBU_ZIPVFS_CTRL_FILE_POINTER 230439
 
 /*
 ** Take an EXCLUSIVE lock on the database file. Return SQLITE_OK if
@@ -209096,9 +217433,20 @@ static void rbuCheckpointFrame(sqlite3rbu *p, RbuFrame *pFrame){
 static int rbuLockDatabase(sqlite3 *db){
   int rc = SQLITE_OK;
   sqlite3_file *fd = 0;
-  sqlite3_file_control(db, "main", SQLITE_FCNTL_FILE_POINTER, &fd);
 
-  if( fd->pMethods ){
+  sqlite3_file_control(db, "main", RBU_ZIPVFS_CTRL_FILE_POINTER, &fd);
+  if( fd ){
+    sqlite3_file_control(db, "main", SQLITE_FCNTL_FILE_POINTER, &fd);
+    rc = fd->pMethods->xLock(fd, SQLITE_LOCK_SHARED);
+    if( rc==SQLITE_OK ){
+      rc = fd->pMethods->xUnlock(fd, SQLITE_LOCK_NONE);
+    }
+    sqlite3_file_control(db, "main", RBU_ZIPVFS_CTRL_FILE_POINTER, &fd);
+  }else{
+    sqlite3_file_control(db, "main", SQLITE_FCNTL_FILE_POINTER, &fd);
+  }
+
+  if( rc==SQLITE_OK && fd->pMethods ){
     rc = fd->pMethods->xLock(fd, SQLITE_LOCK_SHARED);
     if( rc==SQLITE_OK ){
       rc = fd->pMethods->xLock(fd, SQLITE_LOCK_EXCLUSIVE);
@@ -209190,32 +217538,7 @@ static void rbuMoveOalFile(sqlite3rbu *p){
     }
 
     if( p->rc==SQLITE_OK ){
-#if defined(_WIN32_WCE)
-      {
-        LPWSTR zWideOal;
-        LPWSTR zWideWal;
-
-        zWideOal = rbuWinUtf8ToUnicode(zOal);
-        if( zWideOal ){
-          zWideWal = rbuWinUtf8ToUnicode(zWal);
-          if( zWideWal ){
-            if( MoveFileW(zWideOal, zWideWal) ){
-              p->rc = SQLITE_OK;
-            }else{
-              p->rc = SQLITE_IOERR;
-            }
-            sqlite3_free(zWideWal);
-          }else{
-            p->rc = SQLITE_IOERR_NOMEM;
-          }
-          sqlite3_free(zWideOal);
-        }else{
-          p->rc = SQLITE_IOERR_NOMEM;
-        }
-      }
-#else
-      p->rc = rename(zOal, zWal) ? SQLITE_IOERR : SQLITE_OK;
-#endif
+      p->rc = p->xRename(p->pRenameArg, zOal, zWal);
     }
 
     if( p->rc!=SQLITE_OK
@@ -209802,7 +218125,8 @@ static void rbuSetupOal(sqlite3rbu *p, RbuState *pState){
 static void rbuDeleteOalFile(sqlite3rbu *p){
   char *zOal = rbuMPrintf(p, "%s-oal", p->zTarget);
   if( zOal ){
-    sqlite3_vfs *pVfs = sqlite3_vfs_find(0);
+    sqlite3_vfs *pVfs = 0;
+    sqlite3_file_control(p->dbMain, "main", SQLITE_FCNTL_VFS_POINTER, &pVfs);
     assert( pVfs && p->rc==SQLITE_OK && p->zErrmsg==0 );
     pVfs->xDelete(pVfs, zOal, 0);
     sqlite3_free(zOal);
@@ -209954,6 +218278,7 @@ static sqlite3rbu *openRbuHandle(
 
     /* Create the custom VFS. */
     memset(p, 0, sizeof(sqlite3rbu));
+    sqlite3rbu_rename_handler(p, 0, 0);
     rbuCreateVfs(p);
 
     /* Open the target, RBU and state databases */
@@ -210345,6 +218670,54 @@ SQLITE_API int sqlite3rbu_savestate(sqlite3rbu *p){
   return rc;
 }
 
+/*
+** Default xRename callback for RBU.
+*/
+static int xDefaultRename(void *pArg, const char *zOld, const char *zNew){
+  int rc = SQLITE_OK;
+#if defined(_WIN32_WCE)
+  {
+    LPWSTR zWideOld;
+    LPWSTR zWideNew;
+
+    zWideOld = rbuWinUtf8ToUnicode(zOld);
+    if( zWideOld ){
+      zWideNew = rbuWinUtf8ToUnicode(zNew);
+      if( zWideNew ){
+        if( MoveFileW(zWideOld, zWideNew) ){
+          rc = SQLITE_OK;
+        }else{
+          rc = SQLITE_IOERR;
+        }
+        sqlite3_free(zWideNew);
+      }else{
+        rc = SQLITE_IOERR_NOMEM;
+      }
+      sqlite3_free(zWideOld);
+    }else{
+      rc = SQLITE_IOERR_NOMEM;
+    }
+  }
+#else
+  rc = rename(zOld, zNew) ? SQLITE_IOERR : SQLITE_OK;
+#endif
+  return rc;
+}
+
+SQLITE_API void sqlite3rbu_rename_handler(
+  sqlite3rbu *pRbu,
+  void *pArg,
+  int (*xRename)(void *pArg, const char *zOld, const char *zNew)
+){
+  if( xRename ){
+    pRbu->xRename = xRename;
+    pRbu->pRenameArg = pArg;
+  }else{
+    pRbu->xRename = xDefaultRename;
+    pRbu->pRenameArg = 0;
+  }
+}
+
 /**************************************************************************
 ** Beginning of RBU VFS shim methods. The VFS shim modifies the behaviour
 ** of a standard VFS in the following ways:
@@ -210401,7 +218774,7 @@ SQLITE_API int sqlite3rbu_savestate(sqlite3rbu *p){
 **     database file are recorded. xShmLock() calls to unlock the same
 **     locks are no-ops (so that once obtained, these locks are never
 **     relinquished). Finally, calls to xSync() on the target database
-**     file fail with SQLITE_INTERNAL errors.
+**     file fail with SQLITE_NOTICE errors.
 */
 
 static void rbuUnlockShm(rbu_file *p){
@@ -210510,9 +218883,12 @@ static int rbuVfsClose(sqlite3_file *pFile){
   sqlite3_free(p->zDel);
 
   if( p->openFlags & SQLITE_OPEN_MAIN_DB ){
+    const sqlite3_io_methods *pMeth = p->pReal->pMethods;
     rbuMainlistRemove(p);
     rbuUnlockShm(p);
-    p->pReal->pMethods->xShmUnmap(p->pReal, 0);
+    if( pMeth->iVersion>1 && pMeth->xShmUnmap ){
+      pMeth->xShmUnmap(p->pReal, 0);
+    }
   }
   else if( (p->openFlags & SQLITE_OPEN_DELETEONCLOSE) && p->pRbu ){
     rbuUpdateTempSize(p, 0);
@@ -210680,7 +219056,7 @@ static int rbuVfsSync(sqlite3_file *pFile, int flags){
   rbu_file *p = (rbu_file *)pFile;
   if( p->pRbu && p->pRbu->eStage==RBU_STAGE_CAPTURE ){
     if( p->openFlags & SQLITE_OPEN_MAIN_DB ){
-      return SQLITE_INTERNAL;
+      return SQLITE_NOTICE_RBU;
     }
     return SQLITE_OK;
   }
@@ -210971,6 +219347,25 @@ static int rbuVfsOpen(
     rbuVfsShmUnmap,               /* xShmUnmap */
     0, 0                          /* xFetch, xUnfetch */
   };
+  static sqlite3_io_methods rbuvfs_io_methods1 = {
+    1,                            /* iVersion */
+    rbuVfsClose,                  /* xClose */
+    rbuVfsRead,                   /* xRead */
+    rbuVfsWrite,                  /* xWrite */
+    rbuVfsTruncate,               /* xTruncate */
+    rbuVfsSync,                   /* xSync */
+    rbuVfsFileSize,               /* xFileSize */
+    rbuVfsLock,                   /* xLock */
+    rbuVfsUnlock,                 /* xUnlock */
+    rbuVfsCheckReservedLock,      /* xCheckReservedLock */
+    rbuVfsFileControl,            /* xFileControl */
+    rbuVfsSectorSize,             /* xSectorSize */
+    rbuVfsDeviceCharacteristics,  /* xDeviceCharacteristics */
+    0, 0, 0, 0, 0, 0
+  };
+
+
+
   rbu_vfs *pRbuVfs = (rbu_vfs*)pVfs;
   sqlite3_vfs *pRealVfs = pRbuVfs->pRealVfs;
   rbu_file *pFd = (rbu_file *)pFile;
@@ -211025,10 +219420,15 @@ static int rbuVfsOpen(
     rc = pRealVfs->xOpen(pRealVfs, zOpen, pFd->pReal, oflags, pOutFlags);
   }
   if( pFd->pReal->pMethods ){
+    const sqlite3_io_methods *pMeth = pFd->pReal->pMethods;
     /* The xOpen() operation has succeeded. Set the sqlite3_file.pMethods
     ** pointer and, if the file is a main database file, link it into the
     ** mutex protected linked list of all such files.  */
-    pFile->pMethods = &rbuvfs_io_methods;
+    if( pMeth->iVersion<2 || pMeth->xShmLock==0 ){
+      pFile->pMethods = &rbuvfs_io_methods1;
+    }else{
+      pFile->pMethods = &rbuvfs_io_methods;
+    }
     if( flags & SQLITE_OPEN_MAIN_DB ){
       rbuMainlistAdd(pFd);
     }
@@ -211461,6 +219861,7 @@ static int statConnect(
   StatTable *pTab = 0;
   int rc = SQLITE_OK;
   int iDb;
+  (void)pAux;
 
   if( argc>=4 ){
     Token nm;
@@ -211514,6 +219915,7 @@ static int statBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
   int iSchema = -1;
   int iName = -1;
   int iAgg = -1;
+  (void)tab;
 
   /* Look for a valid schema=? constraint.  If found, change the idxNum to
   ** 1 and request the value of that constraint be sent to xFilter.  And
@@ -212039,6 +220441,8 @@ static int statFilter(
   int iArg = 0;           /* Count of argv[] parameters used so far */
   int rc = SQLITE_OK;     /* Result of this operation */
   const char *zName = 0;  /* Only provide analysis of this table */
+  (void)argc;
+  (void)idxStr;
 
   statResetCsr(pCsr);
   sqlite3_finalize(pCsr->pStmt);
@@ -212122,16 +220526,16 @@ static int statColumn(
       }
       break;
     case 4:            /* ncell */
-      sqlite3_result_int(ctx, pCsr->nCell);
+      sqlite3_result_int64(ctx, pCsr->nCell);
       break;
     case 5:            /* payload */
-      sqlite3_result_int(ctx, pCsr->nPayload);
+      sqlite3_result_int64(ctx, pCsr->nPayload);
       break;
     case 6:            /* unused */
-      sqlite3_result_int(ctx, pCsr->nUnused);
+      sqlite3_result_int64(ctx, pCsr->nUnused);
       break;
     case 7:            /* mx_payload */
-      sqlite3_result_int(ctx, pCsr->nMxPayload);
+      sqlite3_result_int64(ctx, pCsr->nMxPayload);
       break;
     case 8:            /* pgoffset */
       if( !pCsr->isAgg ){
@@ -212139,7 +220543,7 @@ static int statColumn(
       }
       break;
     case 9:            /* pgsize */
-      sqlite3_result_int(ctx, pCsr->szPage);
+      sqlite3_result_int64(ctx, pCsr->szPage);
       break;
     case 10: {         /* schema */
       sqlite3 *db = sqlite3_context_db_handle(ctx);
@@ -212189,7 +220593,8 @@ SQLITE_PRIVATE int sqlite3DbstatRegister(sqlite3 *db){
     0,                            /* xSavepoint */
     0,                            /* xRelease */
     0,                            /* xRollbackTo */
-    0                             /* xShadowName */
+    0,                            /* xShadowName */
+    0                             /* xIntegrity */
   };
   return sqlite3_create_module(db, "dbstat", &dbstat_module, 0);
 }
@@ -212273,8 +220678,13 @@ static int dbpageConnect(
 ){
   DbpageTable *pTab = 0;
   int rc = SQLITE_OK;
+  (void)pAux;
+  (void)argc;
+  (void)argv;
+  (void)pzErr;
 
   sqlite3_vtab_config(db, SQLITE_VTAB_DIRECTONLY);
+  sqlite3_vtab_config(db, SQLITE_VTAB_USES_ALL_SCHEMAS);
   rc = sqlite3_declare_vtab(db,
           "CREATE TABLE x(pgno INTEGER PRIMARY KEY, data BLOB, schema HIDDEN)");
   if( rc==SQLITE_OK ){
@@ -212311,6 +220721,7 @@ static int dbpageDisconnect(sqlite3_vtab *pVtab){
 static int dbpageBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
   int i;
   int iPlan = 0;
+  (void)tab;
 
   /* If there is a schema= constraint, it must be honored.  Report a
   ** ridiculously large estimated cost if the schema= constraint is
@@ -212357,7 +220768,6 @@ static int dbpageBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
   ){
     pIdxInfo->orderByConsumed = 1;
   }
-  sqlite3VtabUsesAllSchemas(pIdxInfo);
   return SQLITE_OK;
 }
 
@@ -212426,6 +220836,8 @@ static int dbpageFilter(
   sqlite3 *db = pTab->db;
   Btree *pBt;
 
+  (void)idxStr;
+
   /* Default setting is no rows of result */
   pCsr->pgno = 1;
   pCsr->mxPgno = 0;
@@ -212440,7 +220852,7 @@ static int dbpageFilter(
     pCsr->iDb = 0;
   }
   pBt = db->aDb[pCsr->iDb].pBt;
-  if( pBt==0 ) return SQLITE_OK;
+  if( NEVER(pBt==0) ) return SQLITE_OK;
   pCsr->pPager = sqlite3BtreePager(pBt);
   pCsr->szPage = sqlite3BtreeGetPageSize(pBt);
   pCsr->mxPgno = sqlite3BtreeLastPage(pBt);
@@ -212475,12 +220887,18 @@ static int dbpageColumn(
     }
     case 1: {           /* data */
       DbPage *pDbPage = 0;
-      rc = sqlite3PagerGet(pCsr->pPager, pCsr->pgno, (DbPage**)&pDbPage, 0);
-      if( rc==SQLITE_OK ){
-        sqlite3_result_blob(ctx, sqlite3PagerGetData(pDbPage), pCsr->szPage,
-                            SQLITE_TRANSIENT);
+      if( pCsr->pgno==((PENDING_BYTE/pCsr->szPage)+1) ){
+        /* The pending byte page. Assume it is zeroed out. Attempting to
+        ** request this page from the page is an SQLITE_CORRUPT error. */
+        sqlite3_result_zeroblob(ctx, pCsr->szPage);
+      }else{
+        rc = sqlite3PagerGet(pCsr->pPager, pCsr->pgno, (DbPage**)&pDbPage, 0);
+        if( rc==SQLITE_OK ){
+          sqlite3_result_blob(ctx, sqlite3PagerGetData(pDbPage), pCsr->szPage,
+              SQLITE_TRANSIENT);
+        }
+        sqlite3PagerUnref(pDbPage);
       }
-      sqlite3PagerUnref(pDbPage);
       break;
     }
     default: {          /* schema */
@@ -212489,7 +220907,7 @@ static int dbpageColumn(
       break;
     }
   }
-  return SQLITE_OK;
+  return rc;
 }
 
 static int dbpageRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
@@ -212515,6 +220933,7 @@ static int dbpageUpdate(
   Pager *pPager;
   int szPage;
 
+  (void)pRowid;
   if( pTab->db->flags & SQLITE_Defensive ){
     zErr = "read-only";
     goto update_fail;
@@ -212524,18 +220943,20 @@ static int dbpageUpdate(
     goto update_fail;
   }
   pgno = sqlite3_value_int(argv[0]);
-  if( (Pgno)sqlite3_value_int(argv[1])!=pgno ){
+  if( sqlite3_value_type(argv[0])==SQLITE_NULL
+   || (Pgno)sqlite3_value_int(argv[1])!=pgno
+  ){
     zErr = "cannot insert";
     goto update_fail;
   }
   zSchema = (const char*)sqlite3_value_text(argv[4]);
-  iDb = zSchema ? sqlite3FindDbName(pTab->db, zSchema) : -1;
-  if( iDb<0 ){
+  iDb = ALWAYS(zSchema) ? sqlite3FindDbName(pTab->db, zSchema) : -1;
+  if( NEVER(iDb<0) ){
     zErr = "no such schema";
     goto update_fail;
   }
   pBt = pTab->db->aDb[iDb].pBt;
-  if( pgno<1 || pBt==0 || pgno>sqlite3BtreeLastPage(pBt) ){
+  if( NEVER(pgno<1) || NEVER(pBt==0) || NEVER(pgno>sqlite3BtreeLastPage(pBt)) ){
     zErr = "bad page number";
     goto update_fail;
   }
@@ -212549,11 +220970,12 @@ static int dbpageUpdate(
   pPager = sqlite3BtreePager(pBt);
   rc = sqlite3PagerGet(pPager, pgno, (DbPage**)&pDbPage, 0);
   if( rc==SQLITE_OK ){
-    rc = sqlite3PagerWrite(pDbPage);
-    if( rc==SQLITE_OK ){
-      memcpy(sqlite3PagerGetData(pDbPage),
-             sqlite3_value_blob(argv[3]),
-             szPage);
+    const void *pData = sqlite3_value_blob(argv[3]);
+    assert( pData!=0 || pTab->db->mallocFailed );
+    if( pData
+     && (rc = sqlite3PagerWrite(pDbPage))==SQLITE_OK
+    ){
+      memcpy(sqlite3PagerGetData(pDbPage), pData, szPage);
     }
   }
   sqlite3PagerUnref(pDbPage);
@@ -212575,7 +220997,7 @@ static int dbpageBegin(sqlite3_vtab *pVtab){
   int i;
   for(i=0; i<db->nDb; i++){
     Btree *pBt = db->aDb[i].pBt;
-    if( pBt ) sqlite3BtreeBeginTrans(pBt, 1, 0);
+    if( pBt ) (void)sqlite3BtreeBeginTrans(pBt, 1, 0);
   }
   return SQLITE_OK;
 }
@@ -212609,7 +221031,8 @@ SQLITE_PRIVATE int sqlite3DbpageRegister(sqlite3 *db){
     0,                            /* xSavepoint */
     0,                            /* xRelease */
     0,                            /* xRollbackTo */
-    0                             /* xShadowName */
+    0,                            /* xShadowName */
+    0                             /* xIntegrity */
   };
   return sqlite3_create_module(db, "sqlite_dbpage", &dbpage_module, 0);
 }
@@ -212646,6 +221069,8 @@ typedef struct SessionInput SessionInput;
 # endif
 #endif
 
+#define SESSIONS_ROWID "_rowid_"
+
 static int sessions_strm_chunk_size = SESSIONS_STRM_CHUNK_SIZE;
 
 typedef struct SessionHook SessionHook;
@@ -212667,6 +221092,7 @@ struct sqlite3_session {
   int bEnable;                    /* True if currently recording */
   int bIndirect;                  /* True if all changes are indirect */
   int bAutoAttach;                /* True to auto-attach tables */
+  int bImplicitPK;                /* True to handle tables with implicit PK */
   int rc;                         /* Non-zero if an error has occurred */
   void *pFilterCtx;               /* First argument to pass to xTableFilter */
   int (*xTableFilter)(void *pCtx, const char *zTab);
@@ -212737,17 +221163,32 @@ struct sqlite3_changeset_iter {
 ** The data associated with each hash-table entry is a structure containing
 ** a subset of the initial values that the modified row contained at the
 ** start of the session. Or no initial values if the row was inserted.
+**
+** pDfltStmt:
+**   This is only used by the sqlite3changegroup_xxx() APIs, not by
+**   regular sqlite3_session objects. It is a SELECT statement that
+**   selects the default value for each table column. For example,
+**   if the table is
+**
+**      CREATE TABLE xx(a DEFAULT 1, b, c DEFAULT 'abc')
+**
+**   then this variable is the compiled version of:
+**
+**      SELECT 1, NULL, 'abc'
 */
 struct SessionTable {
   SessionTable *pNext;
   char *zName;                    /* Local name of table */
   int nCol;                       /* Number of columns in table zName */
   int bStat1;                     /* True if this is sqlite_stat1 */
+  int bRowid;                     /* True if this table uses rowid for PK */
   const char **azCol;             /* Column names */
+  const char **azDflt;            /* Default value expressions */
   u8 *abPK;                       /* Array of primary key flags */
   int nEntry;                     /* Total number of entries in hash table */
   int nChange;                    /* Size of apChange[] array */
   SessionChange **apChange;       /* Hash table buckets */
+  sqlite3_stmt *pDfltStmt;
 };
 
 /*
@@ -212916,6 +221357,7 @@ struct SessionTable {
 struct SessionChange {
   u8 op;                          /* One of UPDATE, DELETE, INSERT */
   u8 bIndirect;                   /* True if this change is "indirect" */
+  u16 nRecordField;               /* Number of fields in aRecord[] */
   int nMaxSize;                   /* Max size of eventual changeset record */
   int nRecord;                    /* Number of bytes in buffer aRecord[] */
   u8 *aRecord;                    /* Buffer containing old.* record */
@@ -212941,7 +221383,7 @@ static int sessionVarintLen(int iVal){
 ** Read a varint value from aBuf[] into *piVal. Return the number of
 ** bytes read.
 */
-static int sessionVarintGet(u8 *aBuf, int *piVal){
+static int sessionVarintGet(const u8 *aBuf, int *piVal){
   return getVarint32(aBuf, *piVal);
 }
 
@@ -213135,6 +221577,7 @@ static unsigned int sessionHashAppendType(unsigned int h, int eType){
 */
 static int sessionPreupdateHash(
   sqlite3_session *pSession,      /* Session object that owns pTab */
+  i64 iRowid,
   SessionTable *pTab,             /* Session table handle */
   int bNew,                       /* True to hash the new.* PK */
   int *piHash,                    /* OUT: Hash value */
@@ -213143,48 +221586,53 @@ static int sessionPreupdateHash(
   unsigned int h = 0;             /* Hash value to return */
   int i;                          /* Used to iterate through columns */
 
-  assert( *pbNullPK==0 );
-  assert( pTab->nCol==pSession->hook.xCount(pSession->hook.pCtx) );
-  for(i=0; i<pTab->nCol; i++){
-    if( pTab->abPK[i] ){
-      int rc;
-      int eType;
-      sqlite3_value *pVal;
-
-      if( bNew ){
-        rc = pSession->hook.xNew(pSession->hook.pCtx, i, &pVal);
-      }else{
-        rc = pSession->hook.xOld(pSession->hook.pCtx, i, &pVal);
-      }
-      if( rc!=SQLITE_OK ) return rc;
+  if( pTab->bRowid ){
+    assert( pTab->nCol-1==pSession->hook.xCount(pSession->hook.pCtx) );
+    h = sessionHashAppendI64(h, iRowid);
+  }else{
+    assert( *pbNullPK==0 );
+    assert( pTab->nCol==pSession->hook.xCount(pSession->hook.pCtx) );
+    for(i=0; i<pTab->nCol; i++){
+      if( pTab->abPK[i] ){
+        int rc;
+        int eType;
+        sqlite3_value *pVal;
 
-      eType = sqlite3_value_type(pVal);
-      h = sessionHashAppendType(h, eType);
-      if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
-        i64 iVal;
-        if( eType==SQLITE_INTEGER ){
-          iVal = sqlite3_value_int64(pVal);
+        if( bNew ){
+          rc = pSession->hook.xNew(pSession->hook.pCtx, i, &pVal);
         }else{
-          double rVal = sqlite3_value_double(pVal);
-          assert( sizeof(iVal)==8 && sizeof(rVal)==8 );
-          memcpy(&iVal, &rVal, 8);
+          rc = pSession->hook.xOld(pSession->hook.pCtx, i, &pVal);
         }
-        h = sessionHashAppendI64(h, iVal);
-      }else if( eType==SQLITE_TEXT || eType==SQLITE_BLOB ){
-        const u8 *z;
-        int n;
-        if( eType==SQLITE_TEXT ){
-          z = (const u8 *)sqlite3_value_text(pVal);
+        if( rc!=SQLITE_OK ) return rc;
+
+        eType = sqlite3_value_type(pVal);
+        h = sessionHashAppendType(h, eType);
+        if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
+          i64 iVal;
+          if( eType==SQLITE_INTEGER ){
+            iVal = sqlite3_value_int64(pVal);
+          }else{
+            double rVal = sqlite3_value_double(pVal);
+            assert( sizeof(iVal)==8 && sizeof(rVal)==8 );
+            memcpy(&iVal, &rVal, 8);
+          }
+          h = sessionHashAppendI64(h, iVal);
+        }else if( eType==SQLITE_TEXT || eType==SQLITE_BLOB ){
+          const u8 *z;
+          int n;
+          if( eType==SQLITE_TEXT ){
+            z = (const u8 *)sqlite3_value_text(pVal);
+          }else{
+            z = (const u8 *)sqlite3_value_blob(pVal);
+          }
+          n = sqlite3_value_bytes(pVal);
+          if( !z && (eType!=SQLITE_BLOB || n>0) ) return SQLITE_NOMEM;
+          h = sessionHashAppendBlob(h, n, z);
         }else{
-          z = (const u8 *)sqlite3_value_blob(pVal);
+          assert( eType==SQLITE_NULL );
+          assert( pTab->bStat1==0 || i!=1 );
+          *pbNullPK = 1;
         }
-        n = sqlite3_value_bytes(pVal);
-        if( !z && (eType!=SQLITE_BLOB || n>0) ) return SQLITE_NOMEM;
-        h = sessionHashAppendBlob(h, n, z);
-      }else{
-        assert( eType==SQLITE_NULL );
-        assert( pTab->bStat1==0 || i!=1 );
-        *pbNullPK = 1;
       }
     }
   }
@@ -213198,9 +221646,11 @@ static int sessionPreupdateHash(
 ** Return the number of bytes of space occupied by the value (including
 ** the type byte).
 */
-static int sessionSerialLen(u8 *a){
-  int e = *a;
+static int sessionSerialLen(const u8 *a){
+  int e;
   int n;
+  assert( a!=0 );
+  e = *a;
   if( e==0 || e==0xFF ) return 1;
   if( e==SQLITE_NULL ) return 1;
   if( e==SQLITE_INTEGER || e==SQLITE_FLOAT ) return 9;
@@ -213467,6 +221917,7 @@ static int sessionMergeUpdate(
 */
 static int sessionPreupdateEqual(
   sqlite3_session *pSession,      /* Session object that owns SessionTable */
+  i64 iRowid,                     /* Rowid value if pTab->bRowid */
   SessionTable *pTab,             /* Table associated with change */
   SessionChange *pChange,         /* Change to compare to */
   int op                          /* Current pre-update operation */
@@ -213474,6 +221925,11 @@ static int sessionPreupdateEqual(
   int iCol;                       /* Used to iterate through columns */
   u8 *a = pChange->aRecord;       /* Cursor used to scan change record */
 
+  if( pTab->bRowid ){
+    if( a[0]!=SQLITE_INTEGER ) return 0;
+    return sessionGetI64(&a[1])==iRowid;
+  }
+
   assert( op==SQLITE_INSERT || op==SQLITE_UPDATE || op==SQLITE_DELETE );
   for(iCol=0; iCol<pTab->nCol; iCol++){
     if( !pTab->abPK[iCol] ){
@@ -213496,6 +221952,7 @@ static int sessionPreupdateEqual(
         rc = pSession->hook.xOld(pSession->hook.pCtx, iCol, &pVal);
       }
       assert( rc==SQLITE_OK );
+      (void)rc;                   /* Suppress warning about unused variable */
       if( sqlite3_value_type(pVal)!=eType ) return 0;
 
       /* A SessionChange object never has a NULL value in a PK column */
@@ -213598,13 +222055,14 @@ static int sessionGrowHash(
 **
 ** For example, if the table is declared as:
 **
-**     CREATE TABLE tbl1(w, x, y, z, PRIMARY KEY(w, z));
+**     CREATE TABLE tbl1(w, x DEFAULT 'abc', y, z, PRIMARY KEY(w, z));
 **
-** Then the four output variables are populated as follows:
+** Then the five output variables are populated as follows:
 **
 **     *pnCol  = 4
 **     *pzTab  = "tbl1"
 **     *pazCol = {"w", "x", "y", "z"}
+**     *pazDflt = {NULL, 'abc', NULL, NULL}
 **     *pabPK  = {1, 0, 0, 1}
 **
 ** All returned buffers are part of the same single allocation, which must
@@ -213618,7 +222076,9 @@ static int sessionTableInfo(
   int *pnCol,                     /* OUT: number of columns */
   const char **pzTab,             /* OUT: Copy of zThis */
   const char ***pazCol,           /* OUT: Array of column names for table */
-  u8 **pabPK                      /* OUT: Array of booleans - true for PK col */
+  const char ***pazDflt,          /* OUT: Array of default value expressions */
+  u8 **pabPK,                     /* OUT: Array of booleans - true for PK col */
+  int *pbRowid                    /* OUT: True if only PK is a rowid */
 ){
   char *zPragma;
   sqlite3_stmt *pStmt;
@@ -213629,10 +222089,18 @@ static int sessionTableInfo(
   int i;
   u8 *pAlloc = 0;
   char **azCol = 0;
+  char **azDflt = 0;
   u8 *abPK = 0;
+  int bRowid = 0;                 /* Set to true to use rowid as PK */
 
   assert( pazCol && pabPK );
 
+  *pazCol = 0;
+  *pabPK = 0;
+  *pnCol = 0;
+  if( pzTab ) *pzTab = 0;
+  if( pazDflt ) *pazDflt = 0;
+
   nThis = sqlite3Strlen30(zThis);
   if( nThis==12 && 0==sqlite3_stricmp("sqlite_stat1", zThis) ){
     rc = sqlite3_table_column_metadata(db, zDb, zThis, 0, 0, 0, 0, 0, 0);
@@ -213646,50 +222114,47 @@ static int sessionTableInfo(
     }else if( rc==SQLITE_ERROR ){
       zPragma = sqlite3_mprintf("");
     }else{
-      *pazCol = 0;
-      *pabPK = 0;
-      *pnCol = 0;
-      if( pzTab ) *pzTab = 0;
       return rc;
     }
   }else{
     zPragma = sqlite3_mprintf("PRAGMA '%q'.table_info('%q')", zDb, zThis);
   }
   if( !zPragma ){
-    *pazCol = 0;
-    *pabPK = 0;
-    *pnCol = 0;
-    if( pzTab ) *pzTab = 0;
     return SQLITE_NOMEM;
   }
 
   rc = sqlite3_prepare_v2(db, zPragma, -1, &pStmt, 0);
   sqlite3_free(zPragma);
   if( rc!=SQLITE_OK ){
-    *pazCol = 0;
-    *pabPK = 0;
-    *pnCol = 0;
-    if( pzTab ) *pzTab = 0;
     return rc;
   }
 
   nByte = nThis + 1;
+  bRowid = (pbRowid!=0);
   while( SQLITE_ROW==sqlite3_step(pStmt) ){
-    nByte += sqlite3_column_bytes(pStmt, 1);
+    nByte += sqlite3_column_bytes(pStmt, 1);          /* name */
+    nByte += sqlite3_column_bytes(pStmt, 4);          /* dflt_value */
     nDbCol++;
+    if( sqlite3_column_int(pStmt, 5) ) bRowid = 0;    /* pk */
   }
+  if( nDbCol==0 ) bRowid = 0;
+  nDbCol += bRowid;
+  nByte += strlen(SESSIONS_ROWID);
   rc = sqlite3_reset(pStmt);
 
   if( rc==SQLITE_OK ){
-    nByte += nDbCol * (sizeof(const char *) + sizeof(u8) + 1);
+    nByte += nDbCol * (sizeof(const char *)*2 + sizeof(u8) + 1 + 1);
     pAlloc = sessionMalloc64(pSession, nByte);
     if( pAlloc==0 ){
       rc = SQLITE_NOMEM;
+    }else{
+      memset(pAlloc, 0, nByte);
     }
   }
   if( rc==SQLITE_OK ){
     azCol = (char **)pAlloc;
-    pAlloc = (u8 *)&azCol[nDbCol];
+    azDflt = (char**)&azCol[nDbCol];
+    pAlloc = (u8 *)&azDflt[nDbCol];
     abPK = (u8 *)pAlloc;
     pAlloc = &abPK[nDbCol];
     if( pzTab ){
@@ -213699,43 +222164,57 @@ static int sessionTableInfo(
     }
 
     i = 0;
+    if( bRowid ){
+      size_t nName = strlen(SESSIONS_ROWID);
+      memcpy(pAlloc, SESSIONS_ROWID, nName+1);
+      azCol[i] = (char*)pAlloc;
+      pAlloc += nName+1;
+      abPK[i] = 1;
+      i++;
+    }
     while( SQLITE_ROW==sqlite3_step(pStmt) ){
       int nName = sqlite3_column_bytes(pStmt, 1);
+      int nDflt = sqlite3_column_bytes(pStmt, 4);
       const unsigned char *zName = sqlite3_column_text(pStmt, 1);
+      const unsigned char *zDflt = sqlite3_column_text(pStmt, 4);
+
       if( zName==0 ) break;
       memcpy(pAlloc, zName, nName+1);
       azCol[i] = (char *)pAlloc;
       pAlloc += nName+1;
+      if( zDflt ){
+        memcpy(pAlloc, zDflt, nDflt+1);
+        azDflt[i] = (char *)pAlloc;
+        pAlloc += nDflt+1;
+      }else{
+        azDflt[i] = 0;
+      }
       abPK[i] = sqlite3_column_int(pStmt, 5);
       i++;
     }
     rc = sqlite3_reset(pStmt);
-
   }
 
   /* If successful, populate the output variables. Otherwise, zero them and
   ** free any allocation made. An error code will be returned in this case.
   */
   if( rc==SQLITE_OK ){
-    *pazCol = (const char **)azCol;
+    *pazCol = (const char**)azCol;
+    if( pazDflt ) *pazDflt = (const char**)azDflt;
     *pabPK = abPK;
     *pnCol = nDbCol;
   }else{
-    *pazCol = 0;
-    *pabPK = 0;
-    *pnCol = 0;
-    if( pzTab ) *pzTab = 0;
     sessionFree(pSession, azCol);
   }
+  if( pbRowid ) *pbRowid = bRowid;
   sqlite3_finalize(pStmt);
   return rc;
 }
 
 /*
-** This function is only called from within a pre-update handler for a
-** write to table pTab, part of session pSession. If this is the first
-** write to this table, initalize the SessionTable.nCol, azCol[] and
-** abPK[] arrays accordingly.
+** This function is called to initialize the SessionTable.nCol, azCol[]
+** abPK[] and azDflt[] members of SessionTable object pTab. If these
+** fields are already initilialized, this function is a no-op.
 **
 ** If an error occurs, an error code is stored in sqlite3_session.rc and
 ** non-zero returned. Or, if no error occurs but the table has no primary
@@ -213743,14 +222222,22 @@ static int sessionTableInfo(
 ** indicate that updates on this table should be ignored. SessionTable.abPK
 ** is set to NULL in this case.
 */
-static int sessionInitTable(sqlite3_session *pSession, SessionTable *pTab){
+static int sessionInitTable(
+  sqlite3_session *pSession,      /* Optional session handle */
+  SessionTable *pTab,             /* Table object to initialize */
+  sqlite3 *db,                    /* Database handle to read schema from */
+  const char *zDb                 /* Name of db - "main", "temp" etc. */
+){
+  int rc = SQLITE_OK;
+
   if( pTab->nCol==0 ){
     u8 *abPK;
     assert( pTab->azCol==0 || pTab->abPK==0 );
-    pSession->rc = sessionTableInfo(pSession, pSession->db, pSession->zDb,
-        pTab->zName, &pTab->nCol, 0, &pTab->azCol, &abPK
+    rc = sessionTableInfo(pSession, db, zDb,
+        pTab->zName, &pTab->nCol, 0, &pTab->azCol, &pTab->azDflt, &abPK,
+        ((pSession==0 || pSession->bImplicitPK) ? &pTab->bRowid : 0)
     );
-    if( pSession->rc==SQLITE_OK ){
+    if( rc==SQLITE_OK ){
       int i;
       for(i=0; i<pTab->nCol; i++){
         if( abPK[i] ){
@@ -213762,14 +222249,321 @@ static int sessionInitTable(sqlite3_session *pSession, SessionTable *pTab){
         pTab->bStat1 = 1;
       }
 
-      if( pSession->bEnableSize ){
+      if( pSession && pSession->bEnableSize ){
         pSession->nMaxChangesetSize += (
           1 + sessionVarintLen(pTab->nCol) + pTab->nCol + strlen(pTab->zName)+1
         );
       }
     }
   }
-  return (pSession->rc || pTab->abPK==0);
+
+  if( pSession ){
+    pSession->rc = rc;
+    return (rc || pTab->abPK==0);
+  }
+  return rc;
+}
+
+/*
+** Re-initialize table object pTab.
+*/
+static int sessionReinitTable(sqlite3_session *pSession, SessionTable *pTab){
+  int nCol = 0;
+  const char **azCol = 0;
+  const char **azDflt = 0;
+  u8 *abPK = 0;
+  int bRowid = 0;
+
+  assert( pSession->rc==SQLITE_OK );
+
+  pSession->rc = sessionTableInfo(pSession, pSession->db, pSession->zDb,
+      pTab->zName, &nCol, 0, &azCol, &azDflt, &abPK,
+      (pSession->bImplicitPK ? &bRowid : 0)
+  );
+  if( pSession->rc==SQLITE_OK ){
+    if( pTab->nCol>nCol || pTab->bRowid!=bRowid ){
+      pSession->rc = SQLITE_SCHEMA;
+    }else{
+      int ii;
+      int nOldCol = pTab->nCol;
+      for(ii=0; ii<nCol; ii++){
+        if( ii<pTab->nCol ){
+          if( pTab->abPK[ii]!=abPK[ii] ){
+            pSession->rc = SQLITE_SCHEMA;
+          }
+        }else if( abPK[ii] ){
+          pSession->rc = SQLITE_SCHEMA;
+        }
+      }
+
+      if( pSession->rc==SQLITE_OK ){
+        const char **a = pTab->azCol;
+        pTab->azCol = azCol;
+        pTab->nCol = nCol;
+        pTab->azDflt = azDflt;
+        pTab->abPK = abPK;
+        azCol = a;
+      }
+      if( pSession->bEnableSize ){
+        pSession->nMaxChangesetSize += (nCol - nOldCol);
+        pSession->nMaxChangesetSize += sessionVarintLen(nCol);
+        pSession->nMaxChangesetSize -= sessionVarintLen(nOldCol);
+      }
+    }
+  }
+
+  sqlite3_free((char*)azCol);
+  return pSession->rc;
+}
+
+/*
+** Session-change object (*pp) contains an old.* record with fewer than
+** nCol fields. This function updates it with the default values for
+** the missing fields.
+*/
+static void sessionUpdateOneChange(
+  sqlite3_session *pSession,      /* For memory accounting */
+  int *pRc,                       /* IN/OUT: Error code */
+  SessionChange **pp,             /* IN/OUT: Change object to update */
+  int nCol,                       /* Number of columns now in table */
+  sqlite3_stmt *pDflt             /* SELECT <default-values...> */
+){
+  SessionChange *pOld = *pp;
+
+  while( pOld->nRecordField<nCol ){
+    SessionChange *pNew = 0;
+    int nByte = 0;
+    int nIncr = 0;
+    int iField = pOld->nRecordField;
+    int eType = sqlite3_column_type(pDflt, iField);
+    switch( eType ){
+      case SQLITE_NULL:
+        nIncr = 1;
+        break;
+      case SQLITE_INTEGER:
+      case SQLITE_FLOAT:
+        nIncr = 9;
+        break;
+      default: {
+        int n = sqlite3_column_bytes(pDflt, iField);
+        nIncr = 1 + sessionVarintLen(n) + n;
+        assert( eType==SQLITE_TEXT || eType==SQLITE_BLOB );
+        break;
+      }
+    }
+
+    nByte = nIncr + (sizeof(SessionChange) + pOld->nRecord);
+    pNew = sessionMalloc64(pSession, nByte);
+    if( pNew==0 ){
+      *pRc = SQLITE_NOMEM;
+      return;
+    }else{
+      memcpy(pNew, pOld, sizeof(SessionChange));
+      pNew->aRecord = (u8*)&pNew[1];
+      memcpy(pNew->aRecord, pOld->aRecord, pOld->nRecord);
+      pNew->aRecord[pNew->nRecord++] = (u8)eType;
+      switch( eType ){
+        case SQLITE_INTEGER: {
+          i64 iVal = sqlite3_column_int64(pDflt, iField);
+          sessionPutI64(&pNew->aRecord[pNew->nRecord], iVal);
+          pNew->nRecord += 8;
+          break;
+        }
+
+        case SQLITE_FLOAT: {
+          double rVal = sqlite3_column_double(pDflt, iField);
+          i64 iVal = 0;
+          memcpy(&iVal, &rVal, sizeof(rVal));
+          sessionPutI64(&pNew->aRecord[pNew->nRecord], iVal);
+          pNew->nRecord += 8;
+          break;
+        }
+
+        case SQLITE_TEXT: {
+          int n = sqlite3_column_bytes(pDflt, iField);
+          const char *z = (const char*)sqlite3_column_text(pDflt, iField);
+          pNew->nRecord += sessionVarintPut(&pNew->aRecord[pNew->nRecord], n);
+          memcpy(&pNew->aRecord[pNew->nRecord], z, n);
+          pNew->nRecord += n;
+          break;
+        }
+
+        case SQLITE_BLOB: {
+          int n = sqlite3_column_bytes(pDflt, iField);
+          const u8 *z = (const u8*)sqlite3_column_blob(pDflt, iField);
+          pNew->nRecord += sessionVarintPut(&pNew->aRecord[pNew->nRecord], n);
+          memcpy(&pNew->aRecord[pNew->nRecord], z, n);
+          pNew->nRecord += n;
+          break;
+        }
+
+        default:
+          assert( eType==SQLITE_NULL );
+          break;
+      }
+
+      sessionFree(pSession, pOld);
+      *pp = pOld = pNew;
+      pNew->nRecordField++;
+      pNew->nMaxSize += nIncr;
+      if( pSession ){
+        pSession->nMaxChangesetSize += nIncr;
+      }
+    }
+  }
+}
+
+/*
+** Ensure that there is room in the buffer to append nByte bytes of data.
+** If not, use sqlite3_realloc() to grow the buffer so that there is.
+**
+** If successful, return zero. Otherwise, if an OOM condition is encountered,
+** set *pRc to SQLITE_NOMEM and return non-zero.
+*/
+static int sessionBufferGrow(SessionBuffer *p, i64 nByte, int *pRc){
+#define SESSION_MAX_BUFFER_SZ (0x7FFFFF00 - 1)
+  i64 nReq = p->nBuf + nByte;
+  if( *pRc==SQLITE_OK && nReq>p->nAlloc ){
+    u8 *aNew;
+    i64 nNew = p->nAlloc ? p->nAlloc : 128;
+
+    do {
+      nNew = nNew*2;
+    }while( nNew<nReq );
+
+    /* The value of SESSION_MAX_BUFFER_SZ is copied from the implementation
+    ** of sqlite3_realloc64(). Allocations greater than this size in bytes
+    ** always fail. It is used here to ensure that this routine can always
+    ** allocate up to this limit - instead of up to the largest power of
+    ** two smaller than the limit.  */
+    if( nNew>SESSION_MAX_BUFFER_SZ ){
+      nNew = SESSION_MAX_BUFFER_SZ;
+      if( nNew<nReq ){
+        *pRc = SQLITE_NOMEM;
+        return 1;
+      }
+    }
+
+    aNew = (u8 *)sqlite3_realloc64(p->aBuf, nNew);
+    if( 0==aNew ){
+      *pRc = SQLITE_NOMEM;
+    }else{
+      p->aBuf = aNew;
+      p->nAlloc = nNew;
+    }
+  }
+  return (*pRc!=SQLITE_OK);
+}
+
+
+/*
+** This function is a no-op if *pRc is other than SQLITE_OK when it is
+** called. Otherwise, append a string to the buffer. All bytes in the string
+** up to (but not including) the nul-terminator are written to the buffer.
+**
+** If an OOM condition is encountered, set *pRc to SQLITE_NOMEM before
+** returning.
+*/
+static void sessionAppendStr(
+  SessionBuffer *p,
+  const char *zStr,
+  int *pRc
+){
+  int nStr = sqlite3Strlen30(zStr);
+  if( 0==sessionBufferGrow(p, nStr+1, pRc) ){
+    memcpy(&p->aBuf[p->nBuf], zStr, nStr);
+    p->nBuf += nStr;
+    p->aBuf[p->nBuf] = 0x00;
+  }
+}
+
+/*
+** Format a string using printf() style formatting and then append it to the
+** buffer using sessionAppendString().
+*/
+static void sessionAppendPrintf(
+  SessionBuffer *p,               /* Buffer to append to */
+  int *pRc,
+  const char *zFmt,
+  ...
+){
+  if( *pRc==SQLITE_OK ){
+    char *zApp = 0;
+    va_list ap;
+    va_start(ap, zFmt);
+    zApp = sqlite3_vmprintf(zFmt, ap);
+    if( zApp==0 ){
+      *pRc = SQLITE_NOMEM;
+    }else{
+      sessionAppendStr(p, zApp, pRc);
+    }
+    va_end(ap);
+    sqlite3_free(zApp);
+  }
+}
+
+/*
+** Prepare a statement against database handle db that SELECTs a single
+** row containing the default values for each column in table pTab. For
+** example, if pTab is declared as:
+**
+**   CREATE TABLE pTab(a PRIMARY KEY, b DEFAULT 123, c DEFAULT 'abcd');
+**
+** Then this function prepares and returns the SQL statement:
+**
+**   SELECT NULL, 123, 'abcd';
+*/
+static int sessionPrepareDfltStmt(
+  sqlite3 *db,                    /* Database handle */
+  SessionTable *pTab,             /* Table to prepare statement for */
+  sqlite3_stmt **ppStmt           /* OUT: Statement handle */
+){
+  SessionBuffer sql = {0,0,0};
+  int rc = SQLITE_OK;
+  const char *zSep = " ";
+  int ii = 0;
+
+  *ppStmt = 0;
+  sessionAppendPrintf(&sql, &rc, "SELECT");
+  for(ii=0; ii<pTab->nCol; ii++){
+    const char *zDflt = pTab->azDflt[ii] ? pTab->azDflt[ii] : "NULL";
+    sessionAppendPrintf(&sql, &rc, "%s%s", zSep, zDflt);
+    zSep = ", ";
+  }
+  if( rc==SQLITE_OK ){
+    rc = sqlite3_prepare_v2(db, (const char*)sql.aBuf, -1, ppStmt, 0);
+  }
+  sqlite3_free(sql.aBuf);
+
+  return rc;
+}
+
+/*
+** Table pTab has one or more existing change-records with old.* records
+** with fewer than pTab->nCol columns. This function updates all such
+** change-records with the default values for the missing columns.
+*/
+static int sessionUpdateChanges(sqlite3_session *pSession, SessionTable *pTab){
+  sqlite3_stmt *pStmt = 0;
+  int rc = pSession->rc;
+
+  rc = sessionPrepareDfltStmt(pSession->db, pTab, &pStmt);
+  if( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
+    int ii = 0;
+    SessionChange **pp = 0;
+    for(ii=0; ii<pTab->nChange; ii++){
+      for(pp=&pTab->apChange[ii]; *pp; pp=&((*pp)->pNext)){
+        if( (*pp)->nRecordField!=pTab->nCol ){
+          sessionUpdateOneChange(pSession, &rc, pp, pTab->nCol, pStmt);
+        }
+      }
+    }
+  }
+
+  pSession->rc = rc;
+  rc = sqlite3_finalize(pStmt);
+  if( pSession->rc==SQLITE_OK ) pSession->rc = rc;
+  return pSession->rc;
 }
 
 /*
@@ -213820,6 +222614,7 @@ static int sessionUpdateMaxSize(
 ){
   i64 nNew = 2;
   if( pC->op==SQLITE_INSERT ){
+    if( pTab->bRowid ) nNew += 9;
     if( op!=SQLITE_DELETE ){
       int ii;
       for(ii=0; ii<pTab->nCol; ii++){
@@ -213836,12 +222631,16 @@ static int sessionUpdateMaxSize(
   }else{
     int ii;
     u8 *pCsr = pC->aRecord;
-    for(ii=0; ii<pTab->nCol; ii++){
+    if( pTab->bRowid ){
+      nNew += 9 + 1;
+      pCsr += 9;
+    }
+    for(ii=pTab->bRowid; ii<pTab->nCol; ii++){
       int bChanged = 1;
       int nOld = 0;
       int eType;
       sqlite3_value *p = 0;
-      pSession->hook.xNew(pSession->hook.pCtx, ii, &p);
+      pSession->hook.xNew(pSession->hook.pCtx, ii-pTab->bRowid, &p);
       if( p==0 ){
         return SQLITE_NOMEM;
       }
@@ -213920,22 +222719,29 @@ static int sessionUpdateMaxSize(
 */
 static void sessionPreupdateOneChange(
   int op,                         /* One of SQLITE_UPDATE, INSERT, DELETE */
+  i64 iRowid,
   sqlite3_session *pSession,      /* Session object pTab is attached to */
   SessionTable *pTab              /* Table that change applies to */
 ){
   int iHash;
   int bNull = 0;
   int rc = SQLITE_OK;
+  int nExpect = 0;
   SessionStat1Ctx stat1 = {{0,0,0,0,0},0};
 
   if( pSession->rc ) return;
 
   /* Load table details if required */
-  if( sessionInitTable(pSession, pTab) ) return;
+  if( sessionInitTable(pSession, pTab, pSession->db, pSession->zDb) ) return;
 
   /* Check the number of columns in this xPreUpdate call matches the
   ** number of columns in the table.  */
-  if( pTab->nCol!=pSession->hook.xCount(pSession->hook.pCtx) ){
+  nExpect = pSession->hook.xCount(pSession->hook.pCtx);
+  if( (pTab->nCol-pTab->bRowid)<nExpect ){
+    if( sessionReinitTable(pSession, pTab) ) return;
+    if( sessionUpdateChanges(pSession, pTab) ) return;
+  }
+  if( (pTab->nCol-pTab->bRowid)!=nExpect ){
     pSession->rc = SQLITE_SCHEMA;
     return;
   }
@@ -213968,14 +222774,16 @@ static void sessionPreupdateOneChange(
   /* Calculate the hash-key for this change. If the primary key of the row
   ** includes a NULL value, exit early. Such changes are ignored by the
   ** session module. */
-  rc = sessionPreupdateHash(pSession, pTab, op==SQLITE_INSERT, &iHash, &bNull);
+  rc = sessionPreupdateHash(
+      pSession, iRowid, pTab, op==SQLITE_INSERT, &iHash, &bNull
+  );
   if( rc!=SQLITE_OK ) goto error_out;
 
   if( bNull==0 ){
     /* Search the hash table for an existing record for this row. */
     SessionChange *pC;
     for(pC=pTab->apChange[iHash]; pC; pC=pC->pNext){
-      if( sessionPreupdateEqual(pSession, pTab, pC, op) ) break;
+      if( sessionPreupdateEqual(pSession, iRowid, pTab, pC, op) ) break;
     }
 
     if( pC==0 ){
@@ -213990,7 +222798,7 @@ static void sessionPreupdateOneChange(
 
       /* Figure out how large an allocation is required */
       nByte = sizeof(SessionChange);
-      for(i=0; i<pTab->nCol; i++){
+      for(i=0; i<(pTab->nCol-pTab->bRowid); i++){
         sqlite3_value *p = 0;
         if( op!=SQLITE_INSERT ){
           TESTONLY(int trc = ) pSession->hook.xOld(pSession->hook.pCtx, i, &p);
@@ -214005,9 +222813,12 @@ static void sessionPreupdateOneChange(
         rc = sessionSerializeValue(0, p, &nByte);
         if( rc!=SQLITE_OK ) goto error_out;
       }
+      if( pTab->bRowid ){
+        nByte += 9;               /* Size of rowid field - an integer */
+      }
 
       /* Allocate the change object */
-      pC = (SessionChange *)sessionMalloc64(pSession, nByte);
+      pC = (SessionChange*)sessionMalloc64(pSession, nByte);
       if( !pC ){
         rc = SQLITE_NOMEM;
         goto error_out;
@@ -214021,7 +222832,12 @@ static void sessionPreupdateOneChange(
       ** required values and encodings have already been cached in memory.
       ** It is not possible for an OOM to occur in this block. */
       nByte = 0;
-      for(i=0; i<pTab->nCol; i++){
+      if( pTab->bRowid ){
+        pC->aRecord[0] = SQLITE_INTEGER;
+        sessionPutI64(&pC->aRecord[1], iRowid);
+        nByte = 9;
+      }
+      for(i=0; i<(pTab->nCol-pTab->bRowid); i++){
         sqlite3_value *p = 0;
         if( op!=SQLITE_INSERT ){
           pSession->hook.xOld(pSession->hook.pCtx, i, &p);
@@ -214035,6 +222851,7 @@ static void sessionPreupdateOneChange(
       if( pSession->bIndirect || pSession->hook.xDepth(pSession->hook.pCtx) ){
         pC->bIndirect = 1;
       }
+      pC->nRecordField = pTab->nCol;
       pC->nRecord = nByte;
       pC->op = op;
       pC->pNext = pTab->apChange[iHash];
@@ -214120,6 +222937,8 @@ static void xPreUpdate(
   int nDb = sqlite3Strlen30(zDb);
 
   assert( sqlite3_mutex_held(db->mutex) );
+  (void)iKey1;
+  (void)iKey2;
 
   for(pSession=(sqlite3_session *)pCtx; pSession; pSession=pSession->pNext){
     SessionTable *pTab;
@@ -214134,9 +222953,10 @@ static void xPreUpdate(
     pSession->rc = sessionFindTable(pSession, zName, &pTab);
     if( pTab ){
       assert( pSession->rc==SQLITE_OK );
-      sessionPreupdateOneChange(op, pSession, pTab);
+      assert( op==SQLITE_UPDATE || iKey1==iKey2 );
+      sessionPreupdateOneChange(op, iKey1, pSession, pTab);
       if( op==SQLITE_UPDATE ){
-        sessionPreupdateOneChange(SQLITE_INSERT, pSession, pTab);
+        sessionPreupdateOneChange(SQLITE_INSERT, iKey2, pSession, pTab);
       }
     }
   }
@@ -214175,6 +222995,7 @@ static void sessionPreupdateHooks(
 typedef struct SessionDiffCtx SessionDiffCtx;
 struct SessionDiffCtx {
   sqlite3_stmt *pStmt;
+  int bRowid;
   int nOldOff;
 };
 
@@ -214183,19 +223004,20 @@ struct SessionDiffCtx {
 */
 static int sessionDiffOld(void *pCtx, int iVal, sqlite3_value **ppVal){
   SessionDiffCtx *p = (SessionDiffCtx*)pCtx;
-  *ppVal = sqlite3_column_value(p->pStmt, iVal+p->nOldOff);
+  *ppVal = sqlite3_column_value(p->pStmt, iVal+p->nOldOff+p->bRowid);
   return SQLITE_OK;
 }
 static int sessionDiffNew(void *pCtx, int iVal, sqlite3_value **ppVal){
   SessionDiffCtx *p = (SessionDiffCtx*)pCtx;
-  *ppVal = sqlite3_column_value(p->pStmt, iVal);
+  *ppVal = sqlite3_column_value(p->pStmt, iVal+p->bRowid);
    return SQLITE_OK;
 }
 static int sessionDiffCount(void *pCtx){
   SessionDiffCtx *p = (SessionDiffCtx*)pCtx;
-  return p->nOldOff ? p->nOldOff : sqlite3_column_count(p->pStmt);
+  return (p->nOldOff ? p->nOldOff : sqlite3_column_count(p->pStmt)) - p->bRowid;
 }
 static int sessionDiffDepth(void *pCtx){
+  (void)pCtx;
   return 0;
 }
 
@@ -214269,17 +223091,18 @@ static char *sessionExprCompareOther(
 }
 
 static char *sessionSelectFindNew(
-  int nCol,
   const char *zDb1,      /* Pick rows in this db only */
   const char *zDb2,      /* But not in this one */
+  int bRowid,
   const char *zTbl,      /* Table name */
   const char *zExpr
 ){
+  const char *zSel = (bRowid ? SESSIONS_ROWID ", *" : "*");
   char *zRet = sqlite3_mprintf(
-      "SELECT * FROM \"%w\".\"%w\" WHERE NOT EXISTS ("
+      "SELECT %s FROM \"%w\".\"%w\" WHERE NOT EXISTS ("
       "  SELECT 1 FROM \"%w\".\"%w\" WHERE %s"
       ")",
-      zDb1, zTbl, zDb2, zTbl, zExpr
+      zSel, zDb1, zTbl, zDb2, zTbl, zExpr
   );
   return zRet;
 }
@@ -214293,7 +223116,9 @@ static int sessionDiffFindNew(
   char *zExpr
 ){
   int rc = SQLITE_OK;
-  char *zStmt = sessionSelectFindNew(pTab->nCol, zDb1, zDb2, pTab->zName,zExpr);
+  char *zStmt = sessionSelectFindNew(
+      zDb1, zDb2, pTab->bRowid, pTab->zName, zExpr
+  );
 
   if( zStmt==0 ){
     rc = SQLITE_NOMEM;
@@ -214304,8 +223129,10 @@ static int sessionDiffFindNew(
       SessionDiffCtx *pDiffCtx = (SessionDiffCtx*)pSession->hook.pCtx;
       pDiffCtx->pStmt = pStmt;
       pDiffCtx->nOldOff = 0;
+      pDiffCtx->bRowid = pTab->bRowid;
       while( SQLITE_ROW==sqlite3_step(pStmt) ){
-        sessionPreupdateOneChange(op, pSession, pTab);
+        i64 iRowid = (pTab->bRowid ? sqlite3_column_int64(pStmt, 0) : 0);
+        sessionPreupdateOneChange(op, iRowid, pSession, pTab);
       }
       rc = sqlite3_finalize(pStmt);
     }
@@ -214315,6 +223142,27 @@ static int sessionDiffFindNew(
   return rc;
 }
 
+/*
+** Return a comma-separated list of the fully-qualified (with both database
+** and table name) column names from table pTab. e.g.
+**
+**    "main"."t1"."a", "main"."t1"."b", "main"."t1"."c"
+*/
+static char *sessionAllCols(
+  const char *zDb,
+  SessionTable *pTab
+){
+  int ii;
+  char *zRet = 0;
+  for(ii=0; ii<pTab->nCol; ii++){
+    zRet = sqlite3_mprintf("%z%s\"%w\".\"%w\".\"%w\"",
+        zRet, (zRet ? ", " : ""), zDb, pTab->zName, pTab->azCol[ii]
+    );
+    if( !zRet ) break;
+  }
+  return zRet;
+}
+
 static int sessionDiffFindModified(
   sqlite3_session *pSession,
   SessionTable *pTab,
@@ -214329,11 +223177,13 @@ static int sessionDiffFindModified(
   if( zExpr2==0 ){
     rc = SQLITE_NOMEM;
   }else{
+    char *z1 = sessionAllCols(pSession->zDb, pTab);
+    char *z2 = sessionAllCols(zFrom, pTab);
     char *zStmt = sqlite3_mprintf(
-        "SELECT * FROM \"%w\".\"%w\", \"%w\".\"%w\" WHERE %s AND (%z)",
-        pSession->zDb, pTab->zName, zFrom, pTab->zName, zExpr, zExpr2
+        "SELECT %s,%s FROM \"%w\".\"%w\", \"%w\".\"%w\" WHERE %s AND (%z)",
+        z1, z2, pSession->zDb, pTab->zName, zFrom, pTab->zName, zExpr, zExpr2
     );
-    if( zStmt==0 ){
+    if( zStmt==0 || z1==0 || z2==0 ){
       rc = SQLITE_NOMEM;
     }else{
       sqlite3_stmt *pStmt;
@@ -214344,12 +223194,15 @@ static int sessionDiffFindModified(
         pDiffCtx->pStmt = pStmt;
         pDiffCtx->nOldOff = pTab->nCol;
         while( SQLITE_ROW==sqlite3_step(pStmt) ){
-          sessionPreupdateOneChange(SQLITE_UPDATE, pSession, pTab);
+          i64 iRowid = (pTab->bRowid ? sqlite3_column_int64(pStmt, 0) : 0);
+          sessionPreupdateOneChange(SQLITE_UPDATE, iRowid, pSession, pTab);
         }
         rc = sqlite3_finalize(pStmt);
       }
-      sqlite3_free(zStmt);
     }
+    sqlite3_free(zStmt);
+    sqlite3_free(z1);
+    sqlite3_free(z2);
   }
 
   return rc;
@@ -214378,7 +223231,7 @@ SQLITE_API int sqlite3session_diff(
     /* Locate and if necessary initialize the target table object */
     rc = sessionFindTable(pSession, zTbl, &pTo);
     if( pTo==0 ) goto diff_out;
-    if( sessionInitTable(pSession, pTo) ){
+    if( sessionInitTable(pSession, pTo, pSession->db, pSession->zDb) ){
       rc = pSession->rc;
       goto diff_out;
     }
@@ -214388,9 +223241,12 @@ SQLITE_API int sqlite3session_diff(
       int bHasPk = 0;
       int bMismatch = 0;
       int nCol;                   /* Columns in zFrom.zTbl */
+      int bRowid = 0;
       u8 *abPK;
       const char **azCol = 0;
-      rc = sessionTableInfo(0, db, zFrom, zTbl, &nCol, 0, &azCol, &abPK);
+      rc = sessionTableInfo(0, db, zFrom, zTbl, &nCol, 0, &azCol, 0, &abPK,
+          pSession->bImplicitPK ? &bRowid : 0
+      );
       if( rc==SQLITE_OK ){
         if( pTo->nCol!=nCol ){
           bMismatch = 1;
@@ -214503,6 +223359,7 @@ static void sessionDeleteTable(sqlite3_session *pSession, SessionTable *pList){
         sessionFree(pSession, p);
       }
     }
+    sqlite3_finalize(pTab->pDfltStmt);
     sessionFree(pSession, (char*)pTab->azCol);  /* cast works around VC++ bug */
     sessionFree(pSession, pTab->apChange);
     sessionFree(pSession, pTab);
@@ -214537,7 +223394,7 @@ SQLITE_API void sqlite3session_delete(sqlite3_session *pSession){
 
   /* Assert that all allocations have been freed and then free the
   ** session object itself. */
-  assert( pSession->nMalloc==0 );
+  // assert( pSession->nMalloc==0 );
   sqlite3_free(pSession);
 }
 
@@ -214608,48 +223465,6 @@ SQLITE_API int sqlite3session_attach(
   return rc;
 }
 
-/*
-** Ensure that there is room in the buffer to append nByte bytes of data.
-** If not, use sqlite3_realloc() to grow the buffer so that there is.
-**
-** If successful, return zero. Otherwise, if an OOM condition is encountered,
-** set *pRc to SQLITE_NOMEM and return non-zero.
-*/
-static int sessionBufferGrow(SessionBuffer *p, i64 nByte, int *pRc){
-#define SESSION_MAX_BUFFER_SZ (0x7FFFFF00 - 1)
-  i64 nReq = p->nBuf + nByte;
-  if( *pRc==SQLITE_OK && nReq>p->nAlloc ){
-    u8 *aNew;
-    i64 nNew = p->nAlloc ? p->nAlloc : 128;
-
-    do {
-      nNew = nNew*2;
-    }while( nNew<nReq );
-
-    /* The value of SESSION_MAX_BUFFER_SZ is copied from the implementation
-    ** of sqlite3_realloc64(). Allocations greater than this size in bytes
-    ** always fail. It is used here to ensure that this routine can always
-    ** allocate up to this limit - instead of up to the largest power of
-    ** two smaller than the limit.  */
-    if( nNew>SESSION_MAX_BUFFER_SZ ){
-      nNew = SESSION_MAX_BUFFER_SZ;
-      if( nNew<nReq ){
-        *pRc = SQLITE_NOMEM;
-        return 1;
-      }
-    }
-
-    aNew = (u8 *)sqlite3_realloc64(p->aBuf, nNew);
-    if( 0==aNew ){
-      *pRc = SQLITE_NOMEM;
-    }else{
-      p->aBuf = aNew;
-      p->nAlloc = nNew;
-    }
-  }
-  return (*pRc!=SQLITE_OK);
-}
-
 /*
 ** Append the value passed as the second argument to the buffer passed
 ** as the first.
@@ -214718,26 +223533,6 @@ static void sessionAppendBlob(
   }
 }
 
-/*
-** This function is a no-op if *pRc is other than SQLITE_OK when it is
-** called. Otherwise, append a string to the buffer. All bytes in the string
-** up to (but not including) the nul-terminator are written to the buffer.
-**
-** If an OOM condition is encountered, set *pRc to SQLITE_NOMEM before
-** returning.
-*/
-static void sessionAppendStr(
-  SessionBuffer *p,
-  const char *zStr,
-  int *pRc
-){
-  int nStr = sqlite3Strlen30(zStr);
-  if( 0==sessionBufferGrow(p, nStr, pRc) ){
-    memcpy(&p->aBuf[p->nBuf], zStr, nStr);
-    p->nBuf += nStr;
-  }
-}
-
 /*
 ** This function is a no-op if *pRc is other than SQLITE_OK when it is
 ** called. Otherwise, append the string representation of integer iVal
@@ -214770,7 +223565,7 @@ static void sessionAppendIdent(
   const char *zStr,               /* String to quote, escape and append */
   int *pRc                        /* IN/OUT: Error code */
 ){
-  int nStr = sqlite3Strlen30(zStr)*2 + 2 + 1;
+  int nStr = sqlite3Strlen30(zStr)*2 + 2 + 2;
   if( 0==sessionBufferGrow(p, nStr, pRc) ){
     char *zOut = (char *)&p->aBuf[p->nBuf];
     const char *zIn = zStr;
@@ -214781,6 +223576,7 @@ static void sessionAppendIdent(
     }
     *zOut++ = '"';
     p->nBuf = (int)((u8 *)zOut - p->aBuf);
+    p->aBuf[p->nBuf] = 0x00;
   }
 }
 
@@ -214916,7 +223712,7 @@ static int sessionAppendUpdate(
     /* If at least one field has been modified, this is not a no-op. */
     if( bChanged ) bNoop = 0;
 
-    /* Add a field to the old.* record. This is omitted if this modules is
+    /* Add a field to the old.* record. This is omitted if this module is
     ** currently generating a patchset. */
     if( bPatchset==0 ){
       if( bChanged || abPK[i] ){
@@ -215005,12 +223801,20 @@ static int sessionAppendDelete(
 ** Formulate and prepare a SELECT statement to retrieve a row from table
 ** zTab in database zDb based on its primary key. i.e.
 **
-**   SELECT * FROM zDb.zTab WHERE pk1 = ? AND pk2 = ? AND ...
+**   SELECT *, <noop-test> FROM zDb.zTab WHERE (pk1, pk2,...) IS (?1, ?2,...)
+**
+** where <noop-test> is:
+**
+**   1 AND (?A OR ?1 IS <column>) AND ...
+**
+** for each non-pk <column>.
 */
 static int sessionSelectStmt(
   sqlite3 *db,                    /* Database handle */
+  int bIgnoreNoop,
   const char *zDb,                /* Database name */
   const char *zTab,               /* Table name */
+  int bRowid,
   int nCol,                       /* Number of columns in table */
   const char **azCol,             /* Names of table columns */
   u8 *abPK,                       /* PRIMARY KEY  array */
@@ -215018,8 +223822,50 @@ static int sessionSelectStmt(
 ){
   int rc = SQLITE_OK;
   char *zSql = 0;
+  const char *zSep = "";
+  const char *zCols = bRowid ? SESSIONS_ROWID ", *" : "*";
   int nSql = -1;
+  int i;
+
+  SessionBuffer nooptest = {0, 0, 0};
+  SessionBuffer pkfield = {0, 0, 0};
+  SessionBuffer pkvar = {0, 0, 0};
+
+  sessionAppendStr(&nooptest, ", 1", &rc);
+
+  if( 0==sqlite3_stricmp("sqlite_stat1", zTab) ){
+    sessionAppendStr(&nooptest, " AND (?6 OR ?3 IS stat)", &rc);
+    sessionAppendStr(&pkfield, "tbl, idx", &rc);
+    sessionAppendStr(&pkvar,
+        "?1, (CASE WHEN ?2=X'' THEN NULL ELSE ?2 END)", &rc
+    );
+    zCols = "tbl, ?2, stat";
+  }else{
+    for(i=0; i<nCol; i++){
+      if( abPK[i] ){
+        sessionAppendStr(&pkfield, zSep, &rc);
+        sessionAppendStr(&pkvar, zSep, &rc);
+        zSep = ", ";
+        sessionAppendIdent(&pkfield, azCol[i], &rc);
+        sessionAppendPrintf(&pkvar, &rc, "?%d", i+1);
+      }else{
+        sessionAppendPrintf(&nooptest, &rc,
+            " AND (?%d OR ?%d IS %w.%w)", i+1+nCol, i+1, zTab, azCol[i]
+        );
+      }
+    }
+  }
+
+  if( rc==SQLITE_OK ){
+    zSql = sqlite3_mprintf(
+        "SELECT %s%s FROM %Q.%Q WHERE (%s) IS (%s)",
+        zCols, (bIgnoreNoop ? (char*)nooptest.aBuf : ""),
+        zDb, zTab, (char*)pkfield.aBuf, (char*)pkvar.aBuf
+    );
+    if( zSql==0 ) rc = SQLITE_NOMEM;
+  }
 
+#if 0
   if( 0==sqlite3_stricmp("sqlite_stat1", zTab) ){
     zSql = sqlite3_mprintf(
         "SELECT tbl, ?2, stat FROM %Q.sqlite_stat1 WHERE tbl IS ?1 AND "
@@ -215027,7 +223873,6 @@ static int sessionSelectStmt(
     );
     if( zSql==0 ) rc = SQLITE_NOMEM;
   }else{
-    int i;
     const char *zSep = "";
     SessionBuffer buf = {0, 0, 0};
 
@@ -215048,11 +223893,15 @@ static int sessionSelectStmt(
     zSql = (char*)buf.aBuf;
     nSql = buf.nBuf;
   }
+#endif
 
   if( rc==SQLITE_OK ){
     rc = sqlite3_prepare_v2(db, zSql, nSql, ppStmt, 0);
   }
   sqlite3_free(zSql);
+  sqlite3_free(nooptest.aBuf);
+  sqlite3_free(pkfield.aBuf);
+  sqlite3_free(pkvar.aBuf);
   return rc;
 }
 
@@ -215192,18 +224041,16 @@ static int sessionGenerateChangeset(
   for(pTab=pSession->pTable; rc==SQLITE_OK && pTab; pTab=pTab->pNext){
     if( pTab->nEntry ){
       const char *zName = pTab->zName;
-      int nCol = 0;               /* Number of columns in table */
-      u8 *abPK = 0;               /* Primary key array */
-      const char **azCol = 0;     /* Table columns */
       int i;                      /* Used to iterate through hash buckets */
       sqlite3_stmt *pSel = 0;     /* SELECT statement to query table pTab */
       int nRewind = buf.nBuf;     /* Initial size of write buffer */
       int nNoop;                  /* Size of buffer after writing tbl header */
+      int nOldCol = pTab->nCol;
 
       /* Check the table schema is still Ok. */
-      rc = sessionTableInfo(0, db, pSession->zDb, zName, &nCol, 0,&azCol,&abPK);
-      if( !rc && (pTab->nCol!=nCol || memcmp(abPK, pTab->abPK, nCol)) ){
-        rc = SQLITE_SCHEMA;
+      rc = sessionReinitTable(pSession, pTab);
+      if( rc==SQLITE_OK && pTab->nCol!=nOldCol ){
+        rc = sessionUpdateChanges(pSession, pTab);
       }
 
       /* Write a table header */
@@ -215211,8 +224058,9 @@ static int sessionGenerateChangeset(
 
       /* Build and compile a statement to execute: */
       if( rc==SQLITE_OK ){
-        rc = sessionSelectStmt(
-            db, pSession->zDb, zName, nCol, azCol, abPK, &pSel);
+        rc = sessionSelectStmt(db, 0, pSession->zDb,
+            zName, pTab->bRowid, pTab->nCol, pTab->azCol, pTab->abPK, &pSel
+        );
       }
 
       nNoop = buf.nBuf;
@@ -215220,22 +224068,22 @@ static int sessionGenerateChangeset(
         SessionChange *p;         /* Used to iterate through changes */
 
         for(p=pTab->apChange[i]; rc==SQLITE_OK && p; p=p->pNext){
-          rc = sessionSelectBind(pSel, nCol, abPK, p);
+          rc = sessionSelectBind(pSel, pTab->nCol, pTab->abPK, p);
           if( rc!=SQLITE_OK ) continue;
           if( sqlite3_step(pSel)==SQLITE_ROW ){
             if( p->op==SQLITE_INSERT ){
               int iCol;
               sessionAppendByte(&buf, SQLITE_INSERT, &rc);
               sessionAppendByte(&buf, p->bIndirect, &rc);
-              for(iCol=0; iCol<nCol; iCol++){
+              for(iCol=0; iCol<pTab->nCol; iCol++){
                 sessionAppendCol(&buf, pSel, iCol, &rc);
               }
             }else{
-              assert( abPK!=0 );  /* Because sessionSelectStmt() returned ok */
-              rc = sessionAppendUpdate(&buf, bPatchset, pSel, p, abPK);
+              assert( pTab->abPK!=0 );
+              rc = sessionAppendUpdate(&buf, bPatchset, pSel, p, pTab->abPK);
             }
           }else if( p->op!=SQLITE_INSERT ){
-            rc = sessionAppendDelete(&buf, bPatchset, p, nCol, abPK);
+            rc = sessionAppendDelete(&buf, bPatchset, p, pTab->nCol,pTab->abPK);
           }
           if( rc==SQLITE_OK ){
             rc = sqlite3_reset(pSel);
@@ -215260,7 +224108,6 @@ static int sessionGenerateChangeset(
       if( buf.nBuf==nNoop ){
         buf.nBuf = nRewind;
       }
-      sqlite3_free((char*)azCol);  /* cast works around VC++ bug */
     }
   }
 
@@ -215295,7 +224142,7 @@ SQLITE_API int sqlite3session_changeset(
   int rc;
 
   if( pnChangeset==0 || ppChangeset==0 ) return SQLITE_MISUSE;
-  rc = sessionGenerateChangeset(pSession, 0, 0, 0, pnChangeset,ppChangeset);
+  rc = sessionGenerateChangeset(pSession, 0, 0, 0, pnChangeset, ppChangeset);
   assert( rc || pnChangeset==0
        || pSession->bEnableSize==0 || *pnChangeset<=pSession->nMaxChangesetSize
   );
@@ -215413,6 +224260,19 @@ SQLITE_API int sqlite3session_object_config(sqlite3_session *pSession, int op, v
       break;
     }
 
+    case SQLITE_SESSION_OBJCONFIG_ROWID: {
+      int iArg = *(int*)pArg;
+      if( iArg>=0 ){
+        if( pSession->pTable ){
+          rc = SQLITE_MISUSE;
+        }else{
+          pSession->bImplicitPK = (iArg!=0);
+        }
+      }
+      *(int*)pArg = pSession->bImplicitPK;
+      break;
+    }
+
     default:
       rc = SQLITE_MISUSE;
   }
@@ -215671,15 +224531,19 @@ static int sessionReadRecord(
         }
       }
       if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
-        sqlite3_int64 v = sessionGetI64(aVal);
-        if( eType==SQLITE_INTEGER ){
-          sqlite3VdbeMemSetInt64(apOut[i], v);
+        if( (pIn->nData-pIn->iNext)<8 ){
+          rc = SQLITE_CORRUPT_BKPT;
         }else{
-          double d;
-          memcpy(&d, &v, 8);
-          sqlite3VdbeMemSetDouble(apOut[i], d);
+          sqlite3_int64 v = sessionGetI64(aVal);
+          if( eType==SQLITE_INTEGER ){
+            sqlite3VdbeMemSetInt64(apOut[i], v);
+          }else{
+            double d;
+            memcpy(&d, &v, 8);
+            sqlite3VdbeMemSetDouble(apOut[i], d);
+          }
+          pIn->iNext += 8;
         }
-        pIn->iNext += 8;
       }
     }
   }
@@ -215948,6 +224812,22 @@ static int sessionChangesetNextOne(
       if( p->op==SQLITE_INSERT ) p->op = SQLITE_DELETE;
       else if( p->op==SQLITE_DELETE ) p->op = SQLITE_INSERT;
     }
+
+    /* If this is an UPDATE that is part of a changeset, then check that
+    ** there are no fields in the old.* record that are not (a) PK fields,
+    ** or (b) also present in the new.* record.
+    **
+    ** Such records are technically corrupt, but the rebaser was at one
+    ** point generating them. Under most circumstances this is benign, but
+    ** can cause spurious SQLITE_RANGE errors when applying the changeset. */
+    if( p->bPatchset==0 && p->op==SQLITE_UPDATE){
+      for(i=0; i<p->nCol; i++){
+        if( p->abPK[i]==0 && p->apValue[i+p->nCol]==0 ){
+          sqlite3ValueFree(p->apValue[i]);
+          p->apValue[i] = 0;
+        }
+      }
+    }
   }
 
   return SQLITE_ROW;
@@ -216385,6 +225265,8 @@ struct SessionApplyCtx {
   SessionBuffer rebase;           /* Rebase information (if any) here */
   u8 bRebaseStarted;              /* If table header is already in rebase */
   u8 bRebase;                     /* True to collect rebase information */
+  u8 bIgnoreNoop;                 /* True to ignore no-op conflicts */
+  int bRowid;
 };
 
 /* Number of prepared UPDATE statements to cache. */
@@ -216635,8 +225517,10 @@ static int sessionSelectRow(
   const char *zTab,               /* Table name */
   SessionApplyCtx *p              /* Session changeset-apply context */
 ){
-  return sessionSelectStmt(
-      db, "main", zTab, p->nCol, p->azCol, p->abPK, &p->pSelect);
+  /* TODO */
+  return sessionSelectStmt(db, p->bIgnoreNoop,
+      "main", zTab, p->bRowid, p->nCol, p->azCol, p->abPK, &p->pSelect
+  );
 }
 
 /*
@@ -216794,22 +225678,34 @@ static int sessionBindRow(
 ** UPDATE, bind values from the old.* record.
 */
 static int sessionSeekToRow(
-  sqlite3 *db,                    /* Database handle */
   sqlite3_changeset_iter *pIter,  /* Changeset iterator */
-  u8 *abPK,                       /* Primary key flags array */
-  sqlite3_stmt *pSelect           /* SELECT statement from sessionSelectRow() */
+  SessionApplyCtx *p
 ){
+  sqlite3_stmt *pSelect = p->pSelect;
   int rc;                         /* Return code */
   int nCol;                       /* Number of columns in table */
   int op;                         /* Changset operation (SQLITE_UPDATE etc.) */
   const char *zDummy;             /* Unused */
 
+  sqlite3_clear_bindings(pSelect);
   sqlite3changeset_op(pIter, &zDummy, &nCol, &op, 0);
   rc = sessionBindRow(pIter,
       op==SQLITE_INSERT ? sqlite3changeset_new : sqlite3changeset_old,
-      nCol, abPK, pSelect
+      nCol, p->abPK, pSelect
   );
 
+  if( op!=SQLITE_DELETE && p->bIgnoreNoop ){
+    int ii;
+    for(ii=0; rc==SQLITE_OK && ii<nCol; ii++){
+      if( p->abPK[ii]==0 ){
+        sqlite3_value *pVal = 0;
+        sqlite3changeset_new(pIter, ii, &pVal);
+        sqlite3_bind_int(pSelect, ii+1+nCol, (pVal==0));
+        if( pVal ) rc = sessionBindValue(pSelect, ii+1, pVal);
+      }
+    }
+  }
+
   if( rc==SQLITE_OK ){
     rc = sqlite3_step(pSelect);
     if( rc!=SQLITE_ROW ) rc = sqlite3_reset(pSelect);
@@ -216924,16 +225820,22 @@ static int sessionConflictHandler(
 
   /* Bind the new.* PRIMARY KEY values to the SELECT statement. */
   if( pbReplace ){
-    rc = sessionSeekToRow(p->db, pIter, p->abPK, p->pSelect);
+    rc = sessionSeekToRow(pIter, p);
   }else{
     rc = SQLITE_OK;
   }
 
   if( rc==SQLITE_ROW ){
     /* There exists another row with the new.* primary key. */
-    pIter->pConflict = p->pSelect;
-    res = xConflict(pCtx, eType, pIter);
-    pIter->pConflict = 0;
+    if( p->bIgnoreNoop
+     && sqlite3_column_int(p->pSelect, sqlite3_column_count(p->pSelect)-1)
+    ){
+      res = SQLITE_CHANGESET_OMIT;
+    }else{
+      pIter->pConflict = p->pSelect;
+      res = xConflict(pCtx, eType, pIter);
+      pIter->pConflict = 0;
+    }
     rc = sqlite3_reset(p->pSelect);
   }else if( rc==SQLITE_OK ){
     if( p->bDeferConstraints && eType==SQLITE_CHANGESET_CONFLICT ){
@@ -217041,7 +225943,7 @@ static int sessionApplyOneOp(
 
     sqlite3_step(p->pDelete);
     rc = sqlite3_reset(p->pDelete);
-    if( rc==SQLITE_OK && sqlite3_changes(p->db)==0 ){
+    if( rc==SQLITE_OK && sqlite3_changes(p->db)==0 && p->bIgnoreNoop==0 ){
       rc = sessionConflictHandler(
           SQLITE_CHANGESET_DATA, p, pIter, xConflict, pCtx, pbRetry
       );
@@ -217098,7 +226000,7 @@ static int sessionApplyOneOp(
       /* Check if there is a conflicting row. For sqlite_stat1, this needs
       ** to be done using a SELECT, as there is no PRIMARY KEY in the
       ** database schema to throw an exception if a duplicate is inserted.  */
-      rc = sessionSeekToRow(p->db, pIter, p->abPK, p->pSelect);
+      rc = sessionSeekToRow(pIter, p);
       if( rc==SQLITE_ROW ){
         rc = SQLITE_CONSTRAINT;
         sqlite3_reset(p->pSelect);
@@ -217275,6 +226177,7 @@ static int sessionChangesetApply(
   memset(&sApply, 0, sizeof(sApply));
   sApply.bRebase = (ppRebase && pnRebase);
   sApply.bInvertConstraints = !!(flags & SQLITE_CHANGESETAPPLY_INVERT);
+  sApply.bIgnoreNoop = !!(flags & SQLITE_CHANGESETAPPLY_IGNORENOOP);
   sqlite3_mutex_enter(sqlite3_db_mutex(db));
   if( (flags & SQLITE_CHANGESETAPPLY_NOSAVEPOINT)==0 ){
     rc = sqlite3_exec(db, "SAVEPOINT changeset_apply", 0, 0, 0);
@@ -217312,6 +226215,7 @@ static int sessionChangesetApply(
       sApply.bStat1 = 0;
       sApply.bDeferConstraints = 1;
       sApply.bRebaseStarted = 0;
+      sApply.bRowid = 0;
       memset(&sApply.constraints, 0, sizeof(SessionBuffer));
 
       /* If an xFilter() callback was specified, invoke it now. If the
@@ -217331,8 +226235,8 @@ static int sessionChangesetApply(
         int i;
 
         sqlite3changeset_pk(pIter, &abPK, 0);
-        rc = sessionTableInfo(0,
-            db, "main", zNew, &sApply.nCol, &zTab, &sApply.azCol, &sApply.abPK
+        rc = sessionTableInfo(0, db, "main", zNew,
+            &sApply.nCol, &zTab, &sApply.azCol, 0, &sApply.abPK, &sApply.bRowid
         );
         if( rc!=SQLITE_OK ) break;
         for(i=0; i<sApply.nCol; i++){
@@ -217464,11 +226368,24 @@ SQLITE_API int sqlite3changeset_apply_v2(
   sqlite3_changeset_iter *pIter;  /* Iterator to skip through changeset */
   int bInv = !!(flags & SQLITE_CHANGESETAPPLY_INVERT);
   int rc = sessionChangesetStart(&pIter, 0, 0, nChangeset, pChangeset, bInv, 1);
+  u64 savedFlag = db->flags & SQLITE_FkNoAction;
+
+  if( flags & SQLITE_CHANGESETAPPLY_FKNOACTION ){
+    db->flags |= ((u64)SQLITE_FkNoAction);
+    db->aDb[0].pSchema->schema_cookie -= 32;
+  }
+
   if( rc==SQLITE_OK ){
     rc = sessionChangesetApply(
         db, pIter, xFilter, xConflict, pCtx, ppRebase, pnRebase, flags
     );
   }
+
+  if( (flags & SQLITE_CHANGESETAPPLY_FKNOACTION) && savedFlag==0 ){
+    assert( db->flags & SQLITE_FkNoAction );
+    db->flags &= ~((u64)SQLITE_FkNoAction);
+    db->aDb[0].pSchema->schema_cookie -= 32;
+  }
   return rc;
 }
 
@@ -217556,6 +226473,9 @@ struct sqlite3_changegroup {
   int rc;                         /* Error code */
   int bPatch;                     /* True to accumulate patchsets */
   SessionTable *pList;            /* List of tables in current patch */
+
+  sqlite3 *db;                    /* Configured by changegroup_schema() */
+  char *zDb;                      /* Configured by changegroup_schema() */
 };
 
 /*
@@ -217576,6 +226496,7 @@ static int sessionChangeMerge(
 ){
   SessionChange *pNew = 0;
   int rc = SQLITE_OK;
+  assert( aRec!=0 );
 
   if( !pExist ){
     pNew = (SessionChange *)sqlite3_malloc64(sizeof(SessionChange) + nRec);
@@ -217741,6 +226662,114 @@ static int sessionChangeMerge(
   return rc;
 }
 
+/*
+** Check if a changeset entry with nCol columns and the PK array passed
+** as the final argument to this function is compatible with SessionTable
+** pTab. If so, return 1. Otherwise, if they are incompatible in some way,
+** return 0.
+*/
+static int sessionChangesetCheckCompat(
+  SessionTable *pTab,
+  int nCol,
+  u8 *abPK
+){
+  if( pTab->azCol && nCol<pTab->nCol ){
+    int ii;
+    for(ii=0; ii<pTab->nCol; ii++){
+      u8 bPK = (ii < nCol) ? abPK[ii] : 0;
+      if( pTab->abPK[ii]!=bPK ) return 0;
+    }
+    return 1;
+  }
+  return (pTab->nCol==nCol && 0==memcmp(abPK, pTab->abPK, nCol));
+}
+
+static int sessionChangesetExtendRecord(
+  sqlite3_changegroup *pGrp,
+  SessionTable *pTab,
+  int nCol,
+  int op,
+  const u8 *aRec,
+  int nRec,
+  SessionBuffer *pOut
+){
+  int rc = SQLITE_OK;
+  int ii = 0;
+
+  assert( pTab->azCol );
+  assert( nCol<pTab->nCol );
+
+  pOut->nBuf = 0;
+  if( op==SQLITE_INSERT || (op==SQLITE_DELETE && pGrp->bPatch==0) ){
+    /* Append the missing default column values to the record. */
+    sessionAppendBlob(pOut, aRec, nRec, &rc);
+    if( rc==SQLITE_OK && pTab->pDfltStmt==0 ){
+      rc = sessionPrepareDfltStmt(pGrp->db, pTab, &pTab->pDfltStmt);
+    }
+    for(ii=nCol; rc==SQLITE_OK && ii<pTab->nCol; ii++){
+      int eType = sqlite3_column_type(pTab->pDfltStmt, ii);
+      sessionAppendByte(pOut, eType, &rc);
+      switch( eType ){
+        case SQLITE_FLOAT:
+        case SQLITE_INTEGER: {
+          i64 iVal;
+          if( eType==SQLITE_INTEGER ){
+            iVal = sqlite3_column_int64(pTab->pDfltStmt, ii);
+          }else{
+            double rVal = sqlite3_column_int64(pTab->pDfltStmt, ii);
+            memcpy(&iVal, &rVal, sizeof(i64));
+          }
+          if( SQLITE_OK==sessionBufferGrow(pOut, 8, &rc) ){
+            sessionPutI64(&pOut->aBuf[pOut->nBuf], iVal);
+          }
+          break;
+        }
+
+        case SQLITE_BLOB:
+        case SQLITE_TEXT: {
+          int n = sqlite3_column_bytes(pTab->pDfltStmt, ii);
+          sessionAppendVarint(pOut, n, &rc);
+          if( eType==SQLITE_TEXT ){
+            const u8 *z = (const u8*)sqlite3_column_text(pTab->pDfltStmt, ii);
+            sessionAppendBlob(pOut, z, n, &rc);
+          }else{
+            const u8 *z = (const u8*)sqlite3_column_blob(pTab->pDfltStmt, ii);
+            sessionAppendBlob(pOut, z, n, &rc);
+          }
+          break;
+        }
+
+        default:
+          assert( eType==SQLITE_NULL );
+          break;
+      }
+    }
+  }else if( op==SQLITE_UPDATE ){
+    /* Append missing "undefined" entries to the old.* record. And, if this
+    ** is an UPDATE, to the new.* record as well.  */
+    int iOff = 0;
+    if( pGrp->bPatch==0 ){
+      for(ii=0; ii<nCol; ii++){
+        iOff += sessionSerialLen(&aRec[iOff]);
+      }
+      sessionAppendBlob(pOut, aRec, iOff, &rc);
+      for(ii=0; ii<(pTab->nCol-nCol); ii++){
+        sessionAppendByte(pOut, 0x00, &rc);
+      }
+    }
+
+    sessionAppendBlob(pOut, &aRec[iOff], nRec-iOff, &rc);
+    for(ii=0; ii<(pTab->nCol-nCol); ii++){
+      sessionAppendByte(pOut, 0x00, &rc);
+    }
+  }else{
+    assert( op==SQLITE_DELETE && pGrp->bPatch );
+    sessionAppendBlob(pOut, aRec, nRec, &rc);
+  }
+
+  return rc;
+}
+
 /*
 ** Add all changes in the changeset traversed by the iterator passed as
 ** the first argument to the changegroup hash tables.
@@ -217754,6 +226783,7 @@ static int sessionChangesetToHash(
   int nRec;
   int rc = SQLITE_OK;
   SessionTable *pTab = 0;
+  SessionBuffer rec = {0, 0, 0};
 
   while( SQLITE_ROW==sessionChangesetNext(pIter, &aRec, &nRec, 0) ){
     const char *zNew;
@@ -217765,6 +226795,9 @@ static int sessionChangesetToHash(
     SessionChange *pExist = 0;
     SessionChange **pp;
 
+    /* Ensure that only changesets, or only patchsets, but not a mixture
+    ** of both, are being combined. It is an error to try to combine a
+    ** changeset and a patchset.  */
     if( pGrp->pList==0 ){
       pGrp->bPatch = pIter->bPatchset;
     }else if( pIter->bPatchset!=pGrp->bPatch ){
@@ -217797,18 +226830,38 @@ static int sessionChangesetToHash(
         pTab->zName = (char*)&pTab->abPK[nCol];
         memcpy(pTab->zName, zNew, nNew+1);
 
+        if( pGrp->db ){
+          pTab->nCol = 0;
+          rc = sessionInitTable(0, pTab, pGrp->db, pGrp->zDb);
+          if( rc ){
+            assert( pTab->azCol==0 );
+            sqlite3_free(pTab);
+            break;
+          }
+        }
+
         /* The new object must be linked on to the end of the list, not
         ** simply added to the start of it. This is to ensure that the
         ** tables within the output of sqlite3changegroup_output() are in
         ** the right order.  */
         for(ppTab=&pGrp->pList; *ppTab; ppTab=&(*ppTab)->pNext);
         *ppTab = pTab;
-      }else if( pTab->nCol!=nCol || memcmp(pTab->abPK, abPK, nCol) ){
+      }
+
+      if( !sessionChangesetCheckCompat(pTab, nCol, abPK) ){
         rc = SQLITE_SCHEMA;
         break;
       }
     }
 
+    if( nCol<pTab->nCol ){
+      assert( pGrp->db );
+      rc = sessionChangesetExtendRecord(pGrp, pTab, nCol, op, aRec, nRec, &rec);
+      if( rc ) break;
+      aRec = rec.aBuf;
+      nRec = rec.nBuf;
+    }
+
     if( sessionGrowHash(0, pIter->bPatchset, pTab) ){
       rc = SQLITE_NOMEM;
       break;
@@ -217846,6 +226899,7 @@ static int sessionChangesetToHash(
     }
   }
 
+  sqlite3_free(rec.aBuf);
   if( rc==SQLITE_OK ) rc = pIter->rc;
   return rc;
 }
@@ -217932,6 +226986,31 @@ SQLITE_API int sqlite3changegroup_new(sqlite3_changegroup **pp){
   return rc;
 }
 
+/*
+** Provide a database schema to the changegroup object.
+*/
+SQLITE_API int sqlite3changegroup_schema(
+  sqlite3_changegroup *pGrp,
+  sqlite3 *db,
+  const char *zDb
+){
+  int rc = SQLITE_OK;
+
+  if( pGrp->pList || pGrp->db ){
+    /* Cannot add a schema after one or more calls to sqlite3changegroup_add(),
+    ** or after sqlite3changegroup_schema() has already been called. */
+    rc = SQLITE_MISUSE;
+  }else{
+    pGrp->zDb = sqlite3_mprintf("%s", zDb);
+    if( pGrp->zDb==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      pGrp->db = db;
+    }
+  }
+  return rc;
+}
+
 /*
 ** Add the changeset currently stored in buffer pData, size nData bytes,
 ** to changeset-group p.
@@ -217995,6 +227074,7 @@ SQLITE_API int sqlite3changegroup_output_strm(
 */
 SQLITE_API void sqlite3changegroup_delete(sqlite3_changegroup *pGrp){
   if( pGrp ){
+    sqlite3_free(pGrp->zDb);
     sessionDeleteTable(0, pGrp->pList);
     sqlite3_free(pGrp);
   }
@@ -218144,7 +227224,7 @@ static void sessionAppendPartialUpdate(
         if( !pIter->abPK[i] && a1[0] ) bData = 1;
         memcpy(pOut, a1, n1);
         pOut += n1;
-      }else if( a2[0]!=0xFF ){
+      }else if( a2[0]!=0xFF && a1[0] ){
         bData = 1;
         memcpy(pOut, a2, n2);
         pOut += n2;
@@ -218702,7 +227782,7 @@ struct Fts5PhraseIter {
 **   See xPhraseFirstColumn above.
 */
 struct Fts5ExtensionApi {
-  int iVersion;                   /* Currently always set to 3 */
+  int iVersion;                   /* Currently always set to 2 */
 
   void *(*xUserData)(Fts5Context*);
 
@@ -218931,8 +228011,8 @@ struct Fts5ExtensionApi {
 **   as separate queries of the FTS index are required for each synonym.
 **
 **   When using methods (2) or (3), it is important that the tokenizer only
-**   provide synonyms when tokenizing document text (method (2)) or query
-**   text (method (3)), not both. Doing so will not cause any errors, but is
+**   provide synonyms when tokenizing document text (method (3)) or query
+**   text (method (2)), not both. Doing so will not cause any errors, but is
 **   inefficient.
 */
 typedef struct Fts5Tokenizer Fts5Tokenizer;
@@ -218980,7 +228060,7 @@ struct fts5_api {
   int (*xCreateTokenizer)(
     fts5_api *pApi,
     const char *zName,
-    void *pContext,
+    void *pUserData,
     fts5_tokenizer *pTokenizer,
     void (*xDestroy)(void*)
   );
@@ -218989,7 +228069,7 @@ struct fts5_api {
   int (*xFindTokenizer)(
     fts5_api *pApi,
     const char *zName,
-    void **ppContext,
+    void **ppUserData,
     fts5_tokenizer *pTokenizer
   );
 
@@ -218997,7 +228077,7 @@ struct fts5_api {
   int (*xCreateFunction)(
     fts5_api *pApi,
     const char *zName,
-    void *pContext,
+    void *pUserData,
     fts5_extension_function xFunction,
     void (*xDestroy)(void*)
   );
@@ -219169,6 +228249,10 @@ typedef struct Fts5Config Fts5Config;
 **   attempt to merge together. A value of 1 sets the object to use the
 **   compile time default. Zero disables auto-merge altogether.
 **
+** bContentlessDelete:
+**   True if the contentless_delete option was present in the CREATE
+**   VIRTUAL TABLE statement.
+**
 ** zContent:
 **
 ** zContentRowid:
@@ -219203,6 +228287,7 @@ struct Fts5Config {
   int nPrefix;                    /* Number of prefix indexes */
   int *aPrefix;                   /* Sizes in bytes of nPrefix prefix indexes */
   int eContent;                   /* An FTS5_CONTENT value */
+  int bContentlessDelete;         /* "contentless_delete=" option (dflt==0) */
   char *zContent;                 /* content table */
   char *zContentRowid;            /* "content_rowid=" option value */
   int bColumnsize;                /* "columnsize=" option value (dflt==1) */
@@ -219214,6 +228299,7 @@ struct Fts5Config {
   int ePattern;                   /* FTS_PATTERN_XXX constant */
 
   /* Values loaded from the %_config table */
+  int iVersion;                   /* fts5 file format 'version' */
   int iCookie;                    /* Incremented when %_config is modified */
   int pgsz;                       /* Approximate page size used in %_data */
   int nAutomerge;                 /* 'automerge' setting */
@@ -219222,6 +228308,8 @@ struct Fts5Config {
   int nHashSize;                  /* Bytes of memory for in-memory hash */
   char *zRank;                    /* Name of rank function */
   char *zRankArgs;                /* Arguments to rank function */
+  int bSecureDelete;              /* 'secure-delete' */
+  int nDeleteMerge;           /* 'deletemerge' */
 
   /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
   char **pzErrmsg;
@@ -219231,8 +228319,11 @@ struct Fts5Config {
 #endif
 };
 
-/* Current expected value of %_config table 'version' field */
-#define FTS5_CURRENT_VERSION  4
+/* Current expected value of %_config table 'version' field. And
+** the expected version if the 'secure-delete' option has ever been
+** set on the table.  */
+#define FTS5_CURRENT_VERSION               4
+#define FTS5_CURRENT_VERSION_SECUREDELETE  5
 
 #define FTS5_CONTENT_NORMAL   0
 #define FTS5_CONTENT_NONE     1
@@ -219301,7 +228392,7 @@ static void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...);
 static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...);
 
 #define fts5BufferZero(x)             sqlite3Fts5BufferZero(x)
-#define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,c)
+#define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,(i64)c)
 #define fts5BufferFree(a)             sqlite3Fts5BufferFree(a)
 #define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d)
 #define fts5BufferSet(a,b,c,d)        sqlite3Fts5BufferSet(a,b,c,d)
@@ -219398,6 +228489,7 @@ struct Fts5IndexIter {
 ** above. */
 #define FTS5INDEX_QUERY_SKIPEMPTY  0x0010
 #define FTS5INDEX_QUERY_NOOUTPUT   0x0020
+#define FTS5INDEX_QUERY_SKIPHASH   0x0040
 
 /*
 ** Create/destroy an Fts5Index object.
@@ -219540,6 +228632,9 @@ static int sqlite3Fts5IndexReset(Fts5Index *p);
 
 static int sqlite3Fts5IndexLoadConfig(Fts5Index *p);
 
+static int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin);
+static int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid);
+
 /*
 ** End of interface to code in fts5_index.c.
 **************************************************************************/
@@ -219552,7 +228647,7 @@ static int sqlite3Fts5GetVarintLen(u32 iVal);
 static u8 sqlite3Fts5GetVarint(const unsigned char*, u64*);
 static int sqlite3Fts5PutVarint(unsigned char *p, u64 v);
 
-#define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b)
+#define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&(b))
 #define fts5GetVarint    sqlite3Fts5GetVarint
 
 #define fts5FastGetVarint32(a, iOff, nVal) {      \
@@ -219624,6 +228719,11 @@ static int sqlite3Fts5HashWrite(
 */
 static void sqlite3Fts5HashClear(Fts5Hash*);
 
+/*
+** Return true if the hash is empty, false otherwise.
+*/
+static int sqlite3Fts5HashIsEmpty(Fts5Hash*);
+
 static int sqlite3Fts5HashQuery(
   Fts5Hash*,                      /* Hash table to query */
   int nPre,
@@ -219645,6 +228745,7 @@ static void sqlite3Fts5HashScanEntry(Fts5Hash *,
 );
 
 
+
 /*
 ** End of interface to code in fts5_hash.c.
 **************************************************************************/
@@ -219888,7 +228989,8 @@ static void sqlite3Fts5UnicodeAscii(u8*, u8*);
 #define FTS5_STAR                            15
 
 /* This file is automatically generated by Lemon from input grammar
-** source file "fts5parse.y". */
+** source file "fts5parse.y".
+*/
 /*
 ** 2000-05-29
 **
@@ -221478,15 +230580,19 @@ static int fts5CInstIterInit(
 */
 typedef struct HighlightContext HighlightContext;
 struct HighlightContext {
-  CInstIter iter;                 /* Coalesced Instance Iterator */
-  int iPos;                       /* Current token offset in zIn[] */
+  /* Constant parameters to fts5HighlightCb() */
   int iRangeStart;                /* First token to include */
   int iRangeEnd;                  /* If non-zero, last token to include */
   const char *zOpen;              /* Opening highlight */
   const char *zClose;             /* Closing highlight */
   const char *zIn;                /* Input text */
   int nIn;                        /* Size of input text in bytes */
-  int iOff;                       /* Current offset within zIn[] */
+
+  /* Variables modified by fts5HighlightCb() */
+  CInstIter iter;                 /* Coalesced Instance Iterator */
+  int iPos;                       /* Current token offset in zIn[] */
+  int iOff;                       /* Have copied up to this offset in zIn[] */
+  int bOpen;                      /* True if highlight is open */
   char *zOut;                     /* Output value */
 };
 
@@ -221519,8 +230625,8 @@ static int fts5HighlightCb(
   int tflags,                     /* Mask of FTS5_TOKEN_* flags */
   const char *pToken,             /* Buffer containing token */
   int nToken,                     /* Size of token in bytes */
-  int iStartOff,                  /* Start offset of token */
-  int iEndOff                     /* End offset of token */
+  int iStartOff,                  /* Start byte offset of token */
+  int iEndOff                     /* End byte offset of token */
 ){
   HighlightContext *p = (HighlightContext*)pContext;
   int rc = SQLITE_OK;
@@ -221531,35 +230637,52 @@ static int fts5HighlightCb(
   if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK;
   iPos = p->iPos++;
 
-  if( p->iRangeEnd>0 ){
+  if( p->iRangeEnd>=0 ){
     if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
     if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff;
   }
 
-  if( iPos==p->iter.iStart ){
+  /* If the parenthesis is open, and this token is not part of the current
+  ** phrase, and the starting byte offset of this token is past the point
+  ** that has currently been copied into the output buffer, close the
+  ** parenthesis. */
+  if( p->bOpen
+   && (iPos<=p->iter.iStart || p->iter.iStart<0)
+   && iStartOff>p->iOff
+  ){
+    fts5HighlightAppend(&rc, p, p->zClose, -1);
+    p->bOpen = 0;
+  }
+
+  /* If this is the start of a new phrase, and the highlight is not open:
+  **
+  **   * copy text from the input up to the start of the phrase, and
+  **   * open the highlight.
+  */
+  if( iPos==p->iter.iStart && p->bOpen==0 ){
     fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff);
     fts5HighlightAppend(&rc, p, p->zOpen, -1);
     p->iOff = iStartOff;
+    p->bOpen = 1;
   }
 
   if( iPos==p->iter.iEnd ){
-    if( p->iRangeEnd && p->iter.iStart<p->iRangeStart ){
+    if( p->bOpen==0 ){
+      assert( p->iRangeEnd>=0 );
       fts5HighlightAppend(&rc, p, p->zOpen, -1);
+      p->bOpen = 1;
     }
     fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
-    fts5HighlightAppend(&rc, p, p->zClose, -1);
     p->iOff = iEndOff;
+
     if( rc==SQLITE_OK ){
       rc = fts5CInstIterNext(&p->iter);
     }
   }
 
-  if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){
+  if( iPos==p->iRangeEnd ){
     fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
     p->iOff = iEndOff;
-    if( iPos>=p->iter.iStart && iPos<p->iter.iEnd ){
-      fts5HighlightAppend(&rc, p, p->zClose, -1);
-    }
   }
 
   return rc;
@@ -221589,6 +230712,7 @@ static void fts5HighlightFunction(
   memset(&ctx, 0, sizeof(HighlightContext));
   ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
   ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
+  ctx.iRangeEnd = -1;
   rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn);
 
   if( ctx.zIn ){
@@ -221599,6 +230723,9 @@ static void fts5HighlightFunction(
     if( rc==SQLITE_OK ){
       rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
     }
+    if( ctx.bOpen ){
+      fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1);
+    }
     fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
 
     if( rc==SQLITE_OK ){
@@ -221774,6 +230901,7 @@ static void fts5SnippetFunction(
   iCol = sqlite3_value_int(apVal[0]);
   ctx.zOpen = fts5ValueToText(apVal[1]);
   ctx.zClose = fts5ValueToText(apVal[2]);
+  ctx.iRangeEnd = -1;
   zEllips = fts5ValueToText(apVal[3]);
   nToken = sqlite3_value_int(apVal[4]);
 
@@ -221876,6 +231004,9 @@ static void fts5SnippetFunction(
     if( rc==SQLITE_OK ){
       rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
     }
+    if( ctx.bOpen ){
+      fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1);
+    }
     if( ctx.iRangeEnd>=(nColSize-1) ){
       fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
     }else{
@@ -222514,6 +231645,8 @@ static void sqlite3Fts5TermsetFree(Fts5Termset *p){
 #define FTS5_DEFAULT_CRISISMERGE   16
 #define FTS5_DEFAULT_HASHSIZE    (1024*1024)
 
+#define FTS5_DEFAULT_DELETE_AUTOMERGE 10      /* default 10% */
+
 /* Maximum allowed page size */
 #define FTS5_MAX_PAGE_SIZE (64*1024)
 
@@ -222844,6 +231977,16 @@ static int fts5ConfigParseSpecial(
     return rc;
   }
 
+  if( sqlite3_strnicmp("contentless_delete", zCmd, nCmd)==0 ){
+    if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
+      *pzErr = sqlite3_mprintf("malformed contentless_delete=... directive");
+      rc = SQLITE_ERROR;
+    }else{
+      pConfig->bContentlessDelete = (zArg[0]=='1');
+    }
+    return rc;
+  }
+
   if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){
     if( pConfig->zContentRowid ){
       *pzErr = sqlite3_mprintf("multiple content_rowid=... directives");
@@ -223042,6 +232185,7 @@ static int sqlite3Fts5ConfigParse(
     rc = SQLITE_ERROR;
   }
 
+  assert( (pRet->abUnindexed && pRet->azCol) || rc!=SQLITE_OK );
   for(i=3; rc==SQLITE_OK && i<nArg; i++){
     const char *zOrig = azArg[i];
     const char *z;
@@ -223087,6 +232231,28 @@ static int sqlite3Fts5ConfigParse(
     sqlite3_free(zTwo);
   }
 
+  /* We only allow contentless_delete=1 if the table is indeed contentless. */
+  if( rc==SQLITE_OK
+   && pRet->bContentlessDelete
+   && pRet->eContent!=FTS5_CONTENT_NONE
+  ){
+    *pzErr = sqlite3_mprintf(
+        "contentless_delete=1 requires a contentless table"
+    );
+    rc = SQLITE_ERROR;
+  }
+
+  /* We only allow contentless_delete=1 if columnsize=0 is not present.
+  **
+  ** This restriction may be removed at some point.
+  */
+  if( rc==SQLITE_OK && pRet->bContentlessDelete && pRet->bColumnsize==0 ){
+    *pzErr = sqlite3_mprintf(
+        "contentless_delete=1 is incompatible with columnsize=0"
+    );
+    rc = SQLITE_ERROR;
+  }
+
   /* If a tokenizer= option was successfully parsed, the tokenizer has
   ** already been allocated. Otherwise, allocate an instance of the default
   ** tokenizer (unicode61) now.  */
@@ -223381,6 +232547,18 @@ static int sqlite3Fts5ConfigSetValue(
     }
   }
 
+  else if( 0==sqlite3_stricmp(zKey, "deletemerge") ){
+    int nVal = -1;
+    if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
+      nVal = sqlite3_value_int(pVal);
+    }else{
+      *pbBadkey = 1;
+    }
+    if( nVal<0 ) nVal = FTS5_DEFAULT_DELETE_AUTOMERGE;
+    if( nVal>100 ) nVal = 0;
+    pConfig->nDeleteMerge = nVal;
+  }
+
   else if( 0==sqlite3_stricmp(zKey, "rank") ){
     const char *zIn = (const char*)sqlite3_value_text(pVal);
     char *zRank;
@@ -223395,6 +232573,18 @@ static int sqlite3Fts5ConfigSetValue(
       rc = SQLITE_OK;
       *pbBadkey = 1;
     }
+  }
+
+  else if( 0==sqlite3_stricmp(zKey, "secure-delete") ){
+    int bVal = -1;
+    if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
+      bVal = sqlite3_value_int(pVal);
+    }
+    if( bVal<0 ){
+      *pbBadkey = 1;
+    }else{
+      pConfig->bSecureDelete = (bVal ? 1 : 0);
+    }
   }else{
     *pbBadkey = 1;
   }
@@ -223417,6 +232607,7 @@ static int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){
   pConfig->nUsermerge = FTS5_DEFAULT_USERMERGE;
   pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
   pConfig->nHashSize = FTS5_DEFAULT_HASHSIZE;
+  pConfig->nDeleteMerge = FTS5_DEFAULT_DELETE_AUTOMERGE;
 
   zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName);
   if( zSql ){
@@ -223439,15 +232630,20 @@ static int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){
     rc = sqlite3_finalize(p);
   }
 
-  if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){
+  if( rc==SQLITE_OK
+   && iVersion!=FTS5_CURRENT_VERSION
+   && iVersion!=FTS5_CURRENT_VERSION_SECUREDELETE
+  ){
     rc = SQLITE_ERROR;
     if( pConfig->pzErrmsg ){
       assert( 0==*pConfig->pzErrmsg );
-      *pConfig->pzErrmsg = sqlite3_mprintf(
-          "invalid fts5 file format (found %d, expected %d) - run 'rebuild'",
-          iVersion, FTS5_CURRENT_VERSION
+      *pConfig->pzErrmsg = sqlite3_mprintf("invalid fts5 file format "
+          "(found %d, expected %d or %d) - run 'rebuild'",
+          iVersion, FTS5_CURRENT_VERSION, FTS5_CURRENT_VERSION_SECUREDELETE
       );
     }
+  }else{
+    pConfig->iVersion = iVersion;
   }
 
   if( rc==SQLITE_OK ){
@@ -223475,6 +232671,10 @@ static int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){
 /* #include "fts5Int.h" */
 /* #include "fts5parse.h" */
 
+#ifndef SQLITE_FTS5_MAX_EXPR_DEPTH
+# define SQLITE_FTS5_MAX_EXPR_DEPTH 256
+#endif
+
 /*
 ** All token types in the generated fts5parse.h file are greater than 0.
 */
@@ -223515,11 +232715,17 @@ struct Fts5Expr {
 **       FTS5_NOT                 (nChild, apChild valid)
 **       FTS5_STRING              (pNear valid)
 **       FTS5_TERM                (pNear valid)
+**
+** iHeight:
+**   Distance from this node to furthest leaf. This is always 0 for nodes
+**   of type FTS5_STRING and FTS5_TERM. For all other nodes it is one
+**   greater than the largest child value.
 */
 struct Fts5ExprNode {
   int eType;                      /* Node type */
   int bEof;                       /* True at EOF */
   int bNomatch;                   /* True if entry is not a match */
+  int iHeight;                    /* Distance to tree leaf nodes */
 
   /* Next method for this node. */
   int (*xNext)(Fts5Expr*, Fts5ExprNode*, int, i64);
@@ -223589,6 +232795,31 @@ struct Fts5Parse {
   int bPhraseToAnd;               /* Convert "a+b" to "a AND b" */
 };
 
+/*
+** Check that the Fts5ExprNode.iHeight variables are set correctly in
+** the expression tree passed as the only argument.
+*/
+#ifndef NDEBUG
+static void assert_expr_depth_ok(int rc, Fts5ExprNode *p){
+  if( rc==SQLITE_OK ){
+    if( p->eType==FTS5_TERM || p->eType==FTS5_STRING || p->eType==0 ){
+      assert( p->iHeight==0 );
+    }else{
+      int ii;
+      int iMaxChild = 0;
+      for(ii=0; ii<p->nChild; ii++){
+        Fts5ExprNode *pChild = p->apChild[ii];
+        iMaxChild = MAX(iMaxChild, pChild->iHeight);
+        assert_expr_depth_ok(SQLITE_OK, pChild);
+      }
+      assert( p->iHeight==iMaxChild+1 );
+    }
+  }
+}
+#else
+# define assert_expr_depth_ok(rc, p)
+#endif
+
 static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){
   va_list ap;
   va_start(ap, zFmt);
@@ -223703,6 +232934,8 @@ static int sqlite3Fts5ExprNew(
   }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF );
   sqlite3Fts5ParserFree(pEngine, fts5ParseFree);
 
+  assert_expr_depth_ok(sParse.rc, sParse.pExpr);
+
   /* If the LHS of the MATCH expression was a user column, apply the
   ** implicit column-filter.  */
   if( iCol<pConfig->nCol && sParse.pExpr && sParse.rc==SQLITE_OK ){
@@ -223747,6 +232980,19 @@ static int sqlite3Fts5ExprNew(
   return sParse.rc;
 }
 
+/*
+** Assuming that buffer z is at least nByte bytes in size and contains a
+** valid utf-8 string, return the number of characters in the string.
+*/
+static int fts5ExprCountChar(const char *z, int nByte){
+  int nRet = 0;
+  int ii;
+  for(ii=0; ii<nByte; ii++){
+    if( (z[ii] & 0xC0)!=0x80 ) nRet++;
+  }
+  return nRet;
+}
+
 /*
 ** This function is only called when using the special 'trigram' tokenizer.
 ** Argument zText contains the text of a LIKE or GLOB pattern matched
@@ -223784,7 +233030,8 @@ static int sqlite3Fts5ExprPattern(
       if( i==nText
        || zText[i]==aSpec[0] || zText[i]==aSpec[1] || zText[i]==aSpec[2]
       ){
-        if( i-iFirst>=3 ){
+
+        if( fts5ExprCountChar(&zText[iFirst], i-iFirst)>=3 ){
           int jj;
           zExpr[iOut++] = '"';
           for(jj=iFirst; jj<i; jj++){
@@ -223851,7 +233098,7 @@ static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2){
   Fts5Parse sParse;
   memset(&sParse, 0, sizeof(sParse));
 
-  if( *pp1 ){
+  if( *pp1 && p2 ){
     Fts5Expr *p1 = *pp1;
     int nPhrase = p1->nPhrase + p2->nPhrase;
 
@@ -223876,7 +233123,7 @@ static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2){
     }
     sqlite3_free(p2->apExprPhrase);
     sqlite3_free(p2);
-  }else{
+  }else if( p2 ){
     *pp1 = p2;
   }
 
@@ -225650,6 +234897,7 @@ static void fts5ExprAssignXNext(Fts5ExprNode *pNode){
 }
 
 static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){
+  int ii = p->nChild;
   if( p->eType!=FTS5_NOT && pSub->eType==p->eType ){
     int nByte = sizeof(Fts5ExprNode*) * pSub->nChild;
     memcpy(&p->apChild[p->nChild], pSub->apChild, nByte);
@@ -225658,6 +234906,9 @@ static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){
   }else{
     p->apChild[p->nChild++] = pSub;
   }
+  for( ; ii<p->nChild; ii++){
+    p->iHeight = MAX(p->iHeight, p->apChild[ii]->iHeight + 1);
+  }
 }
 
 /*
@@ -225688,6 +234939,7 @@ static Fts5ExprNode *fts5ParsePhraseToAnd(
   if( pRet ){
     pRet->eType = FTS5_AND;
     pRet->nChild = nTerm;
+    pRet->iHeight = 1;
     fts5ExprAssignXNext(pRet);
     pParse->nPhrase--;
     for(ii=0; ii<nTerm; ii++){
@@ -225793,6 +235045,14 @@ static Fts5ExprNode *sqlite3Fts5ParseNode(
         }else{
           fts5ExprAddChildren(pRet, pLeft);
           fts5ExprAddChildren(pRet, pRight);
+          if( pRet->iHeight>SQLITE_FTS5_MAX_EXPR_DEPTH ){
+            sqlite3Fts5ParseError(pParse,
+                "fts5 expression tree is too large (maximum depth %d)",
+                SQLITE_FTS5_MAX_EXPR_DEPTH
+            );
+            sqlite3_free(pRet);
+            pRet = 0;
+          }
         }
       }
     }
@@ -225871,7 +235131,7 @@ static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd(
   return pRet;
 }
 
-#ifdef SQLITE_TEST
+#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
 static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
   sqlite3_int64 nByte = 0;
   Fts5ExprTerm *p;
@@ -225977,6 +235237,8 @@ static char *fts5ExprPrintTcl(
       if( zRet==0 ) return 0;
     }
 
+  }else if( pExpr->eType==0 ){
+    zRet = sqlite3_mprintf("{}");
   }else{
     char const *zOp = 0;
     int i;
@@ -226238,14 +235500,14 @@ static void fts5ExprFold(
     sqlite3_result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics));
   }
 }
-#endif /* ifdef SQLITE_TEST */
+#endif /* if SQLITE_TEST || SQLITE_FTS5_DEBUG */
 
 /*
 ** This is called during initialization to register the fts5_expr() scalar
 ** UDF with the SQLite handle passed as the only argument.
 */
 static int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){
-#ifdef SQLITE_TEST
+#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
   struct Fts5ExprFunc {
     const char *z;
     void (*x)(sqlite3_context*,int,sqlite3_value**);
@@ -226962,10 +236224,8 @@ static Fts5HashEntry *fts5HashEntryMerge(
 }
 
 /*
-** Extract all tokens from hash table iHash and link them into a list
-** in sorted order. The hash table is cleared before returning. It is
-** the responsibility of the caller to free the elements of the returned
-** list.
+** Link all tokens from hash table iHash into a list in sorted order. The
+** tokens are not removed from the hash table.
 */
 static int fts5HashEntrySort(
   Fts5Hash *pHash,
@@ -227005,7 +236265,6 @@ static int fts5HashEntrySort(
     pList = fts5HashEntryMerge(pList, ap[i]);
   }
 
-  pHash->nEntry = 0;
   sqlite3_free(ap);
   *ppSorted = pList;
   return SQLITE_OK;
@@ -227059,6 +236318,28 @@ static int sqlite3Fts5HashScanInit(
   return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
 }
 
+#ifdef SQLITE_DEBUG
+static int fts5HashCount(Fts5Hash *pHash){
+  int nEntry = 0;
+  int ii;
+  for(ii=0; ii<pHash->nSlot; ii++){
+    Fts5HashEntry *p = 0;
+    for(p=pHash->aSlot[ii]; p; p=p->pHashNext){
+      nEntry++;
+    }
+  }
+  return nEntry;
+}
+#endif
+
+/*
+** Return true if the hash table is empty, false otherwise.
+*/
+static int sqlite3Fts5HashIsEmpty(Fts5Hash *pHash){
+  assert( pHash->nEntry==fts5HashCount(pHash) );
+  return pHash->nEntry==0;
+}
+
 static void sqlite3Fts5HashScanNext(Fts5Hash *p){
   assert( !sqlite3Fts5HashScanEof(p) );
   p->pScan = p->pScan->pScanNext;
@@ -227145,6 +236426,26 @@ static void sqlite3Fts5HashScanEntry(
 # error "FTS5_MAX_PREFIX_INDEXES is too large"
 #endif
 
+#define FTS5_MAX_LEVEL 64
+
+/*
+** There are two versions of the format used for the structure record:
+**
+**   1. the legacy format, that may be read by all fts5 versions, and
+**
+**   2. the V2 format, which is used by contentless_delete=1 databases.
+**
+** Both begin with a 4-byte "configuration cookie" value. Then, a legacy
+** format structure record contains a varint - the number of levels in
+** the structure. Whereas a V2 structure record contains the constant
+** 4 bytes [0xff 0x00 0x00 0x01]. This is unambiguous as the value of a
+** varint has to be at least 16256 to begin with "0xFF". And the default
+** maximum number of levels is 64.
+**
+** See below for more on structure record formats.
+*/
+#define FTS5_STRUCTURE_V2 "\xFF\x00\x00\x01"
+
 /*
 ** Details:
 **
@@ -227152,7 +236453,7 @@ static void sqlite3Fts5HashScanEntry(
 **
 **     CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
 **
-** , contains the following 5 types of records. See the comments surrounding
+** , contains the following 6 types of records. See the comments surrounding
 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are
 ** assigned to each fo them.
 **
@@ -227161,12 +236462,12 @@ static void sqlite3Fts5HashScanEntry(
 **   The set of segments that make up an index - the index structure - are
 **   recorded in a single record within the %_data table. The record consists
 **   of a single 32-bit configuration cookie value followed by a list of
-**   SQLite varints. If the FTS table features more than one index (because
-**   there are one or more prefix indexes), it is guaranteed that all share
-**   the same cookie value.
+**   SQLite varints.
+**
+**   If the structure record is a V2 record, the configuration cookie is
+**   followed by the following 4 bytes: [0xFF 0x00 0x00 0x01].
 **
-**   Immediately following the configuration cookie, the record begins with
-**   three varints:
+**   Next, the record continues with three varints:
 **
 **     + number of levels,
 **     + total number of segments on all levels,
@@ -227181,6 +236482,12 @@ static void sqlite3Fts5HashScanEntry(
 **         + first leaf page number (often 1, always greater than 0)
 **         + final leaf page number
 **
+**      Then, for V2 structures only:
+**
+**         + lower origin counter value,
+**         + upper origin counter value,
+**         + the number of tombstone hash pages.
+**
 ** 2. The Averages Record:
 **
 **   A single record within the %_data table. The data is a list of varints.
@@ -227296,6 +236603,38 @@ static void sqlite3Fts5HashScanEntry(
 **     * A list of delta-encoded varints - the first rowid on each subsequent
 **       child page.
 **
+** 6. Tombstone Hash Page
+**
+**   These records are only ever present in contentless_delete=1 tables.
+**   There are zero or more of these associated with each segment. They
+**   are used to store the tombstone rowids for rows contained in the
+**   associated segments.
+**
+**   The set of nHashPg tombstone hash pages associated with a single
+**   segment together form a single hash table containing tombstone rowids.
+**   To find the page of the hash on which a key might be stored:
+**
+**       iPg = (rowid % nHashPg)
+**
+**   Then, within page iPg, which has nSlot slots:
+**
+**       iSlot = (rowid / nHashPg) % nSlot
+**
+**   Each tombstone hash page begins with an 8 byte header:
+**
+**     1-byte:  Key-size (the size in bytes of each slot). Either 4 or 8.
+**     1-byte:  rowid-0-tombstone flag. This flag is only valid on the
+**              first tombstone hash page for each segment (iPg=0). If set,
+**              the hash table contains rowid 0. If clear, it does not.
+**              Rowid 0 is handled specially.
+**     2-bytes: unused.
+**     4-bytes: Big-endian integer containing number of entries on page.
+**
+**   Following this are nSlot 4 or 8 byte slots (depending on the key-size
+**   in the first byte of the page header). The number of slots may be
+**   determined based on the size of the page record and the key-size:
+**
+**     nSlot = (nByte - 8) / key-size
 */
 
 /*
@@ -227329,6 +236668,7 @@ static void sqlite3Fts5HashScanEntry(
 
 #define FTS5_SEGMENT_ROWID(segid, pgno)       fts5_dri(segid, 0, 0, pgno)
 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
+#define FTS5_TOMBSTONE_ROWID(segid,ipg)       fts5_dri(segid+(1<<16), 0, 0, ipg)
 
 #ifdef SQLITE_DEBUG
 static int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
@@ -227364,6 +236704,12 @@ struct Fts5Data {
 
 /*
 ** One object per %_data table.
+**
+** nContentlessDelete:
+**   The number of contentless delete operations since the most recent
+**   call to fts5IndexFlush() or fts5IndexDiscardData(). This is tracked
+**   so that extra auto-merge work can be done by fts5IndexFlush() to
+**   account for the delete operations.
 */
 struct Fts5Index {
   Fts5Config *pConfig;            /* Virtual table configuration */
@@ -227378,6 +236724,8 @@ struct Fts5Index {
   int nPendingData;               /* Current bytes of pending data */
   i64 iWriteRowid;                /* Rowid for current doc being written */
   int bDelete;                    /* Current write is a delete */
+  int nContentlessDelete;         /* Number of contentless delete ops */
+  int nPendingRow;                /* Number of INSERT in hash table */
 
   /* Error state. */
   int rc;                         /* Current error code */
@@ -227391,6 +236739,8 @@ struct Fts5Index {
   sqlite3_stmt *pIdxSelect;
   int nRead;                      /* Total number of blocks read */
 
+  sqlite3_stmt *pDeleteFromIdx;
+
   sqlite3_stmt *pDataVersion;
   i64 iStructVersion;             /* data_version when pStruct read */
   Fts5Structure *pStruct;         /* Current db structure (or NULL) */
@@ -227410,11 +236760,23 @@ struct Fts5DoclistIter {
 ** The contents of the "structure" record for each index are represented
 ** using an Fts5Structure record in memory. Which uses instances of the
 ** other Fts5StructureXXX types as components.
+**
+** nOriginCntr:
+**   This value is set to non-zero for structure records created for
+**   contentlessdelete=1 tables only. In that case it represents the
+**   origin value to apply to the next top-level segment created.
 */
 struct Fts5StructureSegment {
   int iSegid;                     /* Segment id */
   int pgnoFirst;                  /* First leaf page number in segment */
   int pgnoLast;                   /* Last leaf page number in segment */
+
+  /* contentlessdelete=1 tables only: */
+  u64 iOrigin1;
+  u64 iOrigin2;
+  int nPgTombstone;               /* Number of tombstone hash table pages */
+  u64 nEntryTombstone;            /* Number of tombstone entries that "count" */
+  u64 nEntry;                     /* Number of rows in this segment */
 };
 struct Fts5StructureLevel {
   int nMerge;                     /* Number of segments in incr-merge */
@@ -227424,6 +236786,7 @@ struct Fts5StructureLevel {
 struct Fts5Structure {
   int nRef;                       /* Object reference count */
   u64 nWriteCounter;              /* Total leaves written to level 0 */
+  u64 nOriginCntr;                /* Origin value for next top-level segment */
   int nSegment;                   /* Total segments in this structure */
   int nLevel;                     /* Number of levels in this index */
   Fts5StructureLevel aLevel[1];   /* Array of nLevel level objects */
@@ -227483,9 +236846,6 @@ struct Fts5CResult {
 ** iLeafOffset:
 **   Byte offset within the current leaf that is the first byte of the
 **   position list data (one byte passed the position-list size field).
-**   rowid field of the current entry. Usually this is the size field of the
-**   position list data. The exception is if the rowid for the current entry
-**   is the last thing on the leaf page.
 **
 ** pLeaf:
 **   Buffer containing current leaf page data. Set to NULL at EOF.
@@ -227515,6 +236875,13 @@ struct Fts5CResult {
 **
 ** iTermIdx:
 **     Index of current term on iTermLeafPgno.
+**
+** apTombstone/nTombstone:
+**     These are used for contentless_delete=1 tables only. When the cursor
+**     is first allocated, the apTombstone[] array is allocated so that it
+**     is large enough for all tombstones hash pages associated with the
+**     segment. The pages themselves are loaded lazily from the database as
+**     they are required.
 */
 struct Fts5SegIter {
   Fts5StructureSegment *pSeg;     /* Segment to iterate through */
@@ -227523,6 +236890,8 @@ struct Fts5SegIter {
   Fts5Data *pLeaf;                /* Current leaf data */
   Fts5Data *pNextLeaf;            /* Leaf page (iLeafPgno+1) */
   i64 iLeafOffset;                /* Byte offset within current leaf */
+  Fts5Data **apTombstone;         /* Array of tombstone pages */
+  int nTombstone;
 
   /* Next method */
   void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
@@ -227652,6 +237021,60 @@ static u16 fts5GetU16(const u8 *aIn){
   return ((u16)aIn[0] << 8) + aIn[1];
 }
 
+/*
+** The only argument points to a buffer at least 8 bytes in size. This
+** function interprets the first 8 bytes of the buffer as a 64-bit big-endian
+** unsigned integer and returns the result.
+*/
+static u64 fts5GetU64(u8 *a){
+  return ((u64)a[0] << 56)
+       + ((u64)a[1] << 48)
+       + ((u64)a[2] << 40)
+       + ((u64)a[3] << 32)
+       + ((u64)a[4] << 24)
+       + ((u64)a[5] << 16)
+       + ((u64)a[6] << 8)
+       + ((u64)a[7] << 0);
+}
+
+/*
+** The only argument points to a buffer at least 4 bytes in size. This
+** function interprets the first 4 bytes of the buffer as a 32-bit big-endian
+** unsigned integer and returns the result.
+*/
+static u32 fts5GetU32(const u8 *a){
+  return ((u32)a[0] << 24)
+       + ((u32)a[1] << 16)
+       + ((u32)a[2] << 8)
+       + ((u32)a[3] << 0);
+}
+
+/*
+** Write iVal, formated as a 64-bit big-endian unsigned integer, to the
+** buffer indicated by the first argument.
+*/
+static void fts5PutU64(u8 *a, u64 iVal){
+  a[0] = ((iVal >> 56) & 0xFF);
+  a[1] = ((iVal >> 48) & 0xFF);
+  a[2] = ((iVal >> 40) & 0xFF);
+  a[3] = ((iVal >> 32) & 0xFF);
+  a[4] = ((iVal >> 24) & 0xFF);
+  a[5] = ((iVal >> 16) & 0xFF);
+  a[6] = ((iVal >>  8) & 0xFF);
+  a[7] = ((iVal >>  0) & 0xFF);
+}
+
+/*
+** Write iVal, formated as a 32-bit big-endian unsigned integer, to the
+** buffer indicated by the first argument.
+*/
+static void fts5PutU32(u8 *a, u32 iVal){
+  a[0] = ((iVal >> 24) & 0xFF);
+  a[1] = ((iVal >> 16) & 0xFF);
+  a[2] = ((iVal >>  8) & 0xFF);
+  a[3] = ((iVal >>  0) & 0xFF);
+}
+
 /*
 ** Allocate and return a buffer at least nByte bytes in size.
 **
@@ -227879,10 +237302,17 @@ static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
 /*
 ** Remove all records associated with segment iSegid.
 */
-static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
+static void fts5DataRemoveSegment(Fts5Index *p, Fts5StructureSegment *pSeg){
+  int iSegid = pSeg->iSegid;
   i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
   i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
   fts5DataDelete(p, iFirst, iLast);
+
+  if( pSeg->nPgTombstone ){
+    i64 iTomb1 = FTS5_TOMBSTONE_ROWID(iSegid, 0);
+    i64 iTomb2 = FTS5_TOMBSTONE_ROWID(iSegid, pSeg->nPgTombstone-1);
+    fts5DataDelete(p, iTomb1, iTomb2);
+  }
   if( p->pIdxDeleter==0 ){
     Fts5Config *pConfig = p->pConfig;
     fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
@@ -227993,11 +237423,19 @@ static int fts5StructureDecode(
   int nSegment = 0;
   sqlite3_int64 nByte;            /* Bytes of space to allocate at pRet */
   Fts5Structure *pRet = 0;        /* Structure object to return */
+  int bStructureV2 = 0;           /* True for FTS5_STRUCTURE_V2 */
+  u64 nOriginCntr = 0;            /* Largest origin value seen so far */
 
   /* Grab the cookie value */
   if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
   i = 4;
 
+  /* Check if this is a V2 structure record. Set bStructureV2 if it is. */
+  if( 0==memcmp(&pData[i], FTS5_STRUCTURE_V2, 4) ){
+    i += 4;
+    bStructureV2 = 1;
+  }
+
   /* Read the total number of levels and segments from the start of the
   ** structure record.  */
   i += fts5GetVarint32(&pData[i], nLevel);
@@ -228044,9 +237482,18 @@ static int fts5StructureDecode(
             rc = FTS5_CORRUPT;
             break;
           }
+          assert( pSeg!=0 );
           i += fts5GetVarint32(&pData[i], pSeg->iSegid);
           i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst);
           i += fts5GetVarint32(&pData[i], pSeg->pgnoLast);
+          if( bStructureV2 ){
+            i += fts5GetVarint(&pData[i], &pSeg->iOrigin1);
+            i += fts5GetVarint(&pData[i], &pSeg->iOrigin2);
+            i += fts5GetVarint32(&pData[i], pSeg->nPgTombstone);
+            i += fts5GetVarint(&pData[i], &pSeg->nEntryTombstone);
+            i += fts5GetVarint(&pData[i], &pSeg->nEntry);
+            nOriginCntr = MAX(nOriginCntr, pSeg->iOrigin2);
+          }
           if( pSeg->pgnoLast<pSeg->pgnoFirst ){
             rc = FTS5_CORRUPT;
             break;
@@ -228057,6 +237504,9 @@ static int fts5StructureDecode(
       }
     }
     if( nSegment!=0 && rc==SQLITE_OK ) rc = FTS5_CORRUPT;
+    if( bStructureV2 ){
+      pRet->nOriginCntr = nOriginCntr+1;
+    }
 
     if( rc!=SQLITE_OK ){
       fts5StructureRelease(pRet);
@@ -228074,6 +237524,7 @@ static int fts5StructureDecode(
 */
 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
   fts5StructureMakeWritable(pRc, ppStruct);
+  assert( (ppStruct!=0 && (*ppStruct)!=0) || (*pRc)!=SQLITE_OK );
   if( *pRc==SQLITE_OK ){
     Fts5Structure *pStruct = *ppStruct;
     int nLevel = pStruct->nLevel;
@@ -228268,6 +237719,7 @@ static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
     Fts5Buffer buf;               /* Buffer to serialize record into */
     int iLvl;                     /* Used to iterate through levels */
     int iCookie;                  /* Cookie value to store */
+    int nHdr = (pStruct->nOriginCntr>0 ? (4+4+9+9+9) : (4+9+9));
 
     assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
     memset(&buf, 0, sizeof(Fts5Buffer));
@@ -228276,9 +237728,12 @@ static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
     iCookie = p->pConfig->iCookie;
     if( iCookie<0 ) iCookie = 0;
 
-    if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){
+    if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, nHdr) ){
       sqlite3Fts5Put32(buf.p, iCookie);
       buf.n = 4;
+      if( pStruct->nOriginCntr>0 ){
+        fts5BufferSafeAppendBlob(&buf, FTS5_STRUCTURE_V2, 4);
+      }
       fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
       fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
       fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
@@ -228292,9 +237747,17 @@ static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
       assert( pLvl->nMerge<=pLvl->nSeg );
 
       for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
-        fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
-        fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
-        fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
+        Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
+        fts5BufferAppendVarint(&p->rc, &buf, pSeg->iSegid);
+        fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoFirst);
+        fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoLast);
+        if( pStruct->nOriginCntr>0 ){
+          fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin1);
+          fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin2);
+          fts5BufferAppendVarint(&p->rc, &buf, pSeg->nPgTombstone);
+          fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntryTombstone);
+          fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntry);
+        }
       }
     }
 
@@ -228532,42 +237995,25 @@ static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
     pLvl->bEof = 1;
   }else{
     u8 *a = pLvl->pData->p;
-    i64 iVal;
-    int iLimit;
-    int ii;
-    int nZero = 0;
-
-    /* Currently iOff points to the first byte of a varint. This block
-    ** decrements iOff until it points to the first byte of the previous
-    ** varint. Taking care not to read any memory locations that occur
-    ** before the buffer in memory.  */
-    iLimit = (iOff>9 ? iOff-9 : 0);
-    for(iOff--; iOff>iLimit; iOff--){
-      if( (a[iOff-1] & 0x80)==0 ) break;
-    }
-
-    fts5GetVarint(&a[iOff], (u64*)&iVal);
-    pLvl->iRowid -= iVal;
-    pLvl->iLeafPgno--;
-
-    /* Skip backwards past any 0x00 varints. */
-    for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){
-      nZero++;
-    }
-    if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){
-      /* The byte immediately before the last 0x00 byte has the 0x80 bit
-      ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80
-      ** bytes before a[ii]. */
-      int bZero = 0;              /* True if last 0x00 counts */
-      if( (ii-8)>=pLvl->iFirstOff ){
-        int j;
-        for(j=1; j<=8 && (a[ii-j] & 0x80); j++);
-        bZero = (j>8);
+
+    pLvl->iOff = 0;
+    fts5DlidxLvlNext(pLvl);
+    while( 1 ){
+      int nZero = 0;
+      int ii = pLvl->iOff;
+      u64 delta = 0;
+
+      while( a[ii]==0 ){
+        nZero++;
+        ii++;
       }
-      if( bZero==0 ) nZero--;
+      ii += sqlite3Fts5GetVarint(&a[ii], &delta);
+
+      if( ii>=iOff ) break;
+      pLvl->iLeafPgno += nZero+1;
+      pLvl->iRowid += delta;
+      pLvl->iOff = ii;
     }
-    pLvl->iLeafPgno -= nZero;
-    pLvl->iOff = iOff - nZero;
   }
 
   return pLvl->bEof;
@@ -228763,7 +238209,7 @@ static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
   i64 iOff = pIter->iLeafOffset;
 
   ASSERT_SZLEAF_OK(pIter->pLeaf);
-  if( iOff>=pIter->pLeaf->szLeaf ){
+  while( iOff>=pIter->pLeaf->szLeaf ){
     fts5SegIterNextPage(p, pIter);
     if( pIter->pLeaf==0 ){
       if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
@@ -228834,6 +238280,23 @@ static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
   }
 }
 
+/*
+** Allocate a tombstone hash page array (pIter->apTombstone) for the
+** iterator passed as the second argument. If an OOM error occurs, leave
+** an error in the Fts5Index object.
+*/
+static void fts5SegIterAllocTombstone(Fts5Index *p, Fts5SegIter *pIter){
+  const int nTomb = pIter->pSeg->nPgTombstone;
+  if( nTomb>0 ){
+    Fts5Data **apTomb = 0;
+    apTomb = (Fts5Data**)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5Data)*nTomb);
+    if( apTomb ){
+      pIter->apTombstone = apTomb;
+      pIter->nTombstone = nTomb;
+    }
+  }
+}
+
 /*
 ** Initialize the iterator object pIter to iterate through the entries in
 ** segment pSeg. The iterator is left pointing to the first entry when
@@ -228862,10 +238325,12 @@ static void fts5SegIterInit(
     fts5SegIterSetNext(p, pIter);
     pIter->pSeg = pSeg;
     pIter->iLeafPgno = pSeg->pgnoFirst-1;
-    fts5SegIterNextPage(p, pIter);
+    do {
+      fts5SegIterNextPage(p, pIter);
+    }while( p->rc==SQLITE_OK && pIter->pLeaf && pIter->pLeaf->nn==4 );
   }
 
-  if( p->rc==SQLITE_OK ){
+  if( p->rc==SQLITE_OK && pIter->pLeaf ){
     pIter->iLeafOffset = 4;
     assert( pIter->pLeaf!=0 );
     assert_nc( pIter->pLeaf->nn>4 );
@@ -228873,6 +238338,7 @@ static void fts5SegIterInit(
     pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
     fts5SegIterLoadTerm(p, pIter, 0);
     fts5SegIterLoadNPos(p, pIter);
+    fts5SegIterAllocTombstone(p, pIter);
   }
 }
 
@@ -229059,7 +238525,7 @@ static void fts5SegIterNext_None(
   iOff = pIter->iLeafOffset;
 
   /* Next entry is on the next page */
-  if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
+  while( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
     fts5SegIterNextPage(p, pIter);
     if( p->rc || pIter->pLeaf==0 ) return;
     pIter->iRowid = 0;
@@ -229252,7 +238718,7 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
   Fts5Data *pLast = 0;
   int pgnoLast = 0;
 
-  if( pDlidx ){
+  if( pDlidx && p->pConfig->iVersion==FTS5_CURRENT_VERSION ){
     int iSegid = pIter->pSeg->iSegid;
     pgnoLast = fts5DlidxIterPgno(pDlidx);
     pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
@@ -229574,6 +239040,7 @@ static void fts5SegIterSeekInit(
   }
 
   fts5SegIterSetNext(p, pIter);
+  fts5SegIterAllocTombstone(p, pIter);
 
   /* Either:
   **
@@ -229624,6 +239091,14 @@ static void fts5SegIterHashInit(
         pLeaf->p = (u8*)pList;
       }
     }
+
+    /* The call to sqlite3Fts5HashScanInit() causes the hash table to
+    ** fill the size field of all existing position lists. This means they
+    ** can no longer be appended to. Since the only scenario in which they
+    ** can be appended to is if the previous operation on this table was
+    ** a DELETE, by clearing the Fts5Index.bDelete flag we can avoid this
+    ** possibility altogether.  */
+    p->bDelete = 0;
   }else{
     p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data),
         (const char*)pTerm, nTerm, (void**)&pLeaf, &nList
@@ -229654,6 +239129,20 @@ static void fts5SegIterHashInit(
   fts5SegIterSetNext(p, pIter);
 }
 
+/*
+** Array ap[] contains n elements. Release each of these elements using
+** fts5DataRelease(). Then free the array itself using sqlite3_free().
+*/
+static void fts5IndexFreeArray(Fts5Data **ap, int n){
+  if( ap ){
+    int ii;
+    for(ii=0; ii<n; ii++){
+      fts5DataRelease(ap[ii]);
+    }
+    sqlite3_free(ap);
+  }
+}
+
 /*
 ** Zero the iterator passed as the only argument.
 */
@@ -229661,6 +239150,7 @@ static void fts5SegIterClear(Fts5SegIter *pIter){
   fts5BufferFree(&pIter->term);
   fts5DataRelease(pIter->pLeaf);
   fts5DataRelease(pIter->pNextLeaf);
+  fts5IndexFreeArray(pIter->apTombstone, pIter->nTombstone);
   fts5DlidxIterFree(pIter->pDlidx);
   sqlite3_free(pIter->aRowidOffset);
   memset(pIter, 0, sizeof(Fts5SegIter));
@@ -229794,7 +239284,6 @@ static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
       assert_nc( i2!=0 );
       pRes->bTermEq = 1;
       if( p1->iRowid==p2->iRowid ){
-        p1->bDel = p2->bDel;
         return i2;
       }
       res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
@@ -229813,7 +239302,8 @@ static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
 
 /*
 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
-** It is an error if leaf iLeafPgno does not exist or contains no rowids.
+** It is an error if leaf iLeafPgno does not exist. Unless the db is
+** a 'secure-delete' db, if it contains no rowids then this is also an error.
 */
 static void fts5SegIterGotoPage(
   Fts5Index *p,                   /* FTS5 backend object */
@@ -229828,21 +239318,23 @@ static void fts5SegIterGotoPage(
     fts5DataRelease(pIter->pNextLeaf);
     pIter->pNextLeaf = 0;
     pIter->iLeafPgno = iLeafPgno-1;
-    fts5SegIterNextPage(p, pIter);
-    assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );
 
-    if( p->rc==SQLITE_OK && ALWAYS(pIter->pLeaf!=0) ){
+    while( p->rc==SQLITE_OK ){
       int iOff;
-      u8 *a = pIter->pLeaf->p;
-      int n = pIter->pLeaf->szLeaf;
-
+      fts5SegIterNextPage(p, pIter);
+      if( pIter->pLeaf==0 ) break;
       iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
-      if( iOff<4 || iOff>=n ){
-        p->rc = FTS5_CORRUPT;
-      }else{
-        iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
-        pIter->iLeafOffset = iOff;
-        fts5SegIterLoadNPos(p, pIter);
+      if( iOff>0 ){
+        u8 *a = pIter->pLeaf->p;
+        int n = pIter->pLeaf->szLeaf;
+        if( iOff<4 || iOff>=n ){
+          p->rc = FTS5_CORRUPT;
+        }else{
+          iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
+          pIter->iLeafOffset = iOff;
+          fts5SegIterLoadNPos(p, pIter);
+        }
+        break;
       }
     }
   }
@@ -229995,6 +239487,84 @@ static void fts5MultiIterSetEof(Fts5Iter *pIter){
   pIter->iSwitchRowid = pSeg->iRowid;
 }
 
+/*
+** The argument to this macro must be an Fts5Data structure containing a
+** tombstone hash page. This macro returns the key-size of the hash-page.
+*/
+#define TOMBSTONE_KEYSIZE(pPg) (pPg->p[0]==4 ? 4 : 8)
+
+#define TOMBSTONE_NSLOT(pPg)   \
+  ((pPg->nn > 16) ? ((pPg->nn-8) / TOMBSTONE_KEYSIZE(pPg)) : 1)
+
+/*
+** Query a single tombstone hash table for rowid iRowid. Return true if
+** it is found or false otherwise. The tombstone hash table is one of
+** nHashTable tables.
+*/
+static int fts5IndexTombstoneQuery(
+  Fts5Data *pHash,                /* Hash table page to query */
+  int nHashTable,                 /* Number of pages attached to segment */
+  u64 iRowid                      /* Rowid to query hash for */
+){
+  const int szKey = TOMBSTONE_KEYSIZE(pHash);
+  const int nSlot = TOMBSTONE_NSLOT(pHash);
+  int iSlot = (iRowid / nHashTable) % nSlot;
+  int nCollide = nSlot;
+
+  if( iRowid==0 ){
+    return pHash->p[1];
+  }else if( szKey==4 ){
+    u32 *aSlot = (u32*)&pHash->p[8];
+    while( aSlot[iSlot] ){
+      if( fts5GetU32((u8*)&aSlot[iSlot])==iRowid ) return 1;
+      if( nCollide--==0 ) break;
+      iSlot = (iSlot+1)%nSlot;
+    }
+  }else{
+    u64 *aSlot = (u64*)&pHash->p[8];
+    while( aSlot[iSlot] ){
+      if( fts5GetU64((u8*)&aSlot[iSlot])==iRowid ) return 1;
+      if( nCollide--==0 ) break;
+      iSlot = (iSlot+1)%nSlot;
+    }
+  }
+
+  return 0;
+}
+
+/*
+** Return true if the iterator passed as the only argument points
+** to an segment entry for which there is a tombstone. Return false
+** if there is no tombstone or if the iterator is already at EOF.
+*/
+static int fts5MultiIterIsDeleted(Fts5Iter *pIter){
+  int iFirst = pIter->aFirst[1].iFirst;
+  Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
+
+  if( pSeg->pLeaf && pSeg->nTombstone ){
+    /* Figure out which page the rowid might be present on. */
+    int iPg = ((u64)pSeg->iRowid) % pSeg->nTombstone;
+    assert( iPg>=0 );
+
+    /* If tombstone hash page iPg has not yet been loaded from the
+    ** database, load it now. */
+    if( pSeg->apTombstone[iPg]==0 ){
+      pSeg->apTombstone[iPg] = fts5DataRead(pIter->pIndex,
+          FTS5_TOMBSTONE_ROWID(pSeg->pSeg->iSegid, iPg)
+      );
+      if( pSeg->apTombstone[iPg]==0 ) return 0;
+    }
+
+    return fts5IndexTombstoneQuery(
+        pSeg->apTombstone[iPg],
+        pSeg->nTombstone,
+        pSeg->iRowid
+    );
+  }
+
+  return 0;
+}
+
 /*
 ** Move the iterator to the next entry.
 **
@@ -230032,7 +239602,9 @@ static void fts5MultiIterNext(
 
     fts5AssertMultiIterSetup(p, pIter);
     assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
-    if( pIter->bSkipEmpty==0 || pSeg->nPos ){
+    if( (pIter->bSkipEmpty==0 || pSeg->nPos)
+      && 0==fts5MultiIterIsDeleted(pIter)
+    ){
       pIter->xSetOutputs(pIter, pSeg);
       return;
     }
@@ -230064,7 +239636,9 @@ static void fts5MultiIterNext2(
       }
       fts5AssertMultiIterSetup(p, pIter);
 
-    }while( fts5MultiIterIsEmpty(p, pIter) );
+    }while( (fts5MultiIterIsEmpty(p, pIter) || fts5MultiIterIsDeleted(pIter))
+         && (p->rc==SQLITE_OK)
+    );
   }
 }
 
@@ -230077,7 +239651,7 @@ static Fts5Iter *fts5MultiIterAlloc(
   int nSeg
 ){
   Fts5Iter *pNew;
-  int nSlot;                      /* Power of two >= nSeg */
+  i64 nSlot;                      /* Power of two >= nSeg */
 
   for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
   pNew = fts5IdxMalloc(p,
@@ -230557,7 +240131,7 @@ static void fts5MultiIterNew(
     if( iLevel<0 ){
       assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
       nSeg = pStruct->nSegment;
-      nSeg += (p->pHash ? 1 : 0);
+      nSeg += (p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH));
     }else{
       nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
     }
@@ -230578,7 +240152,7 @@ static void fts5MultiIterNew(
   if( p->rc==SQLITE_OK ){
     if( iLevel<0 ){
       Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
-      if( p->pHash ){
+      if( p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH) ){
         /* Add a segment iterator for the current contents of the hash table. */
         Fts5SegIter *pIter = &pNew->aSeg[iIter++];
         fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
@@ -230619,7 +240193,9 @@ static void fts5MultiIterNew(
     fts5MultiIterSetEof(pNew);
     fts5AssertMultiIterSetup(p, pNew);
 
-    if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){
+    if( (pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew))
+     || fts5MultiIterIsDeleted(pNew)
+    ){
       fts5MultiIterNext(p, pNew, 0, 0);
     }else if( pNew->base.bEof==0 ){
       Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst];
@@ -230797,7 +240373,9 @@ static void fts5IndexDiscardData(Fts5Index *p){
   if( p->pHash ){
     sqlite3Fts5HashClear(p->pHash);
     p->nPendingData = 0;
+    p->nPendingRow = 0;
   }
+  p->nContentlessDelete = 0;
 }
 
 /*
@@ -231178,7 +240756,9 @@ static void fts5WriteAppendRowid(
       fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
     }else{
       assert_nc( p->rc || iRowid>pWriter->iPrevRowid );
-      fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid);
+      fts5BufferAppendVarint(&p->rc, &pPage->buf,
+          (u64)iRowid - (u64)pWriter->iPrevRowid
+      );
     }
     pWriter->iPrevRowid = iRowid;
     pWriter->bFirstRowidInDoclist = 0;
@@ -231196,7 +240776,7 @@ static void fts5WriteAppendPoslistData(
   const u8 *a = aData;
   int n = nData;
 
-  assert( p->pConfig->pgsz>0 );
+  assert( p->pConfig->pgsz>0 || p->rc!=SQLITE_OK );
   while( p->rc==SQLITE_OK
      && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
   ){
@@ -231331,7 +240911,7 @@ static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
           fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
           fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
           fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
-          fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff,&pData->p[iOff]);
+          fts5BufferAppendBlob(&p->rc, &buf,pData->szLeaf-iOff,&pData->p[iOff]);
           if( p->rc==SQLITE_OK ){
             /* Set the szLeaf field */
             fts5PutU16(&buf.p[2], (u16)buf.n);
@@ -231432,6 +241012,12 @@ static void fts5IndexMergeLevel(
 
     /* Read input from all segments in the input level */
     nInput = pLvl->nSeg;
+
+    /* Set the range of origins that will go into the output segment. */
+    if( pStruct->nOriginCntr>0 ){
+      pSeg->iOrigin1 = pLvl->aSeg[0].iOrigin1;
+      pSeg->iOrigin2 = pLvl->aSeg[pLvl->nSeg-1].iOrigin2;
+    }
   }
   bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
 
@@ -231491,8 +241077,11 @@ static void fts5IndexMergeLevel(
     int i;
 
     /* Remove the redundant segments from the %_data table */
+    assert( pSeg->nEntry==0 );
     for(i=0; i<nInput; i++){
-      fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);
+      Fts5StructureSegment *pOld = &pLvl->aSeg[i];
+      pSeg->nEntry += (pOld->nEntry - pOld->nEntryTombstone);
+      fts5DataRemoveSegment(p, pOld);
     }
 
     /* Remove the redundant segments from the input level */
@@ -231518,6 +241107,43 @@ static void fts5IndexMergeLevel(
   if( pnRem ) *pnRem -= writer.nLeafWritten;
 }
 
+/*
+** If this is not a contentless_delete=1 table, or if the 'deletemerge'
+** configuration option is set to 0, then this function always returns -1.
+** Otherwise, it searches the structure object passed as the second argument
+** for a level suitable for merging due to having a large number of
+** tombstones in the tombstone hash. If one is found, its index is returned.
+** Otherwise, if there is no suitable level, -1.
+*/
+static int fts5IndexFindDeleteMerge(Fts5Index *p, Fts5Structure *pStruct){
+  Fts5Config *pConfig = p->pConfig;
+  int iRet = -1;
+  if( pConfig->bContentlessDelete && pConfig->nDeleteMerge>0 ){
+    int ii;
+    int nBest = 0;
+
+    for(ii=0; ii<pStruct->nLevel; ii++){
+      Fts5StructureLevel *pLvl = &pStruct->aLevel[ii];
+      i64 nEntry = 0;
+      i64 nTomb = 0;
+      int iSeg;
+      for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
+        nEntry += pLvl->aSeg[iSeg].nEntry;
+        nTomb += pLvl->aSeg[iSeg].nEntryTombstone;
+      }
+      assert_nc( nEntry>0 || pLvl->nSeg==0 );
+      if( nEntry>0 ){
+        int nPercent = (nTomb * 100) / nEntry;
+        if( nPercent>=pConfig->nDeleteMerge && nPercent>nBest ){
+          iRet = ii;
+          nBest = nPercent;
+        }
+      }
+    }
+  }
+  return iRet;
+}
+
 /*
 ** Do up to nPg pages of automerge work on the index.
 **
@@ -231537,14 +241163,15 @@ static int fts5IndexMerge(
     int iBestLvl = 0;           /* Level offering the most input segments */
     int nBest = 0;              /* Number of input segments on best level */
 
-    /* Set iBestLvl to the level to read input segments from. */
+    /* Set iBestLvl to the level to read input segments from. Or to -1 if
+    ** there is no level suitable to merge segments from.  */
     assert( pStruct->nLevel>0 );
     for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
       Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
       if( pLvl->nMerge ){
         if( pLvl->nMerge>nBest ){
           iBestLvl = iLvl;
-          nBest = pLvl->nMerge;
+          nBest = nMin;
         }
         break;
       }
@@ -231553,22 +241180,18 @@ static int fts5IndexMerge(
         iBestLvl = iLvl;
       }
     }
-
-    /* If nBest is still 0, then the index must be empty. */
-#ifdef SQLITE_DEBUG
-    for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
-      assert( pStruct->aLevel[iLvl].nSeg==0 );
+    if( nBest<nMin ){
+      iBestLvl = fts5IndexFindDeleteMerge(p, pStruct);
     }
-#endif
 
-    if( nBest<nMin && pStruct->aLevel[iBestLvl].nMerge==0 ){
-      break;
-    }
+    if( iBestLvl<0 ) break;
     bRet = 1;
     fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
     if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
       fts5StructurePromote(p, iBestLvl+1, pStruct);
     }
+
+    if( nMin==1 ) nMin = 2;
   }
   *ppStruct = pStruct;
   return bRet;
@@ -231609,16 +241232,16 @@ static void fts5IndexCrisismerge(
 ){
   const int nCrisis = p->pConfig->nCrisisMerge;
   Fts5Structure *pStruct = *ppStruct;
-  int iLvl = 0;
-
-  assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
-  while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
-    fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
-    assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
-    fts5StructurePromote(p, iLvl+1, pStruct);
-    iLvl++;
+  if( pStruct && pStruct->nLevel>0 ){
+    int iLvl = 0;
+    while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
+      fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
+      assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
+      fts5StructurePromote(p, iLvl+1, pStruct);
+      iLvl++;
+    }
+    *ppStruct = pStruct;
   }
-  *ppStruct = pStruct;
 }
 
 static int fts5IndexReturn(Fts5Index *p){
@@ -231652,6 +241275,469 @@ static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
   return ret;
 }
 
+/*
+** Execute the SQL statement:
+**
+**    DELETE FROM %_idx WHERE (segid, (pgno/2)) = ($iSegid, $iPgno);
+**
+** This is used when a secure-delete operation removes the last term
+** from a segment leaf page. In that case the %_idx entry is removed
+** too. This is done to ensure that if all instances of a token are
+** removed from an fts5 database in secure-delete mode, no trace of
+** the token itself remains in the database.
+*/
+static void fts5SecureDeleteIdxEntry(
+  Fts5Index *p,                   /* FTS5 backend object */
+  int iSegid,                     /* Id of segment to delete entry for */
+  int iPgno                       /* Page number within segment */
+){
+  if( iPgno!=1 ){
+    assert( p->pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE );
+    if( p->pDeleteFromIdx==0 ){
+      fts5IndexPrepareStmt(p, &p->pDeleteFromIdx, sqlite3_mprintf(
+          "DELETE FROM '%q'.'%q_idx' WHERE (segid, (pgno/2)) = (?1, ?2)",
+          p->pConfig->zDb, p->pConfig->zName
+      ));
+    }
+    if( p->rc==SQLITE_OK ){
+      sqlite3_bind_int(p->pDeleteFromIdx, 1, iSegid);
+      sqlite3_bind_int(p->pDeleteFromIdx, 2, iPgno);
+      sqlite3_step(p->pDeleteFromIdx);
+      p->rc = sqlite3_reset(p->pDeleteFromIdx);
+    }
+  }
+}
+
+/*
+** This is called when a secure-delete operation removes a position-list
+** that overflows onto segment page iPgno of segment pSeg. This function
+** rewrites node iPgno, and possibly one or more of its right-hand peers,
+** to remove this portion of the position list.
+**
+** Output variable (*pbLastInDoclist) is set to true if the position-list
+** removed is followed by a new term or the end-of-segment, or false if
+** it is followed by another rowid/position list.
+*/
+static void fts5SecureDeleteOverflow(
+  Fts5Index *p,
+  Fts5StructureSegment *pSeg,
+  int iPgno,
+  int *pbLastInDoclist
+){
+  const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE);
+  int pgno;
+  Fts5Data *pLeaf = 0;
+  assert( iPgno!=1 );
+
+  *pbLastInDoclist = 1;
+  for(pgno=iPgno; p->rc==SQLITE_OK && pgno<=pSeg->pgnoLast; pgno++){
+    i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
+    int iNext = 0;
+    u8 *aPg = 0;
+
+    pLeaf = fts5DataRead(p, iRowid);
+    if( pLeaf==0 ) break;
+    aPg = pLeaf->p;
+
+    iNext = fts5GetU16(&aPg[0]);
+    if( iNext!=0 ){
+      *pbLastInDoclist = 0;
+    }
+    if( iNext==0 && pLeaf->szLeaf!=pLeaf->nn ){
+      fts5GetVarint32(&aPg[pLeaf->szLeaf], iNext);
+    }
+
+    if( iNext==0 ){
+      /* The page contains no terms or rowids. Replace it with an empty
+      ** page and move on to the right-hand peer.  */
+      const u8 aEmpty[] = {0x00, 0x00, 0x00, 0x04};
+      assert_nc( bDetailNone==0 || pLeaf->nn==4 );
+      if( bDetailNone==0 ) fts5DataWrite(p, iRowid, aEmpty, sizeof(aEmpty));
+      fts5DataRelease(pLeaf);
+      pLeaf = 0;
+    }else if( bDetailNone ){
+      break;
+    }else if( iNext>=pLeaf->szLeaf || pLeaf->nn<pLeaf->szLeaf || iNext<4 ){
+      p->rc = FTS5_CORRUPT;
+      break;
+    }else{
+      int nShift = iNext - 4;
+      int nPg;
+
+      int nIdx = 0;
+      u8 *aIdx = 0;
+
+      /* Unless the current page footer is 0 bytes in size (in which case
+      ** the new page footer will be as well), allocate and populate a
+      ** buffer containing the new page footer. Set stack variables aIdx
+      ** and nIdx accordingly.  */
+      if( pLeaf->nn>pLeaf->szLeaf ){
+        int iFirst = 0;
+        int i1 = pLeaf->szLeaf;
+        int i2 = 0;
+
+        i1 += fts5GetVarint32(&aPg[i1], iFirst);
+        if( iFirst<iNext ){
+          p->rc = FTS5_CORRUPT;
+          break;
+        }
+        aIdx = sqlite3Fts5MallocZero(&p->rc, (pLeaf->nn-pLeaf->szLeaf)+2);
+        if( aIdx==0 ) break;
+        i2 = sqlite3Fts5PutVarint(aIdx, iFirst-nShift);
+        if( i1<pLeaf->nn ){
+          memcpy(&aIdx[i2], &aPg[i1], pLeaf->nn-i1);
+          i2 += (pLeaf->nn-i1);
+        }
+        nIdx = i2;
+      }
+
+      /* Modify the contents of buffer aPg[]. Set nPg to the new size
+      ** in bytes. The new page is always smaller than the old.  */
+      nPg = pLeaf->szLeaf - nShift;
+      memmove(&aPg[4], &aPg[4+nShift], nPg-4);
+      fts5PutU16(&aPg[2], nPg);
+      if( fts5GetU16(&aPg[0]) ) fts5PutU16(&aPg[0], 4);
+      if( nIdx>0 ){
+        memcpy(&aPg[nPg], aIdx, nIdx);
+        nPg += nIdx;
+      }
+      sqlite3_free(aIdx);
+
+      /* Write the new page to disk and exit the loop */
+      assert( nPg>4 || fts5GetU16(aPg)==0 );
+      fts5DataWrite(p, iRowid, aPg, nPg);
+      break;
+    }
+  }
+  fts5DataRelease(pLeaf);
+}
+
+/*
+** Completely remove the entry that pSeg currently points to from
+** the database.
+*/
+static void fts5DoSecureDelete(
+  Fts5Index *p,
+  Fts5SegIter *pSeg
+){
+  const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE);
+  int iSegid = pSeg->pSeg->iSegid;
+  u8 *aPg = pSeg->pLeaf->p;
+  int nPg = pSeg->pLeaf->nn;
+  int iPgIdx = pSeg->pLeaf->szLeaf;
+
+  u64 iDelta = 0;
+  int iNextOff = 0;
+  int iOff = 0;
+  int nIdx = 0;
+  u8 *aIdx = 0;
+  int bLastInDoclist = 0;
+  int iIdx = 0;
+  int iStart = 0;
+  int iDelKeyOff = 0;       /* Offset of deleted key, if any */
+
+  nIdx = nPg-iPgIdx;
+  aIdx = sqlite3Fts5MallocZero(&p->rc, nIdx+16);
+  if( p->rc ) return;
+  memcpy(aIdx, &aPg[iPgIdx], nIdx);
+
+  /* At this point segment iterator pSeg points to the entry
+  ** this function should remove from the b-tree segment.
+  **
+  ** In detail=full or detail=column mode, pSeg->iLeafOffset is the
+  ** offset of the first byte in the position-list for the entry to
+  ** remove. Immediately before this comes two varints that will also
+  ** need to be removed:
+  **
+  **     + the rowid or delta rowid value for the entry, and
+  **     + the size of the position list in bytes.
+  **
+  ** Or, in detail=none mode, there is a single varint prior to
+  ** pSeg->iLeafOffset - the rowid or delta rowid value.
+  **
+  ** This block sets the following variables:
+  **
+  **   iStart:
+  **     The offset of the first byte of the rowid or delta-rowid
+  **     value for the doclist entry being removed.
+  **
+  **   iDelta:
+  **     The value of the rowid or delta-rowid value for the doclist
+  **     entry being removed.
+  **
+  **   iNextOff:
+  **     The offset of the next entry following the position list
+  **     for the one being removed. If the position list for this
+  **     entry overflows onto the next leaf page, this value will be
+  **     greater than pLeaf->szLeaf.
+  */
+  {
+    int iSOP;                     /* Start-Of-Position-list */
+    if( pSeg->iLeafPgno==pSeg->iTermLeafPgno ){
+      iStart = pSeg->iTermLeafOffset;
+    }else{
+      iStart = fts5GetU16(&aPg[0]);
+    }
+
+    iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
+    assert_nc( iSOP<=pSeg->iLeafOffset );
+
+    if( bDetailNone ){
+      while( iSOP<pSeg->iLeafOffset ){
+        if( aPg[iSOP]==0x00 ) iSOP++;
+        if( aPg[iSOP]==0x00 ) iSOP++;
+        iStart = iSOP;
+        iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
+      }
+
+      iNextOff = iSOP;
+      if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++;
+      if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++;
+
+    }else{
+      int nPos = 0;
+      iSOP += fts5GetVarint32(&aPg[iSOP], nPos);
+      while( iSOP<pSeg->iLeafOffset ){
+        iStart = iSOP + (nPos/2);
+        iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
+        iSOP += fts5GetVarint32(&aPg[iSOP], nPos);
+      }
+      assert_nc( iSOP==pSeg->iLeafOffset );
+      iNextOff = pSeg->iLeafOffset + pSeg->nPos;
+    }
+  }
+
+  iOff = iStart;
+
+  /* If the position-list for the entry being removed flows over past
+  ** the end of this page, delete the portion of the position-list on the
+  ** next page and beyond.
+  **
+  ** Set variable bLastInDoclist to true if this entry happens
+  ** to be the last rowid in the doclist for its term.  */
+  if( iNextOff>=iPgIdx ){
+    int pgno = pSeg->iLeafPgno+1;
+    fts5SecureDeleteOverflow(p, pSeg->pSeg, pgno, &bLastInDoclist);
+    iNextOff = iPgIdx;
+  }
+
+  if( pSeg->bDel==0 ){
+    if( iNextOff!=iPgIdx ){
+      /* Loop through the page-footer. If iNextOff (offset of the
+      ** entry following the one we are removing) is equal to the
+      ** offset of a key on this page, then the entry is the last
+      ** in its doclist. */
+      int iKeyOff = 0;
+      for(iIdx=0; iIdx<nIdx; /* no-op */){
+        u32 iVal = 0;
+        iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
+        iKeyOff += iVal;
+        if( iKeyOff==iNextOff ){
+          bLastInDoclist = 1;
+        }
+      }
+    }
+
+    /* If this is (a) the first rowid on a page and (b) is not followed by
+    ** another position list on the same page, set the "first-rowid" field
+    ** of the header to 0.  */
+    if( fts5GetU16(&aPg[0])==iStart && (bLastInDoclist || iNextOff==iPgIdx) ){
+      fts5PutU16(&aPg[0], 0);
+    }
+  }
+
+  if( pSeg->bDel ){
+    iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta);
+    aPg[iOff++] = 0x01;
+  }else if( bLastInDoclist==0 ){
+    if( iNextOff!=iPgIdx ){
+      u64 iNextDelta = 0;
+      iNextOff += fts5GetVarint(&aPg[iNextOff], &iNextDelta);
+      iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta + iNextDelta);
+    }
+  }else if(
+      pSeg->iLeafPgno==pSeg->iTermLeafPgno
+   && iStart==pSeg->iTermLeafOffset
+  ){
+    /* The entry being removed was the only position list in its
+    ** doclist. Therefore the term needs to be removed as well. */
+    int iKey = 0;
+    int iKeyOff = 0;
+
+    /* Set iKeyOff to the offset of the term that will be removed - the
+    ** last offset in the footer that is not greater than iStart. */
+    for(iIdx=0; iIdx<nIdx; iKey++){
+      u32 iVal = 0;
+      iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
+      if( (iKeyOff+iVal)>(u32)iStart ) break;
+      iKeyOff += iVal;
+    }
+    assert_nc( iKey>=1 );
+
+    /* Set iDelKeyOff to the value of the footer entry to remove from
+    ** the page. */
+    iDelKeyOff = iOff = iKeyOff;
+
+    if( iNextOff!=iPgIdx ){
+      /* This is the only position-list associated with the term, and there
+      ** is another term following it on this page. So the subsequent term
+      ** needs to be moved to replace the term associated with the entry
+      ** being removed. */
+      int nPrefix = 0;
+      int nSuffix = 0;
+      int nPrefix2 = 0;
+      int nSuffix2 = 0;
+
+      iDelKeyOff = iNextOff;
+      iNextOff += fts5GetVarint32(&aPg[iNextOff], nPrefix2);
+      iNextOff += fts5GetVarint32(&aPg[iNextOff], nSuffix2);
+
+      if( iKey!=1 ){
+        iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nPrefix);
+      }
+      iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nSuffix);
+
+      nPrefix = MIN(nPrefix, nPrefix2);
+      nSuffix = (nPrefix2 + nSuffix2) - nPrefix;
+
+      if( (iKeyOff+nSuffix)>iPgIdx || (iNextOff+nSuffix2)>iPgIdx ){
+        p->rc = FTS5_CORRUPT;
+      }else{
+        if( iKey!=1 ){
+          iOff += sqlite3Fts5PutVarint(&aPg[iOff], nPrefix);
+        }
+        iOff += sqlite3Fts5PutVarint(&aPg[iOff], nSuffix);
+        if( nPrefix2>pSeg->term.n ){
+          p->rc = FTS5_CORRUPT;
+        }else if( nPrefix2>nPrefix ){
+          memcpy(&aPg[iOff], &pSeg->term.p[nPrefix], nPrefix2-nPrefix);
+          iOff += (nPrefix2-nPrefix);
+        }
+        memmove(&aPg[iOff], &aPg[iNextOff], nSuffix2);
+        iOff += nSuffix2;
+        iNextOff += nSuffix2;
+      }
+    }
+  }else if( iStart==4 ){
+    int iPgno;
+
+    assert_nc( pSeg->iLeafPgno>pSeg->iTermLeafPgno );
+    /* The entry being removed may be the only position list in
+    ** its doclist. */
+    for(iPgno=pSeg->iLeafPgno-1; iPgno>pSeg->iTermLeafPgno; iPgno-- ){
+      Fts5Data *pPg = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, iPgno));
+      int bEmpty = (pPg && pPg->nn==4);
+      fts5DataRelease(pPg);
+      if( bEmpty==0 ) break;
+    }
+
+    if( iPgno==pSeg->iTermLeafPgno ){
+      i64 iId = FTS5_SEGMENT_ROWID(iSegid, pSeg->iTermLeafPgno);
+      Fts5Data *pTerm = fts5DataRead(p, iId);
+      if( pTerm && pTerm->szLeaf==pSeg->iTermLeafOffset ){
+        u8 *aTermIdx = &pTerm->p[pTerm->szLeaf];
+        int nTermIdx = pTerm->nn - pTerm->szLeaf;
+        int iTermIdx = 0;
+        int iTermOff = 0;
+
+        while( 1 ){
+          u32 iVal = 0;
+          int nByte = fts5GetVarint32(&aTermIdx[iTermIdx], iVal);
+          iTermOff += iVal;
+          if( (iTermIdx+nByte)>=nTermIdx ) break;
+          iTermIdx += nByte;
+        }
+        nTermIdx = iTermIdx;
+
+        memmove(&pTerm->p[iTermOff], &pTerm->p[pTerm->szLeaf], nTermIdx);
+        fts5PutU16(&pTerm->p[2], iTermOff);
+
+        fts5DataWrite(p, iId, pTerm->p, iTermOff+nTermIdx);
+        if( nTermIdx==0 ){
+          fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iTermLeafPgno);
+        }
+      }
+      fts5DataRelease(pTerm);
+    }
+  }
+
+  /* Assuming no error has occurred, this block does final edits to the
+  ** leaf page before writing it back to disk. Input variables are:
+  **
+  **   nPg: Total initial size of leaf page.
+  **   iPgIdx: Initial offset of page footer.
+  **
+  **   iOff: Offset to move data to
+  **   iNextOff: Offset to move data from
+  */
+  if( p->rc==SQLITE_OK ){
+    const int nMove = nPg - iNextOff;     /* Number of bytes to move */
+    int nShift = iNextOff - iOff;         /* Distance to move them */
+
+    int iPrevKeyOut = 0;
+    int iKeyIn = 0;
+
+    memmove(&aPg[iOff], &aPg[iNextOff], nMove);
+    iPgIdx -= nShift;
+    nPg = iPgIdx;
+    fts5PutU16(&aPg[2], iPgIdx);
+
+    for(iIdx=0; iIdx<nIdx; /* no-op */){
+      u32 iVal = 0;
+      iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
+      iKeyIn += iVal;
+      if( iKeyIn!=iDelKeyOff ){
+        int iKeyOut = (iKeyIn - (iKeyIn>iOff ? nShift : 0));
+        nPg += sqlite3Fts5PutVarint(&aPg[nPg], iKeyOut - iPrevKeyOut);
+        iPrevKeyOut = iKeyOut;
+      }
+    }
+
+    if( iPgIdx==nPg && nIdx>0 && pSeg->iLeafPgno!=1 ){
+      fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iLeafPgno);
+    }
+
+    assert_nc( nPg>4 || fts5GetU16(aPg)==0 );
+    fts5DataWrite(p, FTS5_SEGMENT_ROWID(iSegid,pSeg->iLeafPgno), aPg, nPg);
+  }
+  sqlite3_free(aIdx);
+}
+
+/*
+** This is called as part of flushing a delete to disk in 'secure-delete'
+** mode. It edits the segments within the database described by argument
+** pStruct to remove the entries for term zTerm, rowid iRowid.
+*/
+static void fts5FlushSecureDelete(
+  Fts5Index *p,
+  Fts5Structure *pStruct,
+  const char *zTerm,
+  i64 iRowid
+){
+  const int f = FTS5INDEX_QUERY_SKIPHASH;
+  int nTerm = (int)strlen(zTerm);
+  Fts5Iter *pIter = 0;            /* Used to find term instance */
+
+  fts5MultiIterNew(p, pStruct, f, 0, (const u8*)zTerm, nTerm, -1, 0, &pIter);
+  if( fts5MultiIterEof(p, pIter)==0 ){
+    i64 iThis = fts5MultiIterRowid(pIter);
+    if( iThis<iRowid ){
+      fts5MultiIterNextFrom(p, pIter, iRowid);
+    }
+
+    if( p->rc==SQLITE_OK
+     && fts5MultiIterEof(p, pIter)==0
+     && iRowid==fts5MultiIterRowid(pIter)
+    ){
+      Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
+      fts5DoSecureDelete(p, pSeg);
+    }
+  }
+
+  fts5MultiIterFree(pIter);
+}
+
+
 /*
 ** Flush the contents of in-memory hash table iHash to a new level-0
 ** segment on disk. Also update the corresponding structure record.
@@ -231668,143 +241754,198 @@ static void fts5FlushOneHash(Fts5Index *p){
   /* Obtain a reference to the index structure and allocate a new segment-id
   ** for the new level-0 segment.  */
   pStruct = fts5StructureRead(p);
-  iSegid = fts5AllocateSegid(p, pStruct);
   fts5StructureInvalidate(p);
 
-  if( iSegid ){
-    const int pgsz = p->pConfig->pgsz;
-    int eDetail = p->pConfig->eDetail;
-    Fts5StructureSegment *pSeg;   /* New segment within pStruct */
-    Fts5Buffer *pBuf;             /* Buffer in which to assemble leaf page */
-    Fts5Buffer *pPgidx;           /* Buffer in which to assemble pgidx */
-
-    Fts5SegWriter writer;
-    fts5WriteInit(p, &writer, iSegid);
-
-    pBuf = &writer.writer.buf;
-    pPgidx = &writer.writer.pgidx;
+  if( sqlite3Fts5HashIsEmpty(pHash)==0 ){
+    iSegid = fts5AllocateSegid(p, pStruct);
+    if( iSegid ){
+      const int pgsz = p->pConfig->pgsz;
+      int eDetail = p->pConfig->eDetail;
+      int bSecureDelete = p->pConfig->bSecureDelete;
+      Fts5StructureSegment *pSeg; /* New segment within pStruct */
+      Fts5Buffer *pBuf;           /* Buffer in which to assemble leaf page */
+      Fts5Buffer *pPgidx;         /* Buffer in which to assemble pgidx */
+
+      Fts5SegWriter writer;
+      fts5WriteInit(p, &writer, iSegid);
+
+      pBuf = &writer.writer.buf;
+      pPgidx = &writer.writer.pgidx;
+
+      /* fts5WriteInit() should have initialized the buffers to (most likely)
+      ** the maximum space required. */
+      assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
+      assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
+
+      /* Begin scanning through hash table entries. This loop runs once for each
+      ** term/doclist currently stored within the hash table. */
+      if( p->rc==SQLITE_OK ){
+        p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
+      }
+      while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
+        const char *zTerm;        /* Buffer containing term */
+        int nTerm;                /* Size of zTerm in bytes */
+        const u8 *pDoclist;       /* Pointer to doclist for this term */
+        int nDoclist;             /* Size of doclist in bytes */
+
+        /* Get the term and doclist for this entry. */
+        sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
+        nTerm = (int)strlen(zTerm);
+        if( bSecureDelete==0 ){
+          fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm);
+          if( p->rc!=SQLITE_OK ) break;
+          assert( writer.bFirstRowidInPage==0 );
+        }
+
+        if( !bSecureDelete && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
+          /* The entire doclist will fit on the current leaf. */
+          fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
+        }else{
+          int bTermWritten = !bSecureDelete;
+          i64 iRowid = 0;
+          i64 iPrev = 0;
+          int iOff = 0;
+
+          /* The entire doclist will not fit on this leaf. The following
+          ** loop iterates through the poslists that make up the current
+          ** doclist.  */
+          while( p->rc==SQLITE_OK && iOff<nDoclist ){
+            u64 iDelta = 0;
+            iOff += fts5GetVarint(&pDoclist[iOff], &iDelta);
+            iRowid += iDelta;
+
+            /* If in secure delete mode, and if this entry in the poslist is
+            ** in fact a delete, then edit the existing segments directly
+            ** using fts5FlushSecureDelete().  */
+            if( bSecureDelete ){
+              if( eDetail==FTS5_DETAIL_NONE ){
+                if( iOff<nDoclist && pDoclist[iOff]==0x00 ){
+                  fts5FlushSecureDelete(p, pStruct, zTerm, iRowid);
+                  iOff++;
+                  if( iOff<nDoclist && pDoclist[iOff]==0x00 ){
+                    iOff++;
+                    nDoclist = 0;
+                  }else{
+                    continue;
+                  }
+                }
+              }else if( (pDoclist[iOff] & 0x01) ){
+                fts5FlushSecureDelete(p, pStruct, zTerm, iRowid);
+                if( p->rc!=SQLITE_OK || pDoclist[iOff]==0x01 ){
+                  iOff++;
+                  continue;
+                }
+              }
+            }
 
-    /* fts5WriteInit() should have initialized the buffers to (most likely)
-    ** the maximum space required. */
-    assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
-    assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
+            if( p->rc==SQLITE_OK && bTermWritten==0 ){
+              fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm);
+              bTermWritten = 1;
+              assert( p->rc!=SQLITE_OK || writer.bFirstRowidInPage==0 );
+            }
 
-    /* Begin scanning through hash table entries. This loop runs once for each
-    ** term/doclist currently stored within the hash table. */
-    if( p->rc==SQLITE_OK ){
-      p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
-    }
-    while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
-      const char *zTerm;          /* Buffer containing term */
-      const u8 *pDoclist;         /* Pointer to doclist for this term */
-      int nDoclist;               /* Size of doclist in bytes */
-
-      /* Write the term for this entry to disk. */
-      sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
-      fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm);
-      if( p->rc!=SQLITE_OK ) break;
-
-      assert( writer.bFirstRowidInPage==0 );
-      if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
-        /* The entire doclist will fit on the current leaf. */
-        fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
-      }else{
-        i64 iRowid = 0;
-        u64 iDelta = 0;
-        int iOff = 0;
-
-        /* The entire doclist will not fit on this leaf. The following
-        ** loop iterates through the poslists that make up the current
-        ** doclist.  */
-        while( p->rc==SQLITE_OK && iOff<nDoclist ){
-          iOff += fts5GetVarint(&pDoclist[iOff], &iDelta);
-          iRowid += iDelta;
-
-          if( writer.bFirstRowidInPage ){
-            fts5PutU16(&pBuf->p[0], (u16)pBuf->n);   /* first rowid on page */
-            pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
-            writer.bFirstRowidInPage = 0;
-            fts5WriteDlidxAppend(p, &writer, iRowid);
+            if( writer.bFirstRowidInPage ){
+              fts5PutU16(&pBuf->p[0], (u16)pBuf->n);   /* first rowid on page */
+              pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
+              writer.bFirstRowidInPage = 0;
+              fts5WriteDlidxAppend(p, &writer, iRowid);
+            }else{
+              u64 iRowidDelta = (u64)iRowid - (u64)iPrev;
+              pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowidDelta);
+            }
             if( p->rc!=SQLITE_OK ) break;
-          }else{
-            pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
-          }
-          assert( pBuf->n<=pBuf->nSpace );
+            assert( pBuf->n<=pBuf->nSpace );
+            iPrev = iRowid;
 
-          if( eDetail==FTS5_DETAIL_NONE ){
-            if( iOff<nDoclist && pDoclist[iOff]==0 ){
-              pBuf->p[pBuf->n++] = 0;
-              iOff++;
+            if( eDetail==FTS5_DETAIL_NONE ){
               if( iOff<nDoclist && pDoclist[iOff]==0 ){
                 pBuf->p[pBuf->n++] = 0;
                 iOff++;
+                if( iOff<nDoclist && pDoclist[iOff]==0 ){
+                  pBuf->p[pBuf->n++] = 0;
+                  iOff++;
+                }
+              }
+              if( (pBuf->n + pPgidx->n)>=pgsz ){
+                fts5WriteFlushLeaf(p, &writer);
               }
-            }
-            if( (pBuf->n + pPgidx->n)>=pgsz ){
-              fts5WriteFlushLeaf(p, &writer);
-            }
-          }else{
-            int bDummy;
-            int nPos;
-            int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
-            nCopy += nPos;
-            if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
-              /* The entire poslist will fit on the current leaf. So copy
-              ** it in one go. */
-              fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
             }else{
-              /* The entire poslist will not fit on this leaf. So it needs
-              ** to be broken into sections. The only qualification being
-              ** that each varint must be stored contiguously.  */
-              const u8 *pPoslist = &pDoclist[iOff];
-              int iPos = 0;
-              while( p->rc==SQLITE_OK ){
-                int nSpace = pgsz - pBuf->n - pPgidx->n;
-                int n = 0;
-                if( (nCopy - iPos)<=nSpace ){
-                  n = nCopy - iPos;
-                }else{
-                  n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
-                }
-                assert( n>0 );
-                fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
-                iPos += n;
-                if( (pBuf->n + pPgidx->n)>=pgsz ){
-                  fts5WriteFlushLeaf(p, &writer);
+              int bDel = 0;
+              int nPos = 0;
+              int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDel);
+              if( bDel && bSecureDelete ){
+                fts5BufferAppendVarint(&p->rc, pBuf, nPos*2);
+                iOff += nCopy;
+                nCopy = nPos;
+              }else{
+                nCopy += nPos;
+              }
+              if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
+                /* The entire poslist will fit on the current leaf. So copy
+                ** it in one go. */
+                fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
+              }else{
+                /* The entire poslist will not fit on this leaf. So it needs
+                ** to be broken into sections. The only qualification being
+                ** that each varint must be stored contiguously.  */
+                const u8 *pPoslist = &pDoclist[iOff];
+                int iPos = 0;
+                while( p->rc==SQLITE_OK ){
+                  int nSpace = pgsz - pBuf->n - pPgidx->n;
+                  int n = 0;
+                  if( (nCopy - iPos)<=nSpace ){
+                    n = nCopy - iPos;
+                  }else{
+                    n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
+                  }
+                  assert( n>0 );
+                  fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
+                  iPos += n;
+                  if( (pBuf->n + pPgidx->n)>=pgsz ){
+                    fts5WriteFlushLeaf(p, &writer);
+                  }
+                  if( iPos>=nCopy ) break;
                 }
-                if( iPos>=nCopy ) break;
               }
+              iOff += nCopy;
             }
-            iOff += nCopy;
           }
         }
-      }
 
-      /* TODO2: Doclist terminator written here. */
-      /* pBuf->p[pBuf->n++] = '\0'; */
-      assert( pBuf->n<=pBuf->nSpace );
-      if( p->rc==SQLITE_OK ) sqlite3Fts5HashScanNext(pHash);
-    }
-    sqlite3Fts5HashClear(pHash);
-    fts5WriteFinish(p, &writer, &pgnoLast);
+        /* TODO2: Doclist terminator written here. */
+        /* pBuf->p[pBuf->n++] = '\0'; */
+        assert( pBuf->n<=pBuf->nSpace );
+        if( p->rc==SQLITE_OK ) sqlite3Fts5HashScanNext(pHash);
+      }
+      fts5WriteFinish(p, &writer, &pgnoLast);
 
-    /* Update the Fts5Structure. It is written back to the database by the
-    ** fts5StructureRelease() call below.  */
-    if( pStruct->nLevel==0 ){
-      fts5StructureAddLevel(&p->rc, &pStruct);
-    }
-    fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
-    if( p->rc==SQLITE_OK ){
-      pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
-      pSeg->iSegid = iSegid;
-      pSeg->pgnoFirst = 1;
-      pSeg->pgnoLast = pgnoLast;
-      pStruct->nSegment++;
+      assert( p->rc!=SQLITE_OK || bSecureDelete || pgnoLast>0 );
+      if( pgnoLast>0 ){
+        /* Update the Fts5Structure. It is written back to the database by the
+        ** fts5StructureRelease() call below.  */
+        if( pStruct->nLevel==0 ){
+          fts5StructureAddLevel(&p->rc, &pStruct);
+        }
+        fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
+        if( p->rc==SQLITE_OK ){
+          pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
+          pSeg->iSegid = iSegid;
+          pSeg->pgnoFirst = 1;
+          pSeg->pgnoLast = pgnoLast;
+          if( pStruct->nOriginCntr>0 ){
+            pSeg->iOrigin1 = pStruct->nOriginCntr;
+            pSeg->iOrigin2 = pStruct->nOriginCntr;
+            pSeg->nEntry = p->nPendingRow;
+            pStruct->nOriginCntr++;
+          }
+          pStruct->nSegment++;
+        }
+        fts5StructurePromote(p, 0, pStruct);
+      }
     }
-    fts5StructurePromote(p, 0, pStruct);
   }
 
-  fts5IndexAutomerge(p, &pStruct, pgnoLast);
+  fts5IndexAutomerge(p, &pStruct, pgnoLast + p->nContentlessDelete);
   fts5IndexCrisismerge(p, &pStruct);
   fts5StructureWrite(p, pStruct);
   fts5StructureRelease(pStruct);
@@ -231815,10 +241956,15 @@ static void fts5FlushOneHash(Fts5Index *p){
 */
 static void fts5IndexFlush(Fts5Index *p){
   /* Unless it is empty, flush the hash table to disk */
-  if( p->nPendingData ){
+  if( p->nPendingData || p->nContentlessDelete ){
     assert( p->pHash );
-    p->nPendingData = 0;
     fts5FlushOneHash(p);
+    if( p->rc==SQLITE_OK ){
+      sqlite3Fts5HashClear(p->pHash);
+      p->nPendingData = 0;
+      p->nPendingRow = 0;
+      p->nContentlessDelete = 0;
+    }
   }
 }
 
@@ -231834,17 +241980,22 @@ static Fts5Structure *fts5IndexOptimizeStruct(
   /* Figure out if this structure requires optimization. A structure does
   ** not require optimization if either:
   **
-  **  + it consists of fewer than two segments, or
-  **  + all segments are on the same level, or
-  **  + all segments except one are currently inputs to a merge operation.
+  **  1. it consists of fewer than two segments, or
+  **  2. all segments are on the same level, or
+  **  3. all segments except one are currently inputs to a merge operation.
   **
-  ** In the first case, return NULL. In the second, increment the ref-count
-  ** on *pStruct and return a copy of the pointer to it.
+  ** In the first case, if there are no tombstone hash pages, return NULL. In
+  ** the second, increment the ref-count on *pStruct and return a copy of the
+  ** pointer to it.
   */
-  if( nSeg<2 ) return 0;
+  if( nSeg==0 ) return 0;
   for(i=0; i<pStruct->nLevel; i++){
     int nThis = pStruct->aLevel[i].nSeg;
-    if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){
+    int nMerge = pStruct->aLevel[i].nMerge;
+    if( nThis>0 && (nThis==nSeg || (nThis==nSeg-1 && nMerge==nThis)) ){
+      if( nSeg==1 && nThis==1 && pStruct->aLevel[i].aSeg[0].nPgTombstone==0 ){
+        return 0;
+      }
       fts5StructureRef(pStruct);
       return pStruct;
     }
@@ -231857,10 +242008,11 @@ static Fts5Structure *fts5IndexOptimizeStruct(
   if( pNew ){
     Fts5StructureLevel *pLvl;
     nByte = nSeg * sizeof(Fts5StructureSegment);
-    pNew->nLevel = pStruct->nLevel+1;
+    pNew->nLevel = MIN(pStruct->nLevel+1, FTS5_MAX_LEVEL);
     pNew->nRef = 1;
     pNew->nWriteCounter = pStruct->nWriteCounter;
-    pLvl = &pNew->aLevel[pStruct->nLevel];
+    pNew->nOriginCntr = pStruct->nOriginCntr;
+    pLvl = &pNew->aLevel[pNew->nLevel-1];
     pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
     if( pLvl->aSeg ){
       int iLvl, iSeg;
@@ -231890,7 +242042,9 @@ static int sqlite3Fts5IndexOptimize(Fts5Index *p){
 
   assert( p->rc==SQLITE_OK );
   fts5IndexFlush(p);
+  assert( p->rc!=SQLITE_OK || p->nContentlessDelete==0 );
   pStruct = fts5StructureRead(p);
+  assert( p->rc!=SQLITE_OK || pStruct!=0 );
   fts5StructureInvalidate(p);
 
   if( pStruct ){
@@ -231919,7 +242073,10 @@ static int sqlite3Fts5IndexOptimize(Fts5Index *p){
 ** INSERT command.
 */
 static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
-  Fts5Structure *pStruct = fts5StructureRead(p);
+  Fts5Structure *pStruct = 0;
+
+  fts5IndexFlush(p);
+  pStruct = fts5StructureRead(p);
   if( pStruct ){
     int nMin = p->pConfig->nUsermerge;
     fts5StructureInvalidate(p);
@@ -231927,7 +242084,7 @@ static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
       Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct);
       fts5StructureRelease(pStruct);
       pStruct = pNew;
-      nMin = 2;
+      nMin = 1;
       nMerge = nMerge*-1;
     }
     if( pStruct && pStruct->nLevel ){
@@ -231942,7 +242099,7 @@ static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
 
 static void fts5AppendRowid(
   Fts5Index *p,
-  i64 iDelta,
+  u64 iDelta,
   Fts5Iter *pUnused,
   Fts5Buffer *pBuf
 ){
@@ -231952,7 +242109,7 @@ static void fts5AppendRowid(
 
 static void fts5AppendPoslist(
   Fts5Index *p,
-  i64 iDelta,
+  u64 iDelta,
   Fts5Iter *pMulti,
   Fts5Buffer *pBuf
 ){
@@ -232027,10 +242184,10 @@ static void fts5MergeAppendDocid(
 }
 #endif
 
-#define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) {       \
-  assert( (pBuf)->n!=0 || (iLastRowid)==0 );                   \
-  fts5BufferSafeAppendVarint((pBuf), (iRowid) - (iLastRowid)); \
-  (iLastRowid) = (iRowid);                                     \
+#define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) {                 \
+  assert( (pBuf)->n!=0 || (iLastRowid)==0 );                             \
+  fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)); \
+  (iLastRowid) = (iRowid);                                               \
 }
 
 /*
@@ -232162,7 +242319,7 @@ static void fts5MergePrefixLists(
   /* Initialize a doclist-iterator for each input buffer. Arrange them in
   ** a linked-list starting at pHead in ascending order of rowid. Avoid
   ** linking any iterators already at EOF into the linked list at all. */
-  assert( nBuf+1<=sizeof(aMerger)/sizeof(aMerger[0]) );
+  assert( nBuf+1<=(int)(sizeof(aMerger)/sizeof(aMerger[0])) );
   memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1));
   pHead = &aMerger[nBuf];
   fts5DoclistIterInit(p1, &pHead->iter);
@@ -232301,7 +242458,7 @@ static void fts5SetupPrefixIter(
   int nMerge = 1;
 
   void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*);
-  void (*xAppend)(Fts5Index*, i64, Fts5Iter*, Fts5Buffer*);
+  void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*);
   if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
     xMerge = fts5MergeRowidLists;
     xAppend = fts5AppendRowid;
@@ -232340,7 +242497,7 @@ static void fts5SetupPrefixIter(
         Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
         p1->xSetOutputs(p1, pSeg);
         if( p1->base.nData ){
-          xAppend(p, p1->base.iRowid-iLastRowid, p1, &doclist);
+          xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
           iLastRowid = p1->base.iRowid;
         }
       }
@@ -232388,7 +242545,7 @@ static void fts5SetupPrefixIter(
         iLastRowid = 0;
       }
 
-      xAppend(p, p1->base.iRowid-iLastRowid, p1, &doclist);
+      xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
       iLastRowid = p1->base.iRowid;
     }
 
@@ -232441,6 +242598,9 @@ static int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
 
   p->iWriteRowid = iRowid;
   p->bDelete = bDelete;
+  if( bDelete==0 ){
+    p->nPendingRow++;
+  }
   return fts5IndexReturn(p);
 }
 
@@ -232478,6 +242638,9 @@ static int sqlite3Fts5IndexReinit(Fts5Index *p){
   fts5StructureInvalidate(p);
   fts5IndexDiscardData(p);
   memset(&s, 0, sizeof(Fts5Structure));
+  if( p->pConfig->bContentlessDelete ){
+    s.nOriginCntr = 1;
+  }
   fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
   fts5StructureWrite(p, &s);
   return fts5IndexReturn(p);
@@ -232542,6 +242705,7 @@ static int sqlite3Fts5IndexClose(Fts5Index *p){
     sqlite3_finalize(p->pIdxDeleter);
     sqlite3_finalize(p->pIdxSelect);
     sqlite3_finalize(p->pDataVersion);
+    sqlite3_finalize(p->pDeleteFromIdx);
     sqlite3Fts5HashFree(p->pHash);
     sqlite3_free(p->zDataTbl);
     sqlite3_free(p);
@@ -232868,6 +243032,347 @@ static int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
   return fts5IndexReturn(p);
 }
 
+/*
+** Retrieve the origin value that will be used for the segment currently
+** being accumulated in the in-memory hash table when it is flushed to
+** disk. If successful, SQLITE_OK is returned and (*piOrigin) set to
+** the queried value. Or, if an error occurs, an error code is returned
+** and the final value of (*piOrigin) is undefined.
+*/
+static int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin){
+  Fts5Structure *pStruct;
+  pStruct = fts5StructureRead(p);
+  if( pStruct ){
+    *piOrigin = pStruct->nOriginCntr;
+    fts5StructureRelease(pStruct);
+  }
+  return fts5IndexReturn(p);
+}
+
+/*
+** Buffer pPg contains a page of a tombstone hash table - one of nPg pages
+** associated with the same segment. This function adds rowid iRowid to
+** the hash table. The caller is required to guarantee that there is at
+** least one free slot on the page.
+**
+** If parameter bForce is false and the hash table is deemed to be full
+** (more than half of the slots are occupied), then non-zero is returned
+** and iRowid not inserted. Or, if bForce is true or if the hash table page
+** is not full, iRowid is inserted and zero returned.
+*/
+static int fts5IndexTombstoneAddToPage(
+  Fts5Data *pPg,
+  int bForce,
+  int nPg,
+  u64 iRowid
+){
+  const int szKey = TOMBSTONE_KEYSIZE(pPg);
+  const int nSlot = TOMBSTONE_NSLOT(pPg);
+  const int nElem = fts5GetU32(&pPg->p[4]);
+  int iSlot = (iRowid / nPg) % nSlot;
+  int nCollide = nSlot;
+
+  if( szKey==4 && iRowid>0xFFFFFFFF ) return 2;
+  if( iRowid==0 ){
+    pPg->p[1] = 0x01;
+    return 0;
+  }
+
+  if( bForce==0 && nElem>=(nSlot/2) ){
+    return 1;
+  }
+
+  fts5PutU32(&pPg->p[4], nElem+1);
+  if( szKey==4 ){
+    u32 *aSlot = (u32*)&pPg->p[8];
+    while( aSlot[iSlot] ){
+      iSlot = (iSlot + 1) % nSlot;
+      if( nCollide--==0 ) return 0;
+    }
+    fts5PutU32((u8*)&aSlot[iSlot], (u32)iRowid);
+  }else{
+    u64 *aSlot = (u64*)&pPg->p[8];
+    while( aSlot[iSlot] ){
+      iSlot = (iSlot + 1) % nSlot;
+      if( nCollide--==0 ) return 0;
+    }
+    fts5PutU64((u8*)&aSlot[iSlot], iRowid);
+  }
+
+  return 0;
+}
+
+/*
+** This function attempts to build a new hash containing all the keys
+** currently in the tombstone hash table for segment pSeg. The new
+** hash will be stored in the nOut buffers passed in array apOut[].
+** All pages of the new hash use key-size szKey (4 or 8).
+**
+** Return 0 if the hash is successfully rebuilt into the nOut pages.
+** Or non-zero if it is not (because one page became overfull). In this
+** case the caller should retry with a larger nOut parameter.
+**
+** Parameter pData1 is page iPg1 of the hash table being rebuilt.
+*/
+static int fts5IndexTombstoneRehash(
+  Fts5Index *p,
+  Fts5StructureSegment *pSeg,     /* Segment to rebuild hash of */
+  Fts5Data *pData1,               /* One page of current hash - or NULL */
+  int iPg1,                       /* Which page of the current hash is pData1 */
+  int szKey,                      /* 4 or 8, the keysize */
+  int nOut,                       /* Number of output pages */
+  Fts5Data **apOut                /* Array of output hash pages */
+){
+  int ii;
+  int res = 0;
+
+  /* Initialize the headers of all the output pages */
+  for(ii=0; ii<nOut; ii++){
+    apOut[ii]->p[0] = szKey;
+    fts5PutU32(&apOut[ii]->p[4], 0);
+  }
+
+  /* Loop through the current pages of the hash table. */
+  for(ii=0; res==0 && ii<pSeg->nPgTombstone; ii++){
+    Fts5Data *pData = 0;          /* Page ii of the current hash table */
+    Fts5Data *pFree = 0;          /* Free this at the end of the loop */
+
+    if( iPg1==ii ){
+      pData = pData1;
+    }else{
+      pFree = pData = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii));
+    }
+
+    if( pData ){
+      int szKeyIn = TOMBSTONE_KEYSIZE(pData);
+      int nSlotIn = (pData->nn - 8) / szKeyIn;
+      int iIn;
+      for(iIn=0; iIn<nSlotIn; iIn++){
+        u64 iVal = 0;
+
+        /* Read the value from slot iIn of the input page into iVal. */
+        if( szKeyIn==4 ){
+          u32 *aSlot = (u32*)&pData->p[8];
+          if( aSlot[iIn] ) iVal = fts5GetU32((u8*)&aSlot[iIn]);
+        }else{
+          u64 *aSlot = (u64*)&pData->p[8];
+          if( aSlot[iIn] ) iVal = fts5GetU64((u8*)&aSlot[iIn]);
+        }
+
+        /* If iVal is not 0 at this point, insert it into the new hash table */
+        if( iVal ){
+          Fts5Data *pPg = apOut[(iVal % nOut)];
+          res = fts5IndexTombstoneAddToPage(pPg, 0, nOut, iVal);
+          if( res ) break;
+        }
+      }
+
+      /* If this is page 0 of the old hash, copy the rowid-0-flag from the
+      ** old hash to the new.  */
+      if( ii==0 ){
+        apOut[0]->p[1] = pData->p[1];
+      }
+    }
+    fts5DataRelease(pFree);
+  }
+
+  return res;
+}
+
+/*
+** This is called to rebuild the hash table belonging to segment pSeg.
+** If parameter pData1 is not NULL, then one page of the existing hash table
+** has already been loaded - pData1, which is page iPg1. The key-size for
+** the new hash table is szKey (4 or 8).
+**
+** If successful, the new hash table is not written to disk. Instead,
+** output parameter (*pnOut) is set to the number of pages in the new
+** hash table, and (*papOut) to point to an array of buffers containing
+** the new page data.
+**
+** If an error occurs, an error code is left in the Fts5Index object and
+** both output parameters set to 0 before returning.
+*/
+static void fts5IndexTombstoneRebuild(
+  Fts5Index *p,
+  Fts5StructureSegment *pSeg,     /* Segment to rebuild hash of */
+  Fts5Data *pData1,               /* One page of current hash - or NULL */
+  int iPg1,                       /* Which page of the current hash is pData1 */
+  int szKey,                      /* 4 or 8, the keysize */
+  int *pnOut,                     /* OUT: Number of output pages */
+  Fts5Data ***papOut              /* OUT: Output hash pages */
+){
+  const int MINSLOT = 32;
+  int nSlotPerPage = MAX(MINSLOT, (p->pConfig->pgsz - 8) / szKey);
+  int nSlot = 0;                  /* Number of slots in each output page */
+  int nOut = 0;
+
+  /* Figure out how many output pages (nOut) and how many slots per
+  ** page (nSlot).  There are three possibilities:
+  **
+  **   1. The hash table does not yet exist. In this case the new hash
+  **      table will consist of a single page with MINSLOT slots.
+  **
+  **   2. The hash table exists but is currently a single page. In this
+  **      case an attempt is made to grow the page to accommodate the new
+  **      entry. The page is allowed to grow up to nSlotPerPage (see above)
+  **      slots.
+  **
+  **   3. The hash table already consists of more than one page, or of
+  **      a single page already so large that it cannot be grown. In this
+  **      case the new hash consists of (nPg*2+1) pages of nSlotPerPage
+  **      slots each, where nPg is the current number of pages in the
+  **      hash table.
+  */
+  if( pSeg->nPgTombstone==0 ){
+    /* Case 1. */
+    nOut = 1;
+    nSlot = MINSLOT;
+  }else if( pSeg->nPgTombstone==1 ){
+    /* Case 2. */
+    int nElem = (int)fts5GetU32(&pData1->p[4]);
+    assert( pData1 && iPg1==0 );
+    nOut = 1;
+    nSlot = MAX(nElem*4, MINSLOT);
+    if( nSlot>nSlotPerPage ) nOut = 0;
+  }
+  if( nOut==0 ){
+    /* Case 3. */
+    nOut = (pSeg->nPgTombstone * 2 + 1);
+    nSlot = nSlotPerPage;
+  }
+
+  /* Allocate the required array and output pages */
+  while( 1 ){
+    int res = 0;
+    int ii = 0;
+    int szPage = 0;
+    Fts5Data **apOut = 0;
+
+    /* Allocate space for the new hash table */
+    assert( nSlot>=MINSLOT );
+    apOut = (Fts5Data**)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5Data*) * nOut);
+    szPage = 8 + nSlot*szKey;
+    for(ii=0; ii<nOut; ii++){
+      Fts5Data *pNew = (Fts5Data*)sqlite3Fts5MallocZero(&p->rc,
+          sizeof(Fts5Data)+szPage
+      );
+      if( pNew ){
+        pNew->nn = szPage;
+        pNew->p = (u8*)&pNew[1];
+        apOut[ii] = pNew;
+      }
+    }
+
+    /* Rebuild the hash table. */
+    if( p->rc==SQLITE_OK ){
+      res = fts5IndexTombstoneRehash(p, pSeg, pData1, iPg1, szKey, nOut, apOut);
+    }
+    if( res==0 ){
+      if( p->rc ){
+        fts5IndexFreeArray(apOut, nOut);
+        apOut = 0;
+        nOut = 0;
+      }
+      *pnOut = nOut;
+      *papOut = apOut;
+      break;
+    }
+
+    /* If control flows to here, it was not possible to rebuild the hash
+    ** table. Free all buffers and then try again with more pages. */
+    assert( p->rc==SQLITE_OK );
+    fts5IndexFreeArray(apOut, nOut);
+    nSlot = nSlotPerPage;
+    nOut = nOut*2 + 1;
+  }
+}
+
+
+/*
+** Add a tombstone for rowid iRowid to segment pSeg.
+*/
+static void fts5IndexTombstoneAdd(
+  Fts5Index *p,
+  Fts5StructureSegment *pSeg,
+  u64 iRowid
+){
+  Fts5Data *pPg = 0;
+  int iPg = -1;
+  int szKey = 0;
+  int nHash = 0;
+  Fts5Data **apHash = 0;
+
+  p->nContentlessDelete++;
+
+  if( pSeg->nPgTombstone>0 ){
+    iPg = iRowid % pSeg->nPgTombstone;
+    pPg = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg));
+    if( pPg==0 ){
+      assert( p->rc!=SQLITE_OK );
+      return;
+    }
+
+    if( 0==fts5IndexTombstoneAddToPage(pPg, 0, pSeg->nPgTombstone, iRowid) ){
+      fts5DataWrite(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg), pPg->p, pPg->nn);
+      fts5DataRelease(pPg);
+      return;
+    }
+  }
+
+  /* Have to rebuild the hash table. First figure out the key-size (4 or 8). */
+  szKey = pPg ? TOMBSTONE_KEYSIZE(pPg) : 4;
+  if( iRowid>0xFFFFFFFF ) szKey = 8;
+
+  /* Rebuild the hash table */
+  fts5IndexTombstoneRebuild(p, pSeg, pPg, iPg, szKey, &nHash, &apHash);
+  assert( p->rc==SQLITE_OK || (nHash==0 && apHash==0) );
+
+  /* If all has succeeded, write the new rowid into one of the new hash
+  ** table pages, then write them all out to disk. */
+  if( nHash ){
+    int ii = 0;
+    fts5IndexTombstoneAddToPage(apHash[iRowid % nHash], 1, nHash, iRowid);
+    for(ii=0; ii<nHash; ii++){
+      i64 iTombstoneRowid = FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii);
+      fts5DataWrite(p, iTombstoneRowid, apHash[ii]->p, apHash[ii]->nn);
+    }
+    pSeg->nPgTombstone = nHash;
+    fts5StructureWrite(p, p->pStruct);
+  }
+
+  fts5DataRelease(pPg);
+  fts5IndexFreeArray(apHash, nHash);
+}
+
+/*
+** Add iRowid to the tombstone list of the segment or segments that contain
+** rows from origin iOrigin. Return SQLITE_OK if successful, or an SQLite
+** error code otherwise.
+*/
+static int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid){
+  Fts5Structure *pStruct;
+  pStruct = fts5StructureRead(p);
+  if( pStruct ){
+    int bFound = 0;               /* True after pSeg->nEntryTombstone incr. */
+    int iLvl;
+    for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
+      int iSeg;
+      for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){
+        Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
+        if( pSeg->iOrigin1<=(u64)iOrigin && pSeg->iOrigin2>=(u64)iOrigin ){
+          if( bFound==0 ){
+            pSeg->nEntryTombstone++;
+            bFound = 1;
+          }
+          fts5IndexTombstoneAdd(p, pSeg, iRowid);
+        }
+      }
+    }
+    fts5StructureRelease(pStruct);
+  }
+  return fts5IndexReturn(p);
+}
 
 /*************************************************************************
 **************************************************************************
@@ -233172,6 +243677,7 @@ static void fts5IndexIntegrityCheckSegment(
   Fts5StructureSegment *pSeg      /* Segment to check internal consistency */
 ){
   Fts5Config *pConfig = p->pConfig;
+  int bSecureDelete = (pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE);
   sqlite3_stmt *pStmt = 0;
   int rc2;
   int iIdxPrevLeaf = pSeg->pgnoFirst-1;
@@ -233207,7 +243713,19 @@ static void fts5IndexIntegrityCheckSegment(
     ** is also a rowid pointer within the leaf page header, it points to a
     ** location before the term.  */
     if( pLeaf->nn<=pLeaf->szLeaf ){
-      p->rc = FTS5_CORRUPT;
+
+      if( nIdxTerm==0
+       && pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE
+       && pLeaf->nn==pLeaf->szLeaf
+       && pLeaf->nn==4
+      ){
+        /* special case - the very first page in a segment keeps its %_idx
+        ** entry even if all the terms are removed from it by secure-delete
+        ** operations. */
+      }else{
+        p->rc = FTS5_CORRUPT;
+      }
+
     }else{
       int iOff;                   /* Offset of first term on leaf */
       int iRowidOff;              /* Offset of first rowid on leaf */
@@ -233271,9 +243789,12 @@ static void fts5IndexIntegrityCheckSegment(
           ASSERT_SZLEAF_OK(pLeaf);
           if( iRowidOff>=pLeaf->szLeaf ){
             p->rc = FTS5_CORRUPT;
-          }else{
+          }else if( bSecureDelete==0 || iRowidOff>0 ){
+            i64 iDlRowid = fts5DlidxIterRowid(pDlidx);
             fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
-            if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
+            if( iRowid<iDlRowid || (bSecureDelete==0 && iRowid!=iDlRowid) ){
+              p->rc = FTS5_CORRUPT;
+            }
           }
           fts5DataRelease(pLeaf);
         }
@@ -233367,6 +243888,7 @@ static int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum, int bUseCksum
 
     /* If this is a new term, query for it. Update cksum3 with the results. */
     fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
+    if( p->rc ) break;
 
     if( eDetail==FTS5_DETAIL_NONE ){
       if( 0==fts5MultiIterIsEmpty(p, pIter) ){
@@ -233402,13 +243924,14 @@ static int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum, int bUseCksum
 ** function only.
 */
 
-#ifdef SQLITE_TEST
+#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
 /*
 ** Decode a segment-data rowid from the %_data table. This function is
 ** the opposite of macro FTS5_SEGMENT_ROWID().
 */
 static void fts5DecodeRowid(
   i64 iRowid,                     /* Rowid from %_data table */
+  int *pbTombstone,               /* OUT: Tombstone hash flag */
   int *piSegid,                   /* OUT: Segment id */
   int *pbDlidx,                   /* OUT: Dlidx flag */
   int *piHeight,                  /* OUT: Height */
@@ -233424,13 +243947,16 @@ static void fts5DecodeRowid(
   iRowid >>= FTS5_DATA_DLI_B;
 
   *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
+  iRowid >>= FTS5_DATA_ID_B;
+
+  *pbTombstone = (int)(iRowid & 0x0001);
 }
-#endif /* SQLITE_TEST */
+#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
 
-#ifdef SQLITE_TEST
+#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
 static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
-  int iSegid, iHeight, iPgno, bDlidx;       /* Rowid compenents */
-  fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno);
+  int iSegid, iHeight, iPgno, bDlidx, bTomb;     /* Rowid compenents */
+  fts5DecodeRowid(iKey, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno);
 
   if( iSegid==0 ){
     if( iKey==FTS5_AVERAGES_ROWID ){
@@ -233440,14 +243966,16 @@ static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
     }
   }
   else{
-    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}",
-        bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
+    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%s%ssegid=%d h=%d pgno=%d}",
+        bDlidx ? "dlidx " : "",
+        bTomb ? "tombstone " : "",
+        iSegid, iHeight, iPgno
     );
   }
 }
-#endif /* SQLITE_TEST */
+#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
 
-#ifdef SQLITE_TEST
+#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
 static void fts5DebugStructure(
   int *pRc,                       /* IN/OUT: error code */
   Fts5Buffer *pBuf,
@@ -233462,16 +243990,22 @@ static void fts5DebugStructure(
     );
     for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
       Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
-      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}",
+      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d",
           pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
       );
+      if( pSeg->iOrigin1>0 ){
+        sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " origin=%lld..%lld",
+            pSeg->iOrigin1, pSeg->iOrigin2
+        );
+      }
+      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
     }
     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
   }
 }
-#endif /* SQLITE_TEST */
+#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
 
-#ifdef SQLITE_TEST
+#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
 /*
 ** This is part of the fts5_decode() debugging aid.
 **
@@ -233496,9 +244030,9 @@ static void fts5DecodeStructure(
   fts5DebugStructure(pRc, pBuf, p);
   fts5StructureRelease(p);
 }
-#endif /* SQLITE_TEST */
+#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
 
-#ifdef SQLITE_TEST
+#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
 /*
 ** This is part of the fts5_decode() debugging aid.
 **
@@ -233521,9 +244055,9 @@ static void fts5DecodeAverages(
     zSpace = " ";
   }
 }
-#endif /* SQLITE_TEST */
+#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
 
-#ifdef SQLITE_TEST
+#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
 /*
 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read
 ** each varint and append its string representation to buffer pBuf. Return
@@ -233540,9 +244074,9 @@ static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
   }
   return iOff;
 }
-#endif /* SQLITE_TEST */
+#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
 
-#ifdef SQLITE_TEST
+#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
 /*
 ** The start of buffer (a/n) contains the start of a doclist. The doclist
 ** may or may not finish within the buffer. This function appends a text
@@ -233575,9 +244109,9 @@ static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
 
   return iOff;
 }
-#endif /* SQLITE_TEST */
+#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
 
-#ifdef SQLITE_TEST
+#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
 /*
 ** This function is part of the fts5_decode() debugging function. It is
 ** only ever used with detail=none tables.
@@ -233618,9 +244152,9 @@ static void fts5DecodeRowidList(
     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
   }
 }
-#endif /* SQLITE_TEST */
+#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
 
-#ifdef SQLITE_TEST
+#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
 /*
 ** The implementation of user-defined scalar function fts5_decode().
 */
@@ -233631,6 +244165,7 @@ static void fts5DecodeFunction(
 ){
   i64 iRowid;                     /* Rowid for record being decoded */
   int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
+  int bTomb;
   const u8 *aBlob; int n;         /* Record to decode */
   u8 *a = 0;
   Fts5Buffer s;                   /* Build up text to return here */
@@ -233653,7 +244188,7 @@ static void fts5DecodeFunction(
   if( a==0 ) goto decode_out;
   if( n>0 ) memcpy(a, aBlob, n);
 
-  fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
+  fts5DecodeRowid(iRowid, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno);
 
   fts5DebugRowid(&rc, &s, iRowid);
   if( bDlidx ){
@@ -233672,6 +244207,28 @@ static void fts5DecodeFunction(
           " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
       );
     }
+  }else if( bTomb ){
+    u32 nElem  = fts5GetU32(&a[4]);
+    int szKey = (aBlob[0]==4 || aBlob[0]==8) ? aBlob[0] : 8;
+    int nSlot = (n - 8) / szKey;
+    int ii;
+    sqlite3Fts5BufferAppendPrintf(&rc, &s, " nElem=%d", (int)nElem);
+    if( aBlob[1] ){
+      sqlite3Fts5BufferAppendPrintf(&rc, &s, " 0");
+    }
+    for(ii=0; ii<nSlot; ii++){
+      u64 iVal = 0;
+      if( szKey==4 ){
+        u32 *aSlot = (u32*)&aBlob[8];
+        if( aSlot[ii] ) iVal = fts5GetU32((u8*)&aSlot[ii]);
+      }else{
+        u64 *aSlot = (u64*)&aBlob[8];
+        if( aSlot[ii] ) iVal = fts5GetU64((u8*)&aSlot[ii]);
+      }
+      if( iVal!=0 ){
+        sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", (i64)iVal);
+      }
+    }
   }else if( iSegid==0 ){
     if( iRowid==FTS5_AVERAGES_ROWID ){
       fts5DecodeAverages(&rc, &s, a, n);
@@ -233697,7 +244254,7 @@ static void fts5DecodeFunction(
     fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
 
     iOff = iTermOff;
-    while( iOff<szLeaf ){
+    while( iOff<szLeaf && rc==SQLITE_OK ){
       int nAppend;
 
       /* Read the term data for the next term*/
@@ -233717,8 +244274,11 @@ static void fts5DecodeFunction(
       }else{
         iTermOff = szLeaf;
       }
-
-      fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
+      if( iTermOff>szLeaf ){
+        rc = FTS5_CORRUPT;
+      }else{
+        fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
+      }
       iOff = iTermOff;
       if( iOff<szLeaf ){
         iOff += fts5GetVarint32(&a[iOff], nKeep);
@@ -233829,9 +244389,9 @@ static void fts5DecodeFunction(
   }
   fts5BufferFree(&s);
 }
-#endif /* SQLITE_TEST */
+#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
 
-#ifdef SQLITE_TEST
+#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
 /*
 ** The implementation of user-defined scalar function fts5_rowid().
 */
@@ -233865,7 +244425,235 @@ static void fts5RowidFunction(
     }
   }
 }
-#endif /* SQLITE_TEST */
+#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
+
+#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
+
+typedef struct Fts5StructVtab Fts5StructVtab;
+struct Fts5StructVtab {
+  sqlite3_vtab base;
+};
+
+typedef struct Fts5StructVcsr Fts5StructVcsr;
+struct Fts5StructVcsr {
+  sqlite3_vtab_cursor base;
+  Fts5Structure *pStruct;
+  int iLevel;
+  int iSeg;
+  int iRowid;
+};
+
+/*
+** Create a new fts5_structure() table-valued function.
+*/
+static int fts5structConnectMethod(
+  sqlite3 *db,
+  void *pAux,
+  int argc, const char *const*argv,
+  sqlite3_vtab **ppVtab,
+  char **pzErr
+){
+  Fts5StructVtab *pNew = 0;
+  int rc = SQLITE_OK;
+
+  rc = sqlite3_declare_vtab(db,
+      "CREATE TABLE xyz("
+          "level, segment, merge, segid, leaf1, leaf2, loc1, loc2, "
+          "npgtombstone, nentrytombstone, nentry, struct HIDDEN);"
+  );
+  if( rc==SQLITE_OK ){
+    pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew));
+  }
+
+  *ppVtab = (sqlite3_vtab*)pNew;
+  return rc;
+}
+
+/*
+** We must have a single struct=? constraint that will be passed through
+** into the xFilter method.  If there is no valid stmt=? constraint,
+** then return an SQLITE_CONSTRAINT error.
+*/
+static int fts5structBestIndexMethod(
+  sqlite3_vtab *tab,
+  sqlite3_index_info *pIdxInfo
+){
+  int i;
+  int rc = SQLITE_CONSTRAINT;
+  struct sqlite3_index_constraint *p;
+  pIdxInfo->estimatedCost = (double)100;
+  pIdxInfo->estimatedRows = 100;
+  pIdxInfo->idxNum = 0;
+  for(i=0, p=pIdxInfo->aConstraint; i<pIdxInfo->nConstraint; i++, p++){
+    if( p->usable==0 ) continue;
+    if( p->op==SQLITE_INDEX_CONSTRAINT_EQ && p->iColumn==11 ){
+      rc = SQLITE_OK;
+      pIdxInfo->aConstraintUsage[i].omit = 1;
+      pIdxInfo->aConstraintUsage[i].argvIndex = 1;
+      break;
+    }
+  }
+  return rc;
+}
+
+/*
+** This method is the destructor for bytecodevtab objects.
+*/
+static int fts5structDisconnectMethod(sqlite3_vtab *pVtab){
+  Fts5StructVtab *p = (Fts5StructVtab*)pVtab;
+  sqlite3_free(p);
+  return SQLITE_OK;
+}
+
+/*
+** Constructor for a new bytecodevtab_cursor object.
+*/
+static int fts5structOpenMethod(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCsr){
+  int rc = SQLITE_OK;
+  Fts5StructVcsr *pNew = 0;
+
+  pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew));
+  *ppCsr = (sqlite3_vtab_cursor*)pNew;
+
+  return SQLITE_OK;
+}
+
+/*
+** Destructor for a bytecodevtab_cursor.
+*/
+static int fts5structCloseMethod(sqlite3_vtab_cursor *cur){
+  Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
+  fts5StructureRelease(pCsr->pStruct);
+  sqlite3_free(pCsr);
+  return SQLITE_OK;
+}
+
+
+/*
+** Advance a bytecodevtab_cursor to its next row of output.
+*/
+static int fts5structNextMethod(sqlite3_vtab_cursor *cur){
+  Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
+  Fts5Structure *p = pCsr->pStruct;
+
+  assert( pCsr->pStruct );
+  pCsr->iSeg++;
+  pCsr->iRowid++;
+  while( pCsr->iLevel<p->nLevel && pCsr->iSeg>=p->aLevel[pCsr->iLevel].nSeg ){
+    pCsr->iLevel++;
+    pCsr->iSeg = 0;
+  }
+  if( pCsr->iLevel>=p->nLevel ){
+    fts5StructureRelease(pCsr->pStruct);
+    pCsr->pStruct = 0;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Return TRUE if the cursor has been moved off of the last
+** row of output.
+*/
+static int fts5structEofMethod(sqlite3_vtab_cursor *cur){
+  Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
+  return pCsr->pStruct==0;
+}
+
+static int fts5structRowidMethod(
+  sqlite3_vtab_cursor *cur,
+  sqlite_int64 *piRowid
+){
+  Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
+  *piRowid = pCsr->iRowid;
+  return SQLITE_OK;
+}
+
+/*
+** Return values of columns for the row at which the bytecodevtab_cursor
+** is currently pointing.
+*/
+static int fts5structColumnMethod(
+  sqlite3_vtab_cursor *cur,   /* The cursor */
+  sqlite3_context *ctx,       /* First argument to sqlite3_result_...() */
+  int i                       /* Which column to return */
+){
+  Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
+  Fts5Structure *p = pCsr->pStruct;
+  Fts5StructureSegment *pSeg = &p->aLevel[pCsr->iLevel].aSeg[pCsr->iSeg];
+
+  switch( i ){
+    case 0: /* level */
+      sqlite3_result_int(ctx, pCsr->iLevel);
+      break;
+    case 1: /* segment */
+      sqlite3_result_int(ctx, pCsr->iSeg);
+      break;
+    case 2: /* merge */
+      sqlite3_result_int(ctx, pCsr->iSeg < p->aLevel[pCsr->iLevel].nMerge);
+      break;
+    case 3: /* segid */
+      sqlite3_result_int(ctx, pSeg->iSegid);
+      break;
+    case 4: /* leaf1 */
+      sqlite3_result_int(ctx, pSeg->pgnoFirst);
+      break;
+    case 5: /* leaf2 */
+      sqlite3_result_int(ctx, pSeg->pgnoLast);
+      break;
+    case 6: /* origin1 */
+      sqlite3_result_int64(ctx, pSeg->iOrigin1);
+      break;
+    case 7: /* origin2 */
+      sqlite3_result_int64(ctx, pSeg->iOrigin2);
+      break;
+    case 8: /* npgtombstone */
+      sqlite3_result_int(ctx, pSeg->nPgTombstone);
+      break;
+    case 9: /* nentrytombstone */
+      sqlite3_result_int64(ctx, pSeg->nEntryTombstone);
+      break;
+    case 10: /* nentry */
+      sqlite3_result_int64(ctx, pSeg->nEntry);
+      break;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Initialize a cursor.
+**
+**    idxNum==0     means show all subprograms
+**    idxNum==1     means show only the main bytecode and omit subprograms.
+*/
+static int fts5structFilterMethod(
+  sqlite3_vtab_cursor *pVtabCursor,
+  int idxNum, const char *idxStr,
+  int argc, sqlite3_value **argv
+){
+  Fts5StructVcsr *pCsr = (Fts5StructVcsr *)pVtabCursor;
+  int rc = SQLITE_OK;
+
+  const u8 *aBlob = 0;
+  int nBlob = 0;
+
+  assert( argc==1 );
+  fts5StructureRelease(pCsr->pStruct);
+  pCsr->pStruct = 0;
+
+  nBlob = sqlite3_value_bytes(argv[0]);
+  aBlob = (const u8*)sqlite3_value_blob(argv[0]);
+  rc = fts5StructureDecode(aBlob, nBlob, 0, &pCsr->pStruct);
+  if( rc==SQLITE_OK ){
+    pCsr->iLevel = 0;
+    pCsr->iRowid = 0;
+    pCsr->iSeg = -1;
+    rc = fts5structNextMethod(pVtabCursor);
+  }
+
+  return rc;
+}
+
+#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
 
 /*
 ** This is called as part of registering the FTS5 module with database
@@ -233876,7 +244664,7 @@ static void fts5RowidFunction(
 ** SQLite error code is returned instead.
 */
 static int sqlite3Fts5IndexInit(sqlite3 *db){
-#ifdef SQLITE_TEST
+#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
   int rc = sqlite3_create_function(
       db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
   );
@@ -233893,6 +244681,37 @@ static int sqlite3Fts5IndexInit(sqlite3 *db){
         db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
     );
   }
+
+  if( rc==SQLITE_OK ){
+    static const sqlite3_module fts5structure_module = {
+      0,                           /* iVersion      */
+      0,                           /* xCreate       */
+      fts5structConnectMethod,     /* xConnect      */
+      fts5structBestIndexMethod,   /* xBestIndex    */
+      fts5structDisconnectMethod,  /* xDisconnect   */
+      0,                           /* xDestroy      */
+      fts5structOpenMethod,        /* xOpen         */
+      fts5structCloseMethod,       /* xClose        */
+      fts5structFilterMethod,      /* xFilter       */
+      fts5structNextMethod,        /* xNext         */
+      fts5structEofMethod,         /* xEof          */
+      fts5structColumnMethod,      /* xColumn       */
+      fts5structRowidMethod,       /* xRowid        */
+      0,                           /* xUpdate       */
+      0,                           /* xBegin        */
+      0,                           /* xSync         */
+      0,                           /* xCommit       */
+      0,                           /* xRollback     */
+      0,                           /* xFindFunction */
+      0,                           /* xRename       */
+      0,                           /* xSavepoint    */
+      0,                           /* xRelease      */
+      0,                           /* xRollbackTo   */
+      0,                           /* xShadowName   */
+      0                            /* xIntegrity    */
+    };
+    rc = sqlite3_create_module(db, "fts5_structure", &fts5structure_module, 0);
+  }
   return rc;
 #else
   return SQLITE_OK;
@@ -234028,6 +244847,8 @@ struct Fts5FullTable {
   Fts5Storage *pStorage;          /* Document store */
   Fts5Global *pGlobal;            /* Global (connection wide) data */
   Fts5Cursor *pSortCsr;           /* Sort data from this cursor */
+  int iSavepoint;                 /* Successful xSavepoint()+1 */
+  int bInSavepoint;
 #ifdef SQLITE_DEBUG
   struct Fts5TransactionState ts;
 #endif
@@ -234171,7 +244992,7 @@ static void fts5CheckTransactionState(Fts5FullTable *p, int op, int iSavepoint){
       break;
 
     case FTS5_SYNC:
-      assert( p->ts.eState==1 );
+      assert( p->ts.eState==1 || p->ts.eState==2 );
       p->ts.eState = 2;
       break;
 
@@ -234186,21 +245007,21 @@ static void fts5CheckTransactionState(Fts5FullTable *p, int op, int iSavepoint){
       break;
 
     case FTS5_SAVEPOINT:
-      assert( p->ts.eState==1 );
+      assert( p->ts.eState>=1 );
       assert( iSavepoint>=0 );
       assert( iSavepoint>=p->ts.iSavepoint );
       p->ts.iSavepoint = iSavepoint;
       break;
 
     case FTS5_RELEASE:
-      assert( p->ts.eState==1 );
+      assert( p->ts.eState>=1 );
       assert( iSavepoint>=0 );
       assert( iSavepoint<=p->ts.iSavepoint );
       p->ts.iSavepoint = iSavepoint-1;
       break;
 
     case FTS5_ROLLBACKTO:
-      assert( p->ts.eState==1 );
+      assert( p->ts.eState>=1 );
       assert( iSavepoint>=-1 );
       /* The following assert() can fail if another vtab strikes an error
       ** within an xSavepoint() call then SQLite calls xRollbackTo() - without
@@ -234316,6 +245137,13 @@ static int fts5InitVtab(
     pConfig->pzErrmsg = 0;
   }
 
+  if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
+    rc = sqlite3_vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT, (int)1);
+  }
+  if( rc==SQLITE_OK ){
+    rc = sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS);
+  }
+
   if( rc!=SQLITE_OK ){
     fts5FreeVtab(pTab);
     pTab = 0;
@@ -235240,6 +246068,9 @@ static int fts5FilterMethod(
     pCsr->iFirstRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64);
   }
 
+  rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex);
+  if( rc!=SQLITE_OK ) goto filter_out;
+
   if( pTab->pSortCsr ){
     /* If pSortCsr is non-NULL, then this call is being made as part of
     ** processing for a "... MATCH <expr> ORDER BY rank" query (ePlan is
@@ -235262,6 +246093,7 @@ static int fts5FilterMethod(
     pCsr->pExpr = pTab->pSortCsr->pExpr;
     rc = fts5CursorFirst(pTab, pCsr, bDesc);
   }else if( pCsr->pExpr ){
+    assert( rc==SQLITE_OK );
     rc = fts5CursorParseRank(pConfig, pCsr, pRank);
     if( rc==SQLITE_OK ){
       if( bOrderByRank ){
@@ -235433,6 +246265,7 @@ static int fts5SpecialInsert(
   Fts5Config *pConfig = pTab->p.pConfig;
   int rc = SQLITE_OK;
   int bError = 0;
+  int bLoadConfig = 0;
 
   if( 0==sqlite3_stricmp("delete-all", zCmd) ){
     if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
@@ -235444,6 +246277,7 @@ static int fts5SpecialInsert(
     }else{
       rc = sqlite3Fts5StorageDeleteAll(pTab->pStorage);
     }
+    bLoadConfig = 1;
   }else if( 0==sqlite3_stricmp("rebuild", zCmd) ){
     if( pConfig->eContent==FTS5_CONTENT_NONE ){
       fts5SetVtabError(pTab,
@@ -235453,6 +246287,7 @@ static int fts5SpecialInsert(
     }else{
       rc = sqlite3Fts5StorageRebuild(pTab->pStorage);
     }
+    bLoadConfig = 1;
   }else if( 0==sqlite3_stricmp("optimize", zCmd) ){
     rc = sqlite3Fts5StorageOptimize(pTab->pStorage);
   }else if( 0==sqlite3_stricmp("merge", zCmd) ){
@@ -235465,6 +246300,8 @@ static int fts5SpecialInsert(
   }else if( 0==sqlite3_stricmp("prefix-index", zCmd) ){
     pConfig->bPrefixIndex = sqlite3_value_int(pVal);
 #endif
+  }else if( 0==sqlite3_stricmp("flush", zCmd) ){
+    rc = sqlite3Fts5FlushToDisk(&pTab->p);
   }else{
     rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex);
     if( rc==SQLITE_OK ){
@@ -235478,6 +246315,12 @@ static int fts5SpecialInsert(
       }
     }
   }
+
+  if( rc==SQLITE_OK && bLoadConfig ){
+    pTab->p.pConfig->iCookie--;
+    rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex);
+  }
+
   return rc;
 }
 
@@ -235534,9 +246377,10 @@ static int fts5UpdateMethod(
   Fts5Config *pConfig = pTab->p.pConfig;
   int eType0;                     /* value_type() of apVal[0] */
   int rc = SQLITE_OK;             /* Return code */
+  int bUpdateOrDelete = 0;
 
   /* A transaction must be open when this is called. */
-  assert( pTab->ts.eState==1 );
+  assert( pTab->ts.eState==1 || pTab->ts.eState==2 );
 
   assert( pVtab->zErrMsg==0 );
   assert( nArg==1 || nArg==(2+pConfig->nCol+2) );
@@ -235544,6 +246388,11 @@ static int fts5UpdateMethod(
        || sqlite3_value_type(apVal[0])==SQLITE_NULL
   );
   assert( pTab->p.pConfig->pzErrmsg==0 );
+  if( pConfig->pgsz==0 ){
+    rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex);
+    if( rc!=SQLITE_OK ) return rc;
+  }
+
   pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg;
 
   /* Put any active cursors into REQUIRE_SEEK state. */
@@ -235558,7 +246407,14 @@ static int fts5UpdateMethod(
     if( pConfig->eContent!=FTS5_CONTENT_NORMAL
       && 0==sqlite3_stricmp("delete", z)
     ){
-      rc = fts5SpecialDelete(pTab, apVal);
+      if( pConfig->bContentlessDelete ){
+        fts5SetVtabError(pTab,
+            "'delete' may not be used with a contentless_delete=1 table"
+        );
+        rc = SQLITE_ERROR;
+      }else{
+        rc = fts5SpecialDelete(pTab, apVal);
+      }
     }else{
       rc = fts5SpecialInsert(pTab, z, apVal[2 + pConfig->nCol + 1]);
     }
@@ -235575,7 +246431,7 @@ static int fts5UpdateMethod(
     ** Cases 3 and 4 may violate the rowid constraint.
     */
     int eConflict = SQLITE_ABORT;
-    if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
+    if( pConfig->eContent==FTS5_CONTENT_NORMAL || pConfig->bContentlessDelete ){
       eConflict = sqlite3_vtab_on_conflict(pConfig->db);
     }
 
@@ -235583,8 +246439,12 @@ static int fts5UpdateMethod(
     assert( nArg!=1 || eType0==SQLITE_INTEGER );
 
     /* Filter out attempts to run UPDATE or DELETE on contentless tables.
-    ** This is not suported.  */
-    if( eType0==SQLITE_INTEGER && fts5IsContentless(pTab) ){
+    ** This is not suported. Except - they are both supported if the CREATE
+    ** VIRTUAL TABLE statement contained "contentless_delete=1". */
+    if( eType0==SQLITE_INTEGER
+     && pConfig->eContent==FTS5_CONTENT_NONE
+     && pConfig->bContentlessDelete==0
+    ){
       pTab->p.base.zErrMsg = sqlite3_mprintf(
           "cannot %s contentless fts5 table: %s",
           (nArg>1 ? "UPDATE" : "DELETE from"), pConfig->zName
@@ -235596,6 +246456,7 @@ static int fts5UpdateMethod(
     else if( nArg==1 ){
       i64 iDel = sqlite3_value_int64(apVal[0]);  /* Rowid to delete */
       rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0);
+      bUpdateOrDelete = 1;
     }
 
     /* INSERT or UPDATE */
@@ -235607,10 +246468,12 @@ static int fts5UpdateMethod(
       }
 
       else if( eType0!=SQLITE_INTEGER ){
-        /* If this is a REPLACE, first remove the current entry (if any) */
+        /* An INSERT statement. If the conflict-mode is REPLACE, first remove
+        ** the current entry (if any). */
         if( eConflict==SQLITE_REPLACE && eType1==SQLITE_INTEGER ){
           i64 iNew = sqlite3_value_int64(apVal[1]);  /* Rowid to delete */
           rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0);
+          bUpdateOrDelete = 1;
         }
         fts5StorageInsert(&rc, pTab, apVal, pRowid);
       }
@@ -235639,10 +246502,24 @@ static int fts5UpdateMethod(
           rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0);
           fts5StorageInsert(&rc, pTab, apVal, pRowid);
         }
+        bUpdateOrDelete = 1;
       }
     }
   }
 
+  if( rc==SQLITE_OK
+   && bUpdateOrDelete
+   && pConfig->bSecureDelete
+   && pConfig->iVersion==FTS5_CURRENT_VERSION
+  ){
+    rc = sqlite3Fts5StorageConfigValue(
+        pTab->pStorage, "version", 0, FTS5_CURRENT_VERSION_SECUREDELETE
+    );
+    if( rc==SQLITE_OK ){
+      pConfig->iVersion = FTS5_CURRENT_VERSION_SECUREDELETE;
+    }
+  }
+
   pTab->p.pConfig->pzErrmsg = 0;
   return rc;
 }
@@ -235655,8 +246532,7 @@ static int fts5SyncMethod(sqlite3_vtab *pVtab){
   Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
   fts5CheckTransactionState(pTab, FTS5_SYNC, 0);
   pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg;
-  fts5TripCursors(pTab);
-  rc = sqlite3Fts5StorageSync(pTab->pStorage);
+  rc = sqlite3Fts5FlushToDisk(&pTab->p);
   pTab->p.pConfig->pzErrmsg = 0;
   return rc;
 }
@@ -236423,6 +247299,12 @@ static int fts5ColumnMethod(
       sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
     }
     pConfig->pzErrmsg = 0;
+  }else if( pConfig->bContentlessDelete && sqlite3_vtab_nochange(pCtx) ){
+    char *zErr = sqlite3_mprintf("cannot UPDATE a subset of "
+        "columns on fts5 contentless-delete table: %s", pConfig->zName
+    );
+    sqlite3_result_error(pCtx, zErr, -1);
+    sqlite3_free(zErr);
   }
   return rc;
 }
@@ -236461,8 +247343,12 @@ static int fts5RenameMethod(
   sqlite3_vtab *pVtab,            /* Virtual table handle */
   const char *zName               /* New name of table */
 ){
+  int rc;
   Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
-  return sqlite3Fts5StorageRename(pTab->pStorage, zName);
+  pTab->bInSavepoint = 1;
+  rc = sqlite3Fts5StorageRename(pTab->pStorage, zName);
+  pTab->bInSavepoint = 0;
+  return rc;
 }
 
 static int sqlite3Fts5FlushToDisk(Fts5Table *pTab){
@@ -236476,9 +247362,29 @@ static int sqlite3Fts5FlushToDisk(Fts5Table *pTab){
 ** Flush the contents of the pending-terms table to disk.
 */
 static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
-  UNUSED_PARAM(iSavepoint);  /* Call below is a no-op for NDEBUG builds */
-  fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_SAVEPOINT, iSavepoint);
-  return sqlite3Fts5FlushToDisk((Fts5Table*)pVtab);
+  Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
+  int rc = SQLITE_OK;
+  char *zSql = 0;
+  fts5CheckTransactionState(pTab, FTS5_SAVEPOINT, iSavepoint);
+
+  if( pTab->bInSavepoint==0 ){
+    zSql = sqlite3_mprintf("INSERT INTO %Q.%Q(%Q) VALUES('flush')",
+        pTab->p.pConfig->zDb, pTab->p.pConfig->zName, pTab->p.pConfig->zName
+    );
+    if( zSql ){
+      pTab->bInSavepoint = 1;
+      rc = sqlite3_exec(pTab->p.pConfig->db, zSql, 0, 0, 0);
+      pTab->bInSavepoint = 0;
+      sqlite3_free(zSql);
+    }else{
+      rc = SQLITE_NOMEM;
+    }
+    if( rc==SQLITE_OK ){
+      pTab->iSavepoint = iSavepoint+1;
+    }
+  }
+
+  return rc;
 }
 
 /*
@@ -236487,9 +247393,16 @@ static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
 ** This is a no-op.
 */
 static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
-  UNUSED_PARAM(iSavepoint);  /* Call below is a no-op for NDEBUG builds */
-  fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_RELEASE, iSavepoint);
-  return sqlite3Fts5FlushToDisk((Fts5Table*)pVtab);
+  Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
+  int rc = SQLITE_OK;
+  fts5CheckTransactionState(pTab, FTS5_RELEASE, iSavepoint);
+  if( (iSavepoint+1)<pTab->iSavepoint ){
+    rc = sqlite3Fts5FlushToDisk(&pTab->p);
+    if( rc==SQLITE_OK ){
+      pTab->iSavepoint = iSavepoint;
+    }
+  }
+  return rc;
 }
 
 /*
@@ -236499,10 +247412,14 @@ static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
 */
 static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
   Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
-  UNUSED_PARAM(iSavepoint);  /* Call below is a no-op for NDEBUG builds */
+  int rc = SQLITE_OK;
   fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint);
   fts5TripCursors(pTab);
-  return sqlite3Fts5StorageRollback(pTab->pStorage);
+  pTab->p.pConfig->pgsz = 0;
+  if( (iSavepoint+1)<=pTab->iSavepoint ){
+    rc = sqlite3Fts5StorageRollback(pTab->pStorage);
+  }
+  return rc;
 }
 
 /*
@@ -236704,7 +247621,7 @@ static void fts5SourceIdFunc(
 ){
   assert( nArg==0 );
   UNUSED_PARAM2(nArg, apUnused);
-  sqlite3_result_text(pCtx, "fts5: 2022-09-05 11:02:23 4635f4a69c8c2a8df242b384a992aea71224e39a2ccab42d8c0b0602f1e826e8", -1, SQLITE_TRANSIENT);
+  sqlite3_result_text(pCtx, "fts5: 2023-11-24 11:41:44 ebead0e7230cd33bcec9f95d2183069565b9e709bf745c9b5db65cc0cbf92c0f", -1, SQLITE_TRANSIENT);
 }
 
 /*
@@ -236722,9 +247639,46 @@ static int fts5ShadowName(const char *zName){
   return 0;
 }
 
+/*
+** Run an integrity check on the FTS5 data structures.  Return a string
+** if anything is found amiss.  Return a NULL pointer if everything is
+** OK.
+*/
+static int fts5Integrity(
+  sqlite3_vtab *pVtab,    /* the FTS5 virtual table to check */
+  const char *zSchema,    /* Name of schema in which this table lives */
+  const char *zTabname,   /* Name of the table itself */
+  int isQuick,            /* True if this is a quick-check */
+  char **pzErr            /* Write error message here */
+){
+  Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
+  Fts5Config *pConfig = pTab->p.pConfig;
+  char *zSql;
+  char *zErr = 0;
+  int rc;
+  assert( pzErr!=0 && *pzErr==0 );
+  UNUSED_PARAM(isQuick);
+  zSql = sqlite3_mprintf(
+            "INSERT INTO \"%w\".\"%w\"(\"%w\") VALUES('integrity-check');",
+            zSchema, zTabname, pConfig->zName);
+  if( zSql==0 ) return SQLITE_NOMEM;
+  rc = sqlite3_exec(pConfig->db, zSql, 0, 0, &zErr);
+  sqlite3_free(zSql);
+  if( (rc&0xff)==SQLITE_CORRUPT ){
+    *pzErr = sqlite3_mprintf("malformed inverted index for FTS5 table %s.%s",
+                zSchema, zTabname);
+  }else if( rc!=SQLITE_OK ){
+    *pzErr = sqlite3_mprintf("unable to validate the inverted index for"
+                             " FTS5 table %s.%s: %s",
+                zSchema, zTabname, zErr);
+  }
+  sqlite3_free(zErr);
+  return SQLITE_OK;
+}
+
 static int fts5Init(sqlite3 *db){
   static const sqlite3_module fts5Mod = {
-    /* iVersion      */ 3,
+    /* iVersion      */ 4,
     /* xCreate       */ fts5CreateMethod,
     /* xConnect      */ fts5ConnectMethod,
     /* xBestIndex    */ fts5BestIndexMethod,
@@ -236747,7 +247701,8 @@ static int fts5Init(sqlite3 *db){
     /* xSavepoint    */ fts5SavepointMethod,
     /* xRelease      */ fts5ReleaseMethod,
     /* xRollbackTo   */ fts5RollbackToMethod,
-    /* xShadowName   */ fts5ShadowName
+    /* xShadowName   */ fts5ShadowName,
+    /* xIntegrity    */ fts5Integrity
   };
 
   int rc;
@@ -236777,7 +247732,9 @@ static int fts5Init(sqlite3 *db){
     }
     if( rc==SQLITE_OK ){
       rc = sqlite3_create_function(
-          db, "fts5_source_id", 0, SQLITE_UTF8, p, fts5SourceIdFunc, 0, 0
+          db, "fts5_source_id", 0,
+          SQLITE_UTF8|SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS,
+          p, fts5SourceIdFunc, 0, 0
       );
     }
   }
@@ -236915,10 +247872,10 @@ static int fts5StorageGetStmt(
       "INSERT INTO %Q.'%q_content' VALUES(%s)",         /* INSERT_CONTENT  */
       "REPLACE INTO %Q.'%q_content' VALUES(%s)",        /* REPLACE_CONTENT */
       "DELETE FROM %Q.'%q_content' WHERE id=?",         /* DELETE_CONTENT  */
-      "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)",       /* REPLACE_DOCSIZE  */
+      "REPLACE INTO %Q.'%q_docsize' VALUES(?,?%s)",     /* REPLACE_DOCSIZE  */
       "DELETE FROM %Q.'%q_docsize' WHERE id=?",         /* DELETE_DOCSIZE  */
 
-      "SELECT sz FROM %Q.'%q_docsize' WHERE id=?",      /* LOOKUP_DOCSIZE  */
+      "SELECT sz%s FROM %Q.'%q_docsize' WHERE id=?",    /* LOOKUP_DOCSIZE  */
 
       "REPLACE INTO %Q.'%q_config' VALUES(?,?)",        /* REPLACE_CONFIG */
       "SELECT %s FROM %s AS T",                         /* SCAN */
@@ -236966,6 +247923,19 @@ static int fts5StorageGetStmt(
         break;
       }
 
+      case FTS5_STMT_REPLACE_DOCSIZE:
+        zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName,
+          (pC->bContentlessDelete ? ",?" : "")
+        );
+        break;
+
+      case FTS5_STMT_LOOKUP_DOCSIZE:
+        zSql = sqlite3_mprintf(azStmt[eStmt],
+            (pC->bContentlessDelete ? ",origin" : ""),
+            pC->zDb, pC->zName
+        );
+        break;
+
       default:
         zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName);
         break;
@@ -237155,9 +248125,11 @@ static int sqlite3Fts5StorageOpen(
     }
 
     if( rc==SQLITE_OK && pConfig->bColumnsize ){
-      rc = sqlite3Fts5CreateTable(
-          pConfig, "docsize", "id INTEGER PRIMARY KEY, sz BLOB", 0, pzErr
-      );
+      const char *zCols = "id INTEGER PRIMARY KEY, sz BLOB";
+      if( pConfig->bContentlessDelete ){
+        zCols = "id INTEGER PRIMARY KEY, sz BLOB, origin INTEGER";
+      }
+      rc = sqlite3Fts5CreateTable(pConfig, "docsize", zCols, 0, pzErr);
     }
     if( rc==SQLITE_OK ){
       rc = sqlite3Fts5CreateTable(
@@ -237234,7 +248206,7 @@ static int fts5StorageDeleteFromIndex(
 ){
   Fts5Config *pConfig = p->pConfig;
   sqlite3_stmt *pSeek = 0;        /* SELECT to read row iDel from %_data */
-  int rc;                         /* Return code */
+  int rc = SQLITE_OK;             /* Return code */
   int rc2;                        /* sqlite3_reset() return code */
   int iCol;
   Fts5InsertCtx ctx;
@@ -237250,7 +248222,6 @@ static int fts5StorageDeleteFromIndex(
 
   ctx.pStorage = p;
   ctx.iCol = -1;
-  rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 1, iDel);
   for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
     if( pConfig->abUnindexed[iCol-1]==0 ){
       const char *zText;
@@ -237287,6 +248258,37 @@ static int fts5StorageDeleteFromIndex(
   return rc;
 }
 
+/*
+** This function is called to process a DELETE on a contentless_delete=1
+** table. It adds the tombstone required to delete the entry with rowid
+** iDel. If successful, SQLITE_OK is returned. Or, if an error occurs,
+** an SQLite error code.
+*/
+static int fts5StorageContentlessDelete(Fts5Storage *p, i64 iDel){
+  i64 iOrigin = 0;
+  sqlite3_stmt *pLookup = 0;
+  int rc = SQLITE_OK;
+
+  assert( p->pConfig->bContentlessDelete );
+  assert( p->pConfig->eContent==FTS5_CONTENT_NONE );
+
+  /* Look up the origin of the document in the %_docsize table. Store
+  ** this in stack variable iOrigin.  */
+  rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0);
+  if( rc==SQLITE_OK ){
+    sqlite3_bind_int64(pLookup, 1, iDel);
+    if( SQLITE_ROW==sqlite3_step(pLookup) ){
+      iOrigin = sqlite3_column_int64(pLookup, 1);
+    }
+    rc = sqlite3_reset(pLookup);
+  }
+
+  if( rc==SQLITE_OK && iOrigin!=0 ){
+    rc = sqlite3Fts5IndexContentlessDelete(p->pIndex, iOrigin, iDel);
+  }
+
+  return rc;
+}
 
 /*
 ** Insert a record into the %_docsize table. Specifically, do:
@@ -237307,10 +248309,17 @@ static int fts5StorageInsertDocsize(
     rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0);
     if( rc==SQLITE_OK ){
       sqlite3_bind_int64(pReplace, 1, iRowid);
-      sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC);
-      sqlite3_step(pReplace);
-      rc = sqlite3_reset(pReplace);
-      sqlite3_bind_null(pReplace, 2);
+      if( p->pConfig->bContentlessDelete ){
+        i64 iOrigin = 0;
+        rc = sqlite3Fts5IndexGetOrigin(p->pIndex, &iOrigin);
+        sqlite3_bind_int64(pReplace, 3, iOrigin);
+      }
+      if( rc==SQLITE_OK ){
+        sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC);
+        sqlite3_step(pReplace);
+        rc = sqlite3_reset(pReplace);
+        sqlite3_bind_null(pReplace, 2);
+      }
     }
   }
   return rc;
@@ -237374,7 +248383,15 @@ static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel, sqlite3_value **ap
 
   /* Delete the index records */
   if( rc==SQLITE_OK ){
-    rc = fts5StorageDeleteFromIndex(p, iDel, apVal);
+    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 1, iDel);
+  }
+
+  if( rc==SQLITE_OK ){
+    if( p->pConfig->bContentlessDelete ){
+      rc = fts5StorageContentlessDelete(p, iDel);
+    }else{
+      rc = fts5StorageDeleteFromIndex(p, iDel, apVal);
+    }
   }
 
   /* Delete the %_docsize record */
@@ -237962,7 +248979,9 @@ static int sqlite3Fts5StorageSync(Fts5Storage *p){
   i64 iLastRowid = sqlite3_last_insert_rowid(p->pConfig->db);
   if( p->bTotalsValid ){
     rc = fts5StorageSaveTotals(p);
-    p->bTotalsValid = 0;
+    if( rc==SQLITE_OK ){
+      p->bTotalsValid = 0;
+    }
   }
   if( rc==SQLITE_OK ){
     rc = sqlite3Fts5IndexSync(p->pIndex);
@@ -241330,7 +252349,8 @@ static int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){
     /* xSavepoint    */ 0,
     /* xRelease      */ 0,
     /* xRollbackTo   */ 0,
-    /* xShadowName   */ 0
+    /* xShadowName   */ 0,
+    /* xIntegrity    */ 0
   };
   void *p = (void*)pGlobal;
 
@@ -241442,6 +252462,10 @@ static int stmtConnect(
 #define STMT_COLUMN_MEM    10   /* SQLITE_STMTSTATUS_MEMUSED */
 
 
+  (void)pAux;
+  (void)argc;
+  (void)argv;
+  (void)pzErr;
   rc = sqlite3_declare_vtab(db,
      "CREATE TABLE x(sql,ncol,ro,busy,nscan,nsort,naidx,nstep,"
                     "reprep,run,mem)");
@@ -241561,6 +252585,10 @@ static int stmtFilter(
   sqlite3_int64 iRowid = 1;
   StmtRow **ppRow = 0;
 
+  (void)idxNum;
+  (void)idxStr;
+  (void)argc;
+  (void)argv;
   stmtCsrReset(pCur);
   ppRow = &pCur->pRow;
   for(p=sqlite3_next_stmt(pCur->db, 0); p; p=sqlite3_next_stmt(pCur->db, p)){
@@ -241616,6 +252644,7 @@ static int stmtBestIndex(
   sqlite3_vtab *tab,
   sqlite3_index_info *pIdxInfo
 ){
+  (void)tab;
   pIdxInfo->estimatedCost = (double)500;
   pIdxInfo->estimatedRows = 500;
   return SQLITE_OK;
@@ -241650,6 +252679,7 @@ static sqlite3_module stmtModule = {
   0,                         /* xRelease */
   0,                         /* xRollbackTo */
   0,                         /* xShadowName */
+  0                          /* xIntegrity */
 };
 
 #endif /* SQLITE_OMIT_VIRTUALTABLE */
diff --git a/lib/sqlite-amalgamation-3390300/sqlite3.h b/lib/sqlite-amalgamation-3440200/sqlite3.h
similarity index 94%
rename from lib/sqlite-amalgamation-3390300/sqlite3.h
rename to lib/sqlite-amalgamation-3440200/sqlite3.h
index 2868334676f..ef0237bde4d 100644
--- a/lib/sqlite-amalgamation-3390300/sqlite3.h
+++ b/lib/sqlite-amalgamation-3440200/sqlite3.h
@@ -146,9 +146,9 @@ extern "C" {
 ** [sqlite3_libversion_number()], [sqlite3_sourceid()],
 ** [sqlite_version()] and [sqlite_source_id()].
 */
-#define SQLITE_VERSION        "3.39.3"
-#define SQLITE_VERSION_NUMBER 3039003
-#define SQLITE_SOURCE_ID      "2022-09-05 11:02:23 4635f4a69c8c2a8df242b384a992aea71224e39a2ccab42d8c0b0602f1e826e8"
+#define SQLITE_VERSION        "3.44.2"
+#define SQLITE_VERSION_NUMBER 3044002
+#define SQLITE_SOURCE_ID      "2023-11-24 11:41:44 ebead0e7230cd33bcec9f95d2183069565b9e709bf745c9b5db65cc0cbf92c0f"
 
 /*
 ** CAPI3REF: Run-Time Library Version Numbers
@@ -528,6 +528,7 @@ SQLITE_API int sqlite3_exec(
 #define SQLITE_IOERR_ROLLBACK_ATOMIC   (SQLITE_IOERR | (31<<8))
 #define SQLITE_IOERR_DATA              (SQLITE_IOERR | (32<<8))
 #define SQLITE_IOERR_CORRUPTFS         (SQLITE_IOERR | (33<<8))
+#define SQLITE_IOERR_IN_PAGE           (SQLITE_IOERR | (34<<8))
 #define SQLITE_LOCKED_SHAREDCACHE      (SQLITE_LOCKED |  (1<<8))
 #define SQLITE_LOCKED_VTAB             (SQLITE_LOCKED |  (2<<8))
 #define SQLITE_BUSY_RECOVERY           (SQLITE_BUSY   |  (1<<8))
@@ -563,6 +564,7 @@ SQLITE_API int sqlite3_exec(
 #define SQLITE_CONSTRAINT_DATATYPE     (SQLITE_CONSTRAINT |(12<<8))
 #define SQLITE_NOTICE_RECOVER_WAL      (SQLITE_NOTICE | (1<<8))
 #define SQLITE_NOTICE_RECOVER_ROLLBACK (SQLITE_NOTICE | (2<<8))
+#define SQLITE_NOTICE_RBU              (SQLITE_NOTICE | (3<<8))
 #define SQLITE_WARNING_AUTOINDEX       (SQLITE_WARNING | (1<<8))
 #define SQLITE_AUTH_USER               (SQLITE_AUTH | (1<<8))
 #define SQLITE_OK_LOAD_PERMANENTLY     (SQLITE_OK | (1<<8))
@@ -670,13 +672,17 @@ SQLITE_API int sqlite3_exec(
 **
 ** SQLite uses one of these integer values as the second
 ** argument to calls it makes to the xLock() and xUnlock() methods
-** of an [sqlite3_io_methods] object.
+** of an [sqlite3_io_methods] object.  These values are ordered from
+** lest restrictive to most restrictive.
+**
+** The argument to xLock() is always SHARED or higher.  The argument to
+** xUnlock is either SHARED or NONE.
 */
-#define SQLITE_LOCK_NONE          0
-#define SQLITE_LOCK_SHARED        1
-#define SQLITE_LOCK_RESERVED      2
-#define SQLITE_LOCK_PENDING       3
-#define SQLITE_LOCK_EXCLUSIVE     4
+#define SQLITE_LOCK_NONE          0       /* xUnlock() only */
+#define SQLITE_LOCK_SHARED        1       /* xLock() or xUnlock() */
+#define SQLITE_LOCK_RESERVED      2       /* xLock() only */
+#define SQLITE_LOCK_PENDING       3       /* xLock() only */
+#define SQLITE_LOCK_EXCLUSIVE     4       /* xLock() only */
 
 /*
 ** CAPI3REF: Synchronization Type Flags
@@ -754,7 +760,14 @@ struct sqlite3_file {
 ** <li> [SQLITE_LOCK_PENDING], or
 ** <li> [SQLITE_LOCK_EXCLUSIVE].
 ** </ul>
-** xLock() increases the lock. xUnlock() decreases the lock.
+** xLock() upgrades the database file lock.  In other words, xLock() moves the
+** database file lock in the direction NONE toward EXCLUSIVE. The argument to
+** xLock() is always on of SHARED, RESERVED, PENDING, or EXCLUSIVE, never
+** SQLITE_LOCK_NONE.  If the database file lock is already at or above the
+** requested lock, then the call to xLock() is a no-op.
+** xUnlock() downgrades the database file lock to either SHARED or NONE.
+*  If the lock is already at or below the requested lock state, then the call
+** to xUnlock() is a no-op.
 ** The xCheckReservedLock() method checks whether any database connection,
 ** either in this process or in some other process, is holding a RESERVED,
 ** PENDING, or EXCLUSIVE lock on the file.  It returns true
@@ -859,9 +872,8 @@ struct sqlite3_io_methods {
 ** opcode causes the xFileControl method to write the current state of
 ** the lock (one of [SQLITE_LOCK_NONE], [SQLITE_LOCK_SHARED],
 ** [SQLITE_LOCK_RESERVED], [SQLITE_LOCK_PENDING], or [SQLITE_LOCK_EXCLUSIVE])
-** into an integer that the pArg argument points to. This capability
-** is used during testing and is only available when the SQLITE_TEST
-** compile-time option is used.
+** into an integer that the pArg argument points to.
+** This capability is only available if SQLite is compiled with [SQLITE_DEBUG].
 **
 ** <li>[[SQLITE_FCNTL_SIZE_HINT]]
 ** The [SQLITE_FCNTL_SIZE_HINT] opcode is used by SQLite to give the VFS
@@ -1165,7 +1177,6 @@ struct sqlite3_io_methods {
 ** in wal mode after the client has finished copying pages from the wal
 ** file to the database file, but before the *-shm file is updated to
 ** record the fact that the pages have been checkpointed.
-** </ul>
 **
 ** <li>[[SQLITE_FCNTL_EXTERNAL_READER]]
 ** The EXPERIMENTAL [SQLITE_FCNTL_EXTERNAL_READER] opcode is used to detect
@@ -1178,10 +1189,16 @@ struct sqlite3_io_methods {
 ** the database is not a wal-mode db, or if there is no such connection in any
 ** other process. This opcode cannot be used to detect transactions opened
 ** by clients within the current process, only within other processes.
-** </ul>
 **
 ** <li>[[SQLITE_FCNTL_CKSM_FILE]]
-** Used by the cksmvfs VFS module only.
+** The [SQLITE_FCNTL_CKSM_FILE] opcode is for use internally by the
+** [checksum VFS shim] only.
+**
+** <li>[[SQLITE_FCNTL_RESET_CACHE]]
+** If there is currently no transaction open on the database, and the
+** database is not a temp db, then the [SQLITE_FCNTL_RESET_CACHE] file-control
+** purges the contents of the in-memory page cache. If there is an open
+** transaction, or if the db is a temp-db, this opcode is a no-op, not an error.
 ** </ul>
 */
 #define SQLITE_FCNTL_LOCKSTATE               1
@@ -1224,6 +1241,7 @@ struct sqlite3_io_methods {
 #define SQLITE_FCNTL_CKPT_START             39
 #define SQLITE_FCNTL_EXTERNAL_READER        40
 #define SQLITE_FCNTL_CKSM_FILE              41
+#define SQLITE_FCNTL_RESET_CACHE            42
 
 /* deprecated names */
 #define SQLITE_GET_LOCKPROXYFILE      SQLITE_FCNTL_GET_LOCKPROXYFILE
@@ -1253,6 +1271,26 @@ typedef struct sqlite3_mutex sqlite3_mutex;
 */
 typedef struct sqlite3_api_routines sqlite3_api_routines;
 
+/*
+** CAPI3REF: File Name
+**
+** Type [sqlite3_filename] is used by SQLite to pass filenames to the
+** xOpen method of a [VFS]. It may be cast to (const char*) and treated
+** as a normal, nul-terminated, UTF-8 buffer containing the filename, but
+** may also be passed to special APIs such as:
+**
+** <ul>
+** <li>  sqlite3_filename_database()
+** <li>  sqlite3_filename_journal()
+** <li>  sqlite3_filename_wal()
+** <li>  sqlite3_uri_parameter()
+** <li>  sqlite3_uri_boolean()
+** <li>  sqlite3_uri_int64()
+** <li>  sqlite3_uri_key()
+** </ul>
+*/
+typedef const char *sqlite3_filename;
+
 /*
 ** CAPI3REF: OS Interface Object
 **
@@ -1431,7 +1469,7 @@ struct sqlite3_vfs {
   sqlite3_vfs *pNext;      /* Next registered VFS */
   const char *zName;       /* Name of this virtual file system */
   void *pAppData;          /* Pointer to application-specific data */
-  int (*xOpen)(sqlite3_vfs*, const char *zName, sqlite3_file*,
+  int (*xOpen)(sqlite3_vfs*, sqlite3_filename zName, sqlite3_file*,
                int flags, int *pOutFlags);
   int (*xDelete)(sqlite3_vfs*, const char *zName, int syncDir);
   int (*xAccess)(sqlite3_vfs*, const char *zName, int flags, int *pResOut);
@@ -1618,20 +1656,23 @@ SQLITE_API int sqlite3_os_end(void);
 ** must ensure that no other SQLite interfaces are invoked by other
 ** threads while sqlite3_config() is running.</b>
 **
-** The sqlite3_config() interface
-** may only be invoked prior to library initialization using
-** [sqlite3_initialize()] or after shutdown by [sqlite3_shutdown()].
-** ^If sqlite3_config() is called after [sqlite3_initialize()] and before
-** [sqlite3_shutdown()] then it will return SQLITE_MISUSE.
-** Note, however, that ^sqlite3_config() can be called as part of the
-** implementation of an application-defined [sqlite3_os_init()].
-**
 ** The first argument to sqlite3_config() is an integer
 ** [configuration option] that determines
 ** what property of SQLite is to be configured.  Subsequent arguments
 ** vary depending on the [configuration option]
 ** in the first argument.
 **
+** For most configuration options, the sqlite3_config() interface
+** may only be invoked prior to library initialization using
+** [sqlite3_initialize()] or after shutdown by [sqlite3_shutdown()].
+** The exceptional configuration options that may be invoked at any time
+** are called "anytime configuration options".
+** ^If sqlite3_config() is called after [sqlite3_initialize()] and before
+** [sqlite3_shutdown()] with a first argument that is not an anytime
+** configuration option, then the sqlite3_config() call will return SQLITE_MISUSE.
+** Note, however, that ^sqlite3_config() can be called as part of the
+** implementation of an application-defined [sqlite3_os_init()].
+**
 ** ^When a configuration option is set, sqlite3_config() returns [SQLITE_OK].
 ** ^If the option is unknown or SQLite is unable to set the option
 ** then this routine returns a non-zero [error code].
@@ -1739,6 +1780,23 @@ struct sqlite3_mem_methods {
 ** These constants are the available integer configuration options that
 ** can be passed as the first argument to the [sqlite3_config()] interface.
 **
+** Most of the configuration options for sqlite3_config()
+** will only work if invoked prior to [sqlite3_initialize()] or after
+** [sqlite3_shutdown()].  The few exceptions to this rule are called
+** "anytime configuration options".
+** ^Calling [sqlite3_config()] with a first argument that is not an
+** anytime configuration option in between calls to [sqlite3_initialize()] and
+** [sqlite3_shutdown()] is a no-op that returns SQLITE_MISUSE.
+**
+** The set of anytime configuration options can change (by insertions
+** and/or deletions) from one release of SQLite to the next.
+** As of SQLite version 3.42.0, the complete set of anytime configuration
+** options is:
+** <ul>
+** <li> SQLITE_CONFIG_LOG
+** <li> SQLITE_CONFIG_PCACHE_HDRSZ
+** </ul>
+**
 ** New configuration options may be added in future releases of SQLite.
 ** Existing configuration options might be discontinued.  Applications
 ** should check the return code from [sqlite3_config()] to make sure that
@@ -2069,7 +2127,7 @@ struct sqlite3_mem_methods {
 ** is stored in each sorted record and the required column values loaded
 ** from the database as records are returned in sorted order. The default
 ** value for this option is to never use this optimization. Specifying a
-** negative value for this option restores the default behaviour.
+** negative value for this option restores the default behavior.
 ** This option is only available if SQLite is compiled with the
 ** [SQLITE_ENABLE_SORTER_REFERENCES] compile-time option.
 **
@@ -2085,28 +2143,28 @@ struct sqlite3_mem_methods {
 ** compile-time option is not set, then the default maximum is 1073741824.
 ** </dl>
 */
-#define SQLITE_CONFIG_SINGLETHREAD  1  /* nil */
-#define SQLITE_CONFIG_MULTITHREAD   2  /* nil */
-#define SQLITE_CONFIG_SERIALIZED    3  /* nil */
-#define SQLITE_CONFIG_MALLOC        4  /* sqlite3_mem_methods* */
-#define SQLITE_CONFIG_GETMALLOC     5  /* sqlite3_mem_methods* */
-#define SQLITE_CONFIG_SCRATCH       6  /* No longer used */
-#define SQLITE_CONFIG_PAGECACHE     7  /* void*, int sz, int N */
-#define SQLITE_CONFIG_HEAP          8  /* void*, int nByte, int min */
-#define SQLITE_CONFIG_MEMSTATUS     9  /* boolean */
-#define SQLITE_CONFIG_MUTEX        10  /* sqlite3_mutex_methods* */
-#define SQLITE_CONFIG_GETMUTEX     11  /* sqlite3_mutex_methods* */
-/* previously SQLITE_CONFIG_CHUNKALLOC 12 which is now unused. */
-#define SQLITE_CONFIG_LOOKASIDE    13  /* int int */
-#define SQLITE_CONFIG_PCACHE       14  /* no-op */
-#define SQLITE_CONFIG_GETPCACHE    15  /* no-op */
-#define SQLITE_CONFIG_LOG          16  /* xFunc, void* */
-#define SQLITE_CONFIG_URI          17  /* int */
-#define SQLITE_CONFIG_PCACHE2      18  /* sqlite3_pcache_methods2* */
-#define SQLITE_CONFIG_GETPCACHE2   19  /* sqlite3_pcache_methods2* */
+#define SQLITE_CONFIG_SINGLETHREAD         1  /* nil */
+#define SQLITE_CONFIG_MULTITHREAD          2  /* nil */
+#define SQLITE_CONFIG_SERIALIZED           3  /* nil */
+#define SQLITE_CONFIG_MALLOC               4  /* sqlite3_mem_methods* */
+#define SQLITE_CONFIG_GETMALLOC            5  /* sqlite3_mem_methods* */
+#define SQLITE_CONFIG_SCRATCH              6  /* No longer used */
+#define SQLITE_CONFIG_PAGECACHE            7  /* void*, int sz, int N */
+#define SQLITE_CONFIG_HEAP                 8  /* void*, int nByte, int min */
+#define SQLITE_CONFIG_MEMSTATUS            9  /* boolean */
+#define SQLITE_CONFIG_MUTEX               10  /* sqlite3_mutex_methods* */
+#define SQLITE_CONFIG_GETMUTEX            11  /* sqlite3_mutex_methods* */
+/* previously SQLITE_CONFIG_CHUNKALLOC    12 which is now unused. */
+#define SQLITE_CONFIG_LOOKASIDE           13  /* int int */
+#define SQLITE_CONFIG_PCACHE              14  /* no-op */
+#define SQLITE_CONFIG_GETPCACHE           15  /* no-op */
+#define SQLITE_CONFIG_LOG                 16  /* xFunc, void* */
+#define SQLITE_CONFIG_URI                 17  /* int */
+#define SQLITE_CONFIG_PCACHE2             18  /* sqlite3_pcache_methods2* */
+#define SQLITE_CONFIG_GETPCACHE2          19  /* sqlite3_pcache_methods2* */
 #define SQLITE_CONFIG_COVERING_INDEX_SCAN 20  /* int */
-#define SQLITE_CONFIG_SQLLOG       21  /* xSqllog, void* */
-#define SQLITE_CONFIG_MMAP_SIZE    22  /* sqlite3_int64, sqlite3_int64 */
+#define SQLITE_CONFIG_SQLLOG              21  /* xSqllog, void* */
+#define SQLITE_CONFIG_MMAP_SIZE           22  /* sqlite3_int64, sqlite3_int64 */
 #define SQLITE_CONFIG_WIN32_HEAPSIZE      23  /* int nByte */
 #define SQLITE_CONFIG_PCACHE_HDRSZ        24  /* int *psz */
 #define SQLITE_CONFIG_PMASZ               25  /* unsigned int szPma */
@@ -2147,7 +2205,7 @@ struct sqlite3_mem_methods {
 ** configuration for a database connection can only be changed when that
 ** connection is not currently using lookaside memory, or in other words
 ** when the "current value" returned by
-** [sqlite3_db_status](D,[SQLITE_CONFIG_LOOKASIDE],...) is zero.
+** [sqlite3_db_status](D,[SQLITE_DBSTATUS_LOOKASIDE_USED],...) is zero.
 ** Any attempt to change the lookaside memory configuration when lookaside
 ** memory is in use leaves the configuration unchanged and returns
 ** [SQLITE_BUSY].)^</dd>
@@ -2244,7 +2302,7 @@ struct sqlite3_mem_methods {
 ** database handle, SQLite checks if this will mean that there are now no
 ** connections at all to the database. If so, it performs a checkpoint
 ** operation before closing the connection. This option may be used to
-** override this behaviour. The first parameter passed to this operation
+** override this behavior. The first parameter passed to this operation
 ** is an integer - positive to disable checkpoints-on-close, or zero (the
 ** default) to enable them, and negative to leave the setting unchanged.
 ** The second parameter is a pointer to an integer
@@ -2297,8 +2355,12 @@ struct sqlite3_mem_methods {
 ** <li> sqlite3_db_config(db, SQLITE_DBCONFIG_RESET_DATABASE, 0, 0);
 ** </ol>
 ** Because resetting a database is destructive and irreversible, the
-** process requires the use of this obscure API and multiple steps to help
-** ensure that it does not happen by accident.
+** process requires the use of this obscure API and multiple steps to
+** help ensure that it does not happen by accident. Because this
+** feature must be capable of resetting corrupt databases, and
+** shutting down virtual tables may require access to that corrupt
+** storage, the library must abandon any installed virtual tables
+** without calling their xDestroy() methods.
 **
 ** [[SQLITE_DBCONFIG_DEFENSIVE]] <dt>SQLITE_DBCONFIG_DEFENSIVE</dt>
 ** <dd>The SQLITE_DBCONFIG_DEFENSIVE option activates or deactivates the
@@ -2309,6 +2371,7 @@ struct sqlite3_mem_methods {
 ** <ul>
 ** <li> The [PRAGMA writable_schema=ON] statement.
 ** <li> The [PRAGMA journal_mode=OFF] statement.
+** <li> The [PRAGMA schema_version=N] statement.
 ** <li> Writes to the [sqlite_dbpage] virtual table.
 ** <li> Direct writes to [shadow tables].
 ** </ul>
@@ -2336,7 +2399,7 @@ struct sqlite3_mem_methods {
 ** </dd>
 **
 ** [[SQLITE_DBCONFIG_DQS_DML]]
-** <dt>SQLITE_DBCONFIG_DQS_DML</td>
+** <dt>SQLITE_DBCONFIG_DQS_DML</dt>
 ** <dd>The SQLITE_DBCONFIG_DQS_DML option activates or deactivates
 ** the legacy [double-quoted string literal] misfeature for DML statements
 ** only, that is DELETE, INSERT, SELECT, and UPDATE statements. The
@@ -2345,7 +2408,7 @@ struct sqlite3_mem_methods {
 ** </dd>
 **
 ** [[SQLITE_DBCONFIG_DQS_DDL]]
-** <dt>SQLITE_DBCONFIG_DQS_DDL</td>
+** <dt>SQLITE_DBCONFIG_DQS_DDL</dt>
 ** <dd>The SQLITE_DBCONFIG_DQS option activates or deactivates
 ** the legacy [double-quoted string literal] misfeature for DDL statements,
 ** such as CREATE TABLE and CREATE INDEX. The
@@ -2354,7 +2417,7 @@ struct sqlite3_mem_methods {
 ** </dd>
 **
 ** [[SQLITE_DBCONFIG_TRUSTED_SCHEMA]]
-** <dt>SQLITE_DBCONFIG_TRUSTED_SCHEMA</td>
+** <dt>SQLITE_DBCONFIG_TRUSTED_SCHEMA</dt>
 ** <dd>The SQLITE_DBCONFIG_TRUSTED_SCHEMA option tells SQLite to
 ** assume that database schemas are untainted by malicious content.
 ** When the SQLITE_DBCONFIG_TRUSTED_SCHEMA option is disabled, SQLite
@@ -2374,7 +2437,7 @@ struct sqlite3_mem_methods {
 ** </dd>
 **
 ** [[SQLITE_DBCONFIG_LEGACY_FILE_FORMAT]]
-** <dt>SQLITE_DBCONFIG_LEGACY_FILE_FORMAT</td>
+** <dt>SQLITE_DBCONFIG_LEGACY_FILE_FORMAT</dt>
 ** <dd>The SQLITE_DBCONFIG_LEGACY_FILE_FORMAT option activates or deactivates
 ** the legacy file format flag.  When activated, this flag causes all newly
 ** created database file to have a schema format version number (the 4-byte
@@ -2383,7 +2446,7 @@ struct sqlite3_mem_methods {
 ** any SQLite version back to 3.0.0 ([dateof:3.0.0]).  Without this setting,
 ** newly created databases are generally not understandable by SQLite versions
 ** prior to 3.3.0 ([dateof:3.3.0]).  As these words are written, there
-** is now scarcely any need to generated database files that are compatible
+** is now scarcely any need to generate database files that are compatible
 ** all the way back to version 3.0.0, and so this setting is of little
 ** practical use, but is provided so that SQLite can continue to claim the
 ** ability to generate new database files that are compatible with  version
@@ -2392,8 +2455,40 @@ struct sqlite3_mem_methods {
 ** the [VACUUM] command will fail with an obscure error when attempting to
 ** process a table with generated columns and a descending index.  This is
 ** not considered a bug since SQLite versions 3.3.0 and earlier do not support
-** either generated columns or decending indexes.
+** either generated columns or descending indexes.
 ** </dd>
+**
+** [[SQLITE_DBCONFIG_STMT_SCANSTATUS]]
+** <dt>SQLITE_DBCONFIG_STMT_SCANSTATUS</dt>
+** <dd>The SQLITE_DBCONFIG_STMT_SCANSTATUS option is only useful in
+** SQLITE_ENABLE_STMT_SCANSTATUS builds. In this case, it sets or clears
+** a flag that enables collection of the sqlite3_stmt_scanstatus_v2()
+** statistics. For statistics to be collected, the flag must be set on
+** the database handle both when the SQL statement is prepared and when it
+** is stepped. The flag is set (collection of statistics is enabled)
+** by default.  This option takes two arguments: an integer and a pointer to
+** an integer..  The first argument is 1, 0, or -1 to enable, disable, or
+** leave unchanged the statement scanstatus option.  If the second argument
+** is not NULL, then the value of the statement scanstatus setting after
+** processing the first argument is written into the integer that the second
+** argument points to.
+** </dd>
+**
+** [[SQLITE_DBCONFIG_REVERSE_SCANORDER]]
+** <dt>SQLITE_DBCONFIG_REVERSE_SCANORDER</dt>
+** <dd>The SQLITE_DBCONFIG_REVERSE_SCANORDER option changes the default order
+** in which tables and indexes are scanned so that the scans start at the end
+** and work toward the beginning rather than starting at the beginning and
+** working toward the end. Setting SQLITE_DBCONFIG_REVERSE_SCANORDER is the
+** same as setting [PRAGMA reverse_unordered_selects].  This option takes
+** two arguments which are an integer and a pointer to an integer.  The first
+** argument is 1, 0, or -1 to enable, disable, or leave unchanged the
+** reverse scan order flag, respectively.  If the second argument is not NULL,
+** then 0 or 1 is written into the integer that the second argument points to
+** depending on if the reverse scan order flag is set after processing the
+** first argument.
+** </dd>
+**
 ** </dl>
 */
 #define SQLITE_DBCONFIG_MAINDBNAME            1000 /* const char* */
@@ -2414,7 +2509,9 @@ struct sqlite3_mem_methods {
 #define SQLITE_DBCONFIG_ENABLE_VIEW           1015 /* int int* */
 #define SQLITE_DBCONFIG_LEGACY_FILE_FORMAT    1016 /* int int* */
 #define SQLITE_DBCONFIG_TRUSTED_SCHEMA        1017 /* int int* */
-#define SQLITE_DBCONFIG_MAX                   1017 /* Largest DBCONFIG */
+#define SQLITE_DBCONFIG_STMT_SCANSTATUS       1018 /* int int* */
+#define SQLITE_DBCONFIG_REVERSE_SCANORDER     1019 /* int int* */
+#define SQLITE_DBCONFIG_MAX                   1019 /* Largest DBCONFIG */
 
 /*
 ** CAPI3REF: Enable Or Disable Extended Result Codes
@@ -2636,8 +2733,13 @@ SQLITE_API sqlite3_int64 sqlite3_total_changes64(sqlite3*);
 ** ^A call to sqlite3_interrupt(D) that occurs when there are no running
 ** SQL statements is a no-op and has no effect on SQL statements
 ** that are started after the sqlite3_interrupt() call returns.
+**
+** ^The [sqlite3_is_interrupted(D)] interface can be used to determine whether
+** or not an interrupt is currently in effect for [database connection] D.
+** It returns 1 if an interrupt is currently in effect, or 0 otherwise.
 */
 SQLITE_API void sqlite3_interrupt(sqlite3*);
+SQLITE_API int sqlite3_is_interrupted(sqlite3*);
 
 /*
 ** CAPI3REF: Determine If An SQL Statement Is Complete
@@ -3255,8 +3357,8 @@ SQLITE_API SQLITE_DEPRECATED void *sqlite3_profile(sqlite3*,
 ** <dd>^An SQLITE_TRACE_PROFILE callback provides approximately the same
 ** information as is provided by the [sqlite3_profile()] callback.
 ** ^The P argument is a pointer to the [prepared statement] and the
-** X argument points to a 64-bit integer which is the estimated of
-** the number of nanosecond that the prepared statement took to run.
+** X argument points to a 64-bit integer which is approximately
+** the number of nanoseconds that the prepared statement took to run.
 ** ^The SQLITE_TRACE_PROFILE callback is invoked when the statement finishes.
 **
 ** [[SQLITE_TRACE_ROW]] <dt>SQLITE_TRACE_ROW</dt>
@@ -3288,8 +3390,10 @@ SQLITE_API SQLITE_DEPRECATED void *sqlite3_profile(sqlite3*,
 ** M argument should be the bitwise OR-ed combination of
 ** zero or more [SQLITE_TRACE] constants.
 **
-** ^Each call to either sqlite3_trace() or sqlite3_trace_v2() overrides
-** (cancels) any prior calls to sqlite3_trace() or sqlite3_trace_v2().
+** ^Each call to either sqlite3_trace(D,X,P) or sqlite3_trace_v2(D,M,X,P)
+** overrides (cancels) all prior calls to sqlite3_trace(D,X,P) or
+** sqlite3_trace_v2(D,M,X,P) for the [database connection] D.  Each
+** database connection may have at most one trace callback.
 **
 ** ^The X callback is invoked whenever any of the events identified by
 ** mask M occur.  ^The integer return value from the callback is currently
@@ -3319,7 +3423,7 @@ SQLITE_API int sqlite3_trace_v2(
 **
 ** ^The sqlite3_progress_handler(D,N,X,P) interface causes the callback
 ** function X to be invoked periodically during long running calls to
-** [sqlite3_exec()], [sqlite3_step()] and [sqlite3_get_table()] for
+** [sqlite3_step()] and [sqlite3_prepare()] and similar for
 ** database connection D.  An example use for this
 ** interface is to keep a GUI updated during a large query.
 **
@@ -3344,6 +3448,13 @@ SQLITE_API int sqlite3_trace_v2(
 ** Note that [sqlite3_prepare_v2()] and [sqlite3_step()] both modify their
 ** database connections for the meaning of "modify" in this paragraph.
 **
+** The progress handler callback would originally only be invoked from the
+** bytecode engine.  It still might be invoked during [sqlite3_prepare()]
+** and similar because those routines might force a reparse of the schema
+** which involves running the bytecode engine.  However, beginning with
+** SQLite version 3.41.0, the progress handler callback might also be
+** invoked directly from [sqlite3_prepare()] while analyzing and generating
+** code for complex queries.
 */
 SQLITE_API void sqlite3_progress_handler(sqlite3*, int, int(*)(void*), void*);
 
@@ -3380,13 +3491,18 @@ SQLITE_API void sqlite3_progress_handler(sqlite3*, int, int(*)(void*), void*);
 **
 ** <dl>
 ** ^(<dt>[SQLITE_OPEN_READONLY]</dt>
-** <dd>The database is opened in read-only mode.  If the database does not
-** already exist, an error is returned.</dd>)^
+** <dd>The database is opened in read-only mode.  If the database does
+** not already exist, an error is returned.</dd>)^
 **
 ** ^(<dt>[SQLITE_OPEN_READWRITE]</dt>
-** <dd>The database is opened for reading and writing if possible, or reading
-** only if the file is write protected by the operating system.  In either
-** case the database must already exist, otherwise an error is returned.</dd>)^
+** <dd>The database is opened for reading and writing if possible, or
+** reading only if the file is write protected by the operating
+** system.  In either case the database must already exist, otherwise
+** an error is returned.  For historical reasons, if opening in
+** read-write mode fails due to OS-level permissions, an attempt is
+** made to open it in read-only mode. [sqlite3_db_readonly()] can be
+** used to determine whether the database is actually
+** read-write.</dd>)^
 **
 ** ^(<dt>[SQLITE_OPEN_READWRITE] | [SQLITE_OPEN_CREATE]</dt>
 ** <dd>The database is opened for reading and writing, and is created if
@@ -3424,6 +3540,9 @@ SQLITE_API void sqlite3_progress_handler(sqlite3*, int, int(*)(void*), void*);
 ** <dd>The database is opened [shared cache] enabled, overriding
 ** the default shared cache setting provided by
 ** [sqlite3_enable_shared_cache()].)^
+** The [use of shared cache mode is discouraged] and hence shared cache
+** capabilities may be omitted from many builds of SQLite.  In such cases,
+** this option is a no-op.
 **
 ** ^(<dt>[SQLITE_OPEN_PRIVATECACHE]</dt>
 ** <dd>The database is opened [shared cache] disabled, overriding
@@ -3439,7 +3558,7 @@ SQLITE_API void sqlite3_progress_handler(sqlite3*, int, int(*)(void*), void*);
 ** to return an extended result code.</dd>
 **
 ** [[OPEN_NOFOLLOW]] ^(<dt>[SQLITE_OPEN_NOFOLLOW]</dt>
-** <dd>The database filename is not allowed to be a symbolic link</dd>
+** <dd>The database filename is not allowed to contain a symbolic link</dd>
 ** </dl>)^
 **
 ** If the 3rd parameter to sqlite3_open_v2() is not one of the
@@ -3643,7 +3762,7 @@ SQLITE_API int sqlite3_open_v2(
 ** as F) must be one of:
 ** <ul>
 ** <li> A database filename pointer created by the SQLite core and
-** passed into the xOpen() method of a VFS implemention, or
+** passed into the xOpen() method of a VFS implementation, or
 ** <li> A filename obtained from [sqlite3_db_filename()], or
 ** <li> A new filename constructed using [sqlite3_create_filename()].
 ** </ul>
@@ -3698,10 +3817,10 @@ SQLITE_API int sqlite3_open_v2(
 **
 ** See the [URI filename] documentation for additional information.
 */
-SQLITE_API const char *sqlite3_uri_parameter(const char *zFilename, const char *zParam);
-SQLITE_API int sqlite3_uri_boolean(const char *zFile, const char *zParam, int bDefault);
-SQLITE_API sqlite3_int64 sqlite3_uri_int64(const char*, const char*, sqlite3_int64);
-SQLITE_API const char *sqlite3_uri_key(const char *zFilename, int N);
+SQLITE_API const char *sqlite3_uri_parameter(sqlite3_filename z, const char *zParam);
+SQLITE_API int sqlite3_uri_boolean(sqlite3_filename z, const char *zParam, int bDefault);
+SQLITE_API sqlite3_int64 sqlite3_uri_int64(sqlite3_filename, const char*, sqlite3_int64);
+SQLITE_API const char *sqlite3_uri_key(sqlite3_filename z, int N);
 
 /*
 ** CAPI3REF:  Translate filenames
@@ -3730,9 +3849,9 @@ SQLITE_API const char *sqlite3_uri_key(const char *zFilename, int N);
 ** return value from [sqlite3_db_filename()], then the result is
 ** undefined and is likely a memory access violation.
 */
-SQLITE_API const char *sqlite3_filename_database(const char*);
-SQLITE_API const char *sqlite3_filename_journal(const char*);
-SQLITE_API const char *sqlite3_filename_wal(const char*);
+SQLITE_API const char *sqlite3_filename_database(sqlite3_filename);
+SQLITE_API const char *sqlite3_filename_journal(sqlite3_filename);
+SQLITE_API const char *sqlite3_filename_wal(sqlite3_filename);
 
 /*
 ** CAPI3REF:  Database File Corresponding To A Journal
@@ -3756,7 +3875,7 @@ SQLITE_API sqlite3_file *sqlite3_database_file_object(const char*);
 /*
 ** CAPI3REF: Create and Destroy VFS Filenames
 **
-** These interfces are provided for use by [VFS shim] implementations and
+** These interfaces are provided for use by [VFS shim] implementations and
 ** are not useful outside of that context.
 **
 ** The sqlite3_create_filename(D,J,W,N,P) allocates memory to hold a version of
@@ -3798,14 +3917,14 @@ SQLITE_API sqlite3_file *sqlite3_database_file_object(const char*);
 ** then the corresponding [sqlite3_module.xClose() method should also be
 ** invoked prior to calling sqlite3_free_filename(Y).
 */
-SQLITE_API char *sqlite3_create_filename(
+SQLITE_API sqlite3_filename sqlite3_create_filename(
   const char *zDatabase,
   const char *zJournal,
   const char *zWal,
   int nParam,
   const char **azParam
 );
-SQLITE_API void sqlite3_free_filename(char*);
+SQLITE_API void sqlite3_free_filename(sqlite3_filename);
 
 /*
 ** CAPI3REF: Error Codes And Messages
@@ -3836,6 +3955,7 @@ SQLITE_API void sqlite3_free_filename(char*);
 **
 ** ^The sqlite3_errmsg() and sqlite3_errmsg16() return English-language
 ** text that describes the error, as either UTF-8 or UTF-16 respectively.
+** (See how SQLite handles [invalid UTF] for exceptions to this rule.)
 ** ^(Memory to hold the error message string is managed internally.
 ** The application does not need to worry about freeing the result.
 ** However, the error string might be overwritten or deallocated by
@@ -4303,6 +4423,41 @@ SQLITE_API int sqlite3_stmt_readonly(sqlite3_stmt *pStmt);
 */
 SQLITE_API int sqlite3_stmt_isexplain(sqlite3_stmt *pStmt);
 
+/*
+** CAPI3REF: Change The EXPLAIN Setting For A Prepared Statement
+** METHOD: sqlite3_stmt
+**
+** The sqlite3_stmt_explain(S,E) interface changes the EXPLAIN
+** setting for [prepared statement] S.  If E is zero, then S becomes
+** a normal prepared statement.  If E is 1, then S behaves as if
+** its SQL text began with "[EXPLAIN]".  If E is 2, then S behaves as if
+** its SQL text began with "[EXPLAIN QUERY PLAN]".
+**
+** Calling sqlite3_stmt_explain(S,E) might cause S to be reprepared.
+** SQLite tries to avoid a reprepare, but a reprepare might be necessary
+** on the first transition into EXPLAIN or EXPLAIN QUERY PLAN mode.
+**
+** Because of the potential need to reprepare, a call to
+** sqlite3_stmt_explain(S,E) will fail with SQLITE_ERROR if S cannot be
+** reprepared because it was created using [sqlite3_prepare()] instead of
+** the newer [sqlite3_prepare_v2()] or [sqlite3_prepare_v3()] interfaces and
+** hence has no saved SQL text with which to reprepare.
+**
+** Changing the explain setting for a prepared statement does not change
+** the original SQL text for the statement.  Hence, if the SQL text originally
+** began with EXPLAIN or EXPLAIN QUERY PLAN, but sqlite3_stmt_explain(S,0)
+** is called to convert the statement into an ordinary statement, the EXPLAIN
+** or EXPLAIN QUERY PLAN keywords will still appear in the sqlite3_sql(S)
+** output, even though the statement now acts like a normal SQL statement.
+**
+** This routine returns SQLITE_OK if the explain mode is successfully
+** changed, or an error code if the explain mode could not be changed.
+** The explain mode cannot be changed while a statement is active.
+** Hence, it is good practice to call [sqlite3_reset(S)]
+** immediately prior to calling sqlite3_stmt_explain(S,E).
+*/
+SQLITE_API int sqlite3_stmt_explain(sqlite3_stmt *pStmt, int eMode);
+
 /*
 ** CAPI3REF: Determine If A Prepared Statement Has Been Reset
 ** METHOD: sqlite3_stmt
@@ -4466,7 +4621,7 @@ typedef struct sqlite3_context sqlite3_context;
 ** with it may be passed. ^It is called to dispose of the BLOB or string even
 ** if the call to the bind API fails, except the destructor is not called if
 ** the third parameter is a NULL pointer or the fourth parameter is negative.
-** ^ (2) The special constant, [SQLITE_STATIC], may be passsed to indicate that
+** ^ (2) The special constant, [SQLITE_STATIC], may be passed to indicate that
 ** the application remains responsible for disposing of the object. ^In this
 ** case, the object and the provided pointer to it must remain valid until
 ** either the prepared statement is finalized or the same SQL parameter is
@@ -5145,20 +5300,33 @@ SQLITE_API int sqlite3_finalize(sqlite3_stmt *pStmt);
 ** ^The [sqlite3_reset(S)] interface resets the [prepared statement] S
 ** back to the beginning of its program.
 **
-** ^If the most recent call to [sqlite3_step(S)] for the
-** [prepared statement] S returned [SQLITE_ROW] or [SQLITE_DONE],
-** or if [sqlite3_step(S)] has never before been called on S,
-** then [sqlite3_reset(S)] returns [SQLITE_OK].
+** ^The return code from [sqlite3_reset(S)] indicates whether or not
+** the previous evaluation of prepared statement S completed successfully.
+** ^If [sqlite3_step(S)] has never before been called on S or if
+** [sqlite3_step(S)] has not been called since the previous call
+** to [sqlite3_reset(S)], then [sqlite3_reset(S)] will return
+** [SQLITE_OK].
 **
 ** ^If the most recent call to [sqlite3_step(S)] for the
 ** [prepared statement] S indicated an error, then
 ** [sqlite3_reset(S)] returns an appropriate [error code].
+** ^The [sqlite3_reset(S)] interface might also return an [error code]
+** if there were no prior errors but the process of resetting
+** the prepared statement caused a new error. ^For example, if an
+** [INSERT] statement with a [RETURNING] clause is only stepped one time,
+** that one call to [sqlite3_step(S)] might return SQLITE_ROW but
+** the overall statement might still fail and the [sqlite3_reset(S)] call
+** might return SQLITE_BUSY if locking constraints prevent the
+** database change from committing.  Therefore, it is important that
+** applications check the return code from [sqlite3_reset(S)] even if
+** no prior call to [sqlite3_step(S)] indicated a problem.
 **
 ** ^The [sqlite3_reset(S)] interface does not change the values
 ** of any [sqlite3_bind_blob|bindings] on the [prepared statement] S.
 */
 SQLITE_API int sqlite3_reset(sqlite3_stmt *pStmt);
 
+
 /*
 ** CAPI3REF: Create Or Redefine SQL Functions
 ** KEYWORDS: {function creation routines}
@@ -5364,10 +5532,21 @@ SQLITE_API int sqlite3_create_window_function(
 ** from top-level SQL, and cannot be used in VIEWs or TRIGGERs nor in
 ** schema structures such as [CHECK constraints], [DEFAULT clauses],
 ** [expression indexes], [partial indexes], or [generated columns].
-** The SQLITE_DIRECTONLY flags is a security feature which is recommended
-** for all [application-defined SQL functions], and especially for functions
-** that have side-effects or that could potentially leak sensitive
-** information.
+** <p>
+** The SQLITE_DIRECTONLY flag is recommended for any
+** [application-defined SQL function]
+** that has side-effects or that could potentially leak sensitive information.
+** This will prevent attacks in which an application is tricked
+** into using a database file that has had its schema surreptitiously
+** modified to invoke the application-defined function in ways that are
+** harmful.
+** <p>
+** Some people say it is good practice to set SQLITE_DIRECTONLY on all
+** [application-defined SQL functions], regardless of whether or not they
+** are security sensitive, as doing so prevents those functions from being used
+** inside of the database schema, and thus ensures that the database
+** can be inspected and modified using generic tools (such as the [CLI])
+** that do not have access to the application-defined functions.
 ** </dd>
 **
 ** [[SQLITE_INNOCUOUS]] <dt>SQLITE_INNOCUOUS</dt><dd>
@@ -5394,13 +5573,27 @@ SQLITE_API int sqlite3_create_window_function(
 ** </dd>
 **
 ** [[SQLITE_SUBTYPE]] <dt>SQLITE_SUBTYPE</dt><dd>
-** The SQLITE_SUBTYPE flag indicates to SQLite that a function may call
+** The SQLITE_SUBTYPE flag indicates to SQLite that a function might call
 ** [sqlite3_value_subtype()] to inspect the sub-types of its arguments.
-** Specifying this flag makes no difference for scalar or aggregate user
-** functions. However, if it is not specified for a user-defined window
-** function, then any sub-types belonging to arguments passed to the window
-** function may be discarded before the window function is called (i.e.
-** sqlite3_value_subtype() will always return 0).
+** This flag instructs SQLite to omit some corner-case optimizations that
+** might disrupt the operation of the [sqlite3_value_subtype()] function,
+** causing it to return zero rather than the correct subtype().
+** SQL functions that invokes [sqlite3_value_subtype()] should have this
+** property.  If the SQLITE_SUBTYPE property is omitted, then the return
+** value from [sqlite3_value_subtype()] might sometimes be zero even though
+** a non-zero subtype was specified by the function argument expression.
+**
+** [[SQLITE_RESULT_SUBTYPE]] <dt>SQLITE_RESULT_SUBTYPE</dt><dd>
+** The SQLITE_RESULT_SUBTYPE flag indicates to SQLite that a function might call
+** [sqlite3_result_subtype()] to cause a sub-type to be associated with its
+** result.
+** Every function that invokes [sqlite3_result_subtype()] should have this
+** property.  If it does not, then the call to [sqlite3_result_subtype()]
+** might become a no-op if the function is used as term in an
+** [expression index].  On the other hand, SQL functions that never invoke
+** [sqlite3_result_subtype()] should avoid setting this property, as the
+** purpose of this property is to disable certain optimizations that are
+** incompatible with subtypes.
 ** </dd>
 ** </dl>
 */
@@ -5408,6 +5601,7 @@ SQLITE_API int sqlite3_create_window_function(
 #define SQLITE_DIRECTONLY       0x000080000
 #define SQLITE_SUBTYPE          0x000100000
 #define SQLITE_INNOCUOUS        0x000200000
+#define SQLITE_RESULT_SUBTYPE   0x001000000
 
 /*
 ** CAPI3REF: Deprecated Functions
@@ -5573,6 +5767,28 @@ SQLITE_API int sqlite3_value_numeric_type(sqlite3_value*);
 SQLITE_API int sqlite3_value_nochange(sqlite3_value*);
 SQLITE_API int sqlite3_value_frombind(sqlite3_value*);
 
+/*
+** CAPI3REF: Report the internal text encoding state of an sqlite3_value object
+** METHOD: sqlite3_value
+**
+** ^(The sqlite3_value_encoding(X) interface returns one of [SQLITE_UTF8],
+** [SQLITE_UTF16BE], or [SQLITE_UTF16LE] according to the current text encoding
+** of the value X, assuming that X has type TEXT.)^  If sqlite3_value_type(X)
+** returns something other than SQLITE_TEXT, then the return value from
+** sqlite3_value_encoding(X) is meaningless.  ^Calls to
+** [sqlite3_value_text(X)], [sqlite3_value_text16(X)], [sqlite3_value_text16be(X)],
+** [sqlite3_value_text16le(X)], [sqlite3_value_bytes(X)], or
+** [sqlite3_value_bytes16(X)] might change the encoding of the value X and
+** thus change the return from subsequent calls to sqlite3_value_encoding(X).
+**
+** This routine is intended for used by applications that test and validate
+** the SQLite implementation.  This routine is inquiring about the opaque
+** internal state of an [sqlite3_value] object.  Ordinary applications should
+** not need to know what the internal state of an sqlite3_value object is and
+** hence should not need to use this interface.
+*/
+SQLITE_API int sqlite3_value_encoding(sqlite3_value*);
+
 /*
 ** CAPI3REF: Finding The Subtype Of SQL Values
 ** METHOD: sqlite3_value
@@ -5582,6 +5798,12 @@ SQLITE_API int sqlite3_value_frombind(sqlite3_value*);
 ** information can be used to pass a limited amount of context from
 ** one SQL function to another.  Use the [sqlite3_result_subtype()]
 ** routine to set the subtype for the return value of an SQL function.
+**
+** Every [application-defined SQL function] that invoke this interface
+** should include the [SQLITE_SUBTYPE] property in the text
+** encoding argument when the function is [sqlite3_create_function|registered].
+** If the [SQLITE_SUBTYPE] property is omitted, then sqlite3_value_subtype()
+** might return zero instead of the upstream subtype in some corner cases.
 */
 SQLITE_API unsigned int sqlite3_value_subtype(sqlite3_value*);
 
@@ -5625,7 +5847,7 @@ SQLITE_API void sqlite3_value_free(sqlite3_value*);
 **
 ** ^The sqlite3_aggregate_context(C,N) routine returns a NULL pointer
 ** when first called if N is less than or equal to zero or if a memory
-** allocate error occurs.
+** allocation error occurs.
 **
 ** ^(The amount of space allocated by sqlite3_aggregate_context(C,N) is
 ** determined by the N parameter on first successful call.  Changing the
@@ -5680,48 +5902,56 @@ SQLITE_API sqlite3 *sqlite3_context_db_handle(sqlite3_context*);
 ** METHOD: sqlite3_context
 **
 ** These functions may be used by (non-aggregate) SQL functions to
-** associate metadata with argument values. If the same value is passed to
-** multiple invocations of the same SQL function during query execution, under
-** some circumstances the associated metadata may be preserved.  An example
-** of where this might be useful is in a regular-expression matching
-** function. The compiled version of the regular expression can be stored as
-** metadata associated with the pattern string.
+** associate auxiliary data with argument values. If the same argument
+** value is passed to multiple invocations of the same SQL function during
+** query execution, under some circumstances the associated auxiliary data
+** might be preserved.  An example of where this might be useful is in a
+** regular-expression matching function. The compiled version of the regular
+** expression can be stored as auxiliary data associated with the pattern string.
 ** Then as long as the pattern string remains the same,
 ** the compiled regular expression can be reused on multiple
 ** invocations of the same function.
 **
-** ^The sqlite3_get_auxdata(C,N) interface returns a pointer to the metadata
+** ^The sqlite3_get_auxdata(C,N) interface returns a pointer to the auxiliary data
 ** associated by the sqlite3_set_auxdata(C,N,P,X) function with the Nth argument
 ** value to the application-defined function.  ^N is zero for the left-most
-** function argument.  ^If there is no metadata
+** function argument.  ^If there is no auxiliary data
 ** associated with the function argument, the sqlite3_get_auxdata(C,N) interface
 ** returns a NULL pointer.
 **
-** ^The sqlite3_set_auxdata(C,N,P,X) interface saves P as metadata for the N-th
-** argument of the application-defined function.  ^Subsequent
+** ^The sqlite3_set_auxdata(C,N,P,X) interface saves P as auxiliary data for the
+** N-th argument of the application-defined function.  ^Subsequent
 ** calls to sqlite3_get_auxdata(C,N) return P from the most recent
-** sqlite3_set_auxdata(C,N,P,X) call if the metadata is still valid or
-** NULL if the metadata has been discarded.
+** sqlite3_set_auxdata(C,N,P,X) call if the auxiliary data is still valid or
+** NULL if the auxiliary data has been discarded.
 ** ^After each call to sqlite3_set_auxdata(C,N,P,X) where X is not NULL,
 ** SQLite will invoke the destructor function X with parameter P exactly
-** once, when the metadata is discarded.
-** SQLite is free to discard the metadata at any time, including: <ul>
+** once, when the auxiliary data is discarded.
+** SQLite is free to discard the auxiliary data at any time, including: <ul>
 ** <li> ^(when the corresponding function parameter changes)^, or
 ** <li> ^(when [sqlite3_reset()] or [sqlite3_finalize()] is called for the
 **      SQL statement)^, or
 ** <li> ^(when sqlite3_set_auxdata() is invoked again on the same
 **       parameter)^, or
 ** <li> ^(during the original sqlite3_set_auxdata() call when a memory
-**      allocation error occurs.)^ </ul>
+**      allocation error occurs.)^
+** <li> ^(during the original sqlite3_set_auxdata() call if the function
+**      is evaluated during query planning instead of during query execution,
+**      as sometimes happens with [SQLITE_ENABLE_STAT4].)^ </ul>
 **
-** Note the last bullet in particular.  The destructor X in
+** Note the last two bullets in particular.  The destructor X in
 ** sqlite3_set_auxdata(C,N,P,X) might be called immediately, before the
 ** sqlite3_set_auxdata() interface even returns.  Hence sqlite3_set_auxdata()
 ** should be called near the end of the function implementation and the
 ** function implementation should not make any use of P after
-** sqlite3_set_auxdata() has been called.
-**
-** ^(In practice, metadata is preserved between function calls for
+** sqlite3_set_auxdata() has been called.  Furthermore, a call to
+** sqlite3_get_auxdata() that occurs immediately after a corresponding call
+** to sqlite3_set_auxdata() might still return NULL if an out-of-memory
+** condition occurred during the sqlite3_set_auxdata() call or if the
+** function is being evaluated during query planning rather than during
+** query execution.
+**
+** ^(In practice, auxiliary data is preserved between function calls for
 ** function parameters that are compile-time constants, including literal
 ** values and [parameters] and expressions composed from the same.)^
 **
@@ -5731,10 +5961,67 @@ SQLITE_API sqlite3 *sqlite3_context_db_handle(sqlite3_context*);
 **
 ** These routines must be called from the same thread in which
 ** the SQL function is running.
+**
+** See also: [sqlite3_get_clientdata()] and [sqlite3_set_clientdata()].
 */
 SQLITE_API void *sqlite3_get_auxdata(sqlite3_context*, int N);
 SQLITE_API void sqlite3_set_auxdata(sqlite3_context*, int N, void*, void (*)(void*));
 
+/*
+** CAPI3REF: Database Connection Client Data
+** METHOD: sqlite3
+**
+** These functions are used to associate one or more named pointers
+** with a [database connection].
+** A call to sqlite3_set_clientdata(D,N,P,X) causes the pointer P
+** to be attached to [database connection] D using name N.  Subsequent
+** calls to sqlite3_get_clientdata(D,N) will return a copy of pointer P
+** or a NULL pointer if there were no prior calls to
+** sqlite3_set_clientdata() with the same values of D and N.
+** Names are compared using strcmp() and are thus case sensitive.
+**
+** If P and X are both non-NULL, then the destructor X is invoked with
+** argument P on the first of the following occurrences:
+** <ul>
+** <li> An out-of-memory error occurs during the call to
+**      sqlite3_set_clientdata() which attempts to register pointer P.
+** <li> A subsequent call to sqlite3_set_clientdata(D,N,P,X) is made
+**      with the same D and N parameters.
+** <li> The database connection closes.  SQLite does not make any guarantees
+**      about the order in which destructors are called, only that all
+**      destructors will be called exactly once at some point during the
+**      database connection closing process.
+** </ul>
+**
+** SQLite does not do anything with client data other than invoke
+** destructors on the client data at the appropriate time.  The intended
+** use for client data is to provide a mechanism for wrapper libraries
+** to store additional information about an SQLite database connection.
+**
+** There is no limit (other than available memory) on the number of different
+** client data pointers (with different names) that can be attached to a
+** single database connection.  However, the implementation is optimized
+** for the case of having only one or two different client data names.
+** Applications and wrapper libraries are discouraged from using more than
+** one client data name each.
+**
+** There is no way to enumerate the client data pointers
+** associated with a database connection.  The N parameter can be thought
+** of as a secret key such that only code that knows the secret key is able
+** to access the associated data.
+**
+** Security Warning:  These interfaces should not be exposed in scripting
+** languages or in other circumstances where it might be possible for an
+** an attacker to invoke them.  Any agent that can invoke these interfaces
+** can probably also take control of the process.
+**
+** Database connection client data is only available for SQLite
+** version 3.44.0 ([dateof:3.44.0]) and later.
+**
+** See also: [sqlite3_set_auxdata()] and [sqlite3_get_auxdata()].
+*/
+SQLITE_API void *sqlite3_get_clientdata(sqlite3*,const char*);
+SQLITE_API int sqlite3_set_clientdata(sqlite3*, const char*, void*, void(*)(void*));
 
 /*
 ** CAPI3REF: Constants Defining Special Destructor Behavior
@@ -5830,9 +6117,10 @@ typedef void (*sqlite3_destructor_type)(void*);
 ** of [SQLITE_UTF8], [SQLITE_UTF16], [SQLITE_UTF16BE], or [SQLITE_UTF16LE].
 ** ^SQLite takes the text result from the application from
 ** the 2nd parameter of the sqlite3_result_text* interfaces.
-** ^If the 3rd parameter to the sqlite3_result_text* interfaces
-** is negative, then SQLite takes result text from the 2nd parameter
-** through the first zero character.
+** ^If the 3rd parameter to any of the sqlite3_result_text* interfaces
+** other than sqlite3_result_text64() is negative, then SQLite computes
+** the string length itself by searching the 2nd parameter for the first
+** zero character.
 ** ^If the 3rd parameter to the sqlite3_result_text* interfaces
 ** is non-negative, then as many bytes (not characters) of the text
 ** pointed to by the 2nd parameter are taken as the application-defined
@@ -5935,6 +6223,20 @@ SQLITE_API int sqlite3_result_zeroblob64(sqlite3_context*, sqlite3_uint64 n);
 ** higher order bits are discarded.
 ** The number of subtype bytes preserved by SQLite might increase
 ** in future releases of SQLite.
+**
+** Every [application-defined SQL function] that invokes this interface
+** should include the [SQLITE_RESULT_SUBTYPE] property in its
+** text encoding argument when the SQL function is
+** [sqlite3_create_function|registered].  If the [SQLITE_RESULT_SUBTYPE]
+** property is omitted from the function that invokes sqlite3_result_subtype(),
+** then in some cases the sqlite3_result_subtype() might fail to set
+** the result subtype.
+**
+** If SQLite is compiled with -DSQLITE_STRICT_SUBTYPE=1, then any
+** SQL function that invokes the sqlite3_result_subtype() interface
+** and that does not have the SQLITE_RESULT_SUBTYPE property will raise
+** an error.  Future versions of SQLite might enable -DSQLITE_STRICT_SUBTYPE=1
+** by default.
 */
 SQLITE_API void sqlite3_result_subtype(sqlite3_context*,unsigned int);
 
@@ -6106,6 +6408,13 @@ SQLITE_API void sqlite3_activate_cerod(
 ** of the default VFS is not implemented correctly, or not implemented at
 ** all, then the behavior of sqlite3_sleep() may deviate from the description
 ** in the previous paragraphs.
+**
+** If a negative argument is passed to sqlite3_sleep() the results vary by
+** VFS and operating system.  Some system treat a negative argument as an
+** instruction to sleep forever.  Others understand it to mean do not sleep
+** at all. ^In SQLite version 3.42.0 and later, a negative
+** argument passed into sqlite3_sleep() is changed to zero before it is relayed
+** down into the xSleep method of the VFS.
 */
 SQLITE_API int sqlite3_sleep(int);
 
@@ -6328,7 +6637,7 @@ SQLITE_API const char *sqlite3_db_name(sqlite3 *db, int N);
 ** <li> [sqlite3_filename_wal()]
 ** </ul>
 */
-SQLITE_API const char *sqlite3_db_filename(sqlite3 *db, const char *zDbName);
+SQLITE_API sqlite3_filename sqlite3_db_filename(sqlite3 *db, const char *zDbName);
 
 /*
 ** CAPI3REF: Determine if a database is read-only
@@ -6359,7 +6668,7 @@ SQLITE_API int sqlite3_db_readonly(sqlite3 *db, const char *zDbName);
 SQLITE_API int sqlite3_txn_state(sqlite3*,const char *zSchema);
 
 /*
-** CAPI3REF: Allowed return values from [sqlite3_txn_state()]
+** CAPI3REF: Allowed return values from sqlite3_txn_state()
 ** KEYWORDS: {transaction state}
 **
 ** These constants define the current transaction state of a database file.
@@ -6465,7 +6774,7 @@ SQLITE_API void *sqlite3_rollback_hook(sqlite3*, void(*)(void *), void*);
 ** function C that is invoked prior to each autovacuum of the database
 ** file.  ^The callback is passed a copy of the generic data pointer (P),
 ** the schema-name of the attached database that is being autovacuumed,
-** the the size of the database file in pages, the number of free pages,
+** the size of the database file in pages, the number of free pages,
 ** and the number of bytes per page, respectively.  The callback should
 ** return the number of free pages that should be removed by the
 ** autovacuum.  ^If the callback returns zero, then no autovacuum happens.
@@ -6491,7 +6800,7 @@ SQLITE_API void *sqlite3_rollback_hook(sqlite3*, void(*)(void *), void*);
 ** ^Each call to the sqlite3_autovacuum_pages() interface overrides all
 ** previous invocations for that database connection.  ^If the callback
 ** argument (C) to sqlite3_autovacuum_pages(D,C,P,X) is a NULL pointer,
-** then the autovacuum steps callback is cancelled.  The return value
+** then the autovacuum steps callback is canceled.  The return value
 ** from sqlite3_autovacuum_pages() is normally SQLITE_OK, but might
 ** be some other error code if something goes wrong.  The current
 ** implementation will only return SQLITE_OK or SQLITE_MISUSE, but other
@@ -6586,6 +6895,11 @@ SQLITE_API void *sqlite3_update_hook(
 ** to the same database. Sharing is enabled if the argument is true
 ** and disabled if the argument is false.)^
 **
+** This interface is omitted if SQLite is compiled with
+** [-DSQLITE_OMIT_SHARED_CACHE].  The [-DSQLITE_OMIT_SHARED_CACHE]
+** compile-time option is recommended because the
+** [use of shared cache mode is discouraged].
+**
 ** ^Cache sharing is enabled and disabled for an entire process.
 ** This is a change as of SQLite [version 3.5.0] ([dateof:3.5.0]).
 ** In prior versions of SQLite,
@@ -6684,7 +6998,7 @@ SQLITE_API int sqlite3_db_release_memory(sqlite3*);
 ** ^The soft heap limit may not be greater than the hard heap limit.
 ** ^If the hard heap limit is enabled and if sqlite3_soft_heap_limit(N)
 ** is invoked with a value of N that is greater than the hard heap limit,
-** the the soft heap limit is set to the value of the hard heap limit.
+** the soft heap limit is set to the value of the hard heap limit.
 ** ^The soft heap limit is automatically enabled whenever the hard heap
 ** limit is enabled. ^When sqlite3_hard_heap_limit64(N) is invoked and
 ** the soft heap limit is outside the range of 1..N, then the soft heap
@@ -6945,15 +7259,6 @@ SQLITE_API int sqlite3_cancel_auto_extension(void(*xEntryPoint)(void));
 */
 SQLITE_API void sqlite3_reset_auto_extension(void);
 
-/*
-** The interface to the virtual-table mechanism is currently considered
-** to be experimental.  The interface might change in incompatible ways.
-** If this is a problem for you, do not use the interface at this time.
-**
-** When the virtual-table mechanism stabilizes, we will declare the
-** interface fixed, support it indefinitely, and remove this comment.
-*/
-
 /*
 ** Structures used by the virtual table interface
 */
@@ -7014,6 +7319,10 @@ struct sqlite3_module {
   /* The methods above are in versions 1 and 2 of the sqlite_module object.
   ** Those below are for version 3 and greater. */
   int (*xShadowName)(const char*);
+  /* The methods above are in versions 1 through 3 of the sqlite_module object.
+  ** Those below are for version 4 and greater. */
+  int (*xIntegrity)(sqlite3_vtab *pVTab, const char *zSchema,
+                    const char *zTabName, int mFlags, char **pzErr);
 };
 
 /*
@@ -7072,10 +7381,10 @@ struct sqlite3_module {
 ** when the omit flag is true there is no guarantee that the constraint will
 ** not be checked again using byte code.)^
 **
-** ^The idxNum and idxPtr values are recorded and passed into the
+** ^The idxNum and idxStr values are recorded and passed into the
 ** [xFilter] method.
-** ^[sqlite3_free()] is used to free idxPtr if and only if
-** needToFreeIdxPtr is true.
+** ^[sqlite3_free()] is used to free idxStr if and only if
+** needToFreeIdxStr is true.
 **
 ** ^The orderByConsumed means that output from [xFilter]/[xNext] will occur in
 ** the correct order to satisfy the ORDER BY clause so that no separate
@@ -7195,7 +7504,7 @@ struct sqlite3_index_info {
 ** the [sqlite3_vtab_collation()] interface.  For most real-world virtual
 ** tables, the collating sequence of constraints does not matter (for example
 ** because the constraints are numeric) and so the sqlite3_vtab_collation()
-** interface is no commonly needed.
+** interface is not commonly needed.
 */
 #define SQLITE_INDEX_CONSTRAINT_EQ          2
 #define SQLITE_INDEX_CONSTRAINT_GT          4
@@ -7354,16 +7663,6 @@ SQLITE_API int sqlite3_declare_vtab(sqlite3*, const char *zSQL);
 */
 SQLITE_API int sqlite3_overload_function(sqlite3*, const char *zFuncName, int nArg);
 
-/*
-** The interface to the virtual-table mechanism defined above (back up
-** to a comment remarkably similar to this one) is currently considered
-** to be experimental.  The interface might change in incompatible ways.
-** If this is a problem for you, do not use the interface at this time.
-**
-** When the virtual-table mechanism stabilizes, we will declare the
-** interface fixed, support it indefinitely, and remove this comment.
-*/
-
 /*
 ** CAPI3REF: A Handle To An Open BLOB
 ** KEYWORDS: {BLOB handle} {BLOB handles}
@@ -7511,7 +7810,7 @@ SQLITE_API int sqlite3_blob_reopen(sqlite3_blob *, sqlite3_int64);
 ** code is returned and the transaction rolled back.
 **
 ** Calling this function with an argument that is not a NULL pointer or an
-** open blob handle results in undefined behaviour. ^Calling this routine
+** open blob handle results in undefined behavior. ^Calling this routine
 ** with a null pointer (such as would be returned by a failed call to
 ** [sqlite3_blob_open()]) is a harmless no-op. ^Otherwise, if this function
 ** is passed a valid open blob handle, the values returned by the
@@ -7747,9 +8046,9 @@ SQLITE_API int sqlite3_vfs_unregister(sqlite3_vfs*);
 ** is undefined if the mutex is not currently entered by the
 ** calling thread or is not currently allocated.
 **
-** ^If the argument to sqlite3_mutex_enter(), sqlite3_mutex_try(), or
-** sqlite3_mutex_leave() is a NULL pointer, then all three routines
-** behave as no-ops.
+** ^If the argument to sqlite3_mutex_enter(), sqlite3_mutex_try(),
+** sqlite3_mutex_leave(), or sqlite3_mutex_free() is a NULL pointer,
+** then any of the four routines behaves as a no-op.
 **
 ** See also: [sqlite3_mutex_held()] and [sqlite3_mutex_notheld()].
 */
@@ -7991,6 +8290,7 @@ SQLITE_API int sqlite3_test_control(int op, ...);
 #define SQLITE_TESTCTRL_PRNG_SAVE                5
 #define SQLITE_TESTCTRL_PRNG_RESTORE             6
 #define SQLITE_TESTCTRL_PRNG_RESET               7  /* NOT USED */
+#define SQLITE_TESTCTRL_FK_NO_ACTION             7
 #define SQLITE_TESTCTRL_BITVEC_TEST              8
 #define SQLITE_TESTCTRL_FAULT_INSTALL            9
 #define SQLITE_TESTCTRL_BENIGN_MALLOC_HOOKS     10
@@ -8019,7 +8319,8 @@ SQLITE_API int sqlite3_test_control(int op, ...);
 #define SQLITE_TESTCTRL_TRACEFLAGS              31
 #define SQLITE_TESTCTRL_TUNE                    32
 #define SQLITE_TESTCTRL_LOGEST                  33
-#define SQLITE_TESTCTRL_LAST                    33  /* Largest TESTCTRL */
+#define SQLITE_TESTCTRL_USELONGDOUBLE           34
+#define SQLITE_TESTCTRL_LAST                    34  /* Largest TESTCTRL */
 
 /*
 ** CAPI3REF: SQL Keyword Checking
@@ -8979,7 +9280,7 @@ typedef struct sqlite3_backup sqlite3_backup;
 ** if the application incorrectly accesses the destination [database connection]
 ** and so no error code is reported, but the operations may malfunction
 ** nevertheless.  Use of the destination database connection while a
-** backup is in progress might also also cause a mutex deadlock.
+** backup is in progress might also cause a mutex deadlock.
 **
 ** If running in [shared cache mode], the application must
 ** guarantee that the shared cache used by the destination database
@@ -9407,7 +9708,7 @@ SQLITE_API int sqlite3_wal_checkpoint_v2(
 */
 #define SQLITE_CHECKPOINT_PASSIVE  0  /* Do as much as possible w/o blocking */
 #define SQLITE_CHECKPOINT_FULL     1  /* Wait for writers, then checkpoint */
-#define SQLITE_CHECKPOINT_RESTART  2  /* Like FULL but wait for for readers */
+#define SQLITE_CHECKPOINT_RESTART  2  /* Like FULL but wait for readers */
 #define SQLITE_CHECKPOINT_TRUNCATE 3  /* Like RESTART but also truncate WAL */
 
 /*
@@ -9475,7 +9776,7 @@ SQLITE_API int sqlite3_vtab_config(sqlite3*, int op, ...);
 ** [[SQLITE_VTAB_DIRECTONLY]]<dt>SQLITE_VTAB_DIRECTONLY</dt>
 ** <dd>Calls of the form
 ** [sqlite3_vtab_config](db,SQLITE_VTAB_DIRECTONLY) from within the
-** the [xConnect] or [xCreate] methods of a [virtual table] implmentation
+** the [xConnect] or [xCreate] methods of a [virtual table] implementation
 ** prohibits that virtual table from being used from within triggers and
 ** views.
 ** </dd>
@@ -9483,18 +9784,28 @@ SQLITE_API int sqlite3_vtab_config(sqlite3*, int op, ...);
 ** [[SQLITE_VTAB_INNOCUOUS]]<dt>SQLITE_VTAB_INNOCUOUS</dt>
 ** <dd>Calls of the form
 ** [sqlite3_vtab_config](db,SQLITE_VTAB_INNOCUOUS) from within the
-** the [xConnect] or [xCreate] methods of a [virtual table] implmentation
+** the [xConnect] or [xCreate] methods of a [virtual table] implementation
 ** identify that virtual table as being safe to use from within triggers
 ** and views.  Conceptually, the SQLITE_VTAB_INNOCUOUS tag means that the
 ** virtual table can do no serious harm even if it is controlled by a
 ** malicious hacker.  Developers should avoid setting the SQLITE_VTAB_INNOCUOUS
 ** flag unless absolutely necessary.
 ** </dd>
+**
+** [[SQLITE_VTAB_USES_ALL_SCHEMAS]]<dt>SQLITE_VTAB_USES_ALL_SCHEMAS</dt>
+** <dd>Calls of the form
+** [sqlite3_vtab_config](db,SQLITE_VTAB_USES_ALL_SCHEMA) from within the
+** the [xConnect] or [xCreate] methods of a [virtual table] implementation
+** instruct the query planner to begin at least a read transaction on
+** all schemas ("main", "temp", and any ATTACH-ed databases) whenever the
+** virtual table is used.
+** </dd>
 ** </dl>
 */
 #define SQLITE_VTAB_CONSTRAINT_SUPPORT 1
 #define SQLITE_VTAB_INNOCUOUS          2
 #define SQLITE_VTAB_DIRECTONLY         3
+#define SQLITE_VTAB_USES_ALL_SCHEMAS   4
 
 /*
 ** CAPI3REF: Determine The Virtual Table Conflict Policy
@@ -9567,7 +9878,7 @@ SQLITE_API int sqlite3_vtab_nochange(sqlite3_context*);
 ** <li><p> Otherwise, "BINARY" is returned.
 ** </ol>
 */
-SQLITE_API SQLITE_EXPERIMENTAL const char *sqlite3_vtab_collation(sqlite3_index_info*,int);
+SQLITE_API const char *sqlite3_vtab_collation(sqlite3_index_info*,int);
 
 /*
 ** CAPI3REF: Determine if a virtual table query is DISTINCT
@@ -9655,7 +9966,7 @@ SQLITE_API int sqlite3_vtab_distinct(sqlite3_index_info*);
 ** communicated to the xBestIndex method as a
 ** [SQLITE_INDEX_CONSTRAINT_EQ] constraint.)^  If xBestIndex wants to use
 ** this constraint, it must set the corresponding
-** aConstraintUsage[].argvIndex to a postive integer.  ^(Then, under
+** aConstraintUsage[].argvIndex to a positive integer.  ^(Then, under
 ** the usual mode of handling IN operators, SQLite generates [bytecode]
 ** that invokes the [xFilter|xFilter() method] once for each value
 ** on the right-hand side of the IN operator.)^  Thus the virtual table
@@ -9724,21 +10035,20 @@ SQLITE_API int sqlite3_vtab_in(sqlite3_index_info*, int iCons, int bHandle);
 ** is undefined and probably harmful.
 **
 ** The X parameter in a call to sqlite3_vtab_in_first(X,P) or
-** sqlite3_vtab_in_next(X,P) must be one of the parameters to the
+** sqlite3_vtab_in_next(X,P) should be one of the parameters to the
 ** xFilter method which invokes these routines, and specifically
 ** a parameter that was previously selected for all-at-once IN constraint
 ** processing use the [sqlite3_vtab_in()] interface in the
 ** [xBestIndex|xBestIndex method].  ^(If the X parameter is not
 ** an xFilter argument that was selected for all-at-once IN constraint
-** processing, then these routines return [SQLITE_MISUSE])^ or perhaps
-** exhibit some other undefined or harmful behavior.
+** processing, then these routines return [SQLITE_ERROR].)^
 **
 ** ^(Use these routines to access all values on the right-hand side
 ** of the IN constraint using code like the following:
 **
 ** <blockquote><pre>
 ** &nbsp;  for(rc=sqlite3_vtab_in_first(pList, &pVal);
-** &nbsp;      rc==SQLITE_OK && pVal
+** &nbsp;      rc==SQLITE_OK && pVal;
 ** &nbsp;      rc=sqlite3_vtab_in_next(pList, &pVal)
 ** &nbsp;  ){
 ** &nbsp;    // do something with pVal
@@ -9836,6 +10146,10 @@ SQLITE_API int sqlite3_vtab_rhs_value(sqlite3_index_info*, int, sqlite3_value **
 ** managed by the prepared statement S and will be automatically freed when
 ** S is finalized.
 **
+** Not all values are available for all query elements. When a value is
+** not available, the output variable is set to -1 if the value is numeric,
+** or to NULL if it is a string (SQLITE_SCANSTAT_NAME).
+**
 ** <dl>
 ** [[SQLITE_SCANSTAT_NLOOP]] <dt>SQLITE_SCANSTAT_NLOOP</dt>
 ** <dd>^The [sqlite3_int64] variable pointed to by the V parameter will be
@@ -9863,12 +10177,24 @@ SQLITE_API int sqlite3_vtab_rhs_value(sqlite3_index_info*, int, sqlite3_value **
 ** to a zero-terminated UTF-8 string containing the [EXPLAIN QUERY PLAN]
 ** description for the X-th loop.
 **
-** [[SQLITE_SCANSTAT_SELECTID]] <dt>SQLITE_SCANSTAT_SELECT</dt>
+** [[SQLITE_SCANSTAT_SELECTID]] <dt>SQLITE_SCANSTAT_SELECTID</dt>
 ** <dd>^The "int" variable pointed to by the V parameter will be set to the
-** "select-id" for the X-th loop.  The select-id identifies which query or
-** subquery the loop is part of.  The main query has a select-id of zero.
-** The select-id is the same value as is output in the first column
-** of an [EXPLAIN QUERY PLAN] query.
+** id for the X-th query plan element. The id value is unique within the
+** statement. The select-id is the same value as is output in the first
+** column of an [EXPLAIN QUERY PLAN] query.
+**
+** [[SQLITE_SCANSTAT_PARENTID]] <dt>SQLITE_SCANSTAT_PARENTID</dt>
+** <dd>The "int" variable pointed to by the V parameter will be set to the
+** the id of the parent of the current query element, if applicable, or
+** to zero if the query element has no parent. This is the same value as
+** returned in the second column of an [EXPLAIN QUERY PLAN] query.
+**
+** [[SQLITE_SCANSTAT_NCYCLE]] <dt>SQLITE_SCANSTAT_NCYCLE</dt>
+** <dd>The sqlite3_int64 output value is set to the number of cycles,
+** according to the processor time-stamp counter, that elapsed while the
+** query element was being processed. This value is not available for
+** all query elements - if it is unavailable the output variable is
+** set to -1.
 ** </dl>
 */
 #define SQLITE_SCANSTAT_NLOOP    0
@@ -9877,12 +10203,14 @@ SQLITE_API int sqlite3_vtab_rhs_value(sqlite3_index_info*, int, sqlite3_value **
 #define SQLITE_SCANSTAT_NAME     3
 #define SQLITE_SCANSTAT_EXPLAIN  4
 #define SQLITE_SCANSTAT_SELECTID 5
+#define SQLITE_SCANSTAT_PARENTID 6
+#define SQLITE_SCANSTAT_NCYCLE   7
 
 /*
 ** CAPI3REF: Prepared Statement Scan Status
 ** METHOD: sqlite3_stmt
 **
-** This interface returns information about the predicted and measured
+** These interfaces return information about the predicted and measured
 ** performance for pStmt.  Advanced applications can use this
 ** interface to compare the predicted and the measured performance and
 ** issue warnings and/or rerun [ANALYZE] if discrepancies are found.
@@ -9893,19 +10221,25 @@ SQLITE_API int sqlite3_vtab_rhs_value(sqlite3_index_info*, int, sqlite3_value **
 **
 ** The "iScanStatusOp" parameter determines which status information to return.
 ** The "iScanStatusOp" must be one of the [scanstatus options] or the behavior
-** of this interface is undefined.
-** ^The requested measurement is written into a variable pointed to by
-** the "pOut" parameter.
-** Parameter "idx" identifies the specific loop to retrieve statistics for.
-** Loops are numbered starting from zero. ^If idx is out of range - less than
-** zero or greater than or equal to the total number of loops used to implement
-** the statement - a non-zero value is returned and the variable that pOut
-** points to is unchanged.
-**
-** ^Statistics might not be available for all loops in all statements. ^In cases
-** where there exist loops with no available statistics, this function behaves
-** as if the loop did not exist - it returns non-zero and leave the variable
-** that pOut points to unchanged.
+** of this interface is undefined. ^The requested measurement is written into
+** a variable pointed to by the "pOut" parameter.
+**
+** The "flags" parameter must be passed a mask of flags. At present only
+** one flag is defined - SQLITE_SCANSTAT_COMPLEX. If SQLITE_SCANSTAT_COMPLEX
+** is specified, then status information is available for all elements
+** of a query plan that are reported by "EXPLAIN QUERY PLAN" output. If
+** SQLITE_SCANSTAT_COMPLEX is not specified, then only query plan elements
+** that correspond to query loops (the "SCAN..." and "SEARCH..." elements of
+** the EXPLAIN QUERY PLAN output) are available. Invoking API
+** sqlite3_stmt_scanstatus() is equivalent to calling
+** sqlite3_stmt_scanstatus_v2() with a zeroed flags parameter.
+**
+** Parameter "idx" identifies the specific query element to retrieve statistics
+** for. Query elements are numbered starting from zero. A value of -1 may be
+** to query for statistics regarding the entire query. ^If idx is out of range
+** - less than -1 or greater than or equal to the total number of query
+** elements used to implement the statement - a non-zero value is returned and
+** the variable that pOut points to is unchanged.
 **
 ** See also: [sqlite3_stmt_scanstatus_reset()]
 */
@@ -9915,6 +10249,19 @@ SQLITE_API int sqlite3_stmt_scanstatus(
   int iScanStatusOp,        /* Information desired.  SQLITE_SCANSTAT_* */
   void *pOut                /* Result written here */
 );
+SQLITE_API int sqlite3_stmt_scanstatus_v2(
+  sqlite3_stmt *pStmt,      /* Prepared statement for which info desired */
+  int idx,                  /* Index of loop to report on */
+  int iScanStatusOp,        /* Information desired.  SQLITE_SCANSTAT_* */
+  int flags,                /* Mask of flags defined below */
+  void *pOut                /* Result written here */
+);
+
+/*
+** CAPI3REF: Prepared Statement Scan Status
+** KEYWORDS: {scan status flags}
+*/
+#define SQLITE_SCANSTAT_COMPLEX 0x0001
 
 /*
 ** CAPI3REF: Zero Scan-Status Counters
@@ -10005,6 +10352,10 @@ SQLITE_API int sqlite3_db_cacheflush(sqlite3*);
 ** function is not defined for operations on WITHOUT ROWID tables, or for
 ** DELETE operations on rowid tables.
 **
+** ^The sqlite3_preupdate_hook(D,C,P) function returns the P argument from
+** the previous call on the same [database connection] D, or NULL for
+** the first call on D.
+**
 ** The [sqlite3_preupdate_old()], [sqlite3_preupdate_new()],
 ** [sqlite3_preupdate_count()], and [sqlite3_preupdate_depth()] interfaces
 ** provide additional information about a preupdate event. These routines
@@ -10044,7 +10395,7 @@ SQLITE_API int sqlite3_db_cacheflush(sqlite3*);
 ** When the [sqlite3_blob_write()] API is used to update a blob column,
 ** the pre-update hook is invoked with SQLITE_DELETE. This is because the
 ** in this case the new values are not available. In this case, when a
-** callback made with op==SQLITE_DELETE is actuall a write using the
+** callback made with op==SQLITE_DELETE is actually a write using the
 ** sqlite3_blob_write() API, the [sqlite3_preupdate_blobwrite()] returns
 ** the index of the column being written. In other cases, where the
 ** pre-update hook is being invoked for some other reason, including a
@@ -10305,6 +10656,13 @@ SQLITE_API SQLITE_EXPERIMENTAL int sqlite3_snapshot_recover(sqlite3 *db, const c
 ** SQLITE_SERIALIZE_NOCOPY bit is set but no contiguous copy
 ** of the database exists.
 **
+** After the call, if the SQLITE_SERIALIZE_NOCOPY bit had been set,
+** the returned buffer content will remain accessible and unchanged
+** until either the next write operation on the connection or when
+** the connection is closed, and applications must not modify the
+** buffer. If the bit had been clear, the returned buffer will not
+** be accessed by SQLite after the call.
+**
 ** A call to sqlite3_serialize(D,S,P,F) might return NULL even if the
 ** SQLITE_SERIALIZE_NOCOPY bit is omitted from argument F if a memory
 ** allocation error occurs.
@@ -10353,6 +10711,9 @@ SQLITE_API unsigned char *sqlite3_serialize(
 ** SQLite will try to increase the buffer size using sqlite3_realloc64()
 ** if writes on the database cause it to grow larger than M bytes.
 **
+** Applications must not modify the buffer P or invalidate it before
+** the database connection D is closed.
+**
 ** The sqlite3_deserialize() interface will fail with SQLITE_BUSY if the
 ** database is currently in a read transaction or is involved in a backup
 ** operation.
@@ -10361,6 +10722,13 @@ SQLITE_API unsigned char *sqlite3_serialize(
 ** S argument to sqlite3_deserialize(D,S,P,N,M,F) is "temp" then the
 ** function returns SQLITE_ERROR.
 **
+** The deserialized database should not be in [WAL mode].  If the database
+** is in WAL mode, then any attempt to use the database file will result
+** in an [SQLITE_CANTOPEN] error.  The application can set the
+** [file format version numbers] (bytes 18 and 19) of the input database P
+** to 0x01 prior to invoking sqlite3_deserialize(D,S,P,N,M,F) to force the
+** database file into rollback mode and work around this limitation.
+**
 ** If sqlite3_deserialize(D,S,P,N,M,F) fails for any reason and if the
 ** SQLITE_DESERIALIZE_FREEONCLOSE bit is set in argument F, then
 ** [sqlite3_free()] is invoked on argument P prior to returning.
@@ -10410,6 +10778,19 @@ SQLITE_API int sqlite3_deserialize(
 # undef double
 #endif
 
+#if defined(__wasi__)
+# undef SQLITE_WASI
+# define SQLITE_WASI 1
+# undef SQLITE_OMIT_WAL
+# define SQLITE_OMIT_WAL 1/* because it requires shared memory APIs */
+# ifndef SQLITE_OMIT_LOAD_EXTENSION
+#  define SQLITE_OMIT_LOAD_EXTENSION
+# endif
+# ifndef SQLITE_THREADSAFE
+#  define SQLITE_THREADSAFE 0
+# endif
+#endif
+
 #ifdef __cplusplus
 }  /* End of the 'extern "C"' block */
 #endif
@@ -10616,16 +10997,20 @@ SQLITE_API int sqlite3session_create(
 SQLITE_API void sqlite3session_delete(sqlite3_session *pSession);
 
 /*
-** CAPIREF: Conigure a Session Object
+** CAPI3REF: Configure a Session Object
 ** METHOD: sqlite3_session
 **
 ** This method is used to configure a session object after it has been
-** created. At present the only valid value for the second parameter is
-** [SQLITE_SESSION_OBJCONFIG_SIZE].
+** created. At present the only valid values for the second parameter are
+** [SQLITE_SESSION_OBJCONFIG_SIZE] and [SQLITE_SESSION_OBJCONFIG_ROWID].
 **
-** Arguments for sqlite3session_object_config()
+*/
+SQLITE_API int sqlite3session_object_config(sqlite3_session*, int op, void *pArg);
+
+/*
+** CAPI3REF: Options for sqlite3session_object_config
 **
-** The following values may passed as the the 4th parameter to
+** The following values may passed as the the 2nd parameter to
 ** sqlite3session_object_config().
 **
 ** <dt>SQLITE_SESSION_OBJCONFIG_SIZE <dd>
@@ -10641,12 +11026,21 @@ SQLITE_API void sqlite3session_delete(sqlite3_session *pSession);
 **
 **   It is an error (SQLITE_MISUSE) to attempt to modify this setting after
 **   the first table has been attached to the session object.
+**
+** <dt>SQLITE_SESSION_OBJCONFIG_ROWID <dd>
+**   This option is used to set, clear or query the flag that enables
+**   collection of data for tables with no explicit PRIMARY KEY.
+**
+**   Normally, tables with no explicit PRIMARY KEY are simply ignored
+**   by the sessions module. However, if this flag is set, it behaves
+**   as if such tables have a column "_rowid_ INTEGER PRIMARY KEY" inserted
+**   as their leftmost columns.
+**
+**   It is an error (SQLITE_MISUSE) to attempt to modify this setting after
+**   the first table has been attached to the session object.
 */
-SQLITE_API int sqlite3session_object_config(sqlite3_session*, int op, void *pArg);
-
-/*
-*/
-#define SQLITE_SESSION_OBJCONFIG_SIZE 1
+#define SQLITE_SESSION_OBJCONFIG_SIZE  1
+#define SQLITE_SESSION_OBJCONFIG_ROWID 2
 
 /*
 ** CAPI3REF: Enable Or Disable A Session Object
@@ -11407,6 +11801,18 @@ SQLITE_API int sqlite3changeset_concat(
 );
 
 
+/*
+** CAPI3REF: Upgrade the Schema of a Changeset/Patchset
+*/
+SQLITE_API int sqlite3changeset_upgrade(
+  sqlite3 *db,
+  const char *zDb,
+  int nIn, const void *pIn,       /* Input changeset */
+  int *pnOut, void **ppOut        /* OUT: Inverse of input */
+);
+
+
+
 /*
 ** CAPI3REF: Changegroup Handle
 **
@@ -11453,6 +11859,38 @@ typedef struct sqlite3_changegroup sqlite3_changegroup;
 */
 SQLITE_API int sqlite3changegroup_new(sqlite3_changegroup **pp);
 
+/*
+** CAPI3REF: Add a Schema to a Changegroup
+** METHOD: sqlite3_changegroup_schema
+**
+** This method may be used to optionally enforce the rule that the changesets
+** added to the changegroup handle must match the schema of database zDb
+** ("main", "temp", or the name of an attached database). If
+** sqlite3changegroup_add() is called to add a changeset that is not compatible
+** with the configured schema, SQLITE_SCHEMA is returned and the changegroup
+** object is left in an undefined state.
+**
+** A changeset schema is considered compatible with the database schema in
+** the same way as for sqlite3changeset_apply(). Specifically, for each
+** table in the changeset, there exists a database table with:
+**
+** <ul>
+**   <li> The name identified by the changeset, and
+**   <li> at least as many columns as recorded in the changeset, and
+**   <li> the primary key columns in the same position as recorded in
+**        the changeset.
+** </ul>
+**
+** The output of the changegroup object always has the same schema as the
+** database nominated using this function. In cases where changesets passed
+** to sqlite3changegroup_add() have fewer columns than the corresponding table
+** in the database schema, these are filled in using the default column
+** values from the database schema. This makes it possible to combined
+** changesets that have different numbers of columns for a single table
+** within a changegroup, provided that they are otherwise compatible.
+*/
+SQLITE_API int sqlite3changegroup_schema(sqlite3_changegroup*, sqlite3*, const char *zDb);
+
 /*
 ** CAPI3REF: Add A Changeset To A Changegroup
 ** METHOD: sqlite3_changegroup
@@ -11521,13 +11959,18 @@ SQLITE_API int sqlite3changegroup_new(sqlite3_changegroup **pp);
 ** If the new changeset contains changes to a table that is already present
 ** in the changegroup, then the number of columns and the position of the
 ** primary key columns for the table must be consistent. If this is not the
-** case, this function fails with SQLITE_SCHEMA. If the input changeset
-** appears to be corrupt and the corruption is detected, SQLITE_CORRUPT is
-** returned. Or, if an out-of-memory condition occurs during processing, this
-** function returns SQLITE_NOMEM. In all cases, if an error occurs the state
-** of the final contents of the changegroup is undefined.
+** case, this function fails with SQLITE_SCHEMA. Except, if the changegroup
+** object has been configured with a database schema using the
+** sqlite3changegroup_schema() API, then it is possible to combine changesets
+** with different numbers of columns for a single table, provided that
+** they are otherwise compatible.
+**
+** If the input changeset appears to be corrupt and the corruption is
+** detected, SQLITE_CORRUPT is returned. Or, if an out-of-memory condition
+** occurs during processing, this function returns SQLITE_NOMEM.
 **
-** If no error occurs, SQLITE_OK is returned.
+** In all cases, if an error occurs the state of the final contents of the
+** changegroup is undefined. If no error occurs, SQLITE_OK is returned.
 */
 SQLITE_API int sqlite3changegroup_add(sqlite3_changegroup*, int nData, void *pData);
 
@@ -11779,9 +12222,30 @@ SQLITE_API int sqlite3changeset_apply_v2(
 **   Invert the changeset before applying it. This is equivalent to inverting
 **   a changeset using sqlite3changeset_invert() before applying it. It is
 **   an error to specify this flag with a patchset.
+**
+** <dt>SQLITE_CHANGESETAPPLY_IGNORENOOP <dd>
+**   Do not invoke the conflict handler callback for any changes that
+**   would not actually modify the database even if they were applied.
+**   Specifically, this means that the conflict handler is not invoked
+**   for:
+**    <ul>
+**    <li>a delete change if the row being deleted cannot be found,
+**    <li>an update change if the modified fields are already set to
+**        their new values in the conflicting row, or
+**    <li>an insert change if all fields of the conflicting row match
+**        the row being inserted.
+**    </ul>
+**
+** <dt>SQLITE_CHANGESETAPPLY_FKNOACTION <dd>
+**   If this flag it set, then all foreign key constraints in the target
+**   database behave as if they were declared with "ON UPDATE NO ACTION ON
+**   DELETE NO ACTION", even if they are actually CASCADE, RESTRICT, SET NULL
+**   or SET DEFAULT.
 */
 #define SQLITE_CHANGESETAPPLY_NOSAVEPOINT   0x0001
 #define SQLITE_CHANGESETAPPLY_INVERT        0x0002
+#define SQLITE_CHANGESETAPPLY_IGNORENOOP    0x0004
+#define SQLITE_CHANGESETAPPLY_FKNOACTION    0x0008
 
 /*
 ** CAPI3REF: Constants Passed To The Conflict Handler
@@ -12522,7 +12986,7 @@ struct Fts5PhraseIter {
 **   See xPhraseFirstColumn above.
 */
 struct Fts5ExtensionApi {
-  int iVersion;                   /* Currently always set to 3 */
+  int iVersion;                   /* Currently always set to 2 */
 
   void *(*xUserData)(Fts5Context*);
 
@@ -12751,8 +13215,8 @@ struct Fts5ExtensionApi {
 **   as separate queries of the FTS index are required for each synonym.
 **
 **   When using methods (2) or (3), it is important that the tokenizer only
-**   provide synonyms when tokenizing document text (method (2)) or query
-**   text (method (3)), not both. Doing so will not cause any errors, but is
+**   provide synonyms when tokenizing document text (method (3)) or query
+**   text (method (2)), not both. Doing so will not cause any errors, but is
 **   inefficient.
 */
 typedef struct Fts5Tokenizer Fts5Tokenizer;
@@ -12800,7 +13264,7 @@ struct fts5_api {
   int (*xCreateTokenizer)(
     fts5_api *pApi,
     const char *zName,
-    void *pContext,
+    void *pUserData,
     fts5_tokenizer *pTokenizer,
     void (*xDestroy)(void*)
   );
@@ -12809,7 +13273,7 @@ struct fts5_api {
   int (*xFindTokenizer)(
     fts5_api *pApi,
     const char *zName,
-    void **ppContext,
+    void **ppUserData,
     fts5_tokenizer *pTokenizer
   );
 
@@ -12817,7 +13281,7 @@ struct fts5_api {
   int (*xCreateFunction)(
     fts5_api *pApi,
     const char *zName,
-    void *pContext,
+    void *pUserData,
     fts5_extension_function xFunction,
     void (*xDestroy)(void*)
   );