From 8d87a4e0ebc60ed8c77df0cc984c243512af7c91 Mon Sep 17 00:00:00 2001 From: Sumukh-Phalgaonkar Date: Wed, 29 Jan 2025 13:25:38 +0530 Subject: [PATCH] Doc changes for the recent update in the default values of CDC flags --- .../using-logical-replication/advanced-topic.md | 16 +++++++++------- .../preview/explore/change-data-capture.md | 4 +++- .../preview/reference/configuration/yb-master.md | 12 ++++++++++++ .../reference/configuration/yb-tserver.md | 2 +- .../using-logical-replication/advanced-topic.md | 16 +++++++++------- .../stable/explore/change-data-capture.md | 4 +++- .../stable/reference/configuration/yb-master.md | 12 ++++++++++++ .../stable/reference/configuration/yb-tserver.md | 2 +- 8 files changed, 50 insertions(+), 18 deletions(-) diff --git a/docs/content/preview/develop/change-data-capture/using-logical-replication/advanced-topic.md b/docs/content/preview/develop/change-data-capture/using-logical-replication/advanced-topic.md index d4c79055b967..f83a448ae3f4 100644 --- a/docs/content/preview/develop/change-data-capture/using-logical-replication/advanced-topic.md +++ b/docs/content/preview/develop/change-data-capture/using-logical-replication/advanced-topic.md @@ -114,19 +114,19 @@ CREATE TABLE test_table_2(id INT PRIMARY KEY, aa INT, bb INT); ### YugabyteDB semantics -Unlike PostgreSQL, any changes made to the publication's tables list are not applied immediately in YugabyteDB. Instead the publication's tables list is periodically refreshed, and changes, if any, are applied. The refresh interval is managed using the [cdcsdk_publication_list_refresh_interval_secs](../../../../reference/configuration/yb-tserver/#cdcsdk-publication-list-refresh-interval-secs) flag. The default is one hour (3600 sec). This means that any changes made to the publication's tables list will be applied after `cdcsdk_publication_list_refresh_interval_secs` in the worst case. +Unlike PostgreSQL, any changes made to the publication's tables list are not applied immediately in YugabyteDB. Instead the publication's tables list is periodically refreshed, and changes, if any, are applied. The refresh interval is managed using the [cdcsdk_publication_list_refresh_interval_secs](../../../../reference/configuration/yb-tserver/#cdcsdk-publication-list-refresh-interval-secs) flag. The default is 15 minutes (900 sec). This means that any changes made to the publication's tables list will be applied after `cdcsdk_publication_list_refresh_interval_secs` in the worst case. Consider the following example: -- Suppose that the value of the flag `cdcsdk_publication_list_refresh_interval_secs` is 3600 sec (1 hour) and the publication's tables list is being refreshed every hour at 8 am, 9 am, 10 am, and so on. +- Suppose that the value of the flag `cdcsdk_publication_list_refresh_interval_secs` is 900 sec (15 minutes) and the publication's tables list is being refreshed every 15 minutes at 8:00 am, 8:15 am, 8:30 am and so on. -- If any change is made to publication's tables list at 8:01 am, then this change will be applied at 9:00 am. However, any change made to publication's tables list at 8:59 am will also be applied at 9:00 am. +- If any change is made to publication's tables list at 8:01 am, then this change will be applied at 8:15 am. However, any change made to publication's tables list at 8:14 am will also be applied at 8:15 am. The value of this flag can be changed at run time, but the change becomes effective only after some time. Continuing the example: -- Suppose that the value of the flag `cdcsdk_publication_list_refresh_interval_secs` is changed from 3600 sec (1 hour) to 600 sec (10 minutes) at 8:01 am. +- Suppose that the value of the flag `cdcsdk_publication_list_refresh_interval_secs` is changed from 900 sec (15 minutes) to 300 sec (5 minutes) at 8:01 am. -- This change will only be applied after 9:00 am. That is, the publication's tables list will be next refreshed at 9:00 am. Then, the next refresh will happen at 9:10 am, and the subsequent refreshes will take place every 10 minutes. +- This change will only be applied after 8:15 am. That is, the publication's tables list will be next refreshed at 8:15 am. Then, the next refresh will happen at 8:20 am, and the subsequent refreshes will take place every 5 minutes. ### Required settings @@ -152,12 +152,14 @@ To enable dynamic table addition, perform the following steps: ./yb-ts-cli --server_address= set_flag cdcsdk_publication_list_refresh_interval_secs 120 ``` -1. After you start receiving records from the newly added table in the publication, reset the `cdcsdk_publication_list_refresh_interval_secs` flag to a high value (for example, 3600 seconds). +1. After you start receiving records from the newly added table in the publication, reset the `cdcsdk_publication_list_refresh_interval_secs` flag back to original value (i.e 900 seconds). ```sh - ./yb-ts-cli --server_address= set_flag cdcsdk_publication_list_refresh_interval_secs 3600 + ./yb-ts-cli --server_address= set_flag cdcsdk_publication_list_refresh_interval_secs 900 ``` +> **Note:** In the event that a user ends up reducing the value of `cdcsdk_publication_list_refresh_interval_secs`, it is recommended to increase the value of the flag back to its original value once you start receiving changes from the new table. + ## Initial snapshot The [initial snapshot](../../../../architecture/docdb-replication/cdc-logical-replication/#initial-snapshot) data for a table is consumed by executing a snapshot query (SELECT statement). To ensure that the streaming phase continues exactly from where the snapshot left, this snapshot query is executed as of a specific database state. In YugabyteDB, this database state is represented by a value of `HybridTime`. Changes due to transactions with commit time strictly greater than this snapshot `HybridTime` will be consumed during the streaming phase. diff --git a/docs/content/preview/explore/change-data-capture.md b/docs/content/preview/explore/change-data-capture.md index 8fc2b99d576f..e6b1cad852ba 100644 --- a/docs/content/preview/explore/change-data-capture.md +++ b/docs/content/preview/explore/change-data-capture.md @@ -57,7 +57,7 @@ To set up pg_recvlogical, create and start the local cluster by running the foll ./bin/yugabyted start \ --advertise_address=127.0.0.1 \ --base_dir="${HOME}/var/node1" \ - --tserver_flags="allowed_preview_flags_csv={cdcsdk_enable_dynamic_table_support},cdcsdk_enable_dynamic_table_support=true,cdcsdk_publication_list_refresh_interval_secs=2" + --tserver_flags="allowed_preview_flags_csv={cdcsdk_enable_dynamic_table_support},cdcsdk_enable_dynamic_table_support=true,cdcsdk_publication_list_refresh_interval_secs=120" ``` ### Create tables @@ -166,6 +166,8 @@ table public.projects: INSERT: project_id[integer]:1 name[character varying]:'Pr COMMIT 3 ``` +The yugabyte semantics are different from Postgres when it comes to streaming added tables to a publication. Refer the [yugabyte semantics](../develop/change-data-capture/using-logical-replication/advanced-topic.md#yugabytedb-semantics) for more details. + {{% explore-cleanup-local %}} ## Learn more diff --git a/docs/content/preview/reference/configuration/yb-master.md b/docs/content/preview/reference/configuration/yb-master.md index b5307f6bf9be..54f094cb13c2 100644 --- a/docs/content/preview/reference/configuration/yb-master.md +++ b/docs/content/preview/reference/configuration/yb-master.md @@ -933,6 +933,18 @@ WAL retention time, in seconds, to be used for tables for which a CDC stream was Default: `14400` (4 hours) +##### --cdc_intent_retention_ms + +The time period, in milliseconds, after which the intents will be cleaned up if there is no client polling for the change records. + +Default: `28800000` (8 hours) + +##### --cdcsdk_tablet_not_of_interest_timeout_secs + +Timeout after which it is inferred that a particular tablet is not of interest for CDC. To indicate that a particular tablet is of interest for CDC, it should be polled at least once within this interval of stream / slot creation. + +Default: `14400` (4 hours) + ##### --enable_tablet_split_of_cdcsdk_streamed_tables Toggle automatic tablet splitting for tables in a CDCSDK stream, enhancing user control over replication processes. diff --git a/docs/content/preview/reference/configuration/yb-tserver.md b/docs/content/preview/reference/configuration/yb-tserver.md index bfe3366bbdbd..9b716a702946 100644 --- a/docs/content/preview/reference/configuration/yb-tserver.md +++ b/docs/content/preview/reference/configuration/yb-tserver.md @@ -1362,7 +1362,7 @@ Default: `false` Interval in seconds at which the table list in the publication will be refreshed. -Default: `3600` +Default: `900` ##### --cdc_stream_records_threshold_size_bytes diff --git a/docs/content/stable/develop/change-data-capture/using-logical-replication/advanced-topic.md b/docs/content/stable/develop/change-data-capture/using-logical-replication/advanced-topic.md index c96447426e82..7011b40ad565 100644 --- a/docs/content/stable/develop/change-data-capture/using-logical-replication/advanced-topic.md +++ b/docs/content/stable/develop/change-data-capture/using-logical-replication/advanced-topic.md @@ -112,19 +112,19 @@ CREATE TABLE test_table_2(id INT PRIMARY KEY, aa INT, bb INT); ### YugabyteDB semantics -Unlike PostgreSQL, any changes made to the publication's tables list are not applied immediately in YugabyteDB. Instead the publication's tables list is periodically refreshed, and changes, if any, are applied. The refresh interval is managed using the [cdcsdk_publication_list_refresh_interval_secs](../../../../reference/configuration/yb-tserver/#cdcsdk-publication-list-refresh-interval-secs) flag. The default is one hour (3600 sec). This means that any changes made to the publication's tables list will be applied after `cdcsdk_publication_list_refresh_interval_secs` in the worst case. +Unlike PostgreSQL, any changes made to the publication's tables list are not applied immediately in YugabyteDB. Instead the publication's tables list is periodically refreshed, and changes, if any, are applied. The refresh interval is managed using the [cdcsdk_publication_list_refresh_interval_secs](../../../../reference/configuration/yb-tserver/#cdcsdk-publication-list-refresh-interval-secs) flag. The default is 15 minutes (900 sec). This means that any changes made to the publication's tables list will be applied after `cdcsdk_publication_list_refresh_interval_secs` in the worst case. Consider the following example: -- Suppose that the value of the flag `cdcsdk_publication_list_refresh_interval_secs` is 3600 sec (1 hour) and the publication's tables list is being refreshed every hour at 8 am, 9 am, 10 am, and so on. +- Suppose that the value of the flag `cdcsdk_publication_list_refresh_interval_secs` is 900 sec (15 minutes) and the publication's tables list is being refreshed every 15 minutes at 8:00 am, 8:15 am, 8:30 am and so on. -- If any change is made to publication's tables list at 8:01 am, then this change will be applied at 9:00 am. However, any change made to publication's tables list at 8:59 am will also be applied at 9:00 am. +- If any change is made to publication's tables list at 8:01 am, then this change will be applied at 8:15 am. However, any change made to publication's tables list at 8:14 am will also be applied at 8:15 am. The value of this flag can be changed at run time, but the change becomes effective only after some time. Continuing the example: -- Suppose that the value of the flag `cdcsdk_publication_list_refresh_interval_secs` is changed from 3600 sec (1 hour) to 600 sec (10 minutes) at 8:01 am. +- Suppose that the value of the flag `cdcsdk_publication_list_refresh_interval_secs` is changed from 900 sec (15 minutes) to 300 sec (5 minutes) at 8:01 am. -- This change will only be applied after 9:00 am. That is, the publication's tables list will be next refreshed at 9:00 am. Then, the next refresh will happen at 9:10 am, and the subsequent refreshes will take place every 10 minutes. +- This change will only be applied after 8:15 am. That is, the publication's tables list will be next refreshed at 8:15 am. Then, the next refresh will happen at 8:20 am, and the subsequent refreshes will take place every 5 minutes. ### Required settings @@ -150,12 +150,14 @@ To enable dynamic table addition, perform the following steps: ./yb-ts-cli --server_address= set_flag cdcsdk_publication_list_refresh_interval_secs 120 ``` -1. After you start receiving records from the newly added table in the publication, reset the `cdcsdk_publication_list_refresh_interval_secs` flag to a high value (for example, 3600 seconds). +1. After you start receiving records from the newly added table in the publication, reset the `cdcsdk_publication_list_refresh_interval_secs` flag back to original value (i.e 900 seconds). ```sh - ./yb-ts-cli --server_address= set_flag cdcsdk_publication_list_refresh_interval_secs 3600 + ./yb-ts-cli --server_address= set_flag cdcsdk_publication_list_refresh_interval_secs 900 ``` +> **Note:** In the event that user ends up reducing the value of `cdcsdk_publication_list_refresh_interval_secs`, it is recommended to increase the value of the flag back to its original value once you start receiving changes from the new table. + ## Initial snapshot The [initial snapshot](../../../../architecture/docdb-replication/cdc-logical-replication/#initial-snapshot) data for a table is consumed by executing a snapshot query (SELECT statement). To ensure that the streaming phase continues exactly from where the snapshot left, this snapshot query is executed as of a specific database state. In YugabyteDB, this database state is represented by a value of `HybridTime`. Changes due to transactions with commit time strictly greater than this snapshot `HybridTime` will be consumed during the streaming phase. diff --git a/docs/content/stable/explore/change-data-capture.md b/docs/content/stable/explore/change-data-capture.md index ebe6b2f029c1..e2f534b63b08 100644 --- a/docs/content/stable/explore/change-data-capture.md +++ b/docs/content/stable/explore/change-data-capture.md @@ -57,7 +57,7 @@ To set up pg_recvlogical, create and start the local cluster by running the foll ./bin/yugabyted start \ --advertise_address=127.0.0.1 \ --base_dir="${HOME}/var/node1" \ - --tserver_flags="allowed_preview_flags_csv={cdcsdk_enable_dynamic_table_support},cdcsdk_enable_dynamic_table_support=true,cdcsdk_publication_list_refresh_interval_secs=2" + --tserver_flags="allowed_preview_flags_csv={cdcsdk_enable_dynamic_table_support},cdcsdk_enable_dynamic_table_support=true,cdcsdk_publication_list_refresh_interval_secs=120" ``` ### Create tables @@ -166,6 +166,8 @@ table public.projects: INSERT: project_id[integer]:1 name[character varying]:'Pr COMMIT 3 ``` +The yugabyte semantics are different from Postgres when it comes to streaming added tables to a publication. Refer the [yugabyte semantics](../develop/change-data-capture/using-logical-replication/advanced-topic.md#yugabytedb-semantics) for more details. + {{% explore-cleanup-local %}} ## Learn more diff --git a/docs/content/stable/reference/configuration/yb-master.md b/docs/content/stable/reference/configuration/yb-master.md index e5440ee5da07..7acdbd2fd8b3 100644 --- a/docs/content/stable/reference/configuration/yb-master.md +++ b/docs/content/stable/reference/configuration/yb-master.md @@ -941,6 +941,18 @@ WAL retention time, in seconds, to be used for tables for which a CDC stream was Default: `14400` (4 hours) +##### --cdc_intent_retention_ms + +The time period, in milliseconds, after which the intents will be cleaned up if there is no client polling for the change records. + +Default: `28800000` (8 hours) + +##### --cdcsdk_tablet_not_of_interest_timeout_secs + +Timeout after which it is inferred that a particular tablet is not of interest for CDC. To indicate that a particular tablet is of interest for CDC, it should be polled at least once within this interval of stream / slot creation. + +Default: `14400` (4 hours) + ##### --enable_tablet_split_of_cdcsdk_streamed_tables Toggle automatic tablet splitting for tables in a CDCSDK stream, enhancing user control over replication processes. diff --git a/docs/content/stable/reference/configuration/yb-tserver.md b/docs/content/stable/reference/configuration/yb-tserver.md index 71d37e838057..f12b4ae9e93e 100644 --- a/docs/content/stable/reference/configuration/yb-tserver.md +++ b/docs/content/stable/reference/configuration/yb-tserver.md @@ -1370,7 +1370,7 @@ Default: `false` Interval in seconds at which the table list in the publication will be refreshed. -Default: `3600` +Default: `900` ##### --cdc_stream_records_threshold_size_bytes