From c7b4f8ac80e1524eacfa179864c6e928c5f3cf17 Mon Sep 17 00:00:00 2001 From: Yingwen Date: Tue, 16 Jul 2024 09:56:07 +0800 Subject: [PATCH] docs: add docs for the merge_mode option (#1061) Co-authored-by: Yiran --- .../nightly/en/reference/sql/compatibility.md | 1 + docs/nightly/en/reference/sql/create.md | 64 +++++++++++++++++- .../nightly/zh/reference/sql/compatibility.md | 1 + docs/nightly/zh/reference/sql/create.md | 67 ++++++++++++++++++- 4 files changed, 131 insertions(+), 2 deletions(-) diff --git a/docs/nightly/en/reference/sql/compatibility.md b/docs/nightly/en/reference/sql/compatibility.md index b7f2fdb87..c81dcfaf2 100644 --- a/docs/nightly/en/reference/sql/compatibility.md +++ b/docs/nightly/en/reference/sql/compatibility.md @@ -9,6 +9,7 @@ GreptimeDB supports a subset of ANSI SQL and has some unique extensions. Some ma 2. Insert data: Consistent with ANSI SQL syntax, but requires the `TIME INDEX` column value (or default value) to be provided. 3. Update data: Does not support `UPDATE` syntax, but if the primary key and `TIME INDEX` corresponding column values are the same during `INSERT`, subsequent inserted rows will overwrite previously written rows, effectively achieving an update. * Since 0.8, GreptimeDB supports [append mode](/reference/sql/create#create-an-append-only-table) that creates an append-only table with `append_mode="true"` option which keeps duplicate rows. + * GreptimeDB supports [merge mode](/reference/sql/create#create-an-append-only-table) that creates a table with `merge_mode="last_non_null"` option which allow updating a field partially. 4. Query data: Query syntax is compatible with ANSI SQL, with some functional differences and omissions. * Does not support views. * TQL syntax extension: Supports executing PromQL in SQL via TQL subcommands. Please refer to the [TQL](./tql.md) section for details. diff --git a/docs/nightly/en/reference/sql/create.md b/docs/nightly/en/reference/sql/create.md index 18a0a548c..deccd6653 100644 --- a/docs/nightly/en/reference/sql/create.md +++ b/docs/nightly/en/reference/sql/create.md @@ -98,7 +98,8 @@ Users can add table options by using `WITH`. The valid options contain the follo | `compaction.twcs.max_inactive_window_files` | Max num of files that can be kept in inactive time window. | String value, such as '1'. Only available when `compaction.type` is `twcs`. | | `compaction.twcs.time_window` | Compaction time window | String value, such as '1d' for 1 day. The table usually partitions rows into different time windows by their timestamps. Only available when `compaction.type` is `twcs`. | | `memtable.type` | Type of the memtable. | String value, supports `time_series`, `partition_tree`. | -| `append_mode` | Whether the table is append-only | String value. Default is 'false', which removes duplicate rows by primary keys and timestamps. Setting it to 'true' to enable append mode and create an append-only table which keeps duplicate rows. | +| `append_mode` | Whether the table is append-only | String value. Default is 'false', which removes duplicate rows by primary keys and timestamps according to the `merge_mode`. Setting it to 'true' to enable append mode and create an append-only table which keeps duplicate rows. | +| `merge_mode` | The strategy to merge duplicate rows | String value. Only available when `append_mode` is 'false'. Default is `last_row`, which keeps the last row for the same primary key and timestamp. Setting it to `last_non_null` to keep the last non-null field for the same primary key and timestamp. | | `comment` | Table level comment | String value. | #### Create a table with TTL @@ -149,6 +150,67 @@ CREATE TABLE IF NOT EXISTS temperatures( ) engine=mito with('append_mode'='true'); ``` +#### Create a table with merge mode +Create a table with `last_row` merge mode, which is the default merge mode. +```sql +create table if not exists metrics( + host string, + ts timestamp, + cpu double, + memory double, + TIME INDEX (ts), + PRIMARY KEY(host) +) +engine=mito +with('merge_mode'='last_row'); +``` + +Under `last_row` mode, the table merges rows with the same primary key and timestamp by only keeping the latest row. +```sql +INSERT INTO metrics VALUES ('host1', 0, 0, NULL), ('host2', 1, NULL, 1); +INSERT INTO metrics VALUES ('host1', 0, NULL, 10), ('host2', 1, 11, NULL); + +SELECT * from metrics ORDER BY host, ts; + ++-------+-------------------------+------+--------+ +| host | ts | cpu | memory | ++-------+-------------------------+------+--------+ +| host1 | 1970-01-01T00:00:00 | | 10.0 | +| host2 | 1970-01-01T00:00:00.001 | 11.0 | | ++-------+-------------------------+------+--------+ +``` + + +Create a table with `last_non_null` merge mode. +```sql +create table if not exists metrics( + host string, + ts timestamp, + cpu double, + memory double, + TIME INDEX (ts), + PRIMARY KEY(host) +) +engine=mito +with('merge_mode'='last_non_null'); +``` + +Under `last_non_null` mode, the table merges rows with the same primary key and timestamp by keeping the latest value of each field. +```sql +INSERT INTO metrics VALUES ('host1', 0, 0, NULL), ('host2', 1, NULL, 1); +INSERT INTO metrics VALUES ('host1', 0, NULL, 10), ('host2', 1, 11, NULL); + +SELECT * from metrics ORDER BY host, ts; + ++-------+-------------------------+------+--------+ +| host | ts | cpu | memory | ++-------+-------------------------+------+--------+ +| host1 | 1970-01-01T00:00:00 | 0.0 | 10.0 | +| host2 | 1970-01-01T00:00:00.001 | 11.0 | 1.0 | ++-------+-------------------------+------+--------+ +``` + + ### Column options GreptimeDB supports the following column options: diff --git a/docs/nightly/zh/reference/sql/compatibility.md b/docs/nightly/zh/reference/sql/compatibility.md index f6b5b586c..31ab21046 100644 --- a/docs/nightly/zh/reference/sql/compatibility.md +++ b/docs/nightly/zh/reference/sql/compatibility.md @@ -9,6 +9,7 @@ GreptimeDB 支持的 SQL 是 ANSI SQL 的子集,并且拥有一些特有的扩 2. 插入新数据: 与 ANSI SQL 语法一致,但是强制要求提供 `TIME INDEX` 列值(或默认值)。 3. 更新:不支持 `UPDATE` 语法,但是在 `INSERT` 的时候,如果主键和 `TIME INDEX` 对应的列值一样,那么后续插入的行将覆盖以前写入的行,从而变相实现更新。 * 从 0.8 开始, GreptimeDB 支持 [append 模式](/reference/sql/create#创建-Append-Only-表),创建时指定`append_mode = "true"` 选项的表将保留重复的数据行。 + * GreptimeDB 支持 [merge 模式](/reference/sql/create#create-an-append-only-table),该模式使用 `merge_mode="last_non_null"` 选项创建表,允许部分更新字段。 4. 查询:查询语法兼容 ANSI SQL,存在部分功能差异和缺失 * 不支持视图 * TQL 语法扩展:TQL 子命令支持在 SQL 中执行 PromQL,详细请参考 [TQL](./tql.md) 一节。 diff --git a/docs/nightly/zh/reference/sql/create.md b/docs/nightly/zh/reference/sql/create.md index e221743ed..bd90761d5 100644 --- a/docs/nightly/zh/reference/sql/create.md +++ b/docs/nightly/zh/reference/sql/create.md @@ -100,7 +100,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name | `compaction.twcs.max_inactive_window_files` | 非活跃时间窗口内的最大文件数 | 字符串值,如 '1'。只在 `compaction.type` 为 `twcs` 时可用 | | `compaction.twcs.time_window` | Compaction 时间窗口 | 字符串值,如 '1d' 表示 1 天。该表会根据时间戳将数据分区到不同的时间窗口中。只在 `compaction.type` 为 `twcs` 时可用 | | `memtable.type` | memtable 的类型 | 字符串值,支持 `time_series`,`partition_tree` | -| `append_mode` | 该表是否时 append-only 的 | 字符串值. 默认为 'false',表示会根据主键和时间戳对数据去重。设置为 'true' 可以开启 append 模式和创建 append-only 表,保留所有重复的行 | +| `append_mode` | 该表是否时 append-only 的 | 字符串值。默认值为 'false',根据 'merge_mode' 按主键和时间戳删除重复行。设置为 'true' 可以开启 append 模式和创建 append-only 表,保留所有重复的行 | +| `merge_mode` | 合并重复行的策略 | 字符串值。只有当 `append_mode` 为 'false' 时可用。默认值为 `last_row`,保留相同主键和时间戳的最后一行。设置为 `last_non_null` 则保留相同主键和时间戳的最后一个非空字段。 | | `comment` | 表级注释 | 字符串值. | #### 创建指定 TTL 的表 @@ -151,6 +152,70 @@ CREATE TABLE IF NOT EXISTS temperatures( ) engine=mito with('append_mode'='true'); ``` +#### 创建带有 merge 模式的表 + +创建一个带有 `last_row` merge 模式的表,这是默认的 merge 模式。 + +```sql +create table if not exists metrics( + host string, + ts timestamp, + cpu double, + memory double, + TIME INDEX (ts), + PRIMARY KEY(host) +) +engine=mito +with('merge_mode'='last_row'); +``` + +在 `last_row` 模式下,表会通过保留最新的行来合并具有相同主键和时间戳的行。 + +```sql +INSERT INTO metrics VALUES ('host1', 0, 0, NULL), ('host2', 1, NULL, 1); +INSERT INTO metrics VALUES ('host1', 0, NULL, 10), ('host2', 1, 11, NULL); + +SELECT * from metrics ORDER BY host, ts; + ++-------+-------------------------+------+--------+ +| host | ts | cpu | memory | ++-------+-------------------------+------+--------+ +| host1 | 1970-01-01T00:00:00 | | 10.0 | +| host2 | 1970-01-01T00:00:00.001 | 11.0 | | ++-------+-------------------------+------+--------+ +``` + + +创建带有 `last_non_null` merge 模式的表。 + +```sql +create table if not exists metrics( + host string, + ts timestamp, + cpu double, + memory double, + TIME INDEX (ts), + PRIMARY KEY(host) +) +engine=mito +with('merge_mode'='last_non_null'); +``` + +在 `last_non_null` 模式下,表会通过保留每个字段的最新值来合并具有相同主键和时间戳的行。 + +```sql +INSERT INTO metrics VALUES ('host1', 0, 0, NULL), ('host2', 1, NULL, 1); +INSERT INTO metrics VALUES ('host1', 0, NULL, 10), ('host2', 1, 11, NULL); + +SELECT * from metrics ORDER BY host, ts; + ++-------+-------------------------+------+--------+ +| host | ts | cpu | memory | ++-------+-------------------------+------+--------+ +| host1 | 1970-01-01T00:00:00 | 0.0 | 10.0 | +| host2 | 1970-01-01T00:00:00.001 | 11.0 | 1.0 | ++-------+-------------------------+------+--------+ +``` ### 列选项