diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml
index 5fa5f6411c8..de76ed04a5f 100644
--- a/.github/workflows/cicd.yaml
+++ b/.github/workflows/cicd.yaml
@@ -107,9 +107,11 @@ jobs:
uses: actions/upload-artifact@v2
with:
name: linux-ut-result-cpp-${{ github.sha }}
+ # exclude _deps xml
path: |
build/**/*.xml
reports/*.xml
+ !build/_deps/*
- name: install
if: ${{ github.event_name == 'push' }}
diff --git a/.github/workflows/sdk.yml b/.github/workflows/sdk.yml
index 8f4dc6bd628..dc4dd94a2b6 100644
--- a/.github/workflows/sdk.yml
+++ b/.github/workflows/sdk.yml
@@ -352,6 +352,7 @@ jobs:
image: ghcr.io/4paradigm/hybridsql:latest
env:
OPENMLDB_BUILD_TARGET: "openmldb"
+ OPENMLDB_MODE: standalone
steps:
- uses: actions/checkout@v2
diff --git a/.github/workflows/udf-doc.yml b/.github/workflows/udf-doc.yml
index bb57bac2110..5a0e6b33807 100644
--- a/.github/workflows/udf-doc.yml
+++ b/.github/workflows/udf-doc.yml
@@ -54,8 +54,8 @@ jobs:
if: github.event_name != 'pull_request'
with:
add-paths: |
- docs/en/reference/sql/functions_and_operators/Files/udfs_8h.md
- docs/zh/openmldb_sql/functions_and_operators/Files/udfs_8h.md
+ docs/en/reference/sql/udfs_8h.md
+ docs/zh/openmldb_sql/udfs_8h.md
labels: |
udf
branch: docs-udf-patch
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 21066a3c505..703d6bf11de 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -136,6 +136,7 @@ endif()
include(FetchContent)
set(FETCHCONTENT_QUIET OFF)
include(farmhash)
+include(rapidjson)
# contrib libs
add_subdirectory(contrib EXCLUDE_FROM_ALL)
diff --git a/benchmark/pom.xml b/benchmark/pom.xml
index d1d7b99c916..572aec4d282 100644
--- a/benchmark/pom.xml
+++ b/benchmark/pom.xml
@@ -27,12 +27,12 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs
com.4paradigm.openmldb
openmldb-jdbc
- 0.7.0
+ 0.8.3
com.4paradigm.openmldb
openmldb-native
- 0.7.0-allinone
+ 0.8.3-allinone
org.slf4j
diff --git a/benchmark/src/main/java/com/_4paradigm/openmldb/benchmark/BenchmarkConfig.java b/benchmark/src/main/java/com/_4paradigm/openmldb/benchmark/BenchmarkConfig.java
index c6546cadc5d..4f9861cbda2 100644
--- a/benchmark/src/main/java/com/_4paradigm/openmldb/benchmark/BenchmarkConfig.java
+++ b/benchmark/src/main/java/com/_4paradigm/openmldb/benchmark/BenchmarkConfig.java
@@ -34,6 +34,7 @@ public class BenchmarkConfig {
public static long TS_BASE = System.currentTimeMillis();
public static String DEPLOY_NAME;
public static String CSV_PATH;
+ public static int PUT_BACH_SIZE = 1;
private static SqlExecutor executor = null;
private static SdkOption option = null;
@@ -58,6 +59,7 @@ public class BenchmarkConfig {
// if(!CSV_PATH.startsWith("/")){
// CSV_PATH=Util.getRootPath()+CSV_PATH;
// }
+ PUT_BACH_SIZE = Integer.valueOf(prop.getProperty("PUT_BACH_SIZE", "1"));
} catch (Exception e) {
e.printStackTrace();
}
diff --git a/benchmark/src/main/java/com/_4paradigm/openmldb/benchmark/OpenMLDBInsertBenchmark.java b/benchmark/src/main/java/com/_4paradigm/openmldb/benchmark/OpenMLDBInsertBenchmark.java
new file mode 100644
index 00000000000..a856d46ecfd
--- /dev/null
+++ b/benchmark/src/main/java/com/_4paradigm/openmldb/benchmark/OpenMLDBInsertBenchmark.java
@@ -0,0 +1,131 @@
+package com._4paradigm.openmldb.benchmark;
+
+import com._4paradigm.openmldb.sdk.SqlExecutor;
+import org.openjdk.jmh.annotations.*;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import java.sql.Timestamp;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.SampleTime)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@State(Scope.Benchmark)
+@Threads(10)
+@Fork(value = 1, jvmArgs = {"-Xms8G", "-Xmx8G"})
+@Warmup(iterations = 2)
+@Measurement(iterations = 5, time = 60)
+
+public class OpenMLDBInsertBenchmark {
+ private SqlExecutor executor;
+ private String database = "test_put_db";
+ private String tableName = "test_put_t1";
+ private int indexNum;
+ private String placeholderSQL;
+ private Random random;
+ int stringNum = 15;
+ int doubleNum= 5;
+ int timestampNum = 5;
+ int bigintNum = 5;
+
+ public OpenMLDBInsertBenchmark() {
+ executor = BenchmarkConfig.GetSqlExecutor(false);
+ indexNum = BenchmarkConfig.WINDOW_NUM;
+ random = new Random();
+ StringBuilder builder = new StringBuilder();
+ builder.append("insert into ");
+ builder.append(tableName);
+ builder.append(" values (");
+ for (int i = 0; i < stringNum + doubleNum + timestampNum + bigintNum; i++) {
+ if (i > 0) {
+ builder.append(", ");
+ }
+ builder.append("?");
+ }
+ builder.append(");");
+ placeholderSQL = builder.toString();
+ }
+
+ @Setup
+ public void initEnv() {
+ Util.executeSQL("CREATE DATABASE IF NOT EXISTS " + database + ";", executor);
+ Util.executeSQL("USE " + database + ";", executor);
+ String ddl = Util.genDDL(tableName, indexNum);
+ Util.executeSQL(ddl, executor);
+ }
+
+ @Benchmark
+ public void executePut() {
+ java.sql.PreparedStatement pstmt = null;
+ try {
+ pstmt = executor.getInsertPreparedStmt(database, placeholderSQL);
+ for (int num = 0; num < BenchmarkConfig.PUT_BACH_SIZE; num++) {
+ int idx = 1;
+ for (int i = 0; i < stringNum; i++) {
+ if (i < indexNum) {
+ pstmt.setString(idx, String.valueOf(BenchmarkConfig.PK_BASE + random.nextInt(BenchmarkConfig.PK_NUM)));
+ } else {
+ pstmt.setString(idx, "v" + String.valueOf(100000 + random.nextInt(100000)));
+ }
+ idx++;
+ }
+ for (int i = 0; i < doubleNum; i++) {
+ pstmt.setDouble(idx, random.nextDouble());
+ idx++;
+ }
+ for (int i = 0; i < timestampNum; i++) {
+ pstmt.setTimestamp(idx, new Timestamp(System.currentTimeMillis()));
+ idx++;
+ }
+ for (int i = 0; i < bigintNum; i++) {
+ pstmt.setLong(idx, random.nextLong());
+ idx++;
+ }
+ if (BenchmarkConfig.PUT_BACH_SIZE > 1) {
+ pstmt.addBatch();
+ }
+ }
+ if (BenchmarkConfig.PUT_BACH_SIZE > 1) {
+ pstmt.executeBatch();
+ } else {
+ pstmt.execute();
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ if (pstmt != null) {
+ try {
+ pstmt.close();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ }
+
+ @TearDown
+ public void cleanEnv() {
+ Util.executeSQL("USE " + database + ";", executor);
+ Util.executeSQL("DROP TABLE " + tableName + ";", executor);
+ Util.executeSQL("DROP DATABASE " + database + ";", executor);
+ }
+
+ public static void main(String[] args) {
+ /* OpenMLDBPutBenchmark benchmark = new OpenMLDBPutBenchmark();
+ benchmark.initEnv();
+ benchmark.executePut();
+ benchmark.cleanEnv();*/
+
+ try {
+ Options opt = new OptionsBuilder()
+ .include(OpenMLDBInsertBenchmark.class.getSimpleName())
+ .forks(1)
+ .build();
+ new Runner(opt).run();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+}
diff --git a/benchmark/src/main/resources/conf.properties b/benchmark/src/main/resources/conf.properties
index bf3d22a4310..bcde106ed08 100644
--- a/benchmark/src/main/resources/conf.properties
+++ b/benchmark/src/main/resources/conf.properties
@@ -1,5 +1,5 @@
-ZK_CLUSTER=172.24.4.55:30008
-ZK_PATH=/openmldb
+ZK_CLUSTER=172.24.4.55:32200
+ZK_PATH=/openmldb_test
WINDOW_NUM=2
WINDOW_SIZE=1000
@@ -12,3 +12,5 @@ PK_BASE=1000000
DATABASE=bank_perf
DEPLOY_NAME=deploy_bank
CSV_PATH=data/bank_flattenRequest.csv
+
+PUT_BACH_SIZE=100
\ No newline at end of file
diff --git a/cases/function/window/error_window.yaml b/cases/function/window/error_window.yaml
index 9e9419bc74f..8b41d1ff0bf 100644
--- a/cases/function/window/error_window.yaml
+++ b/cases/function/window/error_window.yaml
@@ -17,15 +17,17 @@ debugs: []
version: 0.5.0
cases:
- id: 0
- desc: no order by
+ desc: RANGE-type WINDOW with offset PRECEDING/FOLLOWING requires ORDER BY
inputs:
- columns: [ "id int","c1 string","c3 int","c4 bigint","c5 float","c6 double","c7 timestamp","c8 date" ]
indexs: [ "index1:c8:c4" ]
rows:
- [1,"aa",20,30,1.1,2.1,1590738990000,"2020-05-01"]
sql: |
- SELECT id, c1, c4, count(c4) OVER w1 as w1_c4_count FROM {0} WINDOW w1 AS (PARTITION BY {0}.c8 ROWS BETWEEN 2 PRECEDING AND CURRENT ROW);
+ SELECT id, c1, c4, count(c4) OVER w1 as w1_c4_count FROM {0}
+ WINDOW w1 AS (PARTITION BY {0}.c8 ROWS_RANGE BETWEEN 2 PRECEDING AND CURRENT ROW);
expect:
+ msg: RANGE/ROWS_RANGE-type FRAME with offset PRECEDING/FOLLOWING requires exactly one ORDER BY column
success: false
- id: 1
desc: no partition by
@@ -301,3 +303,29 @@ cases:
SELECT id, c1, c3, sum(c4) OVER w1 as w1_c4_sum FROM {0} WINDOW w1 AS (PARTITION BY {0}.c33 ORDER BY {0}.c7 ROWS_RANGE BETWEEN 2s PRECEDING AND CURRENT ROW);
expect:
success: false
+ - id: 17
+ desc: ROWS WINDOW + EXCLUDE CURRENT_TIME requires order by
+ inputs:
+ - columns: [ "id int","c1 string","c3 int","c4 bigint","c5 float","c6 double","c7 timestamp","c8 date" ]
+ indexs: [ "index1:c8:c4" ]
+ rows:
+ - [1,"aa",20,30,1.1,2.1,1590738990000,"2020-05-01"]
+ sql: |
+ SELECT id, c1, c4, count(c4) OVER w1 as w1_c4_count FROM {0}
+ WINDOW w1 AS (PARTITION BY {0}.c8 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE CURRENT_TIME);
+ expect:
+ msg: WINDOW with EXCLUDE CURRENT_TIME requires exactly one ORDER BY column
+ success: false
+ - id: 18
+ desc: RANGE WINDOW + EXCLUDE CURRENT_TIME requires order by
+ inputs:
+ - columns: [ "id int","c1 string","c3 int","c4 bigint","c5 float","c6 double","c7 timestamp","c8 date" ]
+ indexs: [ "index1:c8:c4" ]
+ rows:
+ - [1,"aa",20,30,1.1,2.1,1590738990000,"2020-05-01"]
+ sql: |
+ SELECT id, c1, c4, count(c4) OVER w1 as w1_c4_count FROM {0}
+ WINDOW w1 AS (PARTITION BY {0}.c8 ROWS_RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE CURRENT_TIME);
+ expect:
+ msg: WINDOW with EXCLUDE CURRENT_TIME requires exactly one ORDER BY column
+ success: false
diff --git a/cases/plan/cmd.yaml b/cases/plan/cmd.yaml
index 50b5fa94343..58eb872268f 100644
--- a/cases/plan/cmd.yaml
+++ b/cases/plan/cmd.yaml
@@ -649,6 +649,22 @@ cases:
+-cmd_type: drop function
+-if_exists: true
+-args: [func1]
+ - id: truncate_stmt
+ desc: truncate
+ sql: TRUNCATE TABLE t1;
+ expect:
+ node_tree_str: |
+ +-node[CMD]
+ +-cmd_type: truncate table
+ +-args: [t1]
+ - id: truncate_stmt_db
+ desc: truncate
+ sql: TRUNCATE TABLE db1.t1;
+ expect:
+ node_tree_str: |
+ +-node[CMD]
+ +-cmd_type: truncate table
+ +-args: [db1, t1]
- id: exit_stmt
desc: exit statement
sql: EXIT;
diff --git a/cases/plan/create.yaml b/cases/plan/create.yaml
index 315ec30a305..f1076934391 100644
--- a/cases/plan/create.yaml
+++ b/cases/plan/create.yaml
@@ -1035,3 +1035,40 @@ cases:
+-kind: HIVE
+-path: hdfs://path
+-table_option_list: []
+
+ - id: 34
+ desc: Create 指定压缩
+ sql: |
+ create table t1(
+ column1 int,
+ column2 timestamp,
+ index(key=column1, ts=column2)) OPTIONS (compress_type="snappy");
+ expect:
+ node_tree_str: |
+ +-node[CREATE]
+ +-table: t1
+ +-IF NOT EXIST: 0
+ +-column_desc_list[list]:
+ | +-0:
+ | | +-node[kColumnDesc]
+ | | +-column_name: column1
+ | | +-column_type: int32
+ | | +-NOT NULL: 0
+ | +-1:
+ | | +-node[kColumnDesc]
+ | | +-column_name: column2
+ | | +-column_type: timestamp
+ | | +-NOT NULL: 0
+ | +-2:
+ | +-node[kColumnIndex]
+ | +-keys: [column1]
+ | +-ts_col: column2
+ | +-abs_ttl: -2
+ | +-lat_ttl: -2
+ | +-ttl_type:
+ | +-version_column:
+ | +-version_count: 0
+ +-table_option_list[list]:
+ +-0:
+ +-node[kCompressType]
+ +-compress_type: snappy
diff --git a/cases/plan/join_query.yaml b/cases/plan/join_query.yaml
index 4d2bbdc0e57..28021b54d4b 100644
--- a/cases/plan/join_query.yaml
+++ b/cases/plan/join_query.yaml
@@ -18,20 +18,83 @@ cases:
sql: SELECT t1.COL1, t1.COL2, t2.COL1, t2.COL2 FROM t1 full join t2 on t1.col1 = t2.col2;
mode: physical-plan-unsupport
- id: 2
+ mode: request-unsupport
desc: 简单SELECT LEFT JOIN
- mode: runner-unsupport
sql: SELECT t1.COL1, t1.COL2, t2.COL1, t2.COL2 FROM t1 left join t2 on t1.col1 = t2.col2;
+ expect:
+ node_tree_str: |
+ +-node[kQuery]: kQuerySelect
+ +-distinct_opt: false
+ +-where_expr: null
+ +-group_expr_list: null
+ +-having_expr: null
+ +-order_expr_list: null
+ +-limit: null
+ +-select_list[list]:
+ | +-0:
+ | | +-node[kResTarget]
+ | | +-val:
+ | | | +-expr[column ref]
+ | | | +-relation_name: t1
+ | | | +-column_name: COL1
+ | | +-name:
+ | +-1:
+ | | +-node[kResTarget]
+ | | +-val:
+ | | | +-expr[column ref]
+ | | | +-relation_name: t1
+ | | | +-column_name: COL2
+ | | +-name:
+ | +-2:
+ | | +-node[kResTarget]
+ | | +-val:
+ | | | +-expr[column ref]
+ | | | +-relation_name: t2
+ | | | +-column_name: COL1
+ | | +-name:
+ | +-3:
+ | +-node[kResTarget]
+ | +-val:
+ | | +-expr[column ref]
+ | | +-relation_name: t2
+ | | +-column_name: COL2
+ | +-name:
+ +-tableref_list[list]:
+ | +-0:
+ | +-node[kTableRef]: kJoin
+ | +-join_type: LeftJoin
+ | +-left:
+ | | +-node[kTableRef]: kTable
+ | | +-table: t1
+ | | +-alias:
+ | +-right:
+ | +-node[kTableRef]: kTable
+ | +-table: t2
+ | +-alias:
+ | +-order_expressions: null
+ | +-on:
+ | +-expr[binary]
+ | +-=[list]:
+ | +-0:
+ | | +-expr[column ref]
+ | | +-relation_name: t1
+ | | +-column_name: col1
+ | +-1:
+ | +-expr[column ref]
+ | +-relation_name: t2
+ | +-column_name: col2
+ +-window_list: []
- id: 3
desc: 简单SELECT LAST JOIN
sql: SELECT t1.COL1, t1.COL2, t2.COL1, t2.COL2 FROM t1 last join t2 order by t2.col5 on t1.col1 = t2.col2;
- id: 4
desc: 简单SELECT RIGHT JOIN
sql: SELECT t1.COL1, t1.COL2, t2.COL1, t2.COL2 FROM t1 right join t2 on t1.col1 = t2.col2;
- mode: runner-unsupport
+ mode: physical-plan-unsupport
- id: 5
desc: LeftJoin有不等式条件
sql: SELECT t1.col1 as t1_col1, t2.col2 as t2_col2 FROM t1 left join t2 on t1.col1 = t2.col2 and t2.col5 >= t1.col5;
- mode: runner-unsupport
+ mode: request-unsupport
- id: 6
desc: LastJoin有不等式条件
sql: SELECT t1.col1 as t1_col1, t2.col2 as t2_col2 FROM t1 last join t2 order by t2.col5 on t1.col1 = t2.col2 and t2.col5 >= t1.col5;
@@ -162,4 +225,4 @@ cases:
col1 as id,
sum(col2) OVER w2 as w2_col2_sum FROM t1 WINDOW
w2 AS (PARTITION BY col1 ORDER BY col5 ROWS_RANGE BETWEEN 1d OPEN PRECEDING AND CURRENT ROW)
- ) as out1 ON out0.id = out1.id;
\ No newline at end of file
+ ) as out1 ON out0.id = out1.id;
diff --git a/cases/query/fail_query.yaml b/cases/query/fail_query.yaml
index 4058525678c..415fa203127 100644
--- a/cases/query/fail_query.yaml
+++ b/cases/query/fail_query.yaml
@@ -49,3 +49,24 @@ cases:
SELECT 100 + 1s;
expect:
success: false
+ - id: 3
+ desc: unsupport join
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - name: t2
+ columns: ["c2 int","c4 timestamp"]
+ indexs: ["index1:c2:c4"]
+ rows:
+ - [20,3000]
+ - [20,2000]
+ sql: |
+ select t1.c1 as id, t2.* from t1 right join t2
+ on t1.c2 = t2.c2
+ expect:
+ success: false
+ msg: unsupport join type RightJoin
diff --git a/cases/query/last_join_subquery_window.yml b/cases/query/last_join_subquery_window.yml
new file mode 100644
index 00000000000..81787f87e67
--- /dev/null
+++ b/cases/query/last_join_subquery_window.yml
@@ -0,0 +1,406 @@
+cases:
+ # ===================================================================
+ # LAST JOIN (WINDOW)
+ # ===================================================================
+ - id: 0
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",2,1590738989000]
+ - ["bb",3,1590738990000]
+ - ["cc",4,1590738991000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp"]
+ indexs: ["index1:c1:c4", "index2:c2:c4"]
+ rows:
+ - ["aa",1, 1590738989000]
+ - ["bb",3, 1590738990000]
+ - ["dd",4, 1590738991000]
+ sql: |
+ select t1.c1, tx.c1 as c1r, tx.c2 as c2r, agg
+ from t1 last join (
+ select c1, c2, count(c4) over w as agg
+ from t2
+ window w as (
+ partition by c1 order by c4
+ rows between 1 preceding and current row
+ )
+ ) tx
+ on t1.c2 = tx.c2
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg))
+ REQUEST_JOIN(type=LastJoin, condition=, left_keys=(), right_keys=(), index_keys=(t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_REQUEST_ROW, partition_keys=(), orders=(ASC), rows=(c4, 1 PRECEDING, 0 CURRENT), index_keys=(c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, condition=, left_keys=(), right_keys=(), index_keys=(#5))
+ SIMPLE_PROJECT(sources=(#5 -> t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(c1, c2, agg))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ SIMPLE_PROJECT(sources=(c1, c2))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_REQUEST_ROW, partition_keys=(), orders=(ASC), rows=(c4, 1 PRECEDING, 0 CURRENT), index_keys=(c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ expect:
+ columns: ["c1 string", "c1r string", "c2r int", "agg int64"]
+ order: c1
+ data: |
+ aa, NULL, NULL, NULL
+ bb, bb, 3, 1
+ cc, dd, 4, 1
+ - id: 1
+ desc: last join window(attributes)
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",2,2000]
+ - ["bb",3,2000]
+ - ["cc",4,2000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp", "val int"]
+ indexs: ["index1:c1:c4", "index2:c2:c4"]
+ rows:
+ - ["aa",1, 1000, 1]
+ - ["aa",4, 2000, 2]
+ - ["bb",3, 3000, 3]
+ - ["dd",4, 8000, 4]
+ - ["dd",4, 7000, 5]
+ - ["dd",4, 9000, 6]
+ sql: |
+ select t1.c1, tx.c1 as c1r, tx.c2 as c2r, agg1, agg2
+ from t1 last join (
+ select c1, c2, c4,
+ count(c4) over w as agg1,
+ max(val) over w as agg2
+ from t2
+ window w as (
+ partition by c1 order by c4
+ rows between 2 preceding and current row
+ exclude current_row
+ )
+ ) tx
+ order by tx.c4
+ on t1.c2 = tx.c2
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg1, agg2))
+ REQUEST_JOIN(type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_REQUEST_ROW, EXCLUDE_CURRENT_ROW, partition_keys=(), orders=(ASC), rows=(c4, 2 PRECEDING, 0 CURRENT), index_keys=(c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg1, agg2))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(#5))
+ SIMPLE_PROJECT(sources=(#5 -> t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(c1, c2, c4, agg1, agg2))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ SIMPLE_PROJECT(sources=(c1, c2, c4))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_REQUEST_ROW, EXCLUDE_CURRENT_ROW, partition_keys=(), orders=(ASC), rows=(c4, 2 PRECEDING, 0 CURRENT), index_keys=(c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ expect:
+ columns: ["c1 string", "c1r string", "c2r int", "agg1 int64", 'agg2 int']
+ order: c1
+ data: |
+ aa, NULL, NULL, NULL, NULL
+ bb, bb, 3, 0, NULL
+ cc, dd, 4, 2, 5
+ - id: 2
+ # issue on join to (multiple windows), fix later
+ mode: batch-unsupport
+ desc: last join multiple windows
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",2,2000]
+ - ["bb",3,2000]
+ - ["cc",4,2000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp", "val int", "gp int"]
+ indexs: ["index1:c1:c4", "index2:c2:c4", "index3:gp:c4"]
+ rows:
+ - ["aa",1, 1000, 1, 0]
+ - ["aa",4, 2000, 2, 0]
+ - ["bb",3, 3000, 3, 1]
+ - ["dd",4, 8000, 4, 1]
+ - ["dd",4, 7000, 5, 1]
+ - ["dd",4, 9000, 6, 1]
+ sql: |
+ select t1.c1, tx.c1 as c1r, tx.c2 as c2r, agg1, agg2, agg3
+ from t1 last join (
+ select c1, c2, c4,
+ count(c4) over w1 as agg1,
+ max(val) over w1 as agg2,
+ min(val) over w2 as agg3
+ from t2
+ window w1 as (
+ partition by c1 order by c4
+ rows between 2 preceding and current row
+ exclude current_row
+ ),
+ w2 as (
+ partition by gp order by c4
+ rows_range between 3s preceding and current row
+ exclude current_time
+ )
+ ) tx
+ order by tx.c4
+ on t1.c2 = tx.c2
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg1, agg2, agg3))
+ REQUEST_JOIN(type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(c1, c2, c4, agg1, agg2, agg3))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_REQUEST_ROW, EXCLUDE_CURRENT_ROW, partition_keys=(), orders=(ASC), rows=(c4, 2 PRECEDING, 0 CURRENT), index_keys=(c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_REQUEST_ROW, EXCLUDE_CURRENT_TIME, partition_keys=(), orders=(ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=(gp))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t2, index=index3)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg1, agg2, agg3))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(#5))
+ SIMPLE_PROJECT(sources=(#5 -> t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(c1, c2, c4, agg1, agg2, agg3))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ SIMPLE_PROJECT(sources=(c1, c2, c4))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_REQUEST_ROW, EXCLUDE_CURRENT_ROW, partition_keys=(), orders=(ASC), rows=(c4, 2 PRECEDING, 0 CURRENT), index_keys=(c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_REQUEST_ROW, EXCLUDE_CURRENT_TIME, partition_keys=(), orders=(ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=(gp))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t2, index=index3)
+ expect:
+ columns: ["c1 string", "c1r string", "c2r int", "agg1 int64", 'agg2 int', 'agg3 int']
+ order: c1
+ data: |
+ aa, NULL, NULL, NULL, NULL, NULL
+ bb, bb, 3, 0, NULL, NULL
+ cc, dd, 4, 2, 5, 4
+ - id: 3
+ desc: last join window union
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",2,2000]
+ - ["bb",3,2000]
+ - ["cc",4,2000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp", "val int"]
+ indexs: ["index1:c1:c4", "index2:c2:c4" ]
+ rows:
+ - ["aa",1, 1000, 1]
+ - ["aa",4, 2000, 2]
+ - ["bb",3, 3000, 3]
+ - ["dd",4, 8000, 4]
+ - ["dd",4, 9000, 6]
+ - name: t3
+ columns: ["c1 string", "c2 int", "c4 timestamp", "val int"]
+ indexs: ["index1:c1:c4", "index2:c2:c4"]
+ rows:
+ - ["aa", 2, 1000, 5]
+ - ["bb", 3, 2000, 8]
+ - ["dd", 4, 4000, 12]
+ - ["dd", 4, 7000, 10]
+ - ["dd", 4, 6000, 11]
+ - ["dd", 4, 10000, 100]
+ sql: |
+ select t1.c1, tx.c1 as c1r, tx.c2 as c2r, agg1, agg2
+ from t1 last join (
+ select c1, c2, c4,
+ count(c4) over w1 as agg1,
+ max(val) over w1 as agg2,
+ from t2
+ window w1 as (
+ union t3
+ partition by c1 order by c4
+ rows_range between 3s preceding and current row
+ instance_not_in_window exclude current_row
+ )
+ ) tx
+ order by tx.c4
+ on t1.c2 = tx.c2
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg1, agg2))
+ REQUEST_JOIN(type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_CURRENT_ROW, INSTANCE_NOT_IN_WINDOW, partition_keys=(c1), orders=(c4 ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=)
+ +-UNION(partition_keys=(), orders=(ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=(c1))
+ RENAME(name=t2)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(table=t2)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg1, agg2))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(#5))
+ SIMPLE_PROJECT(sources=(#5 -> t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(c1, c2, c4, agg1, agg2))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ SIMPLE_PROJECT(sources=(c1, c2, c4))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_CURRENT_ROW, INSTANCE_NOT_IN_WINDOW, partition_keys=(c1), orders=(c4 ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=)
+ +-UNION(partition_keys=(), orders=(ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=(c1))
+ RENAME(name=t2)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(table=t2)
+ expect:
+ columns: ["c1 string", "c1r string", "c2r int", "agg1 int64", 'agg2 int']
+ order: c1
+ data: |
+ aa, NULL, NULL, NULL, NULL
+ bb, bb, 3, 1, 8
+ cc, dd, 4, 2, 11
+ - id: 4
+ desc: last join mulitple window union
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",2,2000]
+ - ["bb",3,2000]
+ - ["cc",4,2000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp", "val int"]
+ indexs: ["index1:c1:c4", "index2:c2:c4" ]
+ rows:
+ - ["aa",1, 1000, 1]
+ - ["aa",4, 2000, 2]
+ - ["bb",3, 3000, 3]
+ - ["dd",4, 8000, 4]
+ - ["dd",4, 9000, 6]
+ - name: t3
+ columns: ["c1 string", "c2 int", "c4 timestamp", "val int"]
+ indexs: ["index1:c1:c4", "index2:c2:c4"]
+ rows:
+ - ["aa", 2, 1000, 5]
+ - ["bb", 3, 2000, 8]
+ - ["dd", 4, 4000, 12]
+ - ["dd", 4, 7000, 10]
+ - ["dd", 4, 6000, 11]
+ - ["dd", 4, 10000, 100]
+ sql: |
+ select t1.c1, tx.c1 as c1r, tx.c2 as c2r, agg1, agg2, agg3
+ from t1 last join (
+ select c1, c2, c4,
+ count(c4) over w1 as agg1,
+ max(val) over w1 as agg2,
+ min(val) over w2 as agg3
+ from t2
+ window w1 as (
+ union t3
+ partition by c1 order by c4
+ rows_range between 3s preceding and current row
+ instance_not_in_window exclude current_row
+ ),
+ w2 as (
+ union t3
+ partition by c1 order by c4
+ rows between 2 preceding and current row
+ instance_not_in_window
+ )
+ ) tx
+ order by tx.c4
+ on t1.c2 = tx.c2
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg1, agg2, agg3))
+ REQUEST_JOIN(type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(c1, c2, c4, agg1, agg2, agg3))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_CURRENT_ROW, INSTANCE_NOT_IN_WINDOW, partition_keys=(c1), orders=(c4 ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=)
+ +-UNION(partition_keys=(), orders=(ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=(c1))
+ RENAME(name=t2)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(table=t2)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(INSTANCE_NOT_IN_WINDOW, partition_keys=(c1), orders=(c4 ASC), rows=(c4, 2 PRECEDING, 0 CURRENT), index_keys=)
+ +-UNION(partition_keys=(), orders=(ASC), rows=(c4, 2 PRECEDING, 0 CURRENT), index_keys=(c1))
+ RENAME(name=t2)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(table=t2)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1r, tx.c2 -> c2r, agg1, agg2, agg3))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(#5))
+ SIMPLE_PROJECT(sources=(#5 -> t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(c1, c2, c4, agg1, agg2, agg3))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ SIMPLE_PROJECT(sources=(c1, c2, c4))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(EXCLUDE_CURRENT_ROW, INSTANCE_NOT_IN_WINDOW, partition_keys=(c1), orders=(c4 ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=)
+ +-UNION(partition_keys=(), orders=(ASC), range=(c4, 3000 PRECEDING, 0 CURRENT), index_keys=(c1))
+ RENAME(name=t2)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(table=t2)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(INSTANCE_NOT_IN_WINDOW, partition_keys=(c1), orders=(c4 ASC), rows=(c4, 2 PRECEDING, 0 CURRENT), index_keys=)
+ +-UNION(partition_keys=(), orders=(ASC), rows=(c4, 2 PRECEDING, 0 CURRENT), index_keys=(c1))
+ RENAME(name=t2)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(table=t2)
+ expect:
+ columns: ["c1 string", "c1r string", "c2r int", "agg1 int64", 'agg2 int', "agg3 int"]
+ order: c1
+ data: |
+ aa, NULL, NULL, NULL, NULL, NULL
+ bb, bb, 3, 1, 8, 3
+ cc, dd, 4, 2, 11, 6
diff --git a/cases/query/left_join.yml b/cases/query/left_join.yml
new file mode 100644
index 00000000000..87e1c387ea6
--- /dev/null
+++ b/cases/query/left_join.yml
@@ -0,0 +1,575 @@
+cases:
+ - id: 0
+ desc: last join to a left join subquery
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - ["cc",40,1000]
+ - ["dd",50,1000]
+ - name: t2
+ columns: ["c1 string","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",2000]
+ - ["bb",2000]
+ - ["cc",3000]
+ - name: t3
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",19,13,3000]
+ - ["aa",21,13,3000]
+ - ["bb",34,131,3000]
+ - ["bb",21,131,3000]
+ sql: |
+ select
+ t1.c1,
+ tx.c1 as c1l,
+ tx.c1r,
+ tx.c2r
+ from t1 last join
+ (
+ select t2.c1 as c1,
+ t3.c1 as c1r,
+ t3.c2 as c2r
+ from t2 left join t3
+ on t2.c1 = t3.c1
+ ) tx
+ on t1.c1 = tx.c1 and t1.c2 > tx.c2r
+ batch_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r))
+ JOIN(type=LastJoin, condition=t1.c2 > tx.c2r, left_keys=(), right_keys=(), index_keys=(t1.c1))
+ DATA_PROVIDER(table=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r))
+ JOIN(type=LeftJoin, condition=, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r))
+ REQUEST_JOIN(type=LastJoin, condition=t1.c2 > tx.c2r, left_keys=(), right_keys=(), index_keys=(t1.c1))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r))
+ REQUEST_JOIN(type=LeftJoin, condition=, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ expect:
+ order: c1
+ columns: ["c1 string", "c1l string", "c1r string", "c2r int"]
+ data: |
+ aa, aa, aa, 19
+ bb, bb, bb, 21
+ cc, NULL, NULL, NULL
+ dd, NULL, NULL, NULL
+ - id: 1
+ desc: last join to a left join subquery, request unsupport if left join not optimized
+ mode: request-unsupport
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - ["cc",40,1000]
+ - ["dd",50,1000]
+ - name: t2
+ columns: ["c1 string","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",2000]
+ - ["bb",3000]
+ - ["cc",4000]
+ - name: t3
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c2:c4"]
+ rows:
+ - ["aa",19,13,3000]
+ - ["aa",21,13,4000]
+ - ["bb",34,131,3000]
+ - ["bb",21,131,4000]
+ sql: |
+ select
+ t1.c1,
+ tx.c1 as c1l,
+ tx.c1r,
+ tx.c2r
+ from t1 last join
+ (
+ select t2.c1 as c1,
+ t3.c1 as c1r,
+ t3.c2 as c2r
+ from t2 left join t3
+ on t2.c1 = t3.c1
+ ) tx
+ on t1.c1 = tx.c1 and t1.c2 > tx.c2r
+ batch_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r))
+ JOIN(type=LastJoin, condition=t1.c2 > tx.c2r, left_keys=(), right_keys=(), index_keys=(t1.c1))
+ DATA_PROVIDER(table=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r))
+ JOIN(type=LeftJoin, condition=, left_keys=(t2.c1), right_keys=(t3.c1), index_keys=)
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(table=t3)
+ expect:
+ order: c1
+ columns: ["c1 string", "c1l string", "c1r string", "c2r int"]
+ data: |
+ aa, aa, aa, 19
+ bb, bb, bb, 21
+ cc, NULL, NULL, NULL
+ dd, NULL, NULL, NULL
+ - id: 2
+ desc: last join to a left join subquery, index optimized with additional condition
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - ["cc",40,1000]
+ - ["dd",50,1000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa", 42, 2000]
+ - ["bb", 68, 3000]
+ - ["cc", 42, 4000]
+ - name: t3
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",19,13,3000]
+ - ["aa",21,13,4000]
+ - ["bb",34,131,3000]
+ - ["bb",21,131,4000]
+ sql: |
+ select
+ t1.c1,
+ tx.c1 as c1l,
+ tx.c1r,
+ tx.c2r
+ from t1 last join
+ (
+ select t2.c1 as c1,
+ t3.c1 as c1r,
+ t3.c2 as c2r
+ from t2 left join t3
+ on t2.c1 = t3.c1 and t2.c2 = 2 * t3.c2
+ ) tx
+ on t1.c1 = tx.c1
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r))
+ REQUEST_JOIN(type=LastJoin, condition=, left_keys=(), right_keys=(), index_keys=(t1.c1))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r))
+ REQUEST_JOIN(type=LeftJoin, condition=, left_keys=(t2.c2), right_keys=(2 * t3.c2), index_keys=(t2.c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, condition=, left_keys=(), right_keys=(), index_keys=(#4))
+ SIMPLE_PROJECT(sources=(#4 -> t1.c1))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r))
+ REQUEST_JOIN(type=LeftJoin, condition=, left_keys=(t2.c2), right_keys=(2 * t3.c2), index_keys=(t2.c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ expect:
+ order: c1
+ columns: ["c1 string", "c1l string", "c1r string", "c2r int"]
+ data: |
+ aa, aa, aa, 21
+ bb, bb, bb, 34
+ cc, cc, NULL, NULL
+ dd, NULL, NULL, NULL
+ - id: 3
+ desc: last join to a left join subquery 2, index optimized with additional condition
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - ["cc",40,1000]
+ - ["dd",50,1000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa", 20, 2000]
+ - ["bb", 10, 3000]
+ - ["cc", 42, 4000]
+ - name: t3
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",19,13,3000]
+ - ["aa",21,13,4000]
+ - ["bb",34,131,3000]
+ - ["bb",21,131,4000]
+ sql: |
+ select
+ t1.c1,
+ tx.c1 as c1l,
+ tx.c1r,
+ tx.c2r
+ from t1 last join
+ (
+ select t2.c1 as c1,
+ t3.c1 as c1r,
+ t3.c2 as c2r
+ from t2 left join t3
+ on t2.c1 = t3.c1 and t2.c2 > t3.c2
+ ) tx
+ on t1.c1 = tx.c1
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r))
+ REQUEST_JOIN(type=LastJoin, condition=, left_keys=(), right_keys=(), index_keys=(t1.c1))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r))
+ REQUEST_JOIN(type=LeftJoin, condition=t2.c2 > t3.c2, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, condition=, left_keys=(), right_keys=(), index_keys=(#4))
+ SIMPLE_PROJECT(sources=(#4 -> t1.c1))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r))
+ REQUEST_JOIN(type=LeftJoin, condition=t2.c2 > t3.c2, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ expect:
+ order: c1
+ columns: ["c1 string", "c1l string", "c1r string", "c2r int"]
+ data: |
+ aa, aa, aa, 19
+ bb, bb, NULL, NULL
+ cc, cc, NULL, NULL
+ dd, NULL, NULL, NULL
+ - id: 4
+ desc: last join to two left join
+ # there is no restriction for multiple left joins, including request mode,
+ # but it may not high performance like multiple last joins
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - ["cc",40,1000]
+ - ["dd",50,1000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa", 20, 2000]
+ - ["bb", 10, 3000]
+ - ["cc", 42, 4000]
+ - name: t3
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",19,13,3000]
+ - ["aa",21,8, 4000]
+ - ["bb",34,131,3000]
+ - ["bb",21,131,4000]
+ - ["cc",27,100,5000]
+ - name: t4
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",19,14,3000]
+ - ["aa",21,13,4000]
+ - ["bb",34,1,3000]
+ - ["bb",21,132,4000]
+ sql: |
+ select
+ t1.c1,
+ tx.c1 as c1l,
+ tx.c1r,
+ tx.c2r,
+ tx.c3x
+ from t1 last join
+ (
+ select t2.c1 as c1,
+ t3.c1 as c1r,
+ t3.c2 as c2r,
+ t4.c3 as c3x
+ from t2 left outer join t3
+ on t2.c1 = t3.c1 and t2.c2 > t3.c2
+ left join t4
+ on t2.c1 = t4.c1 and t3.c3 < t4.c3
+ ) tx
+ on t1.c1 = tx.c1
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r, tx.c3x))
+ REQUEST_JOIN(type=LastJoin, condition=, left_keys=(), right_keys=(), index_keys=(t1.c1))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r, t4.c3 -> c3x))
+ REQUEST_JOIN(type=LeftJoin, condition=t3.c3 < t4.c3, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ REQUEST_JOIN(type=LeftJoin, condition=t2.c2 > t3.c2, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ DATA_PROVIDER(type=Partition, table=t4, index=index1)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, tx.c1 -> c1l, tx.c1r, tx.c2r, tx.c3x))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, condition=, left_keys=(), right_keys=(), index_keys=(#4))
+ SIMPLE_PROJECT(sources=(#4 -> t1.c1))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1, t3.c1 -> c1r, t3.c2 -> c2r, t4.c3 -> c3x))
+ REQUEST_JOIN(type=LeftJoin, condition=t3.c3 < t4.c3, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ REQUEST_JOIN(type=LeftJoin, condition=t2.c2 > t3.c2, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ DATA_PROVIDER(type=Partition, table=t2, index=index1)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ DATA_PROVIDER(type=Partition, table=t4, index=index1)
+ expect:
+ order: c1
+ columns: ["c1 string", "c1l string", "c1r string", "c2r int", "c3x bigint"]
+ data: |
+ aa, aa, aa, 19, 14
+ bb, bb, NULL, NULL, NULL
+ cc, cc, cc, 27, NULL
+ dd, NULL, NULL, NULL, NULL
+ - id: 5
+ desc: simple left join
+ mode: request-unsupport
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - name: t2
+ columns: ["c2 int","c4 timestamp"]
+ indexs: ["index1:c2:c4"]
+ rows:
+ - [20,3000]
+ - [20,2000]
+ sql: |
+ select t1.c1 as id, t2.* from t1 left join t2
+ on t1.c2 = t2.c2
+ expect:
+ order: c1
+ columns: ["id string", "c2 int","c4 timestamp"]
+ data: |
+ aa, 20, 3000
+ aa, 20, 2000
+ bb, NULL, NULL
+ - id: 6
+ desc: lastjoin(leftjoin(filter, table))
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - ["cc",40,1000]
+ - ["dd",50,1000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp"]
+ indexs: ["index1:c1:c4", "index2:c2:c4"]
+ rows:
+ - ["bb",20, 1000]
+ - ["aa",30, 2000]
+ - ["bb",30, 3000]
+ - ["cc",40, 4000]
+ - ["dd",50, 5000]
+ - name: t3
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",19,13,3000]
+ - ["bb",34,131,3000]
+ sql: |
+ select
+ t1.c1,
+ t1.c2,
+ tx.*
+ from t1 last join
+ (
+ select t2.c1 as tx_0_c1,
+ t2.c2 as tx_0_c2,
+ t2.c4 as tx_0_c4,
+ t3.c2 as tx_1_c2,
+ t3.c3 as tx_1_c3
+ from (select * from t2 where c1 != 'dd') t2 left join t3
+ on t2.c1 = t3.c1
+ ) tx
+ order by tx.tx_0_c4
+ on t1.c2 = tx.tx_0_c2
+ request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, t1.c2, tx.tx_0_c1, tx.tx_0_c2, tx.tx_0_c4, tx.tx_1_c2, tx.tx_1_c3))
+ REQUEST_JOIN(type=LastJoin, right_sort=(ASC), condition=, left_keys=(), right_keys=(), index_keys=(t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1 -> tx_0_c1, t2.c2 -> tx_0_c2, t2.c4 -> tx_0_c4, t3.c2 -> tx_1_c2, t3.c3 -> tx_1_c3))
+ REQUEST_JOIN(type=LeftJoin, condition=, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ RENAME(name=t2)
+ FILTER_BY(condition=c1 != dd, left_keys=, right_keys=, index_keys=)
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ expect:
+ order: c1
+ columns: ["c1 string", "c2 int", "tx_0_c1 string", "tx_0_c2 int", "tx_0_c4 timestamp", "tx_1_c2 int", "tx_1_c3 int64"]
+ data: |
+ aa, 20, bb, 20, 1000, 34, 131
+ bb, 30, bb, 30, 3000, 34, 131
+ cc, 40, cc, 40, 4000, NULL, NULL
+ dd, 50, NULL, NULL, NULL, NULL, NULL
+ - id: 7
+ desc: lastjoin(leftjoin(filter, filter))
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - ["cc",40,1000]
+ - ["dd",50,1000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp"]
+ indexs: ["index1:c1:c4", "index2:c2:c4"]
+ rows:
+ - ["bb",20, 1000]
+ - ["aa",30, 2000]
+ - ["bb",30, 3000]
+ - ["cc",40, 4000]
+ - ["dd",50, 5000]
+ - name: t3
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",19,13,3000]
+ - ["bb",34,131,3000]
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, t1.c2, tx.tx_0_c1, tx.tx_0_c2, tx.tx_0_c4, tx.tx_1_c2, tx.tx_1_c3))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, right_sort=(ASC), condition=, left_keys=(#5), right_keys=(#8), index_keys=)
+ SIMPLE_PROJECT(sources=(#5 -> t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1 -> tx_0_c1, t2.c2 -> tx_0_c2, t2.c4 -> tx_0_c4, t3.c2 -> tx_1_c2, t3.c3 -> tx_1_c3))
+ REQUEST_JOIN(type=LeftJoin, condition=, left_keys=(), right_keys=(), index_keys=(t2.c1))
+ RENAME(name=t2)
+ FILTER_BY(condition=, left_keys=(), right_keys=(), index_keys=(30))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ RENAME(name=t3)
+ FILTER_BY(condition=c2 > 20, left_keys=, right_keys=, index_keys=)
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ sql: |
+ select
+ t1.c1,
+ t1.c2,
+ tx.*
+ from t1 last join
+ (
+ select t2.c1 as tx_0_c1,
+ t2.c2 as tx_0_c2,
+ t2.c4 as tx_0_c4,
+ t3.c2 as tx_1_c2,
+ t3.c3 as tx_1_c3
+ from (select * from t2 where c2 = 30) t2 left join (select * from t3 where c2 > 20) t3
+ on t2.c1 = t3.c1
+ ) tx
+ order by tx.tx_0_c4
+ on t1.c2 = tx.tx_0_c2
+ request_plan: |
+ expect:
+ order: c1
+ columns: ["c1 string", "c2 int", "tx_0_c1 string", "tx_0_c2 int", "tx_0_c4 timestamp", "tx_1_c2 int", "tx_1_c3 int64"]
+ data: |
+ aa, 20, NULL, NULL, NULL, NULL, NULL
+ bb, 30, bb, 30, 3000, 34, 131
+ cc, 40, NULL, NULL, NULL, NULL, NULL
+ dd, 50, NULL, NULL, NULL, NULL, NULL
+ - id: 8
+ desc: lastjoin(leftjoin(filter, filter))
+ inputs:
+ - name: t1
+ columns: ["c1 string","c2 int","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",20,1000]
+ - ["bb",30,1000]
+ - ["cc",40,1000]
+ - name: t2
+ columns: ["c1 string", "c2 int", "c4 timestamp"]
+ indexs: ["index1:c1:c4", "index2:c2:c4"]
+ rows:
+ - ["bb",20, 1000]
+ - ["aa",20, 2000]
+ - ["bb",30, 3000]
+ - ["cc",40, 4000]
+ - name: t3
+ columns: ["c1 string","c2 int","c3 bigint","c4 timestamp"]
+ indexs: ["index1:c1:c4"]
+ rows:
+ - ["aa",19,13,3000]
+ - ["bb",34,131,3000]
+ sql: |
+ select
+ t1.c1,
+ t1.c2,
+ tx.*
+ from t1 last join
+ (
+ select t2.c1 as tx_0_c1,
+ t2.c2 as tx_0_c2,
+ t2.c4 as tx_0_c4,
+ t3.c2 as tx_1_c2,
+ t3.c3 as tx_1_c3
+ from (select * from t2 where c2 = 20) t2 left join (select * from t3 where c1 = 'bb') t3
+ on t2.c1 = t3.c1
+ ) tx
+ on t1.c2 = tx.tx_0_c2 and not isnull(tx.tx_1_c2)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(t1.c1, t1.c2, tx.tx_0_c1, tx.tx_0_c2, tx.tx_0_c4, tx.tx_1_c2, tx.tx_1_c3))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ DATA_PROVIDER(request=t1)
+ REQUEST_JOIN(OUTPUT_RIGHT_ONLY, type=LastJoin, condition=NOT isnull(#89), left_keys=(#5), right_keys=(#8), index_keys=)
+ SIMPLE_PROJECT(sources=(#5 -> t1.c2))
+ DATA_PROVIDER(request=t1)
+ RENAME(name=tx)
+ SIMPLE_PROJECT(sources=(t2.c1 -> tx_0_c1, t2.c2 -> tx_0_c2, t2.c4 -> tx_0_c4, t3.c2 -> tx_1_c2, t3.c3 -> tx_1_c3))
+ REQUEST_JOIN(type=LeftJoin, condition=, left_keys=(t2.c1), right_keys=(t3.c1), index_keys=)
+ RENAME(name=t2)
+ FILTER_BY(condition=, left_keys=(), right_keys=(), index_keys=(20))
+ DATA_PROVIDER(type=Partition, table=t2, index=index2)
+ RENAME(name=t3)
+ FILTER_BY(condition=, left_keys=(), right_keys=(), index_keys=(bb))
+ DATA_PROVIDER(type=Partition, table=t3, index=index1)
+ expect:
+ order: c1
+ columns: ["c1 string", "c2 int", "tx_0_c1 string", "tx_0_c2 int", "tx_0_c4 timestamp", "tx_1_c2 int", "tx_1_c3 int64"]
+ data: |
+ aa, 20, bb, 20, 1000, 34, 131
+ bb, 30, NULL, NULL, NULL, NULL, NULL
+ cc, 40, NULL, NULL, NULL, NULL, NULL
diff --git a/cases/query/window_query.yaml b/cases/query/window_query.yaml
index 84365be97f7..3c64259d8c5 100644
--- a/cases/query/window_query.yaml
+++ b/cases/query/window_query.yaml
@@ -901,3 +901,234 @@ cases:
200, 1, 1
300, 0, 0
400, 2, 2
+
+ # ======================================================================
+ # WINDOW without ORDER BY
+ # ======================================================================
+ - id: 24
+ desc: ROWS WINDOW WITHOUT ORDER BY
+ mode: batch-unsupport
+ inputs:
+ - name: t1
+ columns:
+ - id int
+ - gp int
+ - ts timestamp
+ indexs:
+ - idx:gp:ts
+ data: |
+ 1, 100, 20000
+ 2, 100, 10000
+ 3, 400, 20000
+ 4, 400, 10000
+ 5, 400, 15000
+ 6, 400, 40000
+ sql: |
+ select id, count(ts) over w as agg
+ from t1
+ window w as (
+ partition by gp
+ rows between 2 open preceding and current row
+ )
+ request_plan: |
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(partition_keys=(), orders=, rows=(, 2 OPEN PRECEDING, 0 CURRENT), index_keys=(gp))
+ DATA_PROVIDER(request=t1)
+ DATA_PROVIDER(type=Partition, table=t1, index=idx)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(id, agg))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ SIMPLE_PROJECT(sources=(id))
+ DATA_PROVIDER(request=t1)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(partition_keys=(), orders=, rows=(, 2 OPEN PRECEDING, 0 CURRENT), index_keys=(gp))
+ DATA_PROVIDER(request=t1)
+ DATA_PROVIDER(type=Partition, table=t1, index=idx)
+ expect:
+ columns: ["id int", "agg int64"]
+ order: id
+ data: |
+ 1, 1
+ 2, 2
+ 3, 1
+ 4, 2
+ 5, 2
+ 6, 2
+ - id: 25
+ desc: RANGE WINDOW WITHOUT ORDER BY
+ mode: batch-unsupport
+ inputs:
+ - name: t1
+ columns:
+ - id int
+ - gp int
+ - ts timestamp
+ indexs:
+ - idx:gp:ts
+ data: |
+ 1, 100, 20000
+ 2, 100, 10000
+ 3, 400, 20000
+ 4, 400, 10
+ 5, 400, 15000
+ sql: |
+ select id, count(ts) over w as agg
+ from t1
+ window w as (
+ partition by gp
+ rows_range between unbounded preceding and current row
+ )
+ request_plan: |
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(partition_keys=(), orders=, range=(, 0 PRECEDING UNBOUND, 0 CURRENT), index_keys=(gp))
+ DATA_PROVIDER(request=t1)
+ DATA_PROVIDER(type=Partition, table=t1, index=idx)
+ cluster_request_plan: |
+ SIMPLE_PROJECT(sources=(id, agg))
+ REQUEST_JOIN(type=kJoinTypeConcat)
+ SIMPLE_PROJECT(sources=(id))
+ DATA_PROVIDER(request=t1)
+ PROJECT(type=Aggregation)
+ REQUEST_UNION(partition_keys=(), orders=, range=(, 0 PRECEDING UNBOUND, 0 CURRENT), index_keys=(gp))
+ DATA_PROVIDER(request=t1)
+ DATA_PROVIDER(type=Partition, table=t1, index=idx)
+ expect:
+ columns: ["id int", "agg int64"]
+ order: id
+ data: |
+ 1, 1
+ 2, 2
+ 3, 1
+ 4, 2
+ 5, 3
+ - id: 26
+ desc: RANGE-type WINDOW WITHOUT ORDER BY + WINDOW attributes
+ mode: batch-unsupport
+ inputs:
+ - name: t1
+ columns:
+ - id int
+ - gp int
+ - ts timestamp
+ indexs:
+ - idx:gp:ts
+ data: |
+ 1, 100, 20000
+ 2, 100, 10000
+ 3, 400, 20000
+ 4, 400, 10000
+ 5, 400, 15000
+ - name: t2
+ columns:
+ - id int
+ - gp int
+ - ts timestamp
+ indexs:
+ - idx:gp:ts
+ data: |
+ 1, 100, 20000
+ 2, 100, 10000
+ 3, 400, 20000
+ 4, 400, 10000
+ 5, 400, 15000
+ sql: |
+ select id,
+ count(ts) over w1 as agg1,
+ count(ts) over w2 as agg2,
+ count(ts) over w3 as agg3,
+ count(ts) over w4 as agg4,
+ count(ts) over w5 as agg5,
+ count(ts) over w6 as agg6,
+ count(ts) over w7 as agg7,
+ from t1
+ window w1 as (
+ PARTITION by gp
+ ROWS_RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),
+ w2 as (partition by gp
+ ROWS_RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE CURRENT_ROW),
+ w3 as (PARTITION BY gp
+ ROWS_RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW MAXSIZE 1),
+ w4 as (
+ UNION (select * from t2)
+ PARTITION BY gp
+ ROWS_RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW INSTANCE_NOT_IN_WINDOW),
+ w5 as (
+ UNION (select * from t2)
+ PARTITION BY gp
+ ROWS_RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW INSTANCE_NOT_IN_WINDOW EXCLUDE CURRENT_ROW),
+ w6 as (
+ UNION (select * from t2)
+ PARTITION BY gp
+ ROWS_RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW MAXSIZE 2 INSTANCE_NOT_IN_WINDOW EXCLUDE CURRENT_ROW),
+ w7 as (
+ UNION (select * from t2)
+ PARTITION BY gp
+ ROWS_RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE CURRENT_ROW)
+ expect:
+ columns: ["id int", "agg1 int64", "agg2 int64", "agg3 int64", "agg4 int64", "agg5 int64", "agg6 int64", "agg7 int64"]
+ order: id
+ data: |
+ 1, 1, 0, 1, 3, 2, 2, 2
+ 2, 2, 1, 1, 3, 2, 2, 3
+ 3, 1, 0, 1, 4, 3, 2, 3
+ 4, 2, 1, 1, 4, 3, 2, 4
+ 5, 3, 2, 1, 4, 3, 2, 5
+ - id: 27
+ desc: ROWS-type WINDOW WITHOUT ORDER BY + WINDOW attributes
+ mode: batch-unsupport
+ inputs:
+ - name: t1
+ columns:
+ - id int
+ - gp int
+ - ts timestamp
+ indexs:
+ - idx:gp:ts
+ data: |
+ 1, 100, 20000
+ 2, 100, 10000
+ 3, 400, 20000
+ 4, 400, 10000
+ 5, 400, 15000
+ - name: t2
+ columns:
+ - id int
+ - gp int
+ - ts timestamp
+ indexs:
+ - idx:gp:ts
+ data: |
+ 1, 100, 20000
+ 2, 100, 10000
+ 3, 400, 20000
+ 4, 400, 10000
+ 5, 400, 15000
+ sql: |
+ select id,
+ count(ts) over w1 as agg1,
+ count(ts) over w2 as agg2,
+ count(ts) over w3 as agg3,
+ count(ts) over w4 as agg4,
+ from t1
+ window w1 as (
+ PARTITION by gp
+ ROWS BETWEEN 2 PRECEDING AND CURRENT ROW),
+ w2 as (partition by gp
+ ROWS BETWEEN 2 PRECEDING AND CURRENT ROW EXCLUDE CURRENT_ROW),
+ w3 as (
+ UNION (select * from t2)
+ PARTITION BY gp
+ ROWS BETWEEN 2 PRECEDING AND CURRENT ROW INSTANCE_NOT_IN_WINDOW),
+ w4 as (
+ UNION (select * from t2)
+ PARTITION BY gp
+ ROWS BETWEEN 3 PRECEDING AND CURRENT ROW INSTANCE_NOT_IN_WINDOW EXCLUDE CURRENT_ROW)
+ expect:
+ columns: ["id int", "agg1 int64", "agg2 int64", "agg3 int64", "agg4 int64"]
+ order: id
+ data: |
+ 1, 1, 0, 3, 2
+ 2, 2, 1, 3, 2
+ 3, 1, 0, 3, 3
+ 4, 2, 1, 3, 3
+ 5, 3, 2, 3, 3
diff --git a/cmake/rapidjson.cmake b/cmake/rapidjson.cmake
new file mode 100644
index 00000000000..6b1ecd2a6dd
--- /dev/null
+++ b/cmake/rapidjson.cmake
@@ -0,0 +1,9 @@
+FetchContent_Declare(
+ rapidjson
+ URL https://github.com/Tencent/rapidjson/archive/refs/tags/v1.1.0.zip
+ URL_HASH MD5=ceb1cf16e693a3170c173dc040a9d2bd
+ EXCLUDE_FROM_ALL # don't build this project as part of the overall build
+)
+# don't build this project, just populate
+FetchContent_Populate(rapidjson)
+include_directories(${rapidjson_SOURCE_DIR}/include)
diff --git a/docs/en/deploy/conf.md b/docs/en/deploy/conf.md
index 11667427247..138a414fa3d 100644
--- a/docs/en/deploy/conf.md
+++ b/docs/en/deploy/conf.md
@@ -9,6 +9,8 @@
# If you are deploying the standalone version, you do not need to configure zk_cluster and zk_root_path, just comment these two configurations. Deploying the cluster version needs to configure these two items, and the two configurations of all nodes in a cluster must be consistent
#--zk_cluster=127.0.0.1:7181
#--zk_root_path=/openmldb_cluster
+# set the username and password of zookeeper if authentication is enabled
+#--zk_cert=user:passwd
# The address of the tablet needs to be specified in the standalone version, and this configuration can be ignored in the cluster version
--tablet=127.0.0.1:9921
# Configure log directory
@@ -76,6 +78,8 @@
# If you start the cluster version, you need to specify the address of zk and the node path of the cluster in zk
#--zk_cluster=127.0.0.1:7181
#--zk_root_path=/openmldb_cluster
+# set the username and password of zookeeper if authentication is enabled
+#--zk_cert=user:passwd
# Configure the thread pool size, it is recommended to be consistent with the number of CPU cores
--thread_pool_size=24
@@ -218,6 +222,8 @@
# If the deployed openmldb is a cluster version, you need to specify the zk address and the cluster zk node directory
#--zk_cluster=127.0.0.1:7181
#--zk_root_path=/openmldb_cluster
+# set the username and password of zookeeper if authentication is enabled
+#--zk_cert=user:passwd
# configure log path
--openmldb_log_dir=./logs
@@ -249,6 +255,7 @@ zookeeper.connection_timeout=5000
zookeeper.max_retries=10
zookeeper.base_sleep_time=1000
zookeeper.max_connect_waitTime=30000
+#zookeeper.cert=user:passwd
# Spark Config
spark.home=
diff --git a/docs/en/developer/built_in_function_develop_guide.md b/docs/en/developer/built_in_function_develop_guide.md
index 3e6eaa2852a..97d00076f87 100644
--- a/docs/en/developer/built_in_function_develop_guide.md
+++ b/docs/en/developer/built_in_function_develop_guide.md
@@ -792,7 +792,7 @@ select date(timestamp(1590115420000)) as dt;
## 5. Document Management
-Documents for all built-in functions can be found in [Built-in Functions](http://4paradigm.github.io/OpenMLDB/zh/main/reference/sql/functions_and_operators/Files/udfs_8h.html). It is a markdown file automatically generated from source, so please do not edit it directly.
+Documents for all built-in functions can be found in [Built-in Functions](http://4paradigm.github.io/OpenMLDB/zh/main/reference/sql/udfs_8h.html). It is a markdown file automatically generated from source, so please do not edit it directly.
- If you are adding a document for a new function, please refer to [2.2.4 Documenting Function](#224-documenting-function).
- If you are trying to revise a document of an existing function, you can find source code in the files of `hybridse/src/udf/default_udf_library.cc` or `hybridse/src/udf/default_defs/*_def.cc` .
diff --git a/docs/en/developer/udf_develop_guide.md b/docs/en/developer/udf_develop_guide.md
index 63530ae0f1c..4c5aff6d2e1 100644
--- a/docs/en/developer/udf_develop_guide.md
+++ b/docs/en/developer/udf_develop_guide.md
@@ -9,7 +9,7 @@ SQL functions can be categorised into scalar functions and aggregate functions.
#### 2.1.1 Naming Specification of C++ Built-in Function
- The naming of C++ built-in function should follow the [snake_case](https://en.wikipedia.org/wiki/Snake_case) style.
- The name should clearly express the function's purpose.
-- The name of a function should not be the same as the name of a built-in function or other custom functions. The list of all built-in functions can be seen [here](../reference/sql/functions_and_operators/Files/udfs_8h.md).
+- The name of a function should not be the same as the name of a built-in function or other custom functions. The list of all built-in functions can be seen [here](../reference/sql/udfs_8h.md).
#### 2.1.2
The types of the built-in C++ functions' parameters should be BOOL, NUMBER, TIMESTAMP, DATE, or STRING.
diff --git a/docs/en/reference/sql/ddl/CREATE_TABLE_STATEMENT.md b/docs/en/reference/sql/ddl/CREATE_TABLE_STATEMENT.md
index a0d11d90657..ba62cf55231 100644
--- a/docs/en/reference/sql/ddl/CREATE_TABLE_STATEMENT.md
+++ b/docs/en/reference/sql/ddl/CREATE_TABLE_STATEMENT.md
@@ -473,6 +473,11 @@ StorageMode
::= 'Memory'
| 'HDD'
| 'SSD'
+CompressTypeOption
+ ::= 'COMPRESS_TYPE' '=' CompressType
+CompressType
+ ::= 'NoCompress'
+ | 'Snappy
```
@@ -484,6 +489,7 @@ StorageMode
| `REPLICANUM` | It defines the number of replicas for the table. Note that the number of replicas is only configurable in Cluster version. | `OPTIONS (REPLICANUM=3)` |
| `DISTRIBUTION` | It defines the distributed node endpoint configuration. Generally, it contains a Leader node and several followers. `(leader, [follower1, follower2, ..])`. Without explicit configuration, OpenMLDB will automatically configure `DISTRIBUTION` according to the environment and nodes. | `DISTRIBUTION = [ ('127.0.0.1:6527', [ '127.0.0.1:6528','127.0.0.1:6529' ])]` |
| `STORAGE_MODE` | It defines the storage mode of the table. The supported modes are `Memory`, `HDD` and `SSD`. When not explicitly configured, it defaults to `Memory`.
If you need to support a storage mode other than `Memory` mode, `tablet` requires additional configuration options. For details, please refer to [tablet configuration file **conf/tablet.flags**](../../../deploy/conf.md#the-configuration-file-for-apiserver:-conf/tablet.flags). | `OPTIONS (STORAGE_MODE='HDD')` |
+| `COMPRESS_TYPE` | It defines the compress types of the table. The supported compress type are `NoCompress` and `Snappy`. The default value is `NoCompress` | `OPTIONS (COMPRESS_TYPE='Snappy')`
#### The Difference between Disk Table and Memory Table
@@ -515,11 +521,11 @@ DESC t1;
--- -------------------- ------ ---------- ------ ---------------
1 INDEX_0_1651143735 col1 std_time 0min kAbsoluteTime
--- -------------------- ------ ---------- ------ ---------------
- --------------
- storage_mode
- --------------
- HDD
- --------------
+ --------------- --------------
+ compress_type storage_mode
+ --------------- --------------
+ NoCompress HDD
+ --------------- --------------
```
The following sql command create a table with specified distribution.
```sql
diff --git a/docs/en/reference/sql/ddl/DESC_STATEMENT.md b/docs/en/reference/sql/ddl/DESC_STATEMENT.md
index 8179c952c56..a7d288064bb 100644
--- a/docs/en/reference/sql/ddl/DESC_STATEMENT.md
+++ b/docs/en/reference/sql/ddl/DESC_STATEMENT.md
@@ -56,11 +56,11 @@ desc t1;
--- -------------------- ------ ---------- ---------- ---------------
1 INDEX_0_1658136511 col1 std_time 43200min kAbsoluteTime
--- -------------------- ------ ---------- ---------- ---------------
- --------------
- storage_mode
- --------------
- Memory
- --------------
+ --------------- --------------
+ compress_type storage_mode
+ --------------- --------------
+ NoCompress Memory
+ --------------- --------------
```
diff --git a/docs/en/reference/sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md b/docs/en/reference/sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md
index dd411410e65..967ebce316a 100644
--- a/docs/en/reference/sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md
+++ b/docs/en/reference/sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md
@@ -21,7 +21,7 @@ show create table t1;
`c3` bigInt,
`c4` timestamp,
INDEX (KEY=`c1`, TS=`c4`, TTL_TYPE=ABSOLUTE, TTL=0m)
- ) OPTIONS (PARTITIONNUM=8, REPLICANUM=2, STORAGE_MODE='HDD');
+ ) OPTIONS (PARTITIONNUM=8, REPLICANUM=2, STORAGE_MODE='HDD', COMPRESS_TYPE='NoCompress');
------- ---------------------------------------------------------------
1 rows in set
diff --git a/docs/en/reference/sql/ddl/TRUNCATE_TABLE_STATEMENT.md b/docs/en/reference/sql/ddl/TRUNCATE_TABLE_STATEMENT.md
new file mode 100644
index 00000000000..3bd9360d920
--- /dev/null
+++ b/docs/en/reference/sql/ddl/TRUNCATE_TABLE_STATEMENT.md
@@ -0,0 +1,16 @@
+# TRUNCATE TABLE
+
+```
+TRUNCATE TABLE table_name
+```
+
+`TRUNCATE TABLE` statement is used to clear the specified table.
+
+## Example: clear t1
+
+```sql
+TRUNCATE TABLE t1;
+-- Truncate table t1? yes/no
+-- yes
+-- SUCCEED
+```
\ No newline at end of file
diff --git a/docs/en/reference/sql/ddl/index.rst b/docs/en/reference/sql/ddl/index.rst
index dbc94cc1f3d..bff9db48fb0 100644
--- a/docs/en/reference/sql/ddl/index.rst
+++ b/docs/en/reference/sql/ddl/index.rst
@@ -24,3 +24,4 @@ Data Definition Statement (DDL)
SHOW_FUNCTIONS
DROP_FUNCTION
SHOW_CREATE_TABLE_STATEMENT
+ TRUNCATE_TABLE_STATEMENT
diff --git a/docs/en/reference/sql/dql/WINDOW_CLAUSE.md b/docs/en/reference/sql/dql/WINDOW_CLAUSE.md
index bbc71a4f222..f3add760280 100644
--- a/docs/en/reference/sql/dql/WINDOW_CLAUSE.md
+++ b/docs/en/reference/sql/dql/WINDOW_CLAUSE.md
@@ -320,5 +320,5 @@ WINDOW w1 AS (PARTITION BY col1 ORDER BY col5 ROWS_RANGE BETWEEN 10s PRECEDING A
```
```{seealso}
-Please refer to [Built-in Functions](../functions_and_operators/Files/udfs_8h.md) for aggregate functions that can be used in window computation.
+Please refer to [Built-in Functions](../udfs_8h.md) for aggregate functions that can be used in window computation.
````
diff --git a/docs/en/reference/sql/index.rst b/docs/en/reference/sql/index.rst
index ee57dbac297..58bcc3e5502 100644
--- a/docs/en/reference/sql/index.rst
+++ b/docs/en/reference/sql/index.rst
@@ -9,6 +9,7 @@ SQL
language_structure/index
data_types/index
functions_and_operators/index
+ udfs_8h
dql/index
dml/index
ddl/index
diff --git a/docs/en/reference/sql/functions_and_operators/index.rst b/docs/en/reference/sql/operators/index.rst
similarity index 65%
rename from docs/en/reference/sql/functions_and_operators/index.rst
rename to docs/en/reference/sql/operators/index.rst
index b889a6e8a87..db068373e46 100644
--- a/docs/en/reference/sql/functions_and_operators/index.rst
+++ b/docs/en/reference/sql/operators/index.rst
@@ -1,5 +1,5 @@
=============================
-Expressions, Functions, and Operations
+Expressions and Operations
=============================
@@ -7,4 +7,3 @@ Expressions, Functions, and Operations
:maxdepth: 1
operators
- Files/udfs_8h
diff --git a/docs/en/reference/sql/functions_and_operators/operators.md b/docs/en/reference/sql/operators/operators.md
similarity index 100%
rename from docs/en/reference/sql/functions_and_operators/operators.md
rename to docs/en/reference/sql/operators/operators.md
diff --git a/docs/en/reference/sql/functions_and_operators/Files/udfs_8h.md b/docs/en/reference/sql/udfs_8h.md
similarity index 68%
rename from docs/en/reference/sql/functions_and_operators/Files/udfs_8h.md
rename to docs/en/reference/sql/udfs_8h.md
index d1696b6c764..9cfab05977f 100644
--- a/docs/en/reference/sql/functions_and_operators/Files/udfs_8h.md
+++ b/docs/en/reference/sql/udfs_8h.md
@@ -10,158 +10,158 @@ title: udfs/udfs.h
| Name | Description |
| -------------- | -------------- |
-| **[abs](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-abs)**()|
Return the absolute value of expr. |
-| **[acos](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-acos)**()|
Return the arc cosine of expr. |
-| **[add](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-add)**()|
Compute sum of two arguments. |
-| **[add_months](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-add-months)**()|
adds an integer months to a given date, returning the resulting date. |
-| **[array_contains](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-array-contains)**()|
array_contains(array, value) - Returns true if the array contains the value. |
-| **[asin](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-asin)**()|
Return the arc sine of expr. |
-| **[at](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-at)**()| |
-| **[atan](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-atan)**()|
Return the arc tangent of expr If called with one parameter, this function returns the arc tangent of expr. If called with two parameters X and Y, this function returns the arc tangent of Y / X. |
-| **[atan2](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-atan2)**()|
Return the arc tangent of Y / X.. |
-| **[avg](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-avg)**()|
Compute average of values. |
-| **[avg_cate](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-avg-cate)**()|
Compute average of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[avg_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-avg-cate-where)**()|
Compute average of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V', separated by comma, and sorted by key in ascend order. |
-| **[avg_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-avg-where)**()|
Compute average of values match specified condition. |
-| **[bigint](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-bigint)**()| |
-| **[bool](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-bool)**()|
Cast string expression to bool. |
-| **[ceil](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-ceil)**()|
Return the smallest integer value not less than the expr. |
-| **[ceiling](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-ceiling)**()| |
-| **[char](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-char)**()|
Returns the ASCII character having the binary equivalent to expr. If n >= 256 the result is equivalent to char(n % 256). |
-| **[char_length](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-char-length)**()|
Returns the length of the string. It is measured in characters and multibyte character string is not supported. |
-| **[character_length](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-character-length)**()| |
-| **[concat](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-concat)**()|
This function returns a string resulting from the joining of two or more string values in an end-to-end manner. (To add a separating value during joining, see concat_ws.) |
-| **[concat_ws](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-concat-ws)**()|
Returns a string resulting from the joining of two or more string value in an end-to-end manner. It separates those concatenated string values with the delimiter specified in the first function argument. |
-| **[cos](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-cos)**()|
Return the cosine of expr. |
-| **[cot](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-cot)**()|
Return the cotangent of expr. |
-| **[count](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-count)**()|
Compute number of values. |
-| **[count_cate](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-count-cate)**()|
Compute count of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[count_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-count-cate-where)**()|
Compute count of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[count_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-count-where)**()|
Compute number of values match specified condition. |
-| **[date](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-date)**()|
Cast timestamp or string expression to date (date >= 1900-01-01) |
-| **[date_format](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-date-format)**()|
Formats the date value according to the format string. |
-| **[datediff](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-datediff)**()|
days difference from date1 to date2 |
-| **[day](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-day)**()| |
-| **[dayofmonth](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-dayofmonth)**()|
Return the day of the month for a timestamp or date. |
-| **[dayofweek](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-dayofweek)**()|
Return the day of week for a timestamp or date. |
-| **[dayofyear](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-dayofyear)**()|
Return the day of year for a timestamp or date. Returns 0 given an invalid date. |
-| **[degrees](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-degrees)**()|
Convert radians to degrees. |
-| **[distinct_count](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-distinct-count)**()|
Compute number of distinct values. |
-| **[double](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-double)**()|
Cast string expression to double. |
-| **[drawdown](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-drawdown)**()|
Compute drawdown of values. |
-| **[earth_distance](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-earth-distance)**()|
Returns the great circle distance between two points on the surface of the Earth. Km as return unit. add a minus (-) sign if heading west (W) or south (S). |
-| **[entropy](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-entropy)**()|
Calculate Shannon entropy of a column of values. Null values are skipped. |
-| **[ew_avg](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-ew-avg)**()|
Compute exponentially-weighted average of values. It's equivalent to pandas ewm(alpha={alpha}, adjust=True, ignore_na=True, com=None, span=None, halflife=None, min_periods=0) |
-| **[exp](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-exp)**()|
Return the value of e (the base of natural logarithms) raised to the power of expr. |
-| **[farm_fingerprint](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-farm-fingerprint)**()| |
-| **[first_value](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-first-value)**()|
Returns the value of expr from the latest row (last row) of the window frame. |
-| **[float](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-float)**()|
Cast string expression to float. |
-| **[floor](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-floor)**()|
Return the largest integer value not less than the expr. |
-| **[get_json_object](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-get-json-object)**()|
Extracts a JSON object from [JSON Pointer](https://datatracker.ietf.org/doc/html/rfc6901)|
-| **[hash64](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-hash64)**()|
Returns a hash value of the arguments. It is not a cryptographic hash function and should not be used as such. |
-| **[hex](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-hex)**()|
Convert integer to hexadecimal. |
-| **[hour](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-hour)**()|
Return the hour for a timestamp. |
-| **[identity](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-identity)**()|
Return value. |
-| **[if_null](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-if-null)**()|
If input is not null, return input value; else return default value. |
-| **[ifnull](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-ifnull)**()| |
-| **[ilike_match](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-ilike-match)**()|
pattern match same as ILIKE predicate |
-| **[inc](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-inc)**()|
Return expression + 1. |
-| **[int](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-int)**()| |
-| **[int16](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-int16)**()|
Cast string expression to int16. |
-| **[int32](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-int32)**()|
Cast string expression to int32. |
-| **[int64](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-int64)**()|
Cast string expression to int64. |
-| **[is_null](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-is-null)**()|
Check if input value is null, return bool. |
-| **[isnull](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-isnull)**()| |
-| **[join](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-join)**()|
For each string value from specified column of window, join by delimeter. Null values are skipped. |
-| **[json_array_length](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-json-array-length)**()|
Returns the number of elements in the outermost JSON array. |
-| **[lag](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-lag)**()|
Returns value evaluated at the row that is offset rows before the current row within the partition. Offset is evaluated with respect to the current row. |
-| **[last_day](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-last-day)**()|
Return the last day of the month to which the date belongs to. |
-| **[lcase](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-lcase)**()|
Convert all the characters to lowercase. Note that characters with values > 127 are simply returned. |
-| **[like_match](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-like-match)**()|
pattern match same as LIKE predicate |
-| **[list_except_by_key](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-list-except-by-key)**()|
Return list of elements in list1 but keys not in except_str. |
-| **[list_except_by_value](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-list-except-by-value)**()|
Return list of elements in list1 but values not in except_str. |
-| **[ln](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-ln)**()|
Return the natural logarithm of expr. |
-| **[log](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-log)**()|
log(base, expr) If called with one parameter, this function returns the natural logarithm of expr. If called with two parameters, this function returns the logarithm of expr to the base. |
-| **[log10](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-log10)**()|
Return the base-10 logarithm of expr. |
-| **[log2](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-log2)**()|
Return the base-2 logarithm of expr. |
-| **[lower](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-lower)**()| |
-| **[make_tuple](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-make-tuple)**()| |
-| **[max](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-max)**()|
Compute maximum of values. |
-| **[max_cate](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-max-cate)**()|
Compute maximum of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[max_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-max-cate-where)**()|
Compute maximum of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[max_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-max-where)**()|
Compute maximum of values match specified condition. |
-| **[maximum](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-maximum)**()|
Compute maximum of two arguments. |
-| **[median](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-median)**()|
Compute the median of values. |
-| **[min](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-min)**()|
Compute minimum of values. |
-| **[min_cate](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-min-cate)**()|
Compute minimum of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[min_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-min-cate-where)**()|
Compute minimum of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[min_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-min-where)**()|
Compute minimum of values match specified condition. |
-| **[minimum](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-minimum)**()|
Compute minimum of two arguments. |
-| **[minute](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-minute)**()|
Return the minute for a timestamp. |
-| **[month](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-month)**()|
Return the month part of a timestamp or date. |
-| **[nth_value_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-nth-value-where)**()|
Returns the value of expr from the idx th row matches the condition. |
-| **[nvl](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-nvl)**()| |
-| **[nvl2](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-nvl2)**()|
nvl2(expr1, expr2, expr3) - Returns expr2 if expr1 is not null, or expr3 otherwise. |
-| **[pmod](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-pmod)**()|
Compute pmod of two arguments. If any param is NULL, output NULL. If divisor is 0, output NULL. |
-| **[pow](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-pow)**()|
Return the value of expr1 to the power of expr2. |
-| **[power](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-power)**()| |
-| **[radians](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-radians)**()|
Returns the argument X, converted from degrees to radians. (Note that π radians equals 180 degrees.) |
-| **[regexp_like](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-regexp-like)**()|
pattern match same as RLIKE predicate (based on RE2) |
-| **[replace](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-replace)**()|
replace(str, search[, replace]) - Replaces all occurrences of `search` with `replace`|
-| **[reverse](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-reverse)**()|
Returns the reversed given string. |
-| **[round](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-round)**()|
Returns expr rounded to d decimal places using HALF_UP rounding mode. |
-| **[second](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-second)**()|
Return the second for a timestamp. |
-| **[sin](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-sin)**()|
Return the sine of expr. |
-| **[size](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-size)**()|
Get the size of a List (e.g., result of split) |
-| **[smallint](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-smallint)**()| |
-| **[split](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-split)**()|
Split string to list by delimeter. Null values are skipped. |
-| **[split_array](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-split-array)**()|
Split string to array of string by delimeter. |
-| **[split_by_key](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-split-by-key)**()|
Split string by delimeter and split each segment as kv pair, then add each key to output list. Null or illegal segments are skipped. |
-| **[split_by_value](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-split-by-value)**()|
Split string by delimeter and split each segment as kv pair, then add each value to output list. Null or illegal segments are skipped. |
-| **[sqrt](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-sqrt)**()|
Return square root of expr. |
-| **[std](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-std)**()| |
-| **[stddev](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-stddev)**()|
Compute sample standard deviation of values, i.e., `sqrt( sum((x_i - avg)^2) / (n-1) )`|
-| **[stddev_pop](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-stddev-pop)**()|
Compute population standard deviation of values, i.e., `sqrt( sum((x_i - avg)^2) / n )`|
-| **[stddev_samp](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-stddev-samp)**()| |
-| **[strcmp](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-strcmp)**()|
Returns 0 if the strings are the same, -1 if the first argument is smaller than the second according to the current sort order, and 1 otherwise. |
-| **[string](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-string)**()|
Return string converted from timestamp expression. |
-| **[substr](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-substr)**()| |
-| **[substring](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-substring)**()|
Return a substring `len` characters long from string str, starting at position `pos`. Alias function: `substr`|
-| **[sum](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-sum)**()|
Compute sum of values. |
-| **[sum_cate](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-sum-cate)**()|
Compute sum of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[sum_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-sum-cate-where)**()|
Compute sum of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[sum_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-sum-where)**()|
Compute sum of values match specified condition. |
-| **[tan](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-tan)**()|
Return the tangent of expr. |
-| **[timestamp](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-timestamp)**()|
Cast int64, date or string expression to timestamp. |
-| **[top](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top)**()|
Compute top k of values and output string separated by comma. The outputs are sorted in desc order. |
-| **[top1_ratio](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top1-ratio)**()|
Compute the top1 occurring value's ratio. |
-| **[top_n_key_avg_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-key-avg-cate-where)**()|
Compute average of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_key_count_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-key-count-cate-where)**()|
Compute count of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_key_max_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-key-max-cate-where)**()|
Compute maximum of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_key_min_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-key-min-cate-where)**()|
Compute minimum of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_key_ratio_cate](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-key-ratio-cate)**()|
Ratios (cond match cnt / total cnt) for groups. |
-| **[top_n_key_sum_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-key-sum-cate-where)**()|
Compute sum of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_value_avg_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-value-avg-cate-where)**()|
Compute average of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_value_count_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-value-count-cate-where)**()|
Compute count of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_value_max_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-value-max-cate-where)**()|
Compute maximum of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_value_min_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-value-min-cate-where)**()|
Compute minimum of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_value_ratio_cate](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-value-ratio-cate)**()|
Ratios (cond match cnt / total cnt) for groups. |
-| **[top_n_value_sum_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-value-sum-cate-where)**()|
Compute sum of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[topn_frequency](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-topn-frequency)**()|
Return the topN keys sorted by their frequency. |
-| **[truncate](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-truncate)**()|
Return the nearest integer that is not greater in magnitude than the expr. |
-| **[ucase](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-ucase)**()|
Convert all the characters to uppercase. Note that characters values > 127 are simply returned. |
-| **[unhex](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-unhex)**()|
Convert hexadecimal to binary string. |
-| **[unix_timestamp](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-unix-timestamp)**()|
Cast date or string expression to unix_timestamp. If empty string or NULL is provided, return current timestamp. |
-| **[upper](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-upper)**()| |
-| **[var_pop](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-var-pop)**()|
Compute population variance of values, i.e., `sum((x_i - avg)^2) / n`|
-| **[var_samp](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-var-samp)**()|
Compute population variance of values, i.e., `sum((x_i - avg)^2) / (n-1)`|
-| **[variance](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-variance)**()| |
-| **[week](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-week)**()| |
-| **[weekofyear](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-weekofyear)**()|
Return the week of year for a timestamp or date. |
-| **[window_split](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-window-split)**()|
For each string value from specified column of window, split by delimeter and add segment to output list. Null values are skipped. |
-| **[window_split_by_key](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-window-split-by-key)**()|
For each string value from specified column of window, split by delimeter and then split each segment as kv pair, then add each key to output list. Null and illegal segments are skipped. |
-| **[window_split_by_value](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-window-split-by-value)**()|
For each string value from specified column of window, split by delimeter and then split each segment as kv pair, then add each value to output list. Null and illegal segments are skipped. |
-| **[year](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-year)**()|
Return the year part of a timestamp or date. |
+| **[abs](/openmldb_sql/Files/udfs_8h.md#function-abs)**()|
Return the absolute value of expr. |
+| **[acos](/openmldb_sql/Files/udfs_8h.md#function-acos)**()|
Return the arc cosine of expr. |
+| **[add](/openmldb_sql/Files/udfs_8h.md#function-add)**()|
Compute sum of two arguments. |
+| **[add_months](/openmldb_sql/Files/udfs_8h.md#function-add-months)**()|
adds an integer months to a given date, returning the resulting date. |
+| **[array_contains](/openmldb_sql/Files/udfs_8h.md#function-array-contains)**()|
array_contains(array, value) - Returns true if the array contains the value. |
+| **[asin](/openmldb_sql/Files/udfs_8h.md#function-asin)**()|
Return the arc sine of expr. |
+| **[at](/openmldb_sql/Files/udfs_8h.md#function-at)**()| |
+| **[atan](/openmldb_sql/Files/udfs_8h.md#function-atan)**()|
Return the arc tangent of expr If called with one parameter, this function returns the arc tangent of expr. If called with two parameters X and Y, this function returns the arc tangent of Y / X. |
+| **[atan2](/openmldb_sql/Files/udfs_8h.md#function-atan2)**()|
Return the arc tangent of Y / X.. |
+| **[avg](/openmldb_sql/Files/udfs_8h.md#function-avg)**()|
Compute average of values. |
+| **[avg_cate](/openmldb_sql/Files/udfs_8h.md#function-avg-cate)**()|
Compute average of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[avg_cate_where](/openmldb_sql/Files/udfs_8h.md#function-avg-cate-where)**()|
Compute average of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V', separated by comma, and sorted by key in ascend order. |
+| **[avg_where](/openmldb_sql/Files/udfs_8h.md#function-avg-where)**()|
Compute average of values match specified condition. |
+| **[bigint](/openmldb_sql/Files/udfs_8h.md#function-bigint)**()| |
+| **[bool](/openmldb_sql/Files/udfs_8h.md#function-bool)**()|
Cast string expression to bool. |
+| **[ceil](/openmldb_sql/Files/udfs_8h.md#function-ceil)**()|
Return the smallest integer value not less than the expr. |
+| **[ceiling](/openmldb_sql/Files/udfs_8h.md#function-ceiling)**()| |
+| **[char](/openmldb_sql/Files/udfs_8h.md#function-char)**()|
Returns the ASCII character having the binary equivalent to expr. If n >= 256 the result is equivalent to char(n % 256). |
+| **[char_length](/openmldb_sql/Files/udfs_8h.md#function-char-length)**()|
Returns the length of the string. It is measured in characters and multibyte character string is not supported. |
+| **[character_length](/openmldb_sql/Files/udfs_8h.md#function-character-length)**()| |
+| **[concat](/openmldb_sql/Files/udfs_8h.md#function-concat)**()|
This function returns a string resulting from the joining of two or more string values in an end-to-end manner. (To add a separating value during joining, see concat_ws.) |
+| **[concat_ws](/openmldb_sql/Files/udfs_8h.md#function-concat-ws)**()|
Returns a string resulting from the joining of two or more string value in an end-to-end manner. It separates those concatenated string values with the delimiter specified in the first function argument. |
+| **[cos](/openmldb_sql/Files/udfs_8h.md#function-cos)**()|
Return the cosine of expr. |
+| **[cot](/openmldb_sql/Files/udfs_8h.md#function-cot)**()|
Return the cotangent of expr. |
+| **[count](/openmldb_sql/Files/udfs_8h.md#function-count)**()|
Compute number of values. |
+| **[count_cate](/openmldb_sql/Files/udfs_8h.md#function-count-cate)**()|
Compute count of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[count_cate_where](/openmldb_sql/Files/udfs_8h.md#function-count-cate-where)**()|
Compute count of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[count_where](/openmldb_sql/Files/udfs_8h.md#function-count-where)**()|
Compute number of values match specified condition. |
+| **[date](/openmldb_sql/Files/udfs_8h.md#function-date)**()|
Cast timestamp or string expression to date (date >= 1900-01-01) |
+| **[date_format](/openmldb_sql/Files/udfs_8h.md#function-date-format)**()|
Formats the date value according to the format string. |
+| **[datediff](/openmldb_sql/Files/udfs_8h.md#function-datediff)**()|
days difference from date1 to date2 |
+| **[day](/openmldb_sql/Files/udfs_8h.md#function-day)**()| |
+| **[dayofmonth](/openmldb_sql/Files/udfs_8h.md#function-dayofmonth)**()|
Return the day of the month for a timestamp or date. |
+| **[dayofweek](/openmldb_sql/Files/udfs_8h.md#function-dayofweek)**()|
Return the day of week for a timestamp or date. |
+| **[dayofyear](/openmldb_sql/Files/udfs_8h.md#function-dayofyear)**()|
Return the day of year for a timestamp or date. Returns 0 given an invalid date. |
+| **[degrees](/openmldb_sql/Files/udfs_8h.md#function-degrees)**()|
Convert radians to degrees. |
+| **[distinct_count](/openmldb_sql/Files/udfs_8h.md#function-distinct-count)**()|
Compute number of distinct values. |
+| **[double](/openmldb_sql/Files/udfs_8h.md#function-double)**()|
Cast string expression to double. |
+| **[drawdown](/openmldb_sql/Files/udfs_8h.md#function-drawdown)**()|
Compute drawdown of values. |
+| **[earth_distance](/openmldb_sql/Files/udfs_8h.md#function-earth-distance)**()|
Returns the great circle distance between two points on the surface of the Earth. Km as return unit. add a minus (-) sign if heading west (W) or south (S). |
+| **[entropy](/openmldb_sql/Files/udfs_8h.md#function-entropy)**()|
Calculate Shannon entropy of a column of values. Null values are skipped. |
+| **[ew_avg](/openmldb_sql/Files/udfs_8h.md#function-ew-avg)**()|
Compute exponentially-weighted average of values. It's equivalent to pandas ewm(alpha={alpha}, adjust=True, ignore_na=True, com=None, span=None, halflife=None, min_periods=0) |
+| **[exp](/openmldb_sql/Files/udfs_8h.md#function-exp)**()|
Return the value of e (the base of natural logarithms) raised to the power of expr. |
+| **[farm_fingerprint](/openmldb_sql/Files/udfs_8h.md#function-farm-fingerprint)**()| |
+| **[first_value](/openmldb_sql/Files/udfs_8h.md#function-first-value)**()|
Returns the value of expr from the latest row (last row) of the window frame. |
+| **[float](/openmldb_sql/Files/udfs_8h.md#function-float)**()|
Cast string expression to float. |
+| **[floor](/openmldb_sql/Files/udfs_8h.md#function-floor)**()|
Return the largest integer value not less than the expr. |
+| **[get_json_object](/openmldb_sql/Files/udfs_8h.md#function-get-json-object)**()|
Extracts a JSON object from [JSON Pointer](https://datatracker.ietf.org/doc/html/rfc6901)|
+| **[hash64](/openmldb_sql/Files/udfs_8h.md#function-hash64)**()|
Returns a hash value of the arguments. It is not a cryptographic hash function and should not be used as such. |
+| **[hex](/openmldb_sql/Files/udfs_8h.md#function-hex)**()|
Convert integer to hexadecimal. |
+| **[hour](/openmldb_sql/Files/udfs_8h.md#function-hour)**()|
Return the hour for a timestamp. |
+| **[identity](/openmldb_sql/Files/udfs_8h.md#function-identity)**()|
Return value. |
+| **[if_null](/openmldb_sql/Files/udfs_8h.md#function-if-null)**()|
If input is not null, return input value; else return default value. |
+| **[ifnull](/openmldb_sql/Files/udfs_8h.md#function-ifnull)**()| |
+| **[ilike_match](/openmldb_sql/Files/udfs_8h.md#function-ilike-match)**()|
pattern match same as ILIKE predicate |
+| **[inc](/openmldb_sql/Files/udfs_8h.md#function-inc)**()|
Return expression + 1. |
+| **[int](/openmldb_sql/Files/udfs_8h.md#function-int)**()| |
+| **[int16](/openmldb_sql/Files/udfs_8h.md#function-int16)**()|
Cast string expression to int16. |
+| **[int32](/openmldb_sql/Files/udfs_8h.md#function-int32)**()|
Cast string expression to int32. |
+| **[int64](/openmldb_sql/Files/udfs_8h.md#function-int64)**()|
Cast string expression to int64. |
+| **[is_null](/openmldb_sql/Files/udfs_8h.md#function-is-null)**()|
Check if input value is null, return bool. |
+| **[isnull](/openmldb_sql/Files/udfs_8h.md#function-isnull)**()| |
+| **[join](/openmldb_sql/Files/udfs_8h.md#function-join)**()|
For each string value from specified column of window, join by delimeter. Null values are skipped. |
+| **[json_array_length](/openmldb_sql/Files/udfs_8h.md#function-json-array-length)**()|
Returns the number of elements in the outermost JSON array. |
+| **[lag](/openmldb_sql/Files/udfs_8h.md#function-lag)**()|
Returns value evaluated at the row that is offset rows before the current row within the partition. Offset is evaluated with respect to the current row. |
+| **[last_day](/openmldb_sql/Files/udfs_8h.md#function-last-day)**()|
Return the last day of the month to which the date belongs to. |
+| **[lcase](/openmldb_sql/Files/udfs_8h.md#function-lcase)**()|
Convert all the characters to lowercase. Note that characters with values > 127 are simply returned. |
+| **[like_match](/openmldb_sql/Files/udfs_8h.md#function-like-match)**()|
pattern match same as LIKE predicate |
+| **[list_except_by_key](/openmldb_sql/Files/udfs_8h.md#function-list-except-by-key)**()|
Return list of elements in list1 but keys not in except_str. |
+| **[list_except_by_value](/openmldb_sql/Files/udfs_8h.md#function-list-except-by-value)**()|
Return list of elements in list1 but values not in except_str. |
+| **[ln](/openmldb_sql/Files/udfs_8h.md#function-ln)**()|
Return the natural logarithm of expr. |
+| **[log](/openmldb_sql/Files/udfs_8h.md#function-log)**()|
log(base, expr) If called with one parameter, this function returns the natural logarithm of expr. If called with two parameters, this function returns the logarithm of expr to the base. |
+| **[log10](/openmldb_sql/Files/udfs_8h.md#function-log10)**()|
Return the base-10 logarithm of expr. |
+| **[log2](/openmldb_sql/Files/udfs_8h.md#function-log2)**()|
Return the base-2 logarithm of expr. |
+| **[lower](/openmldb_sql/Files/udfs_8h.md#function-lower)**()| |
+| **[make_tuple](/openmldb_sql/Files/udfs_8h.md#function-make-tuple)**()| |
+| **[max](/openmldb_sql/Files/udfs_8h.md#function-max)**()|
Compute maximum of values. |
+| **[max_cate](/openmldb_sql/Files/udfs_8h.md#function-max-cate)**()|
Compute maximum of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[max_cate_where](/openmldb_sql/Files/udfs_8h.md#function-max-cate-where)**()|
Compute maximum of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[max_where](/openmldb_sql/Files/udfs_8h.md#function-max-where)**()|
Compute maximum of values match specified condition. |
+| **[maximum](/openmldb_sql/Files/udfs_8h.md#function-maximum)**()|
Compute maximum of two arguments. |
+| **[median](/openmldb_sql/Files/udfs_8h.md#function-median)**()|
Compute the median of values. |
+| **[min](/openmldb_sql/Files/udfs_8h.md#function-min)**()|
Compute minimum of values. |
+| **[min_cate](/openmldb_sql/Files/udfs_8h.md#function-min-cate)**()|
Compute minimum of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[min_cate_where](/openmldb_sql/Files/udfs_8h.md#function-min-cate-where)**()|
Compute minimum of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[min_where](/openmldb_sql/Files/udfs_8h.md#function-min-where)**()|
Compute minimum of values match specified condition. |
+| **[minimum](/openmldb_sql/Files/udfs_8h.md#function-minimum)**()|
Compute minimum of two arguments. |
+| **[minute](/openmldb_sql/Files/udfs_8h.md#function-minute)**()|
Return the minute for a timestamp. |
+| **[month](/openmldb_sql/Files/udfs_8h.md#function-month)**()|
Return the month part of a timestamp or date. |
+| **[nth_value_where](/openmldb_sql/Files/udfs_8h.md#function-nth-value-where)**()|
Returns the value of expr from the idx th row matches the condition. |
+| **[nvl](/openmldb_sql/Files/udfs_8h.md#function-nvl)**()| |
+| **[nvl2](/openmldb_sql/Files/udfs_8h.md#function-nvl2)**()|
nvl2(expr1, expr2, expr3) - Returns expr2 if expr1 is not null, or expr3 otherwise. |
+| **[pmod](/openmldb_sql/Files/udfs_8h.md#function-pmod)**()|
Compute pmod of two arguments. If any param is NULL, output NULL. If divisor is 0, output NULL. |
+| **[pow](/openmldb_sql/Files/udfs_8h.md#function-pow)**()|
Return the value of expr1 to the power of expr2. |
+| **[power](/openmldb_sql/Files/udfs_8h.md#function-power)**()| |
+| **[radians](/openmldb_sql/Files/udfs_8h.md#function-radians)**()|
Returns the argument X, converted from degrees to radians. (Note that π radians equals 180 degrees.) |
+| **[regexp_like](/openmldb_sql/Files/udfs_8h.md#function-regexp-like)**()|
pattern match same as RLIKE predicate (based on RE2) |
+| **[replace](/openmldb_sql/Files/udfs_8h.md#function-replace)**()|
replace(str, search[, replace]) - Replaces all occurrences of `search` with `replace`|
+| **[reverse](/openmldb_sql/Files/udfs_8h.md#function-reverse)**()|
Returns the reversed given string. |
+| **[round](/openmldb_sql/Files/udfs_8h.md#function-round)**()|
Returns expr rounded to d decimal places using HALF_UP rounding mode. |
+| **[second](/openmldb_sql/Files/udfs_8h.md#function-second)**()|
Return the second for a timestamp. |
+| **[sin](/openmldb_sql/Files/udfs_8h.md#function-sin)**()|
Return the sine of expr. |
+| **[size](/openmldb_sql/Files/udfs_8h.md#function-size)**()|
Get the size of a List (e.g., result of split) |
+| **[smallint](/openmldb_sql/Files/udfs_8h.md#function-smallint)**()| |
+| **[split](/openmldb_sql/Files/udfs_8h.md#function-split)**()|
Split string to list by delimeter. Null values are skipped. |
+| **[split_array](/openmldb_sql/Files/udfs_8h.md#function-split-array)**()|
Split string to array of string by delimeter. |
+| **[split_by_key](/openmldb_sql/Files/udfs_8h.md#function-split-by-key)**()|
Split string by delimeter and split each segment as kv pair, then add each key to output list. Null or illegal segments are skipped. |
+| **[split_by_value](/openmldb_sql/Files/udfs_8h.md#function-split-by-value)**()|
Split string by delimeter and split each segment as kv pair, then add each value to output list. Null or illegal segments are skipped. |
+| **[sqrt](/openmldb_sql/Files/udfs_8h.md#function-sqrt)**()|
Return square root of expr. |
+| **[std](/openmldb_sql/Files/udfs_8h.md#function-std)**()| |
+| **[stddev](/openmldb_sql/Files/udfs_8h.md#function-stddev)**()|
Compute sample standard deviation of values, i.e., `sqrt( sum((x_i - avg)^2) / (n-1) )`|
+| **[stddev_pop](/openmldb_sql/Files/udfs_8h.md#function-stddev-pop)**()|
Compute population standard deviation of values, i.e., `sqrt( sum((x_i - avg)^2) / n )`|
+| **[stddev_samp](/openmldb_sql/Files/udfs_8h.md#function-stddev-samp)**()| |
+| **[strcmp](/openmldb_sql/Files/udfs_8h.md#function-strcmp)**()|
Returns 0 if the strings are the same, -1 if the first argument is smaller than the second according to the current sort order, and 1 otherwise. |
+| **[string](/openmldb_sql/Files/udfs_8h.md#function-string)**()|
Return string converted from timestamp expression. |
+| **[substr](/openmldb_sql/Files/udfs_8h.md#function-substr)**()| |
+| **[substring](/openmldb_sql/Files/udfs_8h.md#function-substring)**()|
Return a substring `len` characters long from string str, starting at position `pos`. Alias function: `substr`|
+| **[sum](/openmldb_sql/Files/udfs_8h.md#function-sum)**()|
Compute sum of values. |
+| **[sum_cate](/openmldb_sql/Files/udfs_8h.md#function-sum-cate)**()|
Compute sum of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[sum_cate_where](/openmldb_sql/Files/udfs_8h.md#function-sum-cate-where)**()|
Compute sum of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[sum_where](/openmldb_sql/Files/udfs_8h.md#function-sum-where)**()|
Compute sum of values match specified condition. |
+| **[tan](/openmldb_sql/Files/udfs_8h.md#function-tan)**()|
Return the tangent of expr. |
+| **[timestamp](/openmldb_sql/Files/udfs_8h.md#function-timestamp)**()|
Cast int64, date or string expression to timestamp. |
+| **[top](/openmldb_sql/Files/udfs_8h.md#function-top)**()|
Compute top k of values and output string separated by comma. The outputs are sorted in desc order. |
+| **[top1_ratio](/openmldb_sql/Files/udfs_8h.md#function-top1-ratio)**()|
Compute the top1 occurring value's ratio. |
+| **[top_n_key_avg_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-key-avg-cate-where)**()|
Compute average of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_key_count_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-key-count-cate-where)**()|
Compute count of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_key_max_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-key-max-cate-where)**()|
Compute maximum of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_key_min_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-key-min-cate-where)**()|
Compute minimum of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_key_ratio_cate](/openmldb_sql/Files/udfs_8h.md#function-top-n-key-ratio-cate)**()|
Ratios (cond match cnt / total cnt) for groups. |
+| **[top_n_key_sum_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-key-sum-cate-where)**()|
Compute sum of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_value_avg_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-value-avg-cate-where)**()|
Compute average of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_value_count_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-value-count-cate-where)**()|
Compute count of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_value_max_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-value-max-cate-where)**()|
Compute maximum of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_value_min_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-value-min-cate-where)**()|
Compute minimum of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_value_ratio_cate](/openmldb_sql/Files/udfs_8h.md#function-top-n-value-ratio-cate)**()|
Ratios (cond match cnt / total cnt) for groups. |
+| **[top_n_value_sum_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-value-sum-cate-where)**()|
Compute sum of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[topn_frequency](/openmldb_sql/Files/udfs_8h.md#function-topn-frequency)**()|
Return the topN keys sorted by their frequency. |
+| **[truncate](/openmldb_sql/Files/udfs_8h.md#function-truncate)**()|
Return the nearest integer that is not greater in magnitude than the expr. |
+| **[ucase](/openmldb_sql/Files/udfs_8h.md#function-ucase)**()|
Convert all the characters to uppercase. Note that characters values > 127 are simply returned. |
+| **[unhex](/openmldb_sql/Files/udfs_8h.md#function-unhex)**()|
Convert hexadecimal to binary string. |
+| **[unix_timestamp](/openmldb_sql/Files/udfs_8h.md#function-unix-timestamp)**()|
Cast date or string expression to unix_timestamp. If empty string or NULL is provided, return current timestamp. |
+| **[upper](/openmldb_sql/Files/udfs_8h.md#function-upper)**()| |
+| **[var_pop](/openmldb_sql/Files/udfs_8h.md#function-var-pop)**()|
Compute population variance of values, i.e., `sum((x_i - avg)^2) / n`|
+| **[var_samp](/openmldb_sql/Files/udfs_8h.md#function-var-samp)**()|
Compute population variance of values, i.e., `sum((x_i - avg)^2) / (n-1)`|
+| **[variance](/openmldb_sql/Files/udfs_8h.md#function-variance)**()| |
+| **[week](/openmldb_sql/Files/udfs_8h.md#function-week)**()| |
+| **[weekofyear](/openmldb_sql/Files/udfs_8h.md#function-weekofyear)**()|
Return the week of year for a timestamp or date. |
+| **[window_split](/openmldb_sql/Files/udfs_8h.md#function-window-split)**()|
For each string value from specified column of window, split by delimeter and add segment to output list. Null values are skipped. |
+| **[window_split_by_key](/openmldb_sql/Files/udfs_8h.md#function-window-split-by-key)**()|
For each string value from specified column of window, split by delimeter and then split each segment as kv pair, then add each key to output list. Null and illegal segments are skipped. |
+| **[window_split_by_value](/openmldb_sql/Files/udfs_8h.md#function-window-split-by-value)**()|
For each string value from specified column of window, split by delimeter and then split each segment as kv pair, then add each value to output list. Null and illegal segments are skipped. |
+| **[year](/openmldb_sql/Files/udfs_8h.md#function-year)**()|
Return the year part of a timestamp or date. |
## Functions Documentation
@@ -501,13 +501,13 @@ Compute average of values.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -541,13 +541,13 @@ Compute average of values grouped by category key and output string. Each group
Example:
-| value | catagory |
+| value | catagory |
| -------- | -------- |
-| 0 | x |
-| 1 | y |
-| 2 | x |
-| 3 | y |
-| 4 | x |
+| 0 | x |
+| 1 | y |
+| 2 | x |
+| 3 | y |
+| 4 | x |
```sql
@@ -586,13 +586,13 @@ Compute average of values matching specified condition grouped by category key a
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | true | x |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | true | x |
```sql
@@ -634,13 +634,13 @@ Compute average of values match specified condition.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -884,7 +884,7 @@ SELECT COS(0);
-* The value returned by [cos()](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-cos) is always in the range: -1 to 1.
+* The value returned by [cos()](/openmldb_sql/Files/udfs_8h.md#function-cos) is always in the range: -1 to 1.
**Supported Types**:
@@ -946,13 +946,13 @@ Compute number of values.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -987,13 +987,13 @@ Compute count of values grouped by category key and output string. Each group is
Example:
-| value | catagory |
+| value | catagory |
| -------- | -------- |
-| 0 | x |
-| 1 | y |
-| 2 | x |
-| 3 | y |
-| 4 | x |
+| 0 | x |
+| 1 | y |
+| 2 | x |
+| 3 | y |
+| 4 | x |
```sql
@@ -1032,13 +1032,13 @@ Compute count of values matching specified condition grouped by category key and
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | true | x |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | true | x |
```sql
@@ -1080,13 +1080,13 @@ Compute number of values match specified condition.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -1230,7 +1230,7 @@ Return the day of the month for a timestamp or date.
0.1.0
-Note: This function equals the `[day()](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-day)` function.
+Note: This function equals the `[day()](/openmldb_sql/Files/udfs_8h.md#function-day)` function.
Example:
@@ -1264,7 +1264,7 @@ Return the day of week for a timestamp or date.
0.4.0
-Note: This function equals the `[week()](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-week)` function.
+Note: This function equals the `[week()](/openmldb_sql/Files/udfs_8h.md#function-week)` function.
Example:
@@ -1374,13 +1374,13 @@ Compute number of distinct values.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 0 |
-| 2 |
-| 2 |
-| 4 |
+| 0 |
+| 0 |
+| 2 |
+| 2 |
+| 4 |
```sql
@@ -1450,14 +1450,14 @@ It requires that all values are non-negative. Negative values will be ignored.
Example:
-| value |
+| value |
| -------- |
-| 1 |
-| 8 |
-| 5 |
-| 2 |
-| 10 |
-| 4 |
+| 1 |
+| 8 |
+| 5 |
+| 2 |
+| 10 |
+| 4 |
```sql
@@ -1568,13 +1568,13 @@ It requires that values are ordered so that it can only be used with WINDOW (PAR
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -1652,11 +1652,11 @@ window w as (partition by gp order by ts rows between 3 preceding and current ro
```
-| id | gp | ts | agg |
+| id | gp | ts | agg |
| -------- | -------- | -------- | -------- |
-| 1 | 100 | 98 | 98 |
-| 2 | 100 | 99 | 99 |
-| 3 | 100 | 100 | 100 |
+| 1 | 100 | 98 | 98 |
+| 2 | 100 | 99 | 99 |
+| 3 | 100 | 100 | 100 |
@@ -2251,21 +2251,21 @@ Returns value evaluated at the row that is offset rows before the current row wi
* **offset** The number of rows forwarded from the current row, must not negative
-Note: This function equals the `[at()](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-at)` function.
+Note: This function equals the `[at()](/openmldb_sql/Files/udfs_8h.md#function-at)` function.
-The offset in window is `nth_value()`, not `[lag()](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-lag)/at()`. The old `[at()](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-at)`(version < 0.5.0) is start from the last row of window(may not be the current row), it's more like `nth_value()`
+The offset in window is `nth_value()`, not `[lag()](/openmldb_sql/Files/udfs_8h.md#function-lag)/at()`. The old `[at()](/openmldb_sql/Files/udfs_8h.md#function-at)`(version < 0.5.0) is start from the last row of window(may not be the current row), it's more like `nth_value()`
Example:
-| c1 | c2 |
+| c1 | c2 |
| -------- | -------- |
-| 0 | 1 |
-| 1 | 1 |
-| 2 | 2 |
-| 3 | 2 |
-| 4 | 2 |
+| 0 | 1 |
+| 1 | 1 |
+| 2 | 2 |
+| 3 | 2 |
+| 4 | 2 |
```sql
@@ -2653,13 +2653,13 @@ Compute maximum of values.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -2696,13 +2696,13 @@ Compute maximum of values grouped by category key and output string. Each group
Example:
-| value | catagory |
+| value | catagory |
| -------- | -------- |
-| 0 | x |
-| 1 | y |
-| 2 | x |
-| 3 | y |
-| 4 | x |
+| 0 | x |
+| 1 | y |
+| 2 | x |
+| 3 | y |
+| 4 | x |
```sql
@@ -2741,13 +2741,13 @@ Compute maximum of values matching specified condition grouped by category key a
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | true | x |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | true | x |
```sql
@@ -2789,13 +2789,13 @@ Compute maximum of values match specified condition.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -2861,12 +2861,12 @@ Compute the median of values.
Example:
-| value |
+| value |
| -------- |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -2903,13 +2903,13 @@ Compute minimum of values.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -2946,13 +2946,13 @@ Compute minimum of values grouped by category key and output string. Each group
Example:
-| value | catagory |
+| value | catagory |
| -------- | -------- |
-| 0 | x |
-| 1 | y |
-| 2 | x |
-| 3 | y |
-| 4 | x |
+| 0 | x |
+| 1 | y |
+| 2 | x |
+| 3 | y |
+| 4 | x |
```sql
@@ -2991,14 +2991,14 @@ Compute minimum of values matching specified condition grouped by category key a
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 1 | true | y |
-| 4 | true | x |
-| 3 | true | y |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 1 | true | y |
+| 4 | true | x |
+| 3 | true | y |
```sql
@@ -3040,13 +3040,13 @@ Compute minimum of values match specified condition.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -3176,12 +3176,12 @@ select col1, cond, gp, nth_value_where(col1, 2, cond) over (partition by gp orde
```
-| col1 | cond | gp | agg |
+| col1 | cond | gp | agg |
| -------- | -------- | -------- | -------- |
-| 1 | true | 100 | NULL |
-| 2 | false | 100 | NULL |
-| 3 | NULL | 100 | NULL |
-| 4 | true | 100 | 4 |
+| 1 | true | 100 | NULL |
+| 2 | false | 100 | NULL |
+| 3 | NULL | 100 | NULL |
+| 4 | true | 100 | 4 |
@@ -3568,7 +3568,7 @@ SELECT SIN(0);
-* The value returned by [sin()](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-sin) is always in the range: -1 to 1.
+* The value returned by [sin()](/openmldb_sql/Files/udfs_8h.md#function-sin) is always in the range: -1 to 1.
**Supported Types**:
@@ -3810,12 +3810,12 @@ Alias function: `std`, `stddev_samp`
Example:
-| value |
+| value |
| -------- |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -3852,12 +3852,12 @@ Compute population standard deviation of values, i.e., `sqrt( sum((x_i - avg)^2)
Example:
-| value |
+| value |
| -------- |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -4013,13 +4013,13 @@ Compute sum of values.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -4053,13 +4053,13 @@ Compute sum of values grouped by category key and output string. Each group is r
Example:
-| value | catagory |
+| value | catagory |
| -------- | -------- |
-| 0 | x |
-| 1 | y |
-| 2 | x |
-| 3 | y |
-| 4 | x |
+| 0 | x |
+| 1 | y |
+| 2 | x |
+| 3 | y |
+| 4 | x |
```sql
@@ -4098,13 +4098,13 @@ Compute sum of values matching specified condition grouped by category key and o
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | true | x |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | true | x |
```sql
@@ -4146,13 +4146,13 @@ Compute sum of values match specified condition.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -4262,13 +4262,13 @@ Compute top k of values and output string separated by comma. The outputs are so
Example:
-| value |
+| value |
| -------- |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
-| 4 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
+| 4 |
```sql
@@ -4319,11 +4319,11 @@ SELECT key, top1_ratio(key) over () as ratio FROM t1;
```
-| key | ratio |
+| key | ratio |
| -------- | -------- |
-| 1 | 1.0 |
-| 2 | 0.5 |
-| NULL | 0.5 |
+| 1 | 1.0 |
+| 2 | 0.5 |
+| NULL | 0.5 |
@@ -4360,15 +4360,15 @@ Compute average of values matching specified condition grouped by category key.
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | true | x |
-| 5 | true | z |
-| 6 | false | z |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | true | x |
+| 5 | true | z |
+| 6 | false | z |
```sql
@@ -4420,15 +4420,15 @@ Compute count of values matching specified condition grouped by category key. Ou
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | true | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | false | x |
-| 5 | true | z |
-| 6 | true | z |
+| 0 | true | x |
+| 1 | true | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | false | x |
+| 5 | true | z |
+| 6 | true | z |
```sql
@@ -4480,15 +4480,15 @@ Compute maximum of values matching specified condition grouped by category key.
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | true | x |
-| 5 | true | z |
-| 6 | false | z |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | true | x |
+| 5 | true | z |
+| 6 | false | z |
```sql
@@ -4540,15 +4540,15 @@ Compute minimum of values matching specified condition grouped by category key.
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | true | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | false | x |
-| 5 | true | z |
-| 6 | true | z |
+| 0 | true | x |
+| 1 | true | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | false | x |
+| 5 | true | z |
+| 6 | true | z |
```sql
@@ -4602,15 +4602,15 @@ For each group, ratio value is `value` expr count matches condtion divide total
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 2 | true | x |
-| 4 | true | x |
-| 1 | true | y |
-| 3 | false | y |
-| 5 | true | z |
-| 6 | true | z |
+| 0 | true | x |
+| 2 | true | x |
+| 4 | true | x |
+| 1 | true | y |
+| 3 | false | y |
+| 5 | true | z |
+| 6 | true | z |
```sql
@@ -4661,15 +4661,15 @@ Compute sum of values matching specified condition grouped by category key. Outp
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | true | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | false | x |
-| 5 | true | z |
-| 6 | true | z |
+| 0 | true | x |
+| 1 | true | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | false | x |
+| 5 | true | z |
+| 6 | true | z |
```sql
@@ -4721,15 +4721,15 @@ Compute average of values matching specified condition grouped by category key.
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 3 | false | y |
-| 4 | true | x |
-| 5 | true | z |
-| 6 | false | z |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 3 | false | y |
+| 4 | true | x |
+| 5 | true | z |
+| 6 | false | z |
```sql
@@ -4781,15 +4781,15 @@ Compute count of values matching specified condition grouped by category key. Ou
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | true | y |
-| 2 | true | x |
-| 3 | false | y |
-| 4 | true | x |
-| 5 | true | z |
-| 6 | true | z |
+| 0 | true | x |
+| 1 | true | y |
+| 2 | true | x |
+| 3 | false | y |
+| 4 | true | x |
+| 5 | true | z |
+| 6 | true | z |
```sql
@@ -4841,15 +4841,15 @@ Compute maximum of values matching specified condition grouped by category key.
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | true | x |
-| 5 | true | z |
-| 6 | false | z |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | true | x |
+| 5 | true | z |
+| 6 | false | z |
```sql
@@ -4901,15 +4901,15 @@ Compute minimum of values matching specified condition grouped by category key.
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | true | y |
-| 2 | true | x |
-| 3 | true | y |
-| 4 | false | x |
-| 5 | true | z |
-| 6 | true | z |
+| 0 | true | x |
+| 1 | true | y |
+| 2 | true | x |
+| 3 | true | y |
+| 4 | false | x |
+| 5 | true | z |
+| 6 | true | z |
```sql
@@ -4963,15 +4963,15 @@ For each group, ratio value is `value` expr count matches condtion divide total
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 2 | true | x |
-| 4 | true | x |
-| 1 | true | y |
-| 3 | false | y |
-| 5 | true | z |
-| 6 | true | z |
+| 0 | true | x |
+| 2 | true | x |
+| 4 | true | x |
+| 1 | true | y |
+| 3 | false | y |
+| 5 | true | z |
+| 6 | true | z |
```sql
@@ -5022,15 +5022,15 @@ Compute sum of values matching specified condition grouped by category key. Outp
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | true | y |
-| 2 | false | x |
-| 3 | false | y |
-| 4 | true | x |
-| 5 | true | z |
-| 6 | true | z |
+| 0 | true | x |
+| 1 | true | y |
+| 2 | false | x |
+| 3 | false | y |
+| 4 | true | x |
+| 5 | true | z |
+| 6 | true | z |
```sql
@@ -5245,11 +5245,11 @@ Compute population variance of values, i.e., `sum((x_i - avg)^2) / n`
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 3 |
-| 6 |
+| 0 |
+| 3 |
+| 6 |
```sql
@@ -5286,11 +5286,11 @@ Compute population variance of values, i.e., `sum((x_i - avg)^2) / (n-1)`
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 3 |
-| 6 |
+| 0 |
+| 3 |
+| 6 |
```sql
diff --git a/docs/zh/deploy/conf.md b/docs/zh/deploy/conf.md
index ef05f0c8dc9..de538720e5d 100644
--- a/docs/zh/deploy/conf.md
+++ b/docs/zh/deploy/conf.md
@@ -9,6 +9,8 @@
# 如果是部署单机版不需要配置zk_cluster和zk_root_path,把这俩配置注释即可. 部署集群版需要配置这两项,一个集群中所有节点的这两个配置必须保持一致
#--zk_cluster=127.0.0.1:7181
#--zk_root_path=/openmldb_cluster
+# 配置zk认证的用户名和密码, 用冒号分割
+#--zk_cert=user:passwd
# 单机版需要指定tablet的地址, 集群版此配置可忽略
--tablet=127.0.0.1:9921
# 配置log目录
@@ -76,6 +78,8 @@
# 如果启动集群版需要指定zk的地址和集群在zk的节点路径
#--zk_cluster=127.0.0.1:7181
#--zk_root_path=/openmldb_cluster
+# 配置zk认证的用户名和密码, 用冒号分割
+#--zk_cert=user:passwd
# 配置线程池大小,建议和cpu核数一致
--thread_pool_size=24
@@ -222,6 +226,8 @@
# 如果部署的openmldb是集群版,需要指定zk地址和集群zk节点目录
#--zk_cluster=127.0.0.1:7181
#--zk_root_path=/openmldb_cluster
+# 配置zk认证的用户名和密码, 用冒号分割
+#--zk_cert=user:passwd
# 配置日志路径
--openmldb_log_dir=./logs
@@ -254,6 +260,7 @@ zookeeper.connection_timeout=5000
zookeeper.max_retries=10
zookeeper.base_sleep_time=1000
zookeeper.max_connect_waitTime=30000
+#zookeeper.cert=user:passwd
# Spark Config
spark.home=
diff --git a/docs/zh/deploy/index.rst b/docs/zh/deploy/index.rst
index 29007be2d86..91a3116489e 100644
--- a/docs/zh/deploy/index.rst
+++ b/docs/zh/deploy/index.rst
@@ -8,6 +8,5 @@
install_deploy
conf
compile
- integrate_hadoop
offline_integrate_kubernetes
[Alpha]在线引擎基于 Kubernetes 部署
diff --git a/docs/zh/developer/built_in_function_develop_guide.md b/docs/zh/developer/built_in_function_develop_guide.md
index 12231384078..cbc186005cf 100644
--- a/docs/zh/developer/built_in_function_develop_guide.md
+++ b/docs/zh/developer/built_in_function_develop_guide.md
@@ -1034,10 +1034,9 @@ RegisterUdafTemplate("distinct_count")
## 6. 文档管理
-内置函数文档可在 [Built-in Functions](https://openmldb.ai/docs/zh/main/openmldb_sql/functions_and_operators/Files/udfs_8h.html) 查看,它是一个代码生成的 markdown 文件,注意请不要进行直接编辑。
+内置函数文档可在 [Built-in Functions](../openmldb_sql/udfs_8h.md) 查看,它是一个代码生成的 markdown 文件,注意请不要进行直接编辑。
-- 如果需要对新增加的函数添加文档,请参照 2.2.4 配置函数文档 章节,说明了内置函数的文档是在 CPP 源代码中管理的。后续会通过一系列步骤生成如上网页中更加可读的文档, 即`docs/*/openmldb_sql/functions_and_operators/`目录下的内容。
+- 如果需要对新增加的函数添加文档,请参照 2.2.4 配置函数文档 章节,说明了内置函数的文档是在 CPP 源代码中管理的。后续会通过一系列步骤生成如上网页中更加可读的文档, 即`docs/*/openmldb_sql/`目录下的内容。
- 如果需要修改一个已存在函数的文档,可以在文件 `hybridse/src/udf/default_udf_library.cc` 或者 `hybridse/src/udf/default_defs/*_def.cc` 下查找到对应函数的文档说明,进行修改。
OpenMLDB 项目中创建了一个定期天级别的 GitHub Workflow 任务来定期更新这里的相关文档。因此内置函数文档相关的改动只需按照上面的步骤修改对应源代码位置的内容即可,`docs` 目录和网站的内容会随之定期更新。具体的文档生成流程可以查看源代码路径下的 [udf_doxygen](https://github.com/4paradigm/OpenMLDB/tree/main/hybridse/tools/documentation/udf_doxygen)。
-
diff --git a/docs/zh/faq/client_faq.md b/docs/zh/faq/client_faq.md
new file mode 100644
index 00000000000..894cca02e57
--- /dev/null
+++ b/docs/zh/faq/client_faq.md
@@ -0,0 +1,88 @@
+# Client FAQ
+
+## fail to get tablet ... 的错误日志
+
+优先检查集群中tablet server是否意外下线,或者在线表是否不可读写。推荐通过[openmldb_tool](../maintain/diagnose.md)诊断,使用`status`(status --diff)和`inspect online`两个检查命令。
+TODO diag tool 测到offline或online表不正常,会输出警告和下一步应该怎么操作?
+如果只能手动检查,需要两步:
+- `show components`,检查server是否存在在列表中(TaskManager如果下线,将不在表中。Tablet如果下线,将在表中,但状态为offline),以及在列表中的server的状态是否为online。如果存在offline的server,**先将server重启加入集群**。
+- `show table status like '%'`(低版本如果不支持like,需要分别查询系统db和用户db),检查每个表的"Warnings"是否报错。
+
+一般会得到`real replica number X does not match the configured replicanum X`等错误,具体错误信息请参考[SHOW TABLE STATUS](../openmldb_sql/ddl/SHOW_TABLE_STATUS.md)。这些错误都说明表目前是有问题的,无法提供正常读写功能,通常是由于Tablet
+
+## 为什么收到 Reached timeout 的警告日志?
+```
+rpc_client.h:xxx] request error. [E1008] Reached timeout=xxxms
+```
+这是由于client端本身发送的rpc request的timeout设置小了,client端自己主动断开,注意这是rpc的超时。需要更改通用的`request_timeout`配置。
+1. CLI: 启动时配置`--request_timeout_ms`
+2. JAVA/Python SDK: Option或url中调整`SdkOption.requestTimeout`
+```{note}
+同步的离线命令通常不会出现这个错误,因为同步离线命令的timeout设置为了TaskManager可接受的最长时间。
+```
+
+## 为什么收到 Got EOF of Socket 的警告日志?
+```
+rpc_client.h:xxx] request error. [E1014]Got EOF of Socket{id=x fd=x addr=xxx} (xx)
+```
+这是因为`addr`端主动断开了连接,`addr`的地址大概率是TaskManager。这不代表TaskManager不正常,而是TaskManager端认为这个连接没有活动,超过keepAliveTime了,而主动断开通信channel。
+在0.5.0及以后的版本中,可以调大TaskManager的`server.channel_keep_alive_time`来提高对不活跃channel的容忍度。默认值为1800s(0.5h),特别是使用同步的离线命令时,这个值可能需要适当调大。
+在0.5.0以前的版本中,无法更改此配置,请升级TaskManager版本。
+
+## 离线查询结果显示中文为什么乱码?
+
+在使用离线查询时,可能出现包含中文的查询结果乱码,主要和系统默认编码格式与Spark任务编码格式参数有关。
+
+如果出现乱码情况,可以通过添加Spark高级参数`spark.driver.extraJavaOptions=-Dfile.encoding=utf-8`和`spark.executor.extraJavaOptions=-Dfile.encoding=utf-8`来解决。
+
+客户端配置方法可参考[客户端Spark配置文件](../reference/client_config/client_spark_config.md),也可以在TaskManager配置文件中添加此项配置。
+
+```
+spark.default.conf=spark.driver.extraJavaOptions=-Dfile.encoding=utf-8;spark.executor.extraJavaOptions=-Dfile.encoding=utf-8
+```
+
+## 如何配置TaskManager来访问开启Kerberos的Yarn集群?
+
+如果Yarn集群开启Kerberos认证,TaskManager可以通过添加以下配置来访问开启Kerberos认证的Yarn集群。注意请根据实际配置修改keytab路径以及principal账号。
+
+```
+spark.default.conf=spark.yarn.keytab=/tmp/test.keytab;spark.yarn.principal=test@EXAMPLE.COM
+```
+
+## 如何配置客户端的core日志?
+
+客户端core日志主要有两种,zk日志和sdk日志(glog日志),两者是独立的。
+
+zk日志:
+1. CLI:启动时配置`--zk_log_level`调整level,`--zk_log_file`配置日志保存文件。
+2. JAVA/Python SDK:Option或url中使用`zkLogLevel`调整level,`zkLogFile`配置日志保存文件。
+
+- `zk_log_level`(int, 默认=0, 即DISABLE_LOGGING):
+打印这个等级及**以下**等级的日志。0-禁止所有zk log, 1-error, 2-warn, 3-info, 4-debug。
+
+sdk日志(glog日志):
+1. CLI:启动时配置`--glog_level`调整level,`--glog_dir`配置日志保存文件。
+2. JAVA/Python SDK:Option或url中使用`glogLevel`调整level,`glogDir`配置日志保存文件。
+
+- `glog_level`(int, 默认=1, 即WARNING):
+打印这个等级及**以上**等级的日志。 INFO, WARNING, ERROR, and FATAL日志分别对应 0, 1, 2, and 3。
+
+
+## 插入错误,日志显示`please use getInsertRow with ... first`
+
+在JAVA client使用InsertPreparedStatement进行插入,或在Python中使用sql和parameter进行插入时,client底层实际有cache影响,第一步`getInsertRow`生成sql cache并返回sql还需要补充的parameter信息,第二步才会真正执行insert,而执行insert需要使用第一步缓存的sql cache。所以,当多线程使用同一个client时,可能因为插入和查询频繁更新cache表,将你想要执行的insert sql cache淘汰掉了,所以会出现好像第一步`getInsertRow`并未执行的样子。
+
+目前可以通过调大`maxSqlCacheSize`这一配置项来避免错误。仅JAVA/Python SDK支持配置。
+
+## 离线命令Spark报错
+
+`java.lang.OutOfMemoryError: Java heap space`
+
+离线命令的Spark配置默认为`local[*]`,并发较高可能出现OutOfMemoryError错误,请调整`spark.driver.memory`和`spark.executor.memory`两个spark配置项。可以写在TaskManager运行目录的`conf/taskmanager.properties`的`spark.default.conf`并重启TaskManager,或者使用CLI客户端进行配置,参考[客户端Spark配置文件](../reference/client_config/client_spark_config.md)。
+```
+spark.default.conf=spark.driver.memory=16g;spark.executor.memory=16g
+```
+
+Container killed by YARN for exceeding memory limits. 5 GB of 5 GB physical memory used. Consider boosting spark.yarn.executor.memoryOverhead.
+
+local时drivermemory
diff --git a/docs/zh/faq/index.rst b/docs/zh/faq/index.rst
new file mode 100644
index 00000000000..a5d1e94a540
--- /dev/null
+++ b/docs/zh/faq/index.rst
@@ -0,0 +1,10 @@
+=============================
+FAQ
+=============================
+
+
+.. toctree::
+ :maxdepth: 1
+
+ client_faq
+ server_faq
diff --git a/docs/zh/faq/server_faq.md b/docs/zh/faq/server_faq.md
new file mode 100644
index 00000000000..1b89fd383d6
--- /dev/null
+++ b/docs/zh/faq/server_faq.md
@@ -0,0 +1,61 @@
+# Server FAQ
+
+Server中有任何上下线变化或问题,都先openmldb_tool status + inspect online检查下集群是否正常。
+
+## 部署和启动 FAQ
+
+### 1. 如何确认集群已经正常运行?
+虽然有一键启动脚本,但由于配置繁多,可能出现“端口已被占用”,“目录无读写权限”等问题。这些问题都是server进程运行之后才能发现,退出后没有及时反馈。(如果配置了监控,可以通过监控直接检查。)
+所以,请先确认集群的所有server进程都正常运行。
+
+可以通过`ps axu | grep openmldb`或sql命令`show components;`来查询。(注意,如果你使用了守护进程,openmldb server进程可能是在启动停止的循环中,并不代表持续运行,可以通过日志或`show components;`连接时间来确认。)
+
+如果进程都活着,集群还是表现不正常,需要查询一下server日志。可以优先看WARN和ERROR级日志,很大概率上,它们就是根本原因。
+
+### 2. 如果数据没有自动恢复成功怎么办?
+
+通常情况,当我们重启服务,表中数据会自动进行恢复,但有些情况可能会造成恢复失败,通常失败的情况包括:
+
+- tablet异常退出
+- 多副本表多个副本所在的tablets同时重启或者重启太快,造成某些`auto_failover`操作还没完成tablet就重启
+- auto_failover设成`false`
+
+当服务启动成功后,可以通过`gettablestatus`获得所有表的状态:
+```
+python tools/openmldb_ops.py --openmldb_bin_path=./bin/openmldb --zk_cluster=172.24.4.40:30481 --zk_root_path=/openmldb --cmd=gettablestatus
+```
+
+如果表中有`Warnings`,可以通过`recoverdata`来自动恢复数据:
+```
+python tools/openmldb_ops.py --openmldb_bin_path=./bin/openmldb --zk_cluster=172.24.4.40:30481 --zk_root_path=/openmldb --cmd=recoverdata
+```
+
+## Server FAQ
+
+### 1. 为什么日志中有 Fail to write into Socket 的警告日志?
+```
+http_rpc_protocol.cpp:911] Fail to write into Socket{id=xx fd=xx addr=xxx} (0x7a7ca00): Unknown error 1014 [1014]
+```
+这是server端会打印的日志。一般是client端使用了连接池或短连接模式,在RPC超时后会关闭连接,server写回response时发现连接已经关了就报这个错。Got EOF就是指之前已经收到了EOF(对端正常关闭了连接)。client端使用单连接模式server端一般不会报这个。
+
+### 2. 表数据的ttl初始设置不合适,如何调整?
+这需要使用nsclient来修改,普通client无法做到。nsclient启动方式与命令,见[ns client](../maintain/cli.md#ns-client)。
+
+在nsclient中使用命令`setttl`可以更改一个表的ttl,类似
+```
+setttl table_name ttl_type ttl [ttl] [index_name]
+```
+可以看到,如果在命令末尾配置index的名字,可以做到只修改单个index的ttl。
+```{caution}
+`setttl`的改变不会及时生效,会受到tablet server的配置`gc_interval`的影响。(每台tablet server的配置是独立的,互不影响。)
+
+举例说明,有一个tablet server的`gc_interval`是1h,那么ttl的配置重载,会在下一次gc的最后时刻进行(最坏情况下,会在1h后重载)。重载ttl的这一次gc就不会按最新ttl来淘汰数据。再下一次gc时才会使用最新ttl进行数据淘汰。
+
+所以,**ttl更改后,需要等待两次gc interval的时间才会生效**。请耐心等待。
+
+当然,你可以调整tablet server的`gc_interval`,但这个配置无法动态更改,只能重启生效。所以,如果内存压力较大,可以尝试扩容,迁移数据分片,来减少内存压力。不推荐轻易调整`gc_interval`。
+```
+
+### 3. 出现警告日志:Last Join right table is empty,这是什么意思?
+通常来讲,这是一个正常现象,不代表集群异常。只是runner中join右表为空,是可能的现象,大概率是数据问题。
+
diff --git a/docs/zh/index.rst b/docs/zh/index.rst
index 1a3fd0deb56..f3b3f63106b 100644
--- a/docs/zh/index.rst
+++ b/docs/zh/index.rst
@@ -16,3 +16,4 @@ OpenMLDB 文档 (|version|)
maintain/index
reference/index
developer/index
+ faq/index
diff --git a/docs/zh/maintain/faq.md b/docs/zh/maintain/faq.md
deleted file mode 100644
index 454bfb500ad..00000000000
--- a/docs/zh/maintain/faq.md
+++ /dev/null
@@ -1,130 +0,0 @@
-# 运维 FAQ
-
-## 部署和启动 FAQ
-
-### 1. 如何确认集群已经正常运行?
-虽然有一键启动脚本,但由于配置繁多,可能出现“端口已被占用”,“目录无读写权限”等问题。这些问题都是server进程运行之后才能发现,退出后没有及时反馈。(如果配置了监控,可以通过监控直接检查。)
-所以,请先确认集群的所有server进程都正常运行。
-
-可以通过`ps axu | grep openmldb`或sql命令`show components;`来查询。(注意,如果你使用了守护进程,openmldb server进程可能是在启动停止的循环中,并不代表持续运行,可以通过日志或`show components;`连接时间来确认。)
-
-如果进程都活着,集群还是表现不正常,需要查询一下server日志。可以优先看WARN和ERROR级日志,很大概率上,它们就是根本原因。
-
-### 2. 如果数据没有自动恢复成功怎么办?
-
-通常情况,当我们重启服务,表中数据会自动进行恢复,但有些情况可能会造成恢复失败,通常失败的情况包括:
-
-- tablet异常退出
-- 多副本表多个副本所在的tablets同时重启或者重启太快,造成某些`auto_failover`操作还没完成tablet就重启
-- auto_failover设成`false`
-
-当服务启动成功后,可以通过`gettablestatus`获得所有表的状态:
-```
-python tools/openmldb_ops.py --openmldb_bin_path=./bin/openmldb --zk_cluster=172.24.4.40:30481 --zk_root_path=/openmldb --cmd=gettablestatus
-```
-
-如果表中有`Warnings`,可以通过`recoverdata`来自动恢复数据:
-```
-python tools/openmldb_ops.py --openmldb_bin_path=./bin/openmldb --zk_cluster=172.24.4.40:30481 --zk_root_path=/openmldb --cmd=recoverdata
-```
-
-## Server FAQ
-
-### 1. 为什么日志中有 Fail to write into Socket 的警告日志?
-```
-http_rpc_protocol.cpp:911] Fail to write into Socket{id=xx fd=xx addr=xxx} (0x7a7ca00): Unknown error 1014 [1014]
-```
-这是server端会打印的日志。一般是client端使用了连接池或短连接模式,在RPC超时后会关闭连接,server写回response时发现连接已经关了就报这个错。Got EOF就是指之前已经收到了EOF(对端正常关闭了连接)。client端使用单连接模式server端一般不会报这个。
-
-### 2. 表数据的ttl初始设置不合适,如何调整?
-这需要使用nsclient来修改,普通client无法做到。nsclient启动方式与命令,见[ns client](../maintain/cli.md#ns-client)。
-
-在nsclient中使用命令`setttl`可以更改一个表的ttl,类似
-```
-setttl table_name ttl_type ttl [ttl] [index_name]
-```
-可以看到,如果在命令末尾配置index的名字,可以做到只修改单个index的ttl。
-```{caution}
-`setttl`的改变不会及时生效,会受到tablet server的配置`gc_interval`的影响。(每台tablet server的配置是独立的,互不影响。)
-
-举例说明,有一个tablet server的`gc_interval`是1h,那么ttl的配置重载,会在下一次gc的最后时刻进行(最坏情况下,会在1h后重载)。重载ttl的这一次gc就不会按最新ttl来淘汰数据。再下一次gc时才会使用最新ttl进行数据淘汰。
-
-所以,**ttl更改后,需要等待两次gc interval的时间才会生效**。请耐心等待。
-
-当然,你可以调整tablet server的`gc_interval`,但这个配置无法动态更改,只能重启生效。所以,如果内存压力较大,可以尝试扩容,迁移数据分片,来减少内存压力。不推荐轻易调整`gc_interval`。
-```
-
-### 3. 出现警告日志:Last Join right table is empty,这是什么意思?
-通常来讲,这是一个正常现象,不代表集群异常。只是runner中join右表为空,是可能的现象,大概率是数据问题。
-
-## Client FAQ
-
-### 1. 为什么收到 Reached timeout 的警告日志?
-```
-rpc_client.h:xxx] request error. [E1008] Reached timeout=xxxms
-```
-这是由于client端本身发送的rpc request的timeout设置小了,client端自己主动断开,注意这是rpc的超时。需要更改通用的`request_timeout`配置。
-1. CLI: 启动时配置`--request_timeout_ms`
-2. JAVA/Python SDK: Option或url中调整`SdkOption.requestTimeout`
-```{note}
-同步的离线命令通常不会出现这个错误,因为同步离线命令的timeout设置为了TaskManager可接受的最长时间。
-```
-### 2. 为什么收到 Got EOF of Socket 的警告日志?
-```
-rpc_client.h:xxx] request error. [E1014]Got EOF of Socket{id=x fd=x addr=xxx} (xx)
-```
-这是因为`addr`端主动断开了连接,`addr`的地址大概率是TaskManager。这不代表TaskManager不正常,而是TaskManager端认为这个连接没有活动,超过keepAliveTime了,而主动断开通信channel。
-在0.5.0及以后的版本中,可以调大TaskManager的`server.channel_keep_alive_time`来提高对不活跃channel的容忍度。默认值为1800s(0.5h),特别是使用同步的离线命令时,这个值可能需要适当调大。
-在0.5.0以前的版本中,无法更改此配置,请升级TaskManager版本。
-
-### 3. 离线查询结果显示中文为什么乱码?
-
-在使用离线查询时,可能出现包含中文的查询结果乱码,主要和系统默认编码格式与Spark任务编码格式参数有关。
-
-如果出现乱码情况,可以通过添加Spark高级参数`spark.driver.extraJavaOptions=-Dfile.encoding=utf-8`和`spark.executor.extraJavaOptions=-Dfile.encoding=utf-8`来解决。
-
-客户端配置方法可参考[客户端Spark配置文件](../reference/client_config/client_spark_config.md),也可以在TaskManager配置文件中添加此项配置。
-
-```
-spark.default.conf=spark.driver.extraJavaOptions=-Dfile.encoding=utf-8;spark.executor.extraJavaOptions=-Dfile.encoding=utf-8
-```
-
-### 4. 如何配置TaskManager来访问开启Kerberos的Yarn集群?
-
-如果Yarn集群开启Kerberos认证,TaskManager可以通过添加以下配置来访问开启Kerberos认证的Yarn集群。注意请根据实际配置修改keytab路径以及principal账号。
-
-```
-spark.default.conf=spark.yarn.keytab=/tmp/test.keytab;spark.yarn.principal=test@EXAMPLE.COM
-```
-
-### 5. 如何配置客户端的core日志?
-
-客户端core日志主要有两种,zk日志和sdk日志(glog日志),两者是独立的。
-
-zk日志:
-1. CLI:启动时配置`--zk_log_level`调整level,`--zk_log_file`配置日志保存文件。
-2. JAVA/Python SDK:Option或url中使用`zkLogLevel`调整level,`zkLogFile`配置日志保存文件。
-
-- `zk_log_level`(int, 默认=0, 即DISABLE_LOGGING):
-打印这个等级及**以下**等级的日志。0-禁止所有zk log, 1-error, 2-warn, 3-info, 4-debug。
-
-sdk日志(glog日志):
-1. CLI:启动时配置`--glog_level`调整level,`--glog_dir`配置日志保存文件。
-2. JAVA/Python SDK:Option或url中使用`glogLevel`调整level,`glogDir`配置日志保存文件。
-
-- `glog_level`(int, 默认=1, 即WARNING):
-打印这个等级及**以上**等级的日志。 INFO, WARNING, ERROR, and FATAL日志分别对应 0, 1, 2, and 3。
-
-
-### 6. 插入错误,日志显示`please use getInsertRow with ... first`
-
-在JAVA client使用InsertPreparedStatement进行插入,或在Python中使用sql和parameter进行插入时,client底层实际有cache影响,第一步`getInsertRow`生成sql cache并返回sql还需要补充的parameter信息,第二步才会真正执行insert,而执行insert需要使用第一步缓存的sql cache。所以,当多线程使用同一个client时,可能因为插入和查询频繁更新cache表,将你想要执行的insert sql cache淘汰掉了,所以会出现好像第一步`getInsertRow`并未执行的样子。
-
-目前可以通过调大`maxSqlCacheSize`这一配置项来避免错误。仅JAVA/Python SDK支持配置。
-
-### 7. 离线命令错误`java.lang.OutOfMemoryError: Java heap space`
-
-离线命令的Spark配置默认为`local[*]`,并发较高可能出现OutOfMemoryError错误,请调整`spark.driver.memory`和`spark.executor.memory`两个spark配置项。可以写在TaskManager运行目录的`conf/taskmanager.properties`的`spark.default.conf`并重启TaskManager,或者使用CLI客户端进行配置,参考[客户端Spark配置文件](../reference/client_config/client_spark_config.md)。
-```
-spark.default.conf=spark.driver.memory=16g;spark.executor.memory=16g
-```
diff --git a/docs/zh/maintain/index.rst b/docs/zh/maintain/index.rst
index a114cccef15..bdb0b551e87 100644
--- a/docs/zh/maintain/index.rst
+++ b/docs/zh/maintain/index.rst
@@ -16,4 +16,3 @@
multi_cluster
diagnose
openmldb_ops
- faq
diff --git a/docs/zh/maintain/openmldb_ops.md b/docs/zh/maintain/openmldb_ops.md
index 10b53437b52..591ae355a75 100644
--- a/docs/zh/maintain/openmldb_ops.md
+++ b/docs/zh/maintain/openmldb_ops.md
@@ -31,9 +31,12 @@
**使用示例**
```
-python tools/openmldb_ops.py --openmldb_bin_path=./bin/openmldb --zk_cluster=172.24.4.40:30481 --zk_root_path=/openmldb --cmd=scaleout
+python tools/openmldb_ops.py --openmldb_bin_path=./bin/openmldb --zk_cluster=0.0.0.0:2181 --zk_root_path=/openmldb --cmd=scaleout
+python tools/openmldb_ops.py --openmldb_bin_path=./bin/openmldb --zk_cluster=0.0.0.0:2181 --zk_root_path=/openmldb --cmd=recoverdata
```
+注:理论上openmldb_ops不要求版本匹配,高版本openmldb_ops可以操作低版本的openmldb集群。
+
### 系统要求
- 要求python2.7及以上版本
- `showopstatus`和`showtablestatus`需要`prettytable`依赖
diff --git a/docs/zh/openmldb_sql/ddl/CREATE_TABLE_STATEMENT.md b/docs/zh/openmldb_sql/ddl/CREATE_TABLE_STATEMENT.md
index 1dffc9d4cae..a44f699eed3 100644
--- a/docs/zh/openmldb_sql/ddl/CREATE_TABLE_STATEMENT.md
+++ b/docs/zh/openmldb_sql/ddl/CREATE_TABLE_STATEMENT.md
@@ -450,6 +450,11 @@ StorageMode
::= 'Memory'
| 'HDD'
| 'SSD'
+CompressTypeOption
+ ::= 'COMPRESS_TYPE' '=' CompressType
+CompressType
+ ::= 'NoCompress'
+ | 'Snappy'
```
@@ -460,6 +465,7 @@ StorageMode
| `REPLICANUM` | 配置表的副本数。请注意,副本数只有在集群版中才可以配置。 | `OPTIONS (REPLICANUM=3)` |
| `DISTRIBUTION` | 配置分布式的节点endpoint。一般包含一个Leader节点和若干Follower节点。`(leader, [follower1, follower2, ..])`。不显式配置时,OpenMLDB会自动根据环境和节点来配置`DISTRIBUTION`。 | `DISTRIBUTION = [ ('127.0.0.1:6527', [ '127.0.0.1:6528','127.0.0.1:6529' ])]` |
| `STORAGE_MODE` | 表的存储模式,支持的模式有`Memory`、`HDD`或`SSD`。不显式配置时,默认为`Memory`。
如果需要支持非`Memory`模式的存储模式,`tablet`需要额外的配置选项,具体可参考[tablet配置文件 conf/tablet.flags](../../../deploy/conf.md)。 | `OPTIONS (STORAGE_MODE='HDD')` |
+| `COMPRESS_TYPE` | 指定表的压缩类型。目前只支持Snappy压缩, 。默认为 `NoCompress` 即不压缩。 | `OPTIONS (COMPRESS_TYPE='Snappy')`
#### 磁盘表与内存表区别
- 磁盘表对应`STORAGE_MODE`的取值为`HDD`或`SSD`。内存表对应的`STORAGE_MODE`取值为`Memory`。
@@ -488,11 +494,11 @@ DESC t1;
--- -------------------- ------ ---------- ------ ---------------
1 INDEX_0_1651143735 col1 std_time 0min kAbsoluteTime
--- -------------------- ------ ---------- ------ ---------------
- --------------
- storage_mode
- --------------
- HDD
- --------------
+ --------------- --------------
+ compress_type storage_mode
+ --------------- --------------
+ NoCompress HDD
+ --------------- --------------
```
创建一张表,指定分片的分布状态
```sql
diff --git a/docs/zh/openmldb_sql/ddl/DESC_STATEMENT.md b/docs/zh/openmldb_sql/ddl/DESC_STATEMENT.md
index 1088411dc03..ca0d0de87bf 100644
--- a/docs/zh/openmldb_sql/ddl/DESC_STATEMENT.md
+++ b/docs/zh/openmldb_sql/ddl/DESC_STATEMENT.md
@@ -56,11 +56,11 @@ desc t1;
--- -------------------- ------ ---------- ---------- ---------------
1 INDEX_0_1658136511 col1 std_time 43200min kAbsoluteTime
--- -------------------- ------ ---------- ---------- ---------------
- --------------
- storage_mode
- --------------
- Memory
- --------------
+ --------------- --------------
+ compress_type storage_mode
+ --------------- --------------
+ NoCompress Memory
+ --------------- --------------
```
diff --git a/docs/zh/openmldb_sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md b/docs/zh/openmldb_sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md
index e697f687846..22c08fb754e 100644
--- a/docs/zh/openmldb_sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md
+++ b/docs/zh/openmldb_sql/ddl/SHOW_CREATE_TABLE_STATEMENT.md
@@ -21,7 +21,7 @@ show create table t1;
`c3` bigInt,
`c4` timestamp,
INDEX (KEY=`c1`, TS=`c4`, TTL_TYPE=ABSOLUTE, TTL=0m)
- ) OPTIONS (PARTITIONNUM=8, REPLICANUM=2, STORAGE_MODE='HDD');
+ ) OPTIONS (PARTITIONNUM=8, REPLICANUM=2, STORAGE_MODE='HDD', COMPRESS_TYPE='NoCompress');
------- ---------------------------------------------------------------
1 rows in set
diff --git a/docs/zh/openmldb_sql/ddl/TRUNCATE_TABLE_STATEMENT.md b/docs/zh/openmldb_sql/ddl/TRUNCATE_TABLE_STATEMENT.md
new file mode 100644
index 00000000000..8ffb623f26f
--- /dev/null
+++ b/docs/zh/openmldb_sql/ddl/TRUNCATE_TABLE_STATEMENT.md
@@ -0,0 +1,16 @@
+# TRUNCATE TABLE
+
+```
+TRUNCATE TABLE table_name
+```
+
+`TRUNCATE TABLE`语句用清空指定的表。
+
+## Example: 清空t1表
+
+```sql
+TRUNCATE TABLE t1;
+-- Truncate table t1? yes/no
+-- yes
+-- SUCCEED
+```
\ No newline at end of file
diff --git a/docs/zh/openmldb_sql/ddl/index.rst b/docs/zh/openmldb_sql/ddl/index.rst
index efd36734261..9e420def154 100644
--- a/docs/zh/openmldb_sql/ddl/index.rst
+++ b/docs/zh/openmldb_sql/ddl/index.rst
@@ -24,3 +24,4 @@
SHOW_FUNCTIONS
DROP_FUNCTION
SHOW_CREATE_TABLE_STATEMENT
+ TRUNCATE_TABLE_STATEMENT
\ No newline at end of file
diff --git a/docs/zh/openmldb_sql/dql/WINDOW_CLAUSE.md b/docs/zh/openmldb_sql/dql/WINDOW_CLAUSE.md
index 18f49149429..6dacf10c268 100644
--- a/docs/zh/openmldb_sql/dql/WINDOW_CLAUSE.md
+++ b/docs/zh/openmldb_sql/dql/WINDOW_CLAUSE.md
@@ -86,27 +86,43 @@ SELECT select_expr [,select_expr...], window_function_name(expr) OVER window_nam
再看窗口想要什么大小,这里要分窗口类型说明:
1. 时间窗口:时间窗口通常使用s, m, h, d等时间单位,如果没有单位,默认为ms。比如:
- [3小时前,当前行] - 3h preceding and current row
- [3小时前,30分钟前] - 3h preceding and 30m preceding
+ - [3小时前,当前行] - 3h preceding and current row
+ - [3小时前,30分钟前] - 3h preceding and 30m preceding
1. 条数窗口:条数不需要单位。比如:
- [10条,当前行] - 10 preceding and current row
- [10条,3条] - 10 preceding and 3 preceding
+ - [10条,当前行] - 10 preceding and current row
+ - [10条,3条] - 10 preceding and 3 preceding
### 如何推断窗口是什么样的?
首先,先明确是什么执行模式:
-离线模式,即批模式,它是对from表的每一行都做一次窗口划分与计算。因此,每一行对应产生一行SQL结果。
-请求模式,会带一条请求行,它会将请求行当做from表的数据,只对该行做窗口划分和计算,因此,只产生一行SQL结果。
+离线模式或在线预览模式,合称为批模式,它是对from表的每一行都做一次窗口划分与计算。因此,每一行对应产生一行SQL结果。
+请求模式,会带一条请求行,它会将请求行当做from表的数据,只对该行做窗口划分和计算,因此,只产生一行SQL结果。注意,不会将请求行插入到表中。
-再看,如何划分窗口:
+我们将批模式看作多次请求模式来看待,所以请求模式查询如何划分窗口,我们分为三段来讲:
-我们将批模式看作多次请求模式来看待。所以,对一次请求行来说,窗口只可能包含,它自己,与它的partition by列值相等的行(可能的全集)。
+- 对一次请求行来说,窗口**只可能**包含,它自己,与它的partition by列值相等的行
-partition key相等的所有行,还不是窗口,经由order by列排序后,还需要排除窗口范围以外的数据。比如,10 preceding and current row的条数窗口,就要抛弃10行以外的数据行(第10行包含在窗口内),又因为包括current row,于是窗口一共有11行数据。
+- partition key相等的所有行,它们不是乱序,而是按**order by列**排序
-* preceding为闭区间,包含该条,开区间使用open preceding
+- 根据rows/rows_range排除窗口范围以外的数据
+ - rows:例如,10 preceding and current row的条数窗口,就要抛弃10行以外的数据行(第10行包含在窗口内),又因为包括current row,于是窗口一共有11行数据。
+ -rows_range:例如,10s preceding and current row的时间窗口,就要抛弃10s以外的数据行(第10s包含在窗口内),也包括current row,于是窗口只会出现order key值在`[current_row_order_key - 10s, current_row_order_key]`范围内的数据行。
+
+```{note}
+窗口划分范围,仅与order by列相关。如果认为窗口内行数或具体某数据不符合预期范围,一般是窗口写法的误解,极小概率是SQL引擎计算有误。请以某一个partition key为例,分步检查表的数据(以下操作都是在线模式):
+- 提取与该key相等的所有数据。可以使用`select * from table where partition_key = xxx`来提取,或使用源数据文件,通过pandas/spark等工具提取。
+- 再按order by列排序,这类似于window设置窗口为unbounded preceding and current row。此处,可以将手动处理的数据和OpenMLDB的unbounded window计算结果进行对比。
+ - 由于OpenMLDB只支持在窗口内聚合,很难看到窗口的数据全貌,而且窗口内数据较多时,查看全部也是很难的。通常是使用count/min/max/lag等聚合函数来衡量窗口内数据的数量和范围。
+ - 如果仍需要通过窗口内具体数据来确认,可以使用top来展示前k大的值,但它会对列进行再排序,不能等同于窗口排序(order by列排序)。其他聚合函数,参考[udf函数](../udfs_8h.md)。
+- 最后,再检查窗口的rows/rows_range设置是否符合预期。
+ - 通常情况,如果前两步没问题,条数划分一般不会有问题。
+ - 时间划分,需要注意时间单位。OpenMLDB中order by列无论是timestamp还是bigint,都当作整数来计算的,timestamp是转换为ms为单位的整数。我们支持在窗口设置中使用时间单位,但不会对表中的order by列值做任何单位假设。例如,如果order by列
+并非timestamp,而是设置整数`20230905`,在时间窗口设置5ms时,窗口的范围是`[20230905 - 5, 20230905]`,而不是`[20230905 00:00:00 - 5ms, 20230905]`。**请谨慎对待order by列,最方便的做法是,任何时间格式都将其转换为timestamp或ms为单位的bigint**。
+```
+
+* preceding为闭区间,包含该条,开区间需使用open preceding
窗口还可以exclude current time,current row等,详情见下文。
@@ -332,5 +348,5 @@ WINDOW w1 AS (PARTITION BY col1 ORDER BY col5 ROWS_RANGE BETWEEN 10s PRECEDING A
```
```{seealso}
-窗口计算可使用的聚合函数,参考[Built-in Functions](../functions_and_operators/Files/udfs_8h.md)
+窗口计算可使用的聚合函数,参考[Built-in Functions](../udfs_8h.md)
```
diff --git a/docs/zh/openmldb_sql/functions_and_operators/index.rst b/docs/zh/openmldb_sql/functions_and_operators/index.rst
index 36329c03045..8dfb1e18cee 100644
--- a/docs/zh/openmldb_sql/functions_and_operators/index.rst
+++ b/docs/zh/openmldb_sql/functions_and_operators/index.rst
@@ -7,4 +7,3 @@
:maxdepth: 1
operators
- Files/udfs_8h
diff --git a/docs/zh/openmldb_sql/index.rst b/docs/zh/openmldb_sql/index.rst
index 7d00e9ed532..149147f1f55 100644
--- a/docs/zh/openmldb_sql/index.rst
+++ b/docs/zh/openmldb_sql/index.rst
@@ -10,6 +10,7 @@ OpenMLDB SQL
language_structure/index
data_types/index
functions_and_operators/index
+ udfs_8h
dql/index
dml/index
ddl/index
diff --git a/docs/zh/openmldb_sql/sql_difference.md b/docs/zh/openmldb_sql/sql_difference.md
index 3118f8f71bb..3d24f399f4d 100644
--- a/docs/zh/openmldb_sql/sql_difference.md
+++ b/docs/zh/openmldb_sql/sql_difference.md
@@ -54,7 +54,7 @@
| LAST JOIN | ✓ | ✓ | ✕ |
| 子查询 / WITH 子句 | ✓ | ✓ | ✕ |
-虽然在线请求模式无法支持 `WHERE` 子句,但是部分功能可以通过带有 `_where` 后缀的计算函数实现,比如 `count_where`, `avg_where` 等,详情查看[内置计算函数文档](functions_and_operators/Files/udfs_8h.md)。
+虽然在线请求模式无法支持 `WHERE` 子句,但是部分功能可以通过带有 `_where` 后缀的计算函数实现,比如 `count_where`, `avg_where` 等,详情查看[内置计算函数文档](./udfs_8h.md)。
### LIMIT 子句
@@ -127,7 +127,7 @@ OpenMLDB (>= v0.7.2) 支持非递归的 WITH 子句。WITH 子句等价于其它
特殊限制:
- OpenMLDB v0.6.0 开始支持在线预览模式的全表聚合,但注意所描述的[扫描限制配置](https://openmldb.feishu.cn/wiki/wikcnhBl4NsKcAX6BO9NDtKAxDf#doxcnLWICKzccMuPiWwdpVjSaIe)。
-- OpenMLDB 有较多的聚合函数扩展,请查看产品文档具体查询所支持的函数 [OpenMLDB 内置函数](../openmldb_sql/functions_and_operators/Files/udfs_8h.md)。
+- OpenMLDB 有较多的聚合函数扩展,请查看产品文档具体查询所支持的函数 [OpenMLDB 内置函数](../openmldb_sql/udfs_8h.md)。
## 扩展语法
diff --git a/docs/zh/openmldb_sql/udf_develop_guide.md b/docs/zh/openmldb_sql/udf_develop_guide.md
index 7fe4e81988d..761e66dea6f 100644
--- a/docs/zh/openmldb_sql/udf_develop_guide.md
+++ b/docs/zh/openmldb_sql/udf_develop_guide.md
@@ -11,7 +11,7 @@
#### 2.1.1 C++函数名规范
- C++内置函数名统一使用[snake_case](https://en.wikipedia.org/wiki/Snake_case)风格
- 要求函数名能清晰表达函数功能
-- 函数不能重名。函数名不能和内置函数及其他自定义函数重名。所有内置函数的列表参考[这里](../openmldb_sql/functions_and_operators/Files/udfs_8h.md)
+- 函数不能重名。函数名不能和内置函数及其他自定义函数重名。所有内置函数的列表参考[这里](../openmldb_sql/udfs_8h.md)
#### 2.1.2 C++类型与SQL类型对应关系
内置C++函数的参数类型限定为:BOOL类型,数值类型,时间戳日期类型和字符串类型。C++类型SQL类型对应关系如下:
diff --git a/docs/zh/openmldb_sql/functions_and_operators/Files/udfs_8h.md b/docs/zh/openmldb_sql/udfs_8h.md
similarity index 68%
rename from docs/zh/openmldb_sql/functions_and_operators/Files/udfs_8h.md
rename to docs/zh/openmldb_sql/udfs_8h.md
index d1696b6c764..9cfab05977f 100644
--- a/docs/zh/openmldb_sql/functions_and_operators/Files/udfs_8h.md
+++ b/docs/zh/openmldb_sql/udfs_8h.md
@@ -10,158 +10,158 @@ title: udfs/udfs.h
| Name | Description |
| -------------- | -------------- |
-| **[abs](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-abs)**()|
Return the absolute value of expr. |
-| **[acos](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-acos)**()|
Return the arc cosine of expr. |
-| **[add](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-add)**()|
Compute sum of two arguments. |
-| **[add_months](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-add-months)**()|
adds an integer months to a given date, returning the resulting date. |
-| **[array_contains](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-array-contains)**()|
array_contains(array, value) - Returns true if the array contains the value. |
-| **[asin](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-asin)**()|
Return the arc sine of expr. |
-| **[at](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-at)**()| |
-| **[atan](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-atan)**()|
Return the arc tangent of expr If called with one parameter, this function returns the arc tangent of expr. If called with two parameters X and Y, this function returns the arc tangent of Y / X. |
-| **[atan2](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-atan2)**()|
Return the arc tangent of Y / X.. |
-| **[avg](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-avg)**()|
Compute average of values. |
-| **[avg_cate](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-avg-cate)**()|
Compute average of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[avg_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-avg-cate-where)**()|
Compute average of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V', separated by comma, and sorted by key in ascend order. |
-| **[avg_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-avg-where)**()|
Compute average of values match specified condition. |
-| **[bigint](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-bigint)**()| |
-| **[bool](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-bool)**()|
Cast string expression to bool. |
-| **[ceil](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-ceil)**()|
Return the smallest integer value not less than the expr. |
-| **[ceiling](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-ceiling)**()| |
-| **[char](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-char)**()|
Returns the ASCII character having the binary equivalent to expr. If n >= 256 the result is equivalent to char(n % 256). |
-| **[char_length](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-char-length)**()|
Returns the length of the string. It is measured in characters and multibyte character string is not supported. |
-| **[character_length](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-character-length)**()| |
-| **[concat](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-concat)**()|
This function returns a string resulting from the joining of two or more string values in an end-to-end manner. (To add a separating value during joining, see concat_ws.) |
-| **[concat_ws](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-concat-ws)**()|
Returns a string resulting from the joining of two or more string value in an end-to-end manner. It separates those concatenated string values with the delimiter specified in the first function argument. |
-| **[cos](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-cos)**()|
Return the cosine of expr. |
-| **[cot](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-cot)**()|
Return the cotangent of expr. |
-| **[count](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-count)**()|
Compute number of values. |
-| **[count_cate](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-count-cate)**()|
Compute count of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[count_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-count-cate-where)**()|
Compute count of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[count_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-count-where)**()|
Compute number of values match specified condition. |
-| **[date](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-date)**()|
Cast timestamp or string expression to date (date >= 1900-01-01) |
-| **[date_format](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-date-format)**()|
Formats the date value according to the format string. |
-| **[datediff](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-datediff)**()|
days difference from date1 to date2 |
-| **[day](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-day)**()| |
-| **[dayofmonth](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-dayofmonth)**()|
Return the day of the month for a timestamp or date. |
-| **[dayofweek](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-dayofweek)**()|
Return the day of week for a timestamp or date. |
-| **[dayofyear](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-dayofyear)**()|
Return the day of year for a timestamp or date. Returns 0 given an invalid date. |
-| **[degrees](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-degrees)**()|
Convert radians to degrees. |
-| **[distinct_count](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-distinct-count)**()|
Compute number of distinct values. |
-| **[double](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-double)**()|
Cast string expression to double. |
-| **[drawdown](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-drawdown)**()|
Compute drawdown of values. |
-| **[earth_distance](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-earth-distance)**()|
Returns the great circle distance between two points on the surface of the Earth. Km as return unit. add a minus (-) sign if heading west (W) or south (S). |
-| **[entropy](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-entropy)**()|
Calculate Shannon entropy of a column of values. Null values are skipped. |
-| **[ew_avg](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-ew-avg)**()|
Compute exponentially-weighted average of values. It's equivalent to pandas ewm(alpha={alpha}, adjust=True, ignore_na=True, com=None, span=None, halflife=None, min_periods=0) |
-| **[exp](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-exp)**()|
Return the value of e (the base of natural logarithms) raised to the power of expr. |
-| **[farm_fingerprint](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-farm-fingerprint)**()| |
-| **[first_value](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-first-value)**()|
Returns the value of expr from the latest row (last row) of the window frame. |
-| **[float](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-float)**()|
Cast string expression to float. |
-| **[floor](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-floor)**()|
Return the largest integer value not less than the expr. |
-| **[get_json_object](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-get-json-object)**()|
Extracts a JSON object from [JSON Pointer](https://datatracker.ietf.org/doc/html/rfc6901)|
-| **[hash64](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-hash64)**()|
Returns a hash value of the arguments. It is not a cryptographic hash function and should not be used as such. |
-| **[hex](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-hex)**()|
Convert integer to hexadecimal. |
-| **[hour](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-hour)**()|
Return the hour for a timestamp. |
-| **[identity](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-identity)**()|
Return value. |
-| **[if_null](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-if-null)**()|
If input is not null, return input value; else return default value. |
-| **[ifnull](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-ifnull)**()| |
-| **[ilike_match](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-ilike-match)**()|
pattern match same as ILIKE predicate |
-| **[inc](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-inc)**()|
Return expression + 1. |
-| **[int](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-int)**()| |
-| **[int16](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-int16)**()|
Cast string expression to int16. |
-| **[int32](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-int32)**()|
Cast string expression to int32. |
-| **[int64](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-int64)**()|
Cast string expression to int64. |
-| **[is_null](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-is-null)**()|
Check if input value is null, return bool. |
-| **[isnull](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-isnull)**()| |
-| **[join](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-join)**()|
For each string value from specified column of window, join by delimeter. Null values are skipped. |
-| **[json_array_length](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-json-array-length)**()|
Returns the number of elements in the outermost JSON array. |
-| **[lag](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-lag)**()|
Returns value evaluated at the row that is offset rows before the current row within the partition. Offset is evaluated with respect to the current row. |
-| **[last_day](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-last-day)**()|
Return the last day of the month to which the date belongs to. |
-| **[lcase](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-lcase)**()|
Convert all the characters to lowercase. Note that characters with values > 127 are simply returned. |
-| **[like_match](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-like-match)**()|
pattern match same as LIKE predicate |
-| **[list_except_by_key](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-list-except-by-key)**()|
Return list of elements in list1 but keys not in except_str. |
-| **[list_except_by_value](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-list-except-by-value)**()|
Return list of elements in list1 but values not in except_str. |
-| **[ln](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-ln)**()|
Return the natural logarithm of expr. |
-| **[log](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-log)**()|
log(base, expr) If called with one parameter, this function returns the natural logarithm of expr. If called with two parameters, this function returns the logarithm of expr to the base. |
-| **[log10](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-log10)**()|
Return the base-10 logarithm of expr. |
-| **[log2](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-log2)**()|
Return the base-2 logarithm of expr. |
-| **[lower](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-lower)**()| |
-| **[make_tuple](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-make-tuple)**()| |
-| **[max](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-max)**()|
Compute maximum of values. |
-| **[max_cate](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-max-cate)**()|
Compute maximum of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[max_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-max-cate-where)**()|
Compute maximum of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[max_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-max-where)**()|
Compute maximum of values match specified condition. |
-| **[maximum](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-maximum)**()|
Compute maximum of two arguments. |
-| **[median](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-median)**()|
Compute the median of values. |
-| **[min](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-min)**()|
Compute minimum of values. |
-| **[min_cate](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-min-cate)**()|
Compute minimum of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[min_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-min-cate-where)**()|
Compute minimum of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[min_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-min-where)**()|
Compute minimum of values match specified condition. |
-| **[minimum](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-minimum)**()|
Compute minimum of two arguments. |
-| **[minute](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-minute)**()|
Return the minute for a timestamp. |
-| **[month](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-month)**()|
Return the month part of a timestamp or date. |
-| **[nth_value_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-nth-value-where)**()|
Returns the value of expr from the idx th row matches the condition. |
-| **[nvl](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-nvl)**()| |
-| **[nvl2](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-nvl2)**()|
nvl2(expr1, expr2, expr3) - Returns expr2 if expr1 is not null, or expr3 otherwise. |
-| **[pmod](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-pmod)**()|
Compute pmod of two arguments. If any param is NULL, output NULL. If divisor is 0, output NULL. |
-| **[pow](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-pow)**()|
Return the value of expr1 to the power of expr2. |
-| **[power](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-power)**()| |
-| **[radians](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-radians)**()|
Returns the argument X, converted from degrees to radians. (Note that π radians equals 180 degrees.) |
-| **[regexp_like](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-regexp-like)**()|
pattern match same as RLIKE predicate (based on RE2) |
-| **[replace](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-replace)**()|
replace(str, search[, replace]) - Replaces all occurrences of `search` with `replace`|
-| **[reverse](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-reverse)**()|
Returns the reversed given string. |
-| **[round](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-round)**()|
Returns expr rounded to d decimal places using HALF_UP rounding mode. |
-| **[second](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-second)**()|
Return the second for a timestamp. |
-| **[sin](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-sin)**()|
Return the sine of expr. |
-| **[size](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-size)**()|
Get the size of a List (e.g., result of split) |
-| **[smallint](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-smallint)**()| |
-| **[split](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-split)**()|
Split string to list by delimeter. Null values are skipped. |
-| **[split_array](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-split-array)**()|
Split string to array of string by delimeter. |
-| **[split_by_key](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-split-by-key)**()|
Split string by delimeter and split each segment as kv pair, then add each key to output list. Null or illegal segments are skipped. |
-| **[split_by_value](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-split-by-value)**()|
Split string by delimeter and split each segment as kv pair, then add each value to output list. Null or illegal segments are skipped. |
-| **[sqrt](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-sqrt)**()|
Return square root of expr. |
-| **[std](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-std)**()| |
-| **[stddev](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-stddev)**()|
Compute sample standard deviation of values, i.e., `sqrt( sum((x_i - avg)^2) / (n-1) )`|
-| **[stddev_pop](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-stddev-pop)**()|
Compute population standard deviation of values, i.e., `sqrt( sum((x_i - avg)^2) / n )`|
-| **[stddev_samp](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-stddev-samp)**()| |
-| **[strcmp](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-strcmp)**()|
Returns 0 if the strings are the same, -1 if the first argument is smaller than the second according to the current sort order, and 1 otherwise. |
-| **[string](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-string)**()|
Return string converted from timestamp expression. |
-| **[substr](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-substr)**()| |
-| **[substring](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-substring)**()|
Return a substring `len` characters long from string str, starting at position `pos`. Alias function: `substr`|
-| **[sum](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-sum)**()|
Compute sum of values. |
-| **[sum_cate](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-sum-cate)**()|
Compute sum of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[sum_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-sum-cate-where)**()|
Compute sum of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
-| **[sum_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-sum-where)**()|
Compute sum of values match specified condition. |
-| **[tan](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-tan)**()|
Return the tangent of expr. |
-| **[timestamp](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-timestamp)**()|
Cast int64, date or string expression to timestamp. |
-| **[top](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top)**()|
Compute top k of values and output string separated by comma. The outputs are sorted in desc order. |
-| **[top1_ratio](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top1-ratio)**()|
Compute the top1 occurring value's ratio. |
-| **[top_n_key_avg_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-key-avg-cate-where)**()|
Compute average of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_key_count_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-key-count-cate-where)**()|
Compute count of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_key_max_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-key-max-cate-where)**()|
Compute maximum of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_key_min_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-key-min-cate-where)**()|
Compute minimum of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_key_ratio_cate](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-key-ratio-cate)**()|
Ratios (cond match cnt / total cnt) for groups. |
-| **[top_n_key_sum_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-key-sum-cate-where)**()|
Compute sum of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_value_avg_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-value-avg-cate-where)**()|
Compute average of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_value_count_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-value-count-cate-where)**()|
Compute count of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_value_max_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-value-max-cate-where)**()|
Compute maximum of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_value_min_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-value-min-cate-where)**()|
Compute minimum of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[top_n_value_ratio_cate](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-value-ratio-cate)**()|
Ratios (cond match cnt / total cnt) for groups. |
-| **[top_n_value_sum_cate_where](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-top-n-value-sum-cate-where)**()|
Compute sum of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
-| **[topn_frequency](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-topn-frequency)**()|
Return the topN keys sorted by their frequency. |
-| **[truncate](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-truncate)**()|
Return the nearest integer that is not greater in magnitude than the expr. |
-| **[ucase](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-ucase)**()|
Convert all the characters to uppercase. Note that characters values > 127 are simply returned. |
-| **[unhex](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-unhex)**()|
Convert hexadecimal to binary string. |
-| **[unix_timestamp](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-unix-timestamp)**()|
Cast date or string expression to unix_timestamp. If empty string or NULL is provided, return current timestamp. |
-| **[upper](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-upper)**()| |
-| **[var_pop](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-var-pop)**()|
Compute population variance of values, i.e., `sum((x_i - avg)^2) / n`|
-| **[var_samp](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-var-samp)**()|
Compute population variance of values, i.e., `sum((x_i - avg)^2) / (n-1)`|
-| **[variance](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-variance)**()| |
-| **[week](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-week)**()| |
-| **[weekofyear](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-weekofyear)**()|
Return the week of year for a timestamp or date. |
-| **[window_split](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-window-split)**()|
For each string value from specified column of window, split by delimeter and add segment to output list. Null values are skipped. |
-| **[window_split_by_key](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-window-split-by-key)**()|
For each string value from specified column of window, split by delimeter and then split each segment as kv pair, then add each key to output list. Null and illegal segments are skipped. |
-| **[window_split_by_value](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-window-split-by-value)**()|
For each string value from specified column of window, split by delimeter and then split each segment as kv pair, then add each value to output list. Null and illegal segments are skipped. |
-| **[year](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-year)**()|
Return the year part of a timestamp or date. |
+| **[abs](/openmldb_sql/Files/udfs_8h.md#function-abs)**()|
Return the absolute value of expr. |
+| **[acos](/openmldb_sql/Files/udfs_8h.md#function-acos)**()|
Return the arc cosine of expr. |
+| **[add](/openmldb_sql/Files/udfs_8h.md#function-add)**()|
Compute sum of two arguments. |
+| **[add_months](/openmldb_sql/Files/udfs_8h.md#function-add-months)**()|
adds an integer months to a given date, returning the resulting date. |
+| **[array_contains](/openmldb_sql/Files/udfs_8h.md#function-array-contains)**()|
array_contains(array, value) - Returns true if the array contains the value. |
+| **[asin](/openmldb_sql/Files/udfs_8h.md#function-asin)**()|
Return the arc sine of expr. |
+| **[at](/openmldb_sql/Files/udfs_8h.md#function-at)**()| |
+| **[atan](/openmldb_sql/Files/udfs_8h.md#function-atan)**()|
Return the arc tangent of expr If called with one parameter, this function returns the arc tangent of expr. If called with two parameters X and Y, this function returns the arc tangent of Y / X. |
+| **[atan2](/openmldb_sql/Files/udfs_8h.md#function-atan2)**()|
Return the arc tangent of Y / X.. |
+| **[avg](/openmldb_sql/Files/udfs_8h.md#function-avg)**()|
Compute average of values. |
+| **[avg_cate](/openmldb_sql/Files/udfs_8h.md#function-avg-cate)**()|
Compute average of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[avg_cate_where](/openmldb_sql/Files/udfs_8h.md#function-avg-cate-where)**()|
Compute average of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V', separated by comma, and sorted by key in ascend order. |
+| **[avg_where](/openmldb_sql/Files/udfs_8h.md#function-avg-where)**()|
Compute average of values match specified condition. |
+| **[bigint](/openmldb_sql/Files/udfs_8h.md#function-bigint)**()| |
+| **[bool](/openmldb_sql/Files/udfs_8h.md#function-bool)**()|
Cast string expression to bool. |
+| **[ceil](/openmldb_sql/Files/udfs_8h.md#function-ceil)**()|
Return the smallest integer value not less than the expr. |
+| **[ceiling](/openmldb_sql/Files/udfs_8h.md#function-ceiling)**()| |
+| **[char](/openmldb_sql/Files/udfs_8h.md#function-char)**()|
Returns the ASCII character having the binary equivalent to expr. If n >= 256 the result is equivalent to char(n % 256). |
+| **[char_length](/openmldb_sql/Files/udfs_8h.md#function-char-length)**()|
Returns the length of the string. It is measured in characters and multibyte character string is not supported. |
+| **[character_length](/openmldb_sql/Files/udfs_8h.md#function-character-length)**()| |
+| **[concat](/openmldb_sql/Files/udfs_8h.md#function-concat)**()|
This function returns a string resulting from the joining of two or more string values in an end-to-end manner. (To add a separating value during joining, see concat_ws.) |
+| **[concat_ws](/openmldb_sql/Files/udfs_8h.md#function-concat-ws)**()|
Returns a string resulting from the joining of two or more string value in an end-to-end manner. It separates those concatenated string values with the delimiter specified in the first function argument. |
+| **[cos](/openmldb_sql/Files/udfs_8h.md#function-cos)**()|
Return the cosine of expr. |
+| **[cot](/openmldb_sql/Files/udfs_8h.md#function-cot)**()|
Return the cotangent of expr. |
+| **[count](/openmldb_sql/Files/udfs_8h.md#function-count)**()|
Compute number of values. |
+| **[count_cate](/openmldb_sql/Files/udfs_8h.md#function-count-cate)**()|
Compute count of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[count_cate_where](/openmldb_sql/Files/udfs_8h.md#function-count-cate-where)**()|
Compute count of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[count_where](/openmldb_sql/Files/udfs_8h.md#function-count-where)**()|
Compute number of values match specified condition. |
+| **[date](/openmldb_sql/Files/udfs_8h.md#function-date)**()|
Cast timestamp or string expression to date (date >= 1900-01-01) |
+| **[date_format](/openmldb_sql/Files/udfs_8h.md#function-date-format)**()|
Formats the date value according to the format string. |
+| **[datediff](/openmldb_sql/Files/udfs_8h.md#function-datediff)**()|
days difference from date1 to date2 |
+| **[day](/openmldb_sql/Files/udfs_8h.md#function-day)**()| |
+| **[dayofmonth](/openmldb_sql/Files/udfs_8h.md#function-dayofmonth)**()|
Return the day of the month for a timestamp or date. |
+| **[dayofweek](/openmldb_sql/Files/udfs_8h.md#function-dayofweek)**()|
Return the day of week for a timestamp or date. |
+| **[dayofyear](/openmldb_sql/Files/udfs_8h.md#function-dayofyear)**()|
Return the day of year for a timestamp or date. Returns 0 given an invalid date. |
+| **[degrees](/openmldb_sql/Files/udfs_8h.md#function-degrees)**()|
Convert radians to degrees. |
+| **[distinct_count](/openmldb_sql/Files/udfs_8h.md#function-distinct-count)**()|
Compute number of distinct values. |
+| **[double](/openmldb_sql/Files/udfs_8h.md#function-double)**()|
Cast string expression to double. |
+| **[drawdown](/openmldb_sql/Files/udfs_8h.md#function-drawdown)**()|
Compute drawdown of values. |
+| **[earth_distance](/openmldb_sql/Files/udfs_8h.md#function-earth-distance)**()|
Returns the great circle distance between two points on the surface of the Earth. Km as return unit. add a minus (-) sign if heading west (W) or south (S). |
+| **[entropy](/openmldb_sql/Files/udfs_8h.md#function-entropy)**()|
Calculate Shannon entropy of a column of values. Null values are skipped. |
+| **[ew_avg](/openmldb_sql/Files/udfs_8h.md#function-ew-avg)**()|
Compute exponentially-weighted average of values. It's equivalent to pandas ewm(alpha={alpha}, adjust=True, ignore_na=True, com=None, span=None, halflife=None, min_periods=0) |
+| **[exp](/openmldb_sql/Files/udfs_8h.md#function-exp)**()|
Return the value of e (the base of natural logarithms) raised to the power of expr. |
+| **[farm_fingerprint](/openmldb_sql/Files/udfs_8h.md#function-farm-fingerprint)**()| |
+| **[first_value](/openmldb_sql/Files/udfs_8h.md#function-first-value)**()|
Returns the value of expr from the latest row (last row) of the window frame. |
+| **[float](/openmldb_sql/Files/udfs_8h.md#function-float)**()|
Cast string expression to float. |
+| **[floor](/openmldb_sql/Files/udfs_8h.md#function-floor)**()|
Return the largest integer value not less than the expr. |
+| **[get_json_object](/openmldb_sql/Files/udfs_8h.md#function-get-json-object)**()|
Extracts a JSON object from [JSON Pointer](https://datatracker.ietf.org/doc/html/rfc6901)|
+| **[hash64](/openmldb_sql/Files/udfs_8h.md#function-hash64)**()|
Returns a hash value of the arguments. It is not a cryptographic hash function and should not be used as such. |
+| **[hex](/openmldb_sql/Files/udfs_8h.md#function-hex)**()|
Convert integer to hexadecimal. |
+| **[hour](/openmldb_sql/Files/udfs_8h.md#function-hour)**()|
Return the hour for a timestamp. |
+| **[identity](/openmldb_sql/Files/udfs_8h.md#function-identity)**()|
Return value. |
+| **[if_null](/openmldb_sql/Files/udfs_8h.md#function-if-null)**()|
If input is not null, return input value; else return default value. |
+| **[ifnull](/openmldb_sql/Files/udfs_8h.md#function-ifnull)**()| |
+| **[ilike_match](/openmldb_sql/Files/udfs_8h.md#function-ilike-match)**()|
pattern match same as ILIKE predicate |
+| **[inc](/openmldb_sql/Files/udfs_8h.md#function-inc)**()|
Return expression + 1. |
+| **[int](/openmldb_sql/Files/udfs_8h.md#function-int)**()| |
+| **[int16](/openmldb_sql/Files/udfs_8h.md#function-int16)**()|
Cast string expression to int16. |
+| **[int32](/openmldb_sql/Files/udfs_8h.md#function-int32)**()|
Cast string expression to int32. |
+| **[int64](/openmldb_sql/Files/udfs_8h.md#function-int64)**()|
Cast string expression to int64. |
+| **[is_null](/openmldb_sql/Files/udfs_8h.md#function-is-null)**()|
Check if input value is null, return bool. |
+| **[isnull](/openmldb_sql/Files/udfs_8h.md#function-isnull)**()| |
+| **[join](/openmldb_sql/Files/udfs_8h.md#function-join)**()|
For each string value from specified column of window, join by delimeter. Null values are skipped. |
+| **[json_array_length](/openmldb_sql/Files/udfs_8h.md#function-json-array-length)**()|
Returns the number of elements in the outermost JSON array. |
+| **[lag](/openmldb_sql/Files/udfs_8h.md#function-lag)**()|
Returns value evaluated at the row that is offset rows before the current row within the partition. Offset is evaluated with respect to the current row. |
+| **[last_day](/openmldb_sql/Files/udfs_8h.md#function-last-day)**()|
Return the last day of the month to which the date belongs to. |
+| **[lcase](/openmldb_sql/Files/udfs_8h.md#function-lcase)**()|
Convert all the characters to lowercase. Note that characters with values > 127 are simply returned. |
+| **[like_match](/openmldb_sql/Files/udfs_8h.md#function-like-match)**()|
pattern match same as LIKE predicate |
+| **[list_except_by_key](/openmldb_sql/Files/udfs_8h.md#function-list-except-by-key)**()|
Return list of elements in list1 but keys not in except_str. |
+| **[list_except_by_value](/openmldb_sql/Files/udfs_8h.md#function-list-except-by-value)**()|
Return list of elements in list1 but values not in except_str. |
+| **[ln](/openmldb_sql/Files/udfs_8h.md#function-ln)**()|
Return the natural logarithm of expr. |
+| **[log](/openmldb_sql/Files/udfs_8h.md#function-log)**()|
log(base, expr) If called with one parameter, this function returns the natural logarithm of expr. If called with two parameters, this function returns the logarithm of expr to the base. |
+| **[log10](/openmldb_sql/Files/udfs_8h.md#function-log10)**()|
Return the base-10 logarithm of expr. |
+| **[log2](/openmldb_sql/Files/udfs_8h.md#function-log2)**()|
Return the base-2 logarithm of expr. |
+| **[lower](/openmldb_sql/Files/udfs_8h.md#function-lower)**()| |
+| **[make_tuple](/openmldb_sql/Files/udfs_8h.md#function-make-tuple)**()| |
+| **[max](/openmldb_sql/Files/udfs_8h.md#function-max)**()|
Compute maximum of values. |
+| **[max_cate](/openmldb_sql/Files/udfs_8h.md#function-max-cate)**()|
Compute maximum of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[max_cate_where](/openmldb_sql/Files/udfs_8h.md#function-max-cate-where)**()|
Compute maximum of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[max_where](/openmldb_sql/Files/udfs_8h.md#function-max-where)**()|
Compute maximum of values match specified condition. |
+| **[maximum](/openmldb_sql/Files/udfs_8h.md#function-maximum)**()|
Compute maximum of two arguments. |
+| **[median](/openmldb_sql/Files/udfs_8h.md#function-median)**()|
Compute the median of values. |
+| **[min](/openmldb_sql/Files/udfs_8h.md#function-min)**()|
Compute minimum of values. |
+| **[min_cate](/openmldb_sql/Files/udfs_8h.md#function-min-cate)**()|
Compute minimum of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[min_cate_where](/openmldb_sql/Files/udfs_8h.md#function-min-cate-where)**()|
Compute minimum of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[min_where](/openmldb_sql/Files/udfs_8h.md#function-min-where)**()|
Compute minimum of values match specified condition. |
+| **[minimum](/openmldb_sql/Files/udfs_8h.md#function-minimum)**()|
Compute minimum of two arguments. |
+| **[minute](/openmldb_sql/Files/udfs_8h.md#function-minute)**()|
Return the minute for a timestamp. |
+| **[month](/openmldb_sql/Files/udfs_8h.md#function-month)**()|
Return the month part of a timestamp or date. |
+| **[nth_value_where](/openmldb_sql/Files/udfs_8h.md#function-nth-value-where)**()|
Returns the value of expr from the idx th row matches the condition. |
+| **[nvl](/openmldb_sql/Files/udfs_8h.md#function-nvl)**()| |
+| **[nvl2](/openmldb_sql/Files/udfs_8h.md#function-nvl2)**()|
nvl2(expr1, expr2, expr3) - Returns expr2 if expr1 is not null, or expr3 otherwise. |
+| **[pmod](/openmldb_sql/Files/udfs_8h.md#function-pmod)**()|
Compute pmod of two arguments. If any param is NULL, output NULL. If divisor is 0, output NULL. |
+| **[pow](/openmldb_sql/Files/udfs_8h.md#function-pow)**()|
Return the value of expr1 to the power of expr2. |
+| **[power](/openmldb_sql/Files/udfs_8h.md#function-power)**()| |
+| **[radians](/openmldb_sql/Files/udfs_8h.md#function-radians)**()|
Returns the argument X, converted from degrees to radians. (Note that π radians equals 180 degrees.) |
+| **[regexp_like](/openmldb_sql/Files/udfs_8h.md#function-regexp-like)**()|
pattern match same as RLIKE predicate (based on RE2) |
+| **[replace](/openmldb_sql/Files/udfs_8h.md#function-replace)**()|
replace(str, search[, replace]) - Replaces all occurrences of `search` with `replace`|
+| **[reverse](/openmldb_sql/Files/udfs_8h.md#function-reverse)**()|
Returns the reversed given string. |
+| **[round](/openmldb_sql/Files/udfs_8h.md#function-round)**()|
Returns expr rounded to d decimal places using HALF_UP rounding mode. |
+| **[second](/openmldb_sql/Files/udfs_8h.md#function-second)**()|
Return the second for a timestamp. |
+| **[sin](/openmldb_sql/Files/udfs_8h.md#function-sin)**()|
Return the sine of expr. |
+| **[size](/openmldb_sql/Files/udfs_8h.md#function-size)**()|
Get the size of a List (e.g., result of split) |
+| **[smallint](/openmldb_sql/Files/udfs_8h.md#function-smallint)**()| |
+| **[split](/openmldb_sql/Files/udfs_8h.md#function-split)**()|
Split string to list by delimeter. Null values are skipped. |
+| **[split_array](/openmldb_sql/Files/udfs_8h.md#function-split-array)**()|
Split string to array of string by delimeter. |
+| **[split_by_key](/openmldb_sql/Files/udfs_8h.md#function-split-by-key)**()|
Split string by delimeter and split each segment as kv pair, then add each key to output list. Null or illegal segments are skipped. |
+| **[split_by_value](/openmldb_sql/Files/udfs_8h.md#function-split-by-value)**()|
Split string by delimeter and split each segment as kv pair, then add each value to output list. Null or illegal segments are skipped. |
+| **[sqrt](/openmldb_sql/Files/udfs_8h.md#function-sqrt)**()|
Return square root of expr. |
+| **[std](/openmldb_sql/Files/udfs_8h.md#function-std)**()| |
+| **[stddev](/openmldb_sql/Files/udfs_8h.md#function-stddev)**()|
Compute sample standard deviation of values, i.e., `sqrt( sum((x_i - avg)^2) / (n-1) )`|
+| **[stddev_pop](/openmldb_sql/Files/udfs_8h.md#function-stddev-pop)**()|
Compute population standard deviation of values, i.e., `sqrt( sum((x_i - avg)^2) / n )`|
+| **[stddev_samp](/openmldb_sql/Files/udfs_8h.md#function-stddev-samp)**()| |
+| **[strcmp](/openmldb_sql/Files/udfs_8h.md#function-strcmp)**()|
Returns 0 if the strings are the same, -1 if the first argument is smaller than the second according to the current sort order, and 1 otherwise. |
+| **[string](/openmldb_sql/Files/udfs_8h.md#function-string)**()|
Return string converted from timestamp expression. |
+| **[substr](/openmldb_sql/Files/udfs_8h.md#function-substr)**()| |
+| **[substring](/openmldb_sql/Files/udfs_8h.md#function-substring)**()|
Return a substring `len` characters long from string str, starting at position `pos`. Alias function: `substr`|
+| **[sum](/openmldb_sql/Files/udfs_8h.md#function-sum)**()|
Compute sum of values. |
+| **[sum_cate](/openmldb_sql/Files/udfs_8h.md#function-sum-cate)**()|
Compute sum of values grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[sum_cate_where](/openmldb_sql/Files/udfs_8h.md#function-sum-cate-where)**()|
Compute sum of values matching specified condition grouped by category key and output string. Each group is represented as 'K:V' and separated by comma in outputs and are sorted by key in ascend order. |
+| **[sum_where](/openmldb_sql/Files/udfs_8h.md#function-sum-where)**()|
Compute sum of values match specified condition. |
+| **[tan](/openmldb_sql/Files/udfs_8h.md#function-tan)**()|
Return the tangent of expr. |
+| **[timestamp](/openmldb_sql/Files/udfs_8h.md#function-timestamp)**()|
Cast int64, date or string expression to timestamp. |
+| **[top](/openmldb_sql/Files/udfs_8h.md#function-top)**()|
Compute top k of values and output string separated by comma. The outputs are sorted in desc order. |
+| **[top1_ratio](/openmldb_sql/Files/udfs_8h.md#function-top1-ratio)**()|
Compute the top1 occurring value's ratio. |
+| **[top_n_key_avg_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-key-avg-cate-where)**()|
Compute average of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_key_count_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-key-count-cate-where)**()|
Compute count of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_key_max_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-key-max-cate-where)**()|
Compute maximum of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_key_min_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-key-min-cate-where)**()|
Compute minimum of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_key_ratio_cate](/openmldb_sql/Files/udfs_8h.md#function-top-n-key-ratio-cate)**()|
Ratios (cond match cnt / total cnt) for groups. |
+| **[top_n_key_sum_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-key-sum-cate-where)**()|
Compute sum of values matching specified condition grouped by category key. Output string for top N category keys in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_value_avg_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-value-avg-cate-where)**()|
Compute average of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_value_count_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-value-count-cate-where)**()|
Compute count of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_value_max_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-value-max-cate-where)**()|
Compute maximum of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_value_min_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-value-min-cate-where)**()|
Compute minimum of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[top_n_value_ratio_cate](/openmldb_sql/Files/udfs_8h.md#function-top-n-value-ratio-cate)**()|
Ratios (cond match cnt / total cnt) for groups. |
+| **[top_n_value_sum_cate_where](/openmldb_sql/Files/udfs_8h.md#function-top-n-value-sum-cate-where)**()|
Compute sum of values matching specified condition grouped by category key. Output string for top N aggregate values in descend order. Each group is represented as 'K:V' and separated by comma(,). Empty string returned if no rows selected. |
+| **[topn_frequency](/openmldb_sql/Files/udfs_8h.md#function-topn-frequency)**()|
Return the topN keys sorted by their frequency. |
+| **[truncate](/openmldb_sql/Files/udfs_8h.md#function-truncate)**()|
Return the nearest integer that is not greater in magnitude than the expr. |
+| **[ucase](/openmldb_sql/Files/udfs_8h.md#function-ucase)**()|
Convert all the characters to uppercase. Note that characters values > 127 are simply returned. |
+| **[unhex](/openmldb_sql/Files/udfs_8h.md#function-unhex)**()|
Convert hexadecimal to binary string. |
+| **[unix_timestamp](/openmldb_sql/Files/udfs_8h.md#function-unix-timestamp)**()|
Cast date or string expression to unix_timestamp. If empty string or NULL is provided, return current timestamp. |
+| **[upper](/openmldb_sql/Files/udfs_8h.md#function-upper)**()| |
+| **[var_pop](/openmldb_sql/Files/udfs_8h.md#function-var-pop)**()|
Compute population variance of values, i.e., `sum((x_i - avg)^2) / n`|
+| **[var_samp](/openmldb_sql/Files/udfs_8h.md#function-var-samp)**()|
Compute population variance of values, i.e., `sum((x_i - avg)^2) / (n-1)`|
+| **[variance](/openmldb_sql/Files/udfs_8h.md#function-variance)**()| |
+| **[week](/openmldb_sql/Files/udfs_8h.md#function-week)**()| |
+| **[weekofyear](/openmldb_sql/Files/udfs_8h.md#function-weekofyear)**()|
Return the week of year for a timestamp or date. |
+| **[window_split](/openmldb_sql/Files/udfs_8h.md#function-window-split)**()|
For each string value from specified column of window, split by delimeter and add segment to output list. Null values are skipped. |
+| **[window_split_by_key](/openmldb_sql/Files/udfs_8h.md#function-window-split-by-key)**()|
For each string value from specified column of window, split by delimeter and then split each segment as kv pair, then add each key to output list. Null and illegal segments are skipped. |
+| **[window_split_by_value](/openmldb_sql/Files/udfs_8h.md#function-window-split-by-value)**()|
For each string value from specified column of window, split by delimeter and then split each segment as kv pair, then add each value to output list. Null and illegal segments are skipped. |
+| **[year](/openmldb_sql/Files/udfs_8h.md#function-year)**()|
Return the year part of a timestamp or date. |
## Functions Documentation
@@ -501,13 +501,13 @@ Compute average of values.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -541,13 +541,13 @@ Compute average of values grouped by category key and output string. Each group
Example:
-| value | catagory |
+| value | catagory |
| -------- | -------- |
-| 0 | x |
-| 1 | y |
-| 2 | x |
-| 3 | y |
-| 4 | x |
+| 0 | x |
+| 1 | y |
+| 2 | x |
+| 3 | y |
+| 4 | x |
```sql
@@ -586,13 +586,13 @@ Compute average of values matching specified condition grouped by category key a
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | true | x |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | true | x |
```sql
@@ -634,13 +634,13 @@ Compute average of values match specified condition.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -884,7 +884,7 @@ SELECT COS(0);
-* The value returned by [cos()](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-cos) is always in the range: -1 to 1.
+* The value returned by [cos()](/openmldb_sql/Files/udfs_8h.md#function-cos) is always in the range: -1 to 1.
**Supported Types**:
@@ -946,13 +946,13 @@ Compute number of values.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -987,13 +987,13 @@ Compute count of values grouped by category key and output string. Each group is
Example:
-| value | catagory |
+| value | catagory |
| -------- | -------- |
-| 0 | x |
-| 1 | y |
-| 2 | x |
-| 3 | y |
-| 4 | x |
+| 0 | x |
+| 1 | y |
+| 2 | x |
+| 3 | y |
+| 4 | x |
```sql
@@ -1032,13 +1032,13 @@ Compute count of values matching specified condition grouped by category key and
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | true | x |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | true | x |
```sql
@@ -1080,13 +1080,13 @@ Compute number of values match specified condition.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -1230,7 +1230,7 @@ Return the day of the month for a timestamp or date.
0.1.0
-Note: This function equals the `[day()](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-day)` function.
+Note: This function equals the `[day()](/openmldb_sql/Files/udfs_8h.md#function-day)` function.
Example:
@@ -1264,7 +1264,7 @@ Return the day of week for a timestamp or date.
0.4.0
-Note: This function equals the `[week()](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-week)` function.
+Note: This function equals the `[week()](/openmldb_sql/Files/udfs_8h.md#function-week)` function.
Example:
@@ -1374,13 +1374,13 @@ Compute number of distinct values.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 0 |
-| 2 |
-| 2 |
-| 4 |
+| 0 |
+| 0 |
+| 2 |
+| 2 |
+| 4 |
```sql
@@ -1450,14 +1450,14 @@ It requires that all values are non-negative. Negative values will be ignored.
Example:
-| value |
+| value |
| -------- |
-| 1 |
-| 8 |
-| 5 |
-| 2 |
-| 10 |
-| 4 |
+| 1 |
+| 8 |
+| 5 |
+| 2 |
+| 10 |
+| 4 |
```sql
@@ -1568,13 +1568,13 @@ It requires that values are ordered so that it can only be used with WINDOW (PAR
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -1652,11 +1652,11 @@ window w as (partition by gp order by ts rows between 3 preceding and current ro
```
-| id | gp | ts | agg |
+| id | gp | ts | agg |
| -------- | -------- | -------- | -------- |
-| 1 | 100 | 98 | 98 |
-| 2 | 100 | 99 | 99 |
-| 3 | 100 | 100 | 100 |
+| 1 | 100 | 98 | 98 |
+| 2 | 100 | 99 | 99 |
+| 3 | 100 | 100 | 100 |
@@ -2251,21 +2251,21 @@ Returns value evaluated at the row that is offset rows before the current row wi
* **offset** The number of rows forwarded from the current row, must not negative
-Note: This function equals the `[at()](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-at)` function.
+Note: This function equals the `[at()](/openmldb_sql/Files/udfs_8h.md#function-at)` function.
-The offset in window is `nth_value()`, not `[lag()](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-lag)/at()`. The old `[at()](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-at)`(version < 0.5.0) is start from the last row of window(may not be the current row), it's more like `nth_value()`
+The offset in window is `nth_value()`, not `[lag()](/openmldb_sql/Files/udfs_8h.md#function-lag)/at()`. The old `[at()](/openmldb_sql/Files/udfs_8h.md#function-at)`(version < 0.5.0) is start from the last row of window(may not be the current row), it's more like `nth_value()`
Example:
-| c1 | c2 |
+| c1 | c2 |
| -------- | -------- |
-| 0 | 1 |
-| 1 | 1 |
-| 2 | 2 |
-| 3 | 2 |
-| 4 | 2 |
+| 0 | 1 |
+| 1 | 1 |
+| 2 | 2 |
+| 3 | 2 |
+| 4 | 2 |
```sql
@@ -2653,13 +2653,13 @@ Compute maximum of values.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -2696,13 +2696,13 @@ Compute maximum of values grouped by category key and output string. Each group
Example:
-| value | catagory |
+| value | catagory |
| -------- | -------- |
-| 0 | x |
-| 1 | y |
-| 2 | x |
-| 3 | y |
-| 4 | x |
+| 0 | x |
+| 1 | y |
+| 2 | x |
+| 3 | y |
+| 4 | x |
```sql
@@ -2741,13 +2741,13 @@ Compute maximum of values matching specified condition grouped by category key a
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | true | x |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | true | x |
```sql
@@ -2789,13 +2789,13 @@ Compute maximum of values match specified condition.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -2861,12 +2861,12 @@ Compute the median of values.
Example:
-| value |
+| value |
| -------- |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -2903,13 +2903,13 @@ Compute minimum of values.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -2946,13 +2946,13 @@ Compute minimum of values grouped by category key and output string. Each group
Example:
-| value | catagory |
+| value | catagory |
| -------- | -------- |
-| 0 | x |
-| 1 | y |
-| 2 | x |
-| 3 | y |
-| 4 | x |
+| 0 | x |
+| 1 | y |
+| 2 | x |
+| 3 | y |
+| 4 | x |
```sql
@@ -2991,14 +2991,14 @@ Compute minimum of values matching specified condition grouped by category key a
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 1 | true | y |
-| 4 | true | x |
-| 3 | true | y |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 1 | true | y |
+| 4 | true | x |
+| 3 | true | y |
```sql
@@ -3040,13 +3040,13 @@ Compute minimum of values match specified condition.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -3176,12 +3176,12 @@ select col1, cond, gp, nth_value_where(col1, 2, cond) over (partition by gp orde
```
-| col1 | cond | gp | agg |
+| col1 | cond | gp | agg |
| -------- | -------- | -------- | -------- |
-| 1 | true | 100 | NULL |
-| 2 | false | 100 | NULL |
-| 3 | NULL | 100 | NULL |
-| 4 | true | 100 | 4 |
+| 1 | true | 100 | NULL |
+| 2 | false | 100 | NULL |
+| 3 | NULL | 100 | NULL |
+| 4 | true | 100 | 4 |
@@ -3568,7 +3568,7 @@ SELECT SIN(0);
-* The value returned by [sin()](/openmldb_sql/functions_and_operators/Files/udfs_8h.md#function-sin) is always in the range: -1 to 1.
+* The value returned by [sin()](/openmldb_sql/Files/udfs_8h.md#function-sin) is always in the range: -1 to 1.
**Supported Types**:
@@ -3810,12 +3810,12 @@ Alias function: `std`, `stddev_samp`
Example:
-| value |
+| value |
| -------- |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -3852,12 +3852,12 @@ Compute population standard deviation of values, i.e., `sqrt( sum((x_i - avg)^2)
Example:
-| value |
+| value |
| -------- |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -4013,13 +4013,13 @@ Compute sum of values.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -4053,13 +4053,13 @@ Compute sum of values grouped by category key and output string. Each group is r
Example:
-| value | catagory |
+| value | catagory |
| -------- | -------- |
-| 0 | x |
-| 1 | y |
-| 2 | x |
-| 3 | y |
-| 4 | x |
+| 0 | x |
+| 1 | y |
+| 2 | x |
+| 3 | y |
+| 4 | x |
```sql
@@ -4098,13 +4098,13 @@ Compute sum of values matching specified condition grouped by category key and o
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | true | x |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | true | x |
```sql
@@ -4146,13 +4146,13 @@ Compute sum of values match specified condition.
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
+| 0 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
```sql
@@ -4262,13 +4262,13 @@ Compute top k of values and output string separated by comma. The outputs are so
Example:
-| value |
+| value |
| -------- |
-| 1 |
-| 2 |
-| 3 |
-| 4 |
-| 4 |
+| 1 |
+| 2 |
+| 3 |
+| 4 |
+| 4 |
```sql
@@ -4319,11 +4319,11 @@ SELECT key, top1_ratio(key) over () as ratio FROM t1;
```
-| key | ratio |
+| key | ratio |
| -------- | -------- |
-| 1 | 1.0 |
-| 2 | 0.5 |
-| NULL | 0.5 |
+| 1 | 1.0 |
+| 2 | 0.5 |
+| NULL | 0.5 |
@@ -4360,15 +4360,15 @@ Compute average of values matching specified condition grouped by category key.
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | true | x |
-| 5 | true | z |
-| 6 | false | z |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | true | x |
+| 5 | true | z |
+| 6 | false | z |
```sql
@@ -4420,15 +4420,15 @@ Compute count of values matching specified condition grouped by category key. Ou
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | true | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | false | x |
-| 5 | true | z |
-| 6 | true | z |
+| 0 | true | x |
+| 1 | true | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | false | x |
+| 5 | true | z |
+| 6 | true | z |
```sql
@@ -4480,15 +4480,15 @@ Compute maximum of values matching specified condition grouped by category key.
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | true | x |
-| 5 | true | z |
-| 6 | false | z |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | true | x |
+| 5 | true | z |
+| 6 | false | z |
```sql
@@ -4540,15 +4540,15 @@ Compute minimum of values matching specified condition grouped by category key.
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | true | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | false | x |
-| 5 | true | z |
-| 6 | true | z |
+| 0 | true | x |
+| 1 | true | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | false | x |
+| 5 | true | z |
+| 6 | true | z |
```sql
@@ -4602,15 +4602,15 @@ For each group, ratio value is `value` expr count matches condtion divide total
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 2 | true | x |
-| 4 | true | x |
-| 1 | true | y |
-| 3 | false | y |
-| 5 | true | z |
-| 6 | true | z |
+| 0 | true | x |
+| 2 | true | x |
+| 4 | true | x |
+| 1 | true | y |
+| 3 | false | y |
+| 5 | true | z |
+| 6 | true | z |
```sql
@@ -4661,15 +4661,15 @@ Compute sum of values matching specified condition grouped by category key. Outp
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | true | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | false | x |
-| 5 | true | z |
-| 6 | true | z |
+| 0 | true | x |
+| 1 | true | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | false | x |
+| 5 | true | z |
+| 6 | true | z |
```sql
@@ -4721,15 +4721,15 @@ Compute average of values matching specified condition grouped by category key.
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 3 | false | y |
-| 4 | true | x |
-| 5 | true | z |
-| 6 | false | z |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 3 | false | y |
+| 4 | true | x |
+| 5 | true | z |
+| 6 | false | z |
```sql
@@ -4781,15 +4781,15 @@ Compute count of values matching specified condition grouped by category key. Ou
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | true | y |
-| 2 | true | x |
-| 3 | false | y |
-| 4 | true | x |
-| 5 | true | z |
-| 6 | true | z |
+| 0 | true | x |
+| 1 | true | y |
+| 2 | true | x |
+| 3 | false | y |
+| 4 | true | x |
+| 5 | true | z |
+| 6 | true | z |
```sql
@@ -4841,15 +4841,15 @@ Compute maximum of values matching specified condition grouped by category key.
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | false | y |
-| 2 | false | x |
-| 3 | true | y |
-| 4 | true | x |
-| 5 | true | z |
-| 6 | false | z |
+| 0 | true | x |
+| 1 | false | y |
+| 2 | false | x |
+| 3 | true | y |
+| 4 | true | x |
+| 5 | true | z |
+| 6 | false | z |
```sql
@@ -4901,15 +4901,15 @@ Compute minimum of values matching specified condition grouped by category key.
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | true | y |
-| 2 | true | x |
-| 3 | true | y |
-| 4 | false | x |
-| 5 | true | z |
-| 6 | true | z |
+| 0 | true | x |
+| 1 | true | y |
+| 2 | true | x |
+| 3 | true | y |
+| 4 | false | x |
+| 5 | true | z |
+| 6 | true | z |
```sql
@@ -4963,15 +4963,15 @@ For each group, ratio value is `value` expr count matches condtion divide total
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 2 | true | x |
-| 4 | true | x |
-| 1 | true | y |
-| 3 | false | y |
-| 5 | true | z |
-| 6 | true | z |
+| 0 | true | x |
+| 2 | true | x |
+| 4 | true | x |
+| 1 | true | y |
+| 3 | false | y |
+| 5 | true | z |
+| 6 | true | z |
```sql
@@ -5022,15 +5022,15 @@ Compute sum of values matching specified condition grouped by category key. Outp
Example:
-| value | condition | catagory |
+| value | condition | catagory |
| -------- | -------- | -------- |
-| 0 | true | x |
-| 1 | true | y |
-| 2 | false | x |
-| 3 | false | y |
-| 4 | true | x |
-| 5 | true | z |
-| 6 | true | z |
+| 0 | true | x |
+| 1 | true | y |
+| 2 | false | x |
+| 3 | false | y |
+| 4 | true | x |
+| 5 | true | z |
+| 6 | true | z |
```sql
@@ -5245,11 +5245,11 @@ Compute population variance of values, i.e., `sum((x_i - avg)^2) / n`
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 3 |
-| 6 |
+| 0 |
+| 3 |
+| 6 |
```sql
@@ -5286,11 +5286,11 @@ Compute population variance of values, i.e., `sum((x_i - avg)^2) / (n-1)`
Example:
-| value |
+| value |
| -------- |
-| 0 |
-| 3 |
-| 6 |
+| 0 |
+| 3 |
+| 6 |
```sql
diff --git a/docs/zh/quickstart/beginner_must_read.md b/docs/zh/quickstart/beginner_must_read.md
index def0e3728d1..117ad6fedb7 100644
--- a/docs/zh/quickstart/beginner_must_read.md
+++ b/docs/zh/quickstart/beginner_must_read.md
@@ -1,6 +1,6 @@
# 上手必读
-由于OpenMLDB是分布式系统,多种模式,客户端丰富,初次使用可能会有很多疑问,或者遇到一些运行、使用问题,本文从新手使用的角度,讲解如何进行诊断调试,需求帮助时如何提供有效信息给技术人员等等。
+由于OpenMLDB是分布式系统,多种模式,客户端丰富,初次使用可能会有很多疑问,或者遇到一些运行、使用问题,本文从新手使用的角度,讲解如何进行诊断调试,需要帮助时如何提供有效信息给技术人员等等。
## 错误诊断
@@ -22,7 +22,7 @@ openmldb_tool inspect [-c=0.0.0.0:2181/openmldb]
docker创建OpenMLDB见[快速上手](./openmldb_quickstart.md),请注意文档中有两个版本,单机版和集群版。请清楚自己要创建哪个版本,不要混合使用。
-启动成功的标准是可以使用CLI连接上OpenMLDB服务端(即使用`/work/openmldb/bin/openmldb`连接OpenMLDB,单机或集群均可以通过CLI连接),并且执行`show components;`可以看到OpenMLDB服务端组件的运行情况。
+启动成功的标准是可以使用CLI连接上OpenMLDB服务端(即使用`/work/openmldb/bin/openmldb`连接OpenMLDB,单机或集群均可以通过CLI连接),并且执行`show components;`可以看到OpenMLDB服务端组件的运行情况。推荐使用[诊断工具](../maintain/diagnose.md),执行status和inspect,可以得到更可靠的诊断结果。
如果CLI无法连接OpenMLDB,请先确认进程是否运行正常,可以通过`ps f|grep bin/openmldb`确认nameserver和tabletserver进程,集群版还需要通过`ps f | grep zoo.cfg`来确认zk服务,`ps f | grep TaskManagerServer`来确认taskmanager进程。
@@ -32,6 +32,20 @@ docker创建OpenMLDB见[快速上手](./openmldb_quickstart.md),请注意文
如果我们还需要OpenMLDB服务端的配置和日志,可以使用诊断工具获取,见[下文](#提供配置与日志获得技术支持)。
```
+### 运维
+
+集群各组件进程启动后,在使用过程中可能遇到各种变化,比如服务进程意外退出,需要重启服务进程,或者需要扩容服务进程。
+
+如果你需要保留已有的在线表,**不要主动地kill全部Tablet再重启**,保证Tablet只有单台在上下线。`stop-all.sh`和`start-all.sh`脚本是给快速重建集群用的,可能会导致在线表数据恢复失败,**不保证能修复**。
+
+当你发现进程变化或者主动操作其变化后,需要使用诊断工具进行诊断,确认集群状态是否正常:
+```bash
+openmldb_tool inspect # 主要命令
+openmldb_tool status --diff hosts # 可检查TaskManager等是否掉线,当然,你也可以手动判断
+```
+
+如果诊断出server offline,或是TaskManager等掉线,需要先启动回来。如果启动失败,请查看对应日志,提供错误信息。如果诊断结果提示需要recoverdata,请参考[OpenMLDB运维工具](../maintain/openmldb_ops.md)执行recoverdata。如果recoverdata脚本提示recover失败,或recover成功后再次inpsect的结果仍然不正常,请提供日志给我们。
+
## 源数据
### LOAD DATA
@@ -56,15 +70,51 @@ docker创建OpenMLDB见[快速上手](./openmldb_quickstart.md),请注意文
csv文件格式有诸多不便,更推荐使用parquet格式,需要OpenMLDB集群版并启动taskmanager组件。
```
-## SQL限制
+## OpenMLDB SQL 开发和调试
OpenMLDB并不完全兼容标准SQL。所以,部分SQL执行会得不到预期结果。如果发现SQL执行不符合预期,请先查看下SQL是否满足[功能边界](./function_boundary.md)。
-## SQL执行
+为了方便使用 OpenMLDB SQL 进行开发、调试、验证,我们强烈推荐使用社区工具 [OpenMLDB SQL Emulator](https://github.com/vagetablechicken/OpenMLDBSQLEmulator) 来进行 SQL 模拟开发,可以节省大量的部署、编译、索引构建、任务运行等待时间,详见该项目 README https://github.com/vagetablechicken/OpenMLDBSQLEmulator
+
+### OpenMLDB SQL语法指南
+
+基于 OpenMLDB SQL 的特征计算,一般比较常使用`WINDOW`(包括`WINDOW UNION`),`LAST JOIN` 等子句来完成计算逻辑,它们能保证在任何模式下使用。可以跟随教程"基于 SQL 的特征开发"[(上)](../tutorial/tutorial_sql_1.md)[(下)](../tutorial/tutorial_sql_2.md)进行学习。
+
+如果使用`WHERE`,`WITH`,`HAVING`等子句,需要注意限制条件。在每个子句的详细文档中都有具体的说明,比如[`HAVING`子句](../openmldb_sql/dql/HAVING_CLAUSE.md)在在线请求模式中不支持。翻阅OpenMLDB SQL的DQL目录,或使用搜索功能,可以快速找到子句的详细文档。
+
+在不熟悉OpenMLDB SQL的情况下,我们建议从子句开始编写SQL,确保每个子句都能通过,再逐步组合成完整的SQL。
+
+推荐使用[OpenMLDB SQL Emulator](https://github.com/vagetablechicken/OpenMLDBSQLEmulator)进行SQL探索和验证,SQL验证完成后再去真实集群进行上线,可以避免浪费大量时间在索引构建、数据导入、任务等待等过程上。 Emulator 可以不依赖真实OpenMLDB集群,在一个交互式虚拟环境中,快速创建表、校验SQL、导出当前环境等等,详情参考该项目的 README 。使用 Emulator 不需要操作集群,也就不需要测试后清理集群,还可通过少量的数据进行SQL运行测试,比较适合SQL探索时期。
+
+### OpenMLDB SQL 语法错误提示
+
+当发现SQL编译报错时,需要查看错误信息。例如`Syntax error: Expected XXX but got keyword YYY`错误,它说明SQL不符合语法,通常是某些关键字写错了位置,或并没有这种写法。详情需要查询错误的子句文档,可注意子句的`Syntax`章节,它详细说明了每个部分的组成,请检查SQL是否符合要求。
+
+比如,[`WINDOW`子句](../openmldb_sql/dql/WINDOW_CLAUSE.md#syntax)中`WindowFrameClause (WindowAttribute)*`部分,我们再拆解它就是`WindowFrameUnits WindowFrameBounds [WindowFrameMaxSize] (WindowAttribute)*`。那么,`WindowFrameUnits WindowFrameBounds MAXSIZE 10 EXCLUDE CURRENT_TIME`就是符合语法的,`WindowFrameUnits WindowFrameBounds EXCLUDE CURRENT_TIME MAXSIZE 10`就是不符合语法的,不能把`WindowFrameMaxSize`放到`WindowFrameClause`外面。
-OpenMLDB所有命令均为SQL,如果SQL执行失败或交互有问题(不知道命令是否执行成功),请先确认SQL书写是否有误,命令并未执行,还是命令进入了执行阶段。
+### OpenMLDB SQL 计算正确性调试
-例如,下面提示Syntax error的是SQL书写有误,请参考[sql reference](../../openmldb_sql/)纠正错误。
+SQL编译通过以后,可以基于数据进行计算。如果计算结果不符合预期,请逐步检查:
+- SQL无论是一列还是多列计算结果不符合预期,建议都请选择**其中一列**进行调试。
+- 如果你的表数据较多,建议使用小数据量(几行,几十行的量级)来测试,也可以使用OpenMLDB SQL Emulator的[运行toydb](https://github.com/vagetablechicken/OpenMLDBSQLEmulator#run-in-toydb)功能,构造case进行测试。
+- 该列是不是表示了自己想表达的意思,是否使用了不符合预期的函数,或者函数参数错误。
+- 该列如果是窗口聚合的结果,是不是WINDOW定义错误,导致窗口范围不对。参考[推断窗口](../openmldb_sql/dql/WINDOW_CLAUSE.md#如何推断窗口是什么样的)进行检查,使用小数据进行验证测试。
+
+如果你仍然无法解决问题,可以提供 OpenMLDB SQL Emulator 的 yaml case 。如果在集群中进行的测试,请[提供复现脚本](#提供复现脚本)。
+
+### 在线请求模式测试
+
+SQL上线,等价于`DEPLOY `成功。但`DEPLOY`操作是一个很“重”的操作,SQL如果可以上线,将会创建或修改索引并复制数据到新索引。所以,在SQL探索期使用`DEPLOY`测试SQL是否能上线,是比较浪费资源的,尤其是某些SQL可能需要多次修改才能上线,多次的`DEPLOY`可能产生很多无用的索引。在探索期间,可能还会修改表Schema,又需要删除和再创建。这些操作都是只能手动处理,比较繁琐。
+
+如果你对OpenMLDB SQL较熟悉,一些场景下可以用“在线预览模式”进行测试,但“在线预览模式”不等于“在线请求模式”,不能保证一定可以上线。如果你对索引较为熟悉,可以通过`EXPLAIN `来确认SQL是否可以上线,但`EXPLAIN`的检查较为严格,可能因为当前表没有匹配的索引,而判定SQL无法在“在线请求模式”中执行(因为无索引而无法保证实时性能,所以被拒绝)。
+
+目前只有Java SDK可以使用[validateSQLInRequest](./sdk/java_sdk.md#sql-校验)方法来检验,使用上稍麻烦。我们推荐使用 OpenMLDB SQL Emulator 来测试。在 Emulator 中,通过简单语法创建表,再使用`valreq `可以判断是否能上线。
+
+## OpenMLDB SQL 执行
+
+OpenMLDB 所有命令均为 SQL,如果 SQL 执行失败或交互有问题(不知道命令是否执行成功),请先确认 SQL 书写是否有误,命令并未执行,还是命令进入了执行阶段。
+
+例如,下面提示Syntax error的是SQL书写有误,请参考[SQL编写指南](#sql编写指南)纠正错误。
```
127.0.0.1:7527/db> create table t1(c1 int;
Error: Syntax error: Expected ")" or "," but got ";" [at 1:23]
@@ -79,9 +129,7 @@ create table t1(c1 int;
我们需要特别注意集群版的一些使用逻辑。
-### 集群版SQL执行
-
-#### 离线
+### 集群版离线 SQL 执行注意事项
如果是集群离线命令,默认异步模式下,发送命令会得到job id的返回。可使用`show job `来查询job执行情况。
@@ -95,13 +143,13 @@ create table t1(c1 int;
如果你无法通过show joblog获得日志,或者想要直接拿到日志文件,可以直接在TaskManager机器上获取。日志地址由taskmanager.properties的`job.log.path`配置,如果你改变了此配置项,需要到配置的目录中寻找日志。stdout查询结果默认在`/work/openmldb/taskmanager/bin/logs/job_x.log`,stderr job运行日志默认在`/work/openmldb/taskmanager/bin/logs/job_x_error.log`(注意有error后缀)。
```
-#### 在线
+### 集群版在线 SQL 执行注意事项
-集群版在线模式下,我们通常只推荐使用`DEPLOY`创建deployment,HTTP访问APIServer执行deployment做实时特征计算。在CLI或其他客户端中,直接在在线中进行SELECT查询,称为“在线预览”。在线预览有诸多限制,详情请参考[功能边界-集群版在线预览模式](../function_boundary.md#集群版在线预览模式),请不要执行不支持的SQL。
+集群版在线模式下,我们通常只推荐两种使用,`DEPLOY`创建deployment,执行deployment做实时特征计算(SDK请求deployment,或HTTP访问APIServer请求deployment)。在CLI或其他客户端中,可以直接在“在线”中进行SELECT查询,称为“在线预览”。在线预览有诸多限制,详情请参考[功能边界-集群版在线预览模式](./function_boundary.md#集群版在线预览模式),请不要执行不支持的SQL。
-### 提供复现脚本
+### 构造 OpenMLDB SQL 复现脚本
-如果你通过自主诊断,无法解决问题,请向我们提供复现脚本。一个完整的复现脚本,如下所示:
+如果你的 SQL 执行不符合预期,通过自主诊断,无法解决问题,请向我们提供复现脚本。一个完整的复现脚本。仅涉及在线SQL计算或校验SQL,推荐使用[OpenMLDB SQL Emulator](https://github.com/vagetablechicken/OpenMLDBSQLEmulator#run-in-toydb) 构造可复现的 yaml case。如果涉及到数据导入等必须使用 OpenMLDB集群,请提供可复现脚本,其结构如下所示:
```
create database db;
@@ -134,7 +182,7 @@ set @@execute_mode='';
请注意离线job默认为异步。如果你需要离线导入再查询,请设置为同步模式,详情见[离线命令配置详情](../openmldb_sql/ddl/SET_STATEMENT.md#离线命令配置详情)。否则导入还未完成就进行查询,是无意义的。
```
-## 提供配置与日志,获得技术支持
+### 提供配置与日志,获得技术支持
如果你的SQL执行问题无法通过复现脚本复现,或者并非SQL执行问题而是集群管理问题,那么请提供客户端和服务端的配置与日志,以便我们调查。
@@ -151,3 +199,11 @@ openmldb_tool --env=onebox --dist_conf=standalone_dist.yml
如果是分布式的集群,需要配置ssh免密才能顺利使用诊断工具,参考文档[诊断工具](../maintain/diagnose.md)。
如果你的环境无法做到,请手动获取配置与日志。
+
+## 性能统计
+
+deployment耗时统计需要开启:
+```
+SET GLOBAL deploy_stats = 'on';
+```
+开启后的Deployment执行都将被统计,之前的不会被统计,表中的数据不包含集群外部的网络耗时,仅统计deployment在server端从开始执行到结束的时间。
diff --git a/docs/zh/quickstart/sdk/rest_api.md b/docs/zh/quickstart/sdk/rest_api.md
index 0526127cd29..0a225e444f6 100644
--- a/docs/zh/quickstart/sdk/rest_api.md
+++ b/docs/zh/quickstart/sdk/rest_api.md
@@ -5,6 +5,18 @@
- REST APIs 通过 APIServer 和 OpenMLDB 的服务进行交互,因此 APIServer 模块必须被正确部署才能有效使用。APISever 在安装部署时是可选模块,参照 [APIServer 部署文档](../../deploy/install_deploy.md#部署-apiserver)。
- 现阶段,APIServer 主要用来做功能测试使用,并不推荐用来测试性能,也不推荐在生产环境使用。APIServer 的默认部署目前并没有高可用机制,并且引入了额外的网络和编解码开销。生产环境推荐使用 Java SDK,功能覆盖最完善,并且在功能、性能上都经过了充分测试。
+## JSON Body
+
+与APIServer的交互中,请求体均为JSON格式,并支持一定的扩展格式。注意以下几点:
+
+- 传入超过整型或浮点数最大值的数值,将会解析失败,比如,double类型传入`1e1000`。
+- 非数值浮点数:在传入数据时,支持传入`NaN`、`Infinity`、`-Infinity`,与缩写`Inf`、`-Inf`(注意是unquoted的,并非字符串,也不支持其他变种写法)。在返回数据时,支持返回`NaN`、`Infinity`、`-Infinity`(不支持变种写法)。如果你需要将三者转换为null,可以配置 `write_nan_and_inf_null`。
+- 可以传入整型数字到浮点数,比如,`1`可被读取为double。
+- float浮点数可能有精度损失,比如,`0.3`读取后将不会严格等于`0.3`,而是`0.30000000000000004`。我们不拒绝精度损失,请从业务层面考虑是否需要对此进行处理。传入超过float max但不超过double max的值,在读取后将成为`Inf`。
+- `true/false`、`null`并不支持大写,只支持小写。
+- timestamp类型暂不支持传入年月日字符串,只支持传入数值,比如`1635247427000`。
+- date类型请传入**年月日字符串**,中间不要包含任何空格。
+
## 数据插入
请求地址:http://ip:port/dbs/{db_name}/tables/{table_name}
@@ -55,7 +67,8 @@ curl http://127.0.0.1:8080/dbs/db/tables/trans -X PUT -d '{
```JSON
{
"input": [["row0_value0", "row0_value1", "row0_value2"], ["row1_value0", "row1_value1", "row1_value2"], ...],
- "need_schema": false
+ "need_schema": false,
+ "write_nan_and_inf_null": false
}
```
@@ -73,6 +86,7 @@ curl http://127.0.0.1:8080/dbs/db/tables/trans -X PUT -d '{
- 可以支持多行,其结果与返回的 response 中的 data.data 字段的数组一一对应。
- need_schema 可以设置为 true, 返回就会有输出结果的 schema。可选参数,默认为 false。
+- write_nan_and_inf_null 可以设置为 true,可选参数,默认为false。如果设置为 true,当输出数据中有 NaN、Inf、-Inf 时,会将其转换为 null。
- input 为 array 格式/JSON 格式时候返回结果也是 array 格式/JSON 格式,一次请求的 input 只支持一种格式,请不要混合格式。
- JSON 格式的 input 数据可以有多余列。
@@ -131,7 +145,8 @@ curl http://127.0.0.1:8080/dbs/demo_db/deployments/demo_data_service -X POST -d'
"input": {
"schema": [],
"data": []
- }
+ },
+ "write_nan_and_inf_null": false
}
```
diff --git a/docs/zh/tutorial/index.rst b/docs/zh/tutorial/index.rst
index cce68996ded..7406fda41a9 100644
--- a/docs/zh/tutorial/index.rst
+++ b/docs/zh/tutorial/index.rst
@@ -9,7 +9,6 @@
data_import_guide
tutorial_sql_1
tutorial_sql_2
- modes
openmldbspark_distribution
data_import
data_export
diff --git a/hybridse/examples/toydb/src/storage/table_iterator.cc b/hybridse/examples/toydb/src/storage/table_iterator.cc
index 45561cd52a1..8ea4a3e0349 100644
--- a/hybridse/examples/toydb/src/storage/table_iterator.cc
+++ b/hybridse/examples/toydb/src/storage/table_iterator.cc
@@ -62,7 +62,7 @@ WindowTableIterator::WindowTableIterator(Segment*** segments, uint32_t seg_cnt,
seg_idx_(0),
pk_it_(),
table_(table) {
- GoToStart();
+ SeekToFirst();
}
WindowTableIterator::~WindowTableIterator() {}
@@ -80,7 +80,7 @@ void WindowTableIterator::Seek(const std::string& key) {
pk_it_->Seek(pk);
}
-void WindowTableIterator::SeekToFirst() {}
+void WindowTableIterator::SeekToFirst() { GoToStart(); }
std::unique_ptr WindowTableIterator::GetValue() {
if (!pk_it_)
diff --git a/hybridse/examples/toydb/src/tablet/tablet_catalog.cc b/hybridse/examples/toydb/src/tablet/tablet_catalog.cc
index feeb750ab6f..81764df9da6 100644
--- a/hybridse/examples/toydb/src/tablet/tablet_catalog.cc
+++ b/hybridse/examples/toydb/src/tablet/tablet_catalog.cc
@@ -19,7 +19,6 @@
#include
#include
#include
-#include "codec/list_iterator_codec.h"
#include "glog/logging.h"
#include "storage/table_iterator.h"
@@ -99,13 +98,6 @@ bool TabletTableHandler::Init() {
return true;
}
-std::unique_ptr TabletTableHandler::GetIterator() {
- std::unique_ptr it(
- new storage::FullTableIterator(table_->GetSegments(),
- table_->GetSegCnt(), table_));
- return std::move(it);
-}
-
std::unique_ptr TabletTableHandler::GetWindowIterator(
const std::string& idx_name) {
auto iter = index_hint_.find(idx_name);
@@ -136,22 +128,6 @@ RowIterator* TabletTableHandler::GetRawIterator() {
return new storage::FullTableIterator(table_->GetSegments(),
table_->GetSegCnt(), table_);
}
-const uint64_t TabletTableHandler::GetCount() {
- auto iter = GetIterator();
- uint64_t cnt = 0;
- while (iter->Valid()) {
- iter->Next();
- cnt++;
- }
- return cnt;
-}
-Row TabletTableHandler::At(uint64_t pos) {
- auto iter = GetIterator();
- while (pos-- > 0 && iter->Valid()) {
- iter->Next();
- }
- return iter->Valid() ? iter->GetValue() : Row();
-}
TabletCatalog::TabletCatalog() : tables_(), db_() {}
@@ -249,22 +225,6 @@ std::unique_ptr TabletSegmentHandler::GetWindowIterator(
const std::string& idx_name) {
return std::unique_ptr();
}
-const uint64_t TabletSegmentHandler::GetCount() {
- auto iter = GetIterator();
- uint64_t cnt = 0;
- while (iter->Valid()) {
- cnt++;
- iter->Next();
- }
- return cnt;
-}
-Row TabletSegmentHandler::At(uint64_t pos) {
- auto iter = GetIterator();
- while (pos-- > 0 && iter->Valid()) {
- iter->Next();
- }
- return iter->Valid() ? iter->GetValue() : Row();
-}
const uint64_t TabletPartitionHandler::GetCount() {
auto iter = GetWindowIterator();
@@ -275,5 +235,6 @@ const uint64_t TabletPartitionHandler::GetCount() {
}
return cnt;
}
+
} // namespace tablet
} // namespace hybridse
diff --git a/hybridse/examples/toydb/src/tablet/tablet_catalog.h b/hybridse/examples/toydb/src/tablet/tablet_catalog.h
index fa41140a495..9d2e8b907e5 100644
--- a/hybridse/examples/toydb/src/tablet/tablet_catalog.h
+++ b/hybridse/examples/toydb/src/tablet/tablet_catalog.h
@@ -21,7 +21,6 @@
#include
#include
#include
-#include "base/spin_lock.h"
#include "storage/table_impl.h"
#include "vm/catalog.h"
@@ -68,8 +67,6 @@ class TabletSegmentHandler : public TableHandler {
std::unique_ptr GetIterator() override;
RowIterator* GetRawIterator() override;
std::unique_ptr GetWindowIterator(const std::string& idx_name) override;
- const uint64_t GetCount() override;
- Row At(uint64_t pos) override;
const std::string GetHandlerTypeName() override {
return "TabletSegmentHandler";
}
@@ -79,7 +76,7 @@ class TabletSegmentHandler : public TableHandler {
std::string key_;
};
-class TabletPartitionHandler
+class TabletPartitionHandler final
: public PartitionHandler,
public std::enable_shared_from_this {
public:
@@ -91,6 +88,8 @@ class TabletPartitionHandler
~TabletPartitionHandler() {}
+ RowIterator* GetRawIterator() override { return table_handler_->GetRawIterator(); }
+
const OrderType GetOrderType() const override { return OrderType::kDescOrder; }
const vm::Schema* GetSchema() override { return table_handler_->GetSchema(); }
@@ -104,6 +103,7 @@ class TabletPartitionHandler
std::unique_ptr GetWindowIterator() override {
return table_handler_->GetWindowIterator(index_name_);
}
+
const uint64_t GetCount() override;
std::shared_ptr GetSegment(const std::string& key) override {
@@ -119,7 +119,7 @@ class TabletPartitionHandler
vm::IndexHint index_hint_;
};
-class TabletTableHandler
+class TabletTableHandler final
: public vm::TableHandler,
public std::enable_shared_from_this {
public:
@@ -135,28 +135,23 @@ class TabletTableHandler
bool Init();
- inline const vm::Schema* GetSchema() { return &schema_; }
+ const vm::Schema* GetSchema() override { return &schema_; }
- inline const std::string& GetName() { return name_; }
+ const std::string& GetName() override { return name_; }
- inline const std::string& GetDatabase() { return db_; }
+ const std::string& GetDatabase() override { return db_; }
- inline const vm::Types& GetTypes() { return types_; }
+ const vm::Types& GetTypes() override { return types_; }
- inline const vm::IndexHint& GetIndex() { return index_hint_; }
+ const vm::IndexHint& GetIndex() override { return index_hint_; }
const Row Get(int32_t pos);
- inline std::shared_ptr GetTable() { return table_; }
- std::unique_ptr GetIterator();
+ std::shared_ptr GetTable() { return table_; }
RowIterator* GetRawIterator() override;
- std::unique_ptr GetWindowIterator(
- const std::string& idx_name);
- virtual const uint64_t GetCount();
- Row At(uint64_t pos) override;
+ std::unique_ptr GetWindowIterator(const std::string& idx_name) override;
- virtual std::shared_ptr GetPartition(
- const std::string& index_name) {
+ std::shared_ptr GetPartition(const std::string& index_name) override {
if (index_hint_.find(index_name) == index_hint_.cend()) {
LOG(WARNING)
<< "fail to get partition for tablet table handler, index name "
@@ -169,12 +164,12 @@ class TabletTableHandler
const std::string GetHandlerTypeName() override {
return "TabletTableHandler";
}
- virtual std::shared_ptr GetTablet(
- const std::string& index_name, const std::string& pk) {
+ std::shared_ptr GetTablet(const std::string& index_name,
+ const std::string& pk) override {
return tablet_;
}
- virtual std::shared_ptr GetTablet(
- const std::string& index_name, const std::vector& pks) {
+ std::shared_ptr GetTablet(const std::string& index_name,
+ const std::vector& pks) override {
return tablet_;
}
diff --git a/hybridse/examples/toydb/src/testing/toydb_engine_test_base.cc b/hybridse/examples/toydb/src/testing/toydb_engine_test_base.cc
index fcaa71d8373..35a595b431e 100644
--- a/hybridse/examples/toydb/src/testing/toydb_engine_test_base.cc
+++ b/hybridse/examples/toydb/src/testing/toydb_engine_test_base.cc
@@ -15,8 +15,9 @@
*/
#include "testing/toydb_engine_test_base.h"
+
+#include "absl/strings/str_join.h"
#include "gtest/gtest.h"
-#include "gtest/internal/gtest-param-util.h"
using namespace llvm; // NOLINT (build/namespaces)
using namespace llvm::orc; // NOLINT (build/namespaces)
@@ -141,18 +142,12 @@ std::shared_ptr BuildOnePkTableStorage(
}
return catalog;
}
-void BatchRequestEngineCheckWithCommonColumnIndices(
- const SqlCase& sql_case, const EngineOptions options,
- const std::set& common_column_indices) {
- std::ostringstream oss;
- for (size_t index : common_column_indices) {
- oss << index << ",";
- }
- LOG(INFO) << "BatchRequestEngineCheckWithCommonColumnIndices: "
- "common_column_indices = ["
- << oss.str() << "]";
- ToydbBatchRequestEngineTestRunner engine_test(sql_case, options,
- common_column_indices);
+// Run check with common column index info
+void BatchRequestEngineCheckWithCommonColumnIndices(const SqlCase& sql_case, const EngineOptions options,
+ const std::set& common_column_indices) {
+ LOG(INFO) << "BatchRequestEngineCheckWithCommonColumnIndices: common_column_indices = ["
+ << absl::StrJoin(common_column_indices, ",") << "]";
+ ToydbBatchRequestEngineTestRunner engine_test(sql_case, options, common_column_indices);
engine_test.RunCheck();
}
diff --git a/hybridse/include/codec/fe_row_codec.h b/hybridse/include/codec/fe_row_codec.h
index 1e0e5b1badc..0e0b153f5a5 100644
--- a/hybridse/include/codec/fe_row_codec.h
+++ b/hybridse/include/codec/fe_row_codec.h
@@ -157,6 +157,9 @@ class RowView {
const Schema* GetSchema() const { return &schema_; }
inline bool IsNULL(const int8_t* row, uint32_t idx) const {
+ if (row == nullptr) {
+ return true;
+ }
const int8_t* ptr = row + HEADER_LENGTH + (idx >> 3);
return *(reinterpret_cast(ptr)) & (1 << (idx & 0x07));
}
diff --git a/hybridse/include/codec/row.h b/hybridse/include/codec/row.h
index cd6abb0a3a1..69158d41e85 100644
--- a/hybridse/include/codec/row.h
+++ b/hybridse/include/codec/row.h
@@ -54,7 +54,7 @@ class Row {
inline int32_t size() const { return slice_.size(); }
inline int32_t size(int32_t pos) const {
- return 0 == pos ? slice_.size() : slices_[pos - 1].size();
+ return 0 == pos ? slice_.size() : slices_.at(pos - 1).size();
}
// Return true if the length of the referenced data is zero
diff --git a/hybridse/include/codec/row_iterator.h b/hybridse/include/codec/row_iterator.h
index 2075918666c..fa60d21a37e 100644
--- a/hybridse/include/codec/row_iterator.h
+++ b/hybridse/include/codec/row_iterator.h
@@ -71,7 +71,14 @@ class WindowIterator {
virtual bool Valid() = 0;
/// Return the RowIterator of current segment
/// of dataset if Valid() return `true`.
- virtual std::unique_ptr GetValue() = 0;
+ virtual std::unique_ptr GetValue() {
+ auto p = GetRawValue();
+ if (!p) {
+ return nullptr;
+ }
+
+ return std::unique_ptr(p);
+ }
/// Return the RowIterator of current segment
/// of dataset if Valid() return `true`.
virtual RowIterator *GetRawValue() = 0;
diff --git a/hybridse/include/codec/row_list.h b/hybridse/include/codec/row_list.h
index b32ad24c3eb..f601b207b9c 100644
--- a/hybridse/include/codec/row_list.h
+++ b/hybridse/include/codec/row_list.h
@@ -65,7 +65,13 @@ class ListV {
ListV() {}
virtual ~ListV() {}
/// \brief Return the const iterator
- virtual std::unique_ptr> GetIterator() = 0;
+ virtual std::unique_ptr> GetIterator() {
+ auto raw = GetRawIterator();
+ if (raw == nullptr) {
+ return {};
+ }
+ return std::unique_ptr>(raw);
+ }
/// \brief Return the const iterator raw pointer
virtual ConstIterator *GetRawIterator() = 0;
@@ -76,7 +82,7 @@ class ListV {
virtual const uint64_t GetCount() {
auto iter = GetIterator();
uint64_t cnt = 0;
- while (iter->Valid()) {
+ while (iter && iter->Valid()) {
iter->Next();
cnt++;
}
diff --git a/hybridse/include/node/node_enum.h b/hybridse/include/node/node_enum.h
index 16e18291478..baa3bdb2afe 100644
--- a/hybridse/include/node/node_enum.h
+++ b/hybridse/include/node/node_enum.h
@@ -97,6 +97,7 @@ enum SqlNodeType {
kWithClauseEntry,
kAlterTableStmt,
kShowStmt,
+ kCompressType,
kSqlNodeTypeLast, // debug type
};
@@ -251,7 +252,7 @@ enum JoinType {
kJoinTypeRight,
kJoinTypeInner,
kJoinTypeConcat,
- kJoinTypeComma
+ kJoinTypeCross, // AKA commma join
};
enum UnionType { kUnionTypeDistinct, kUnionTypeAll };
@@ -284,6 +285,7 @@ enum CmdType {
kCmdDropFunction,
kCmdShowJobLog,
kCmdShowCreateTable,
+ kCmdTruncate,
kCmdFake, // not a real cmd, for testing purpose only
kLastCmd = kCmdFake,
};
@@ -342,6 +344,11 @@ enum StorageMode {
kHDD = 3,
};
+enum CompressType {
+ kNoCompress = 0,
+ kSnappy = 1,
+};
+
// batch plan node type
enum BatchPlanNodeType { kBatchDataset, kBatchPartition, kBatchMap };
diff --git a/hybridse/include/node/node_manager.h b/hybridse/include/node/node_manager.h
index ab87e588a53..e70f0a59564 100644
--- a/hybridse/include/node/node_manager.h
+++ b/hybridse/include/node/node_manager.h
@@ -399,8 +399,6 @@ class NodeManager {
SqlNode *MakeReplicaNumNode(int num);
- SqlNode *MakeStorageModeNode(StorageMode storage_mode);
-
SqlNode *MakePartitionNumNode(int num);
SqlNode *MakeDistributionsNode(const NodePointVector& distribution_list);
diff --git a/hybridse/include/node/sql_node.h b/hybridse/include/node/sql_node.h
index bbdfc83313f..30f7a6cc34a 100644
--- a/hybridse/include/node/sql_node.h
+++ b/hybridse/include/node/sql_node.h
@@ -25,6 +25,7 @@
#include
#include "absl/status/statusor.h"
+#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "boost/algorithm/string.hpp"
@@ -309,17 +310,26 @@ inline const std::string StorageModeName(StorageMode mode) {
}
inline const StorageMode NameToStorageMode(const std::string& name) {
- if (boost::iequals(name, "memory")) {
+ if (absl::EqualsIgnoreCase(name, "memory")) {
return kMemory;
- } else if (boost::iequals(name, "hdd")) {
+ } else if (absl::EqualsIgnoreCase(name, "hdd")) {
return kHDD;
- } else if (boost::iequals(name, "ssd")) {
+ } else if (absl::EqualsIgnoreCase(name, "ssd")) {
return kSSD;
} else {
return kUnknown;
}
}
+inline absl::StatusOr NameToCompressType(const std::string& name) {
+ if (absl::EqualsIgnoreCase(name, "snappy")) {
+ return CompressType::kSnappy;
+ } else if (absl::EqualsIgnoreCase(name, "nocompress")) {
+ return CompressType::kNoCompress;
+ }
+ return absl::Status(absl::StatusCode::kInvalidArgument, absl::StrCat("invalid compress type: ", name));
+}
+
inline const std::string RoleTypeName(RoleType type) {
switch (type) {
case kLeader:
@@ -1166,6 +1176,9 @@ class FrameBound : public SqlNode {
int64_t GetOffset() const { return offset_; }
void SetOffset(int64_t v) { offset_ = v; }
+ // is offset [OPEN] PRECEDING/FOLLOWING
+ bool is_offset_bound() const;
+
/// \brief get the inclusive frame bound offset value that has signed symbol
///
@@ -1881,6 +1894,23 @@ class StorageModeNode : public SqlNode {
StorageMode storage_mode_;
};
+class CompressTypeNode : public SqlNode {
+ public:
+ CompressTypeNode() : SqlNode(kCompressType, 0, 0), compress_type_(kNoCompress) {}
+
+ explicit CompressTypeNode(CompressType compress_type)
+ : SqlNode(kCompressType, 0, 0), compress_type_(compress_type) {}
+
+ ~CompressTypeNode() {}
+
+ CompressType GetCompressType() const { return compress_type_; }
+
+ void Print(std::ostream &output, const std::string &org_tab) const;
+
+ private:
+ CompressType compress_type_;
+};
+
class CreateTableLikeClause {
public:
CreateTableLikeClause() = default;
diff --git a/hybridse/include/vm/catalog.h b/hybridse/include/vm/catalog.h
index 30e68316606..4bd007645bd 100644
--- a/hybridse/include/vm/catalog.h
+++ b/hybridse/include/vm/catalog.h
@@ -217,6 +217,7 @@ class TableHandler : public DataHandler {
virtual ~TableHandler() {}
/// Return table column Types information.
+ /// TODO: rm it, never used
virtual const Types& GetTypes() = 0;
/// Return the index information
@@ -224,8 +225,7 @@ class TableHandler : public DataHandler {
/// Return WindowIterator
/// so that user can use it to iterate datasets segment by segment.
- virtual std::unique_ptr GetWindowIterator(
- const std::string& idx_name) = 0;
+ virtual std::unique_ptr GetWindowIterator(const std::string& idx_name) { return nullptr; }
/// Return the HandlerType of the dataset.
/// Return HandlerType::kTableHandler by default
@@ -254,8 +254,7 @@ class TableHandler : public DataHandler {
/// Return Tablet binding to specify index and keys.
/// Return `null` by default.
- virtual std::shared_ptr GetTablet(
- const std::string& index_name, const std::vector& pks) {
+ virtual std::shared_ptr GetTablet(const std::string& index_name, const std::vector& pks) {
return std::shared_ptr();
}
};
@@ -286,27 +285,19 @@ class ErrorTableHandler : public TableHandler {
/// Return empty column Types.
const Types& GetTypes() override { return types_; }
/// Return empty table Schema.
- inline const Schema* GetSchema() override { return schema_; }
+ const Schema* GetSchema() override { return schema_; }
/// Return empty table name
- inline const std::string& GetName() override { return table_name_; }
+ const std::string& GetName() override { return table_name_; }
/// Return empty indexn information
- inline const IndexHint& GetIndex() override { return index_hint_; }
+ const IndexHint& GetIndex() override { return index_hint_; }
/// Return name of database
- inline const std::string& GetDatabase() override { return db_; }
+ const std::string& GetDatabase() override { return db_; }
/// Return null iterator
- std::unique_ptr GetIterator() {
- return std::unique_ptr();
- }
- /// Return null iterator
- RowIterator* GetRawIterator() { return nullptr; }
- /// Return null window iterator
- std::unique_ptr GetWindowIterator(
- const std::string& idx_name) {
- return std::unique_ptr();
- }
+ RowIterator* GetRawIterator() override { return nullptr; }
+
/// Return empty row
- virtual Row At(uint64_t pos) { return Row(); }
+ Row At(uint64_t pos) override { return Row(); }
/// Return 0
const uint64_t GetCount() override { return 0; }
@@ -317,7 +308,7 @@ class ErrorTableHandler : public TableHandler {
}
/// Return status
- virtual base::Status GetStatus() { return status_; }
+ base::Status GetStatus() override { return status_; }
protected:
base::Status status_;
@@ -340,16 +331,11 @@ class PartitionHandler : public TableHandler {
PartitionHandler() : TableHandler() {}
~PartitionHandler() {}
- /// Return the iterator of row iterator.
- /// Return null by default
- virtual std::unique_ptr GetIterator() {
- return std::unique_ptr();
- }
- /// Return the iterator of row iterator
- /// Return null by default
- RowIterator* GetRawIterator() { return nullptr; }
- virtual std::unique_ptr GetWindowIterator(
- const std::string& idx_name) {
+ // Return the iterator of row iterator
+ // Return null by default
+ RowIterator* GetRawIterator() override { return nullptr; }
+
+ std::unique_ptr GetWindowIterator(const std::string& idx_name) override {
return std::unique_ptr();
}
@@ -361,18 +347,15 @@ class PartitionHandler : public TableHandler {
const HandlerType GetHandlerType() override { return kPartitionHandler; }
/// Return empty row, cause partition dataset does not support At operation.
- virtual Row At(uint64_t pos) { return Row(); }
+ // virtual Row At(uint64_t pos) { return Row(); }
/// Return Return table handler of specific segment binding to given key.
/// Return `null` by default.
- virtual std::shared_ptr GetSegment(const std::string& key) {
- return std::shared_ptr();
- }
+ virtual std::shared_ptr GetSegment(const std::string& key) = 0;
/// Return a sequence of table handles of specify segments binding to given
/// keys set.
- virtual std::vector> GetSegments(
- const std::vector& keys) {
+ virtual std::vector> GetSegments(const std::vector& keys) {
std::vector> segments;
for (auto key : keys) {
segments.push_back(GetSegment(key));
@@ -383,9 +366,6 @@ class PartitionHandler : public TableHandler {
const std::string GetHandlerTypeName() override {
return "PartitionHandler";
}
- /// Return order type of the dataset,
- /// and return kNoneOrder by default.
- const OrderType GetOrderType() const { return kNoneOrder; }
};
/// \brief A wrapper of table handler which is used as a asynchronous row
diff --git a/hybridse/include/vm/mem_catalog.h b/hybridse/include/vm/mem_catalog.h
index 2fc5df4960c..6237edd1d43 100644
--- a/hybridse/include/vm/mem_catalog.h
+++ b/hybridse/include/vm/mem_catalog.h
@@ -25,8 +25,6 @@
#include
#include
#include
-#include "base/fe_slice.h"
-#include "codec/list_iterator_codec.h"
#include "glog/logging.h"
#include "vm/catalog.h"
@@ -66,11 +64,11 @@ class MemTimeTableIterator : public RowIterator {
MemTimeTableIterator(const MemTimeTable* table, const vm::Schema* schema,
int32_t start, int32_t end);
~MemTimeTableIterator();
- void Seek(const uint64_t& ts);
- void SeekToFirst();
- const uint64_t& GetKey() const;
- void Next();
- bool Valid() const;
+ void Seek(const uint64_t& ts) override;
+ void SeekToFirst() override;
+ const uint64_t& GetKey() const override;
+ void Next() override;
+ bool Valid() const override;
const Row& GetValue() override;
bool IsSeekable() const override;
@@ -88,12 +86,12 @@ class MemTableIterator : public RowIterator {
MemTableIterator(const MemTable* table, const vm::Schema* schema,
int32_t start, int32_t end);
~MemTableIterator();
- void Seek(const uint64_t& ts);
- void SeekToFirst();
- const uint64_t& GetKey() const;
- const Row& GetValue();
- void Next();
- bool Valid() const;
+ void Seek(const uint64_t& ts) override;
+ void SeekToFirst() override;
+ const uint64_t& GetKey() const override;
+ const Row& GetValue() override;
+ void Next() override;
+ bool Valid() const override;
bool IsSeekable() const override;
private:
@@ -115,7 +113,6 @@ class MemWindowIterator : public WindowIterator {
void SeekToFirst();
void Next();
bool Valid();
- std::unique_ptr GetValue();
RowIterator* GetRawValue();
const Row GetKey();
@@ -157,24 +154,21 @@ class MemTableHandler : public TableHandler {
~MemTableHandler() override;
const Types& GetTypes() override { return types_; }
- inline const Schema* GetSchema() { return schema_; }
- inline const std::string& GetName() { return table_name_; }
- inline const IndexHint& GetIndex() { return index_hint_; }
- inline const std::string& GetDatabase() { return db_; }
+ const Schema* GetSchema() override { return schema_; }
+ const std::string& GetName() override { return table_name_; }
+ const IndexHint& GetIndex() override { return index_hint_; }
+ const std::string& GetDatabase() override { return db_; }
- std::unique_ptr GetIterator() override;
RowIterator* GetRawIterator() override;
- std::unique_ptr GetWindowIterator(
- const std::string& idx_name);
void AddRow(const Row& row);
void Reverse();
- virtual const uint64_t GetCount() { return table_.size(); }
- virtual Row At(uint64_t pos) {
+ const uint64_t GetCount() override { return table_.size(); }
+ Row At(uint64_t pos) override {
return pos < table_.size() ? table_.at(pos) : Row();
}
- const OrderType GetOrderType() const { return order_type_; }
+ const OrderType GetOrderType() const override { return order_type_; }
void SetOrderType(const OrderType order_type) { order_type_ = order_type; }
const std::string GetHandlerTypeName() override {
return "MemTableHandler";
@@ -200,14 +194,11 @@ class MemTimeTableHandler : public TableHandler {
const Schema* schema);
const Types& GetTypes() override;
~MemTimeTableHandler() override;
- inline const Schema* GetSchema() { return schema_; }
- inline const std::string& GetName() { return table_name_; }
- inline const IndexHint& GetIndex() { return index_hint_; }
- std::unique_ptr GetIterator();
- RowIterator* GetRawIterator();
- inline const std::string& GetDatabase() { return db_; }
- std::unique_ptr GetWindowIterator(
- const std::string& idx_name);
+ const Schema* GetSchema() override { return schema_; }
+ const std::string& GetName() override { return table_name_; }
+ const IndexHint& GetIndex() override { return index_hint_; }
+ RowIterator* GetRawIterator() override;
+ const std::string& GetDatabase() override { return db_; }
void AddRow(const uint64_t key, const Row& v);
void AddFrontRow(const uint64_t key, const Row& v);
void PopBackRow();
@@ -220,12 +211,12 @@ class MemTimeTableHandler : public TableHandler {
}
void Sort(const bool is_asc);
void Reverse();
- virtual const uint64_t GetCount() { return table_.size(); }
- virtual Row At(uint64_t pos) {
+ const uint64_t GetCount() override { return table_.size(); }
+ Row At(uint64_t pos) override {
return pos < table_.size() ? table_.at(pos).second : Row();
}
void SetOrderType(const OrderType order_type) { order_type_ = order_type; }
- const OrderType GetOrderType() const { return order_type_; }
+ const OrderType GetOrderType() const override { return order_type_; }
const std::string GetHandlerTypeName() override {
return "MemTimeTableHandler";
}
@@ -254,21 +245,11 @@ class Window : public MemTimeTableHandler {
return std::make_unique(&table_, schema_);
}
- RowIterator* GetRawIterator() {
- return new vm::MemTimeTableIterator(&table_, schema_);
- }
+ RowIterator* GetRawIterator() override { return new vm::MemTimeTableIterator(&table_, schema_); }
virtual bool BufferData(uint64_t key, const Row& row) = 0;
virtual void PopBackData() { PopBackRow(); }
virtual void PopFrontData() = 0;
- virtual const uint64_t GetCount() { return table_.size(); }
- virtual Row At(uint64_t pos) {
- if (pos >= table_.size()) {
- return Row();
- } else {
- return table_[pos].second;
- }
- }
const std::string GetHandlerTypeName() override { return "Window"; }
bool instance_not_in_window() const { return instance_not_in_window_; }
@@ -322,7 +303,7 @@ class WindowRange {
return WindowRange(Window::kFrameRowsMergeRowsRange, start_offset, 0,
rows_preceding, max_size);
}
- inline const WindowPositionStatus GetWindowPositionStatus(
+ const WindowPositionStatus GetWindowPositionStatus(
bool out_of_rows, bool before_window, bool exceed_window) const {
switch (frame_type_) {
case Window::WindowFrameType::kFrameRows:
@@ -531,7 +512,7 @@ class CurrentHistoryWindow : public HistoryWindow {
void PopFrontData() override { PopFrontRow(); }
- bool BufferData(uint64_t key, const Row& row) {
+ bool BufferData(uint64_t key, const Row& row) override {
if (!table_.empty() && GetFrontRow().first > key) {
DLOG(WARNING) << "Fail BufferData: buffer key less than latest key";
return false;
@@ -560,34 +541,25 @@ class MemSegmentHandler : public TableHandler {
virtual ~MemSegmentHandler() {}
- inline const vm::Schema* GetSchema() {
+ const vm::Schema* GetSchema() override {
return partition_hander_->GetSchema();
}
- inline const std::string& GetName() { return partition_hander_->GetName(); }
+ const std::string& GetName() override { return partition_hander_->GetName(); }
- inline const std::string& GetDatabase() {
+ const std::string& GetDatabase() override {
return partition_hander_->GetDatabase();
}
- inline const vm::Types& GetTypes() { return partition_hander_->GetTypes(); }
+ const vm::Types& GetTypes() override { return partition_hander_->GetTypes(); }
- inline const vm::IndexHint& GetIndex() {
+ const vm::IndexHint& GetIndex() override {
return partition_hander_->GetIndex();
}
- const OrderType GetOrderType() const {
+ const OrderType GetOrderType() const override {
return partition_hander_->GetOrderType();
}
- std::unique_ptr GetIterator() {
- auto iter = partition_hander_->GetWindowIterator();
- if (iter) {
- iter->Seek(key_);
- return iter->Valid() ? iter->GetValue()
- : std::unique_ptr();
- }
- return std::unique_ptr();
- }
RowIterator* GetRawIterator() override {
auto iter = partition_hander_->GetWindowIterator();
if (iter) {
@@ -596,12 +568,11 @@ class MemSegmentHandler : public TableHandler {
}
return nullptr;
}
- std::unique_ptr GetWindowIterator(
- const std::string& idx_name) {
+ std::unique_ptr GetWindowIterator(const std::string& idx_name) override {
LOG(WARNING) << "SegmentHandler can't support window iterator";
return std::unique_ptr();
}
- virtual const uint64_t GetCount() {
+ const uint64_t GetCount() override {
auto iter = GetIterator();
if (!iter) {
return 0;
@@ -634,9 +605,7 @@ class MemSegmentHandler : public TableHandler {
std::string key_;
};
-class MemPartitionHandler
- : public PartitionHandler,
- public std::enable_shared_from_this {
+class MemPartitionHandler : public PartitionHandler, public std::enable_shared_from_this {
public:
MemPartitionHandler();
explicit MemPartitionHandler(const Schema* schema);
@@ -649,18 +618,19 @@ class MemPartitionHandler
const Schema* GetSchema() override;
const std::string& GetName() override;
const std::string& GetDatabase() override;
- virtual std::unique_ptr GetWindowIterator();
+ RowIterator* GetRawIterator() override { return nullptr; }
+ std::unique_ptr GetWindowIterator() override;
bool AddRow(const std::string& key, uint64_t ts, const Row& row);
void Sort(const bool is_asc);
void Reverse();
void Print();
- virtual const uint64_t GetCount() { return partitions_.size(); }
- virtual std::shared_ptr GetSegment(const std::string& key) {
+ const uint64_t GetCount() override { return partitions_.size(); }
+ std::shared_ptr GetSegment(const std::string& key) override {
return std::shared_ptr(
new MemSegmentHandler(shared_from_this(), key));
}
void SetOrderType(const OrderType order_type) { order_type_ = order_type; }
- const OrderType GetOrderType() const { return order_type_; }
+ const OrderType GetOrderType() const override { return order_type_; }
const std::string GetHandlerTypeName() override {
return "MemPartitionHandler";
}
@@ -674,6 +644,7 @@ class MemPartitionHandler
IndexHint index_hint_;
OrderType order_type_;
};
+
class ConcatTableHandler : public MemTimeTableHandler {
public:
ConcatTableHandler(std::shared_ptr left, size_t left_slices,
@@ -692,19 +663,13 @@ class ConcatTableHandler : public MemTimeTableHandler {
status_ = SyncValue();
return MemTimeTableHandler::At(pos);
}
- std::unique_ptr GetIterator() {
- if (status_.isRunning()) {
- status_ = SyncValue();
- }
- return MemTimeTableHandler::GetIterator();
- }
- RowIterator* GetRawIterator() {
+ RowIterator* GetRawIterator() override {
if (status_.isRunning()) {
status_ = SyncValue();
}
return MemTimeTableHandler::GetRawIterator();
}
- virtual const uint64_t GetCount() {
+ const uint64_t GetCount() override {
if (status_.isRunning()) {
status_ = SyncValue();
}
@@ -757,11 +722,11 @@ class MemCatalog : public Catalog {
bool Init();
- std::shared_ptr GetDatabase(const std::string& db) {
+ std::shared_ptr GetDatabase(const std::string& db) override {
return dbs_[db];
}
std::shared_ptr GetTable(const std::string& db,
- const std::string& table_name) {
+ const std::string& table_name) override {
return tables_[db][table_name];
}
bool IndexSupport() override { return true; }
@@ -783,17 +748,11 @@ class RequestUnionTableHandler : public TableHandler {
: request_ts_(request_ts), request_row_(request_row), window_(window) {}
~RequestUnionTableHandler() {}
- std::unique_ptr GetIterator() override {
- return std::unique_ptr(GetRawIterator());
- }
RowIterator* GetRawIterator() override;
const Types& GetTypes() override { return window_->GetTypes(); }
const IndexHint& GetIndex() override { return window_->GetIndex(); }
- std::unique_ptr GetWindowIterator(const std::string&) {
- return nullptr;
- }
- const OrderType GetOrderType() const { return window_->GetOrderType(); }
+ const OrderType GetOrderType() const override { return window_->GetOrderType(); }
const Schema* GetSchema() override { return window_->GetSchema(); }
const std::string& GetName() override { return window_->GetName(); }
const std::string& GetDatabase() override { return window_->GetDatabase(); }
diff --git a/hybridse/include/vm/physical_op.h b/hybridse/include/vm/physical_op.h
index ee3634615c8..dd51c73bfd1 100644
--- a/hybridse/include/vm/physical_op.h
+++ b/hybridse/include/vm/physical_op.h
@@ -200,9 +200,9 @@ class Range : public FnComponent {
const bool Valid() const { return nullptr != range_key_; }
const std::string ToString() const {
std::ostringstream oss;
- if (nullptr != range_key_ && nullptr != frame_) {
+ if (nullptr != frame_) {
if (nullptr != frame_->frame_range()) {
- oss << "range=(" << range_key_->GetExprString() << ", "
+ oss << "range=(" << node::ExprString(range_key_) << ", "
<< frame_->frame_range()->start()->GetExprString() << ", "
<< frame_->frame_range()->end()->GetExprString();
@@ -216,7 +216,7 @@ class Range : public FnComponent {
if (nullptr != frame_->frame_range()) {
oss << ", ";
}
- oss << "rows=(" << range_key_->GetExprString() << ", "
+ oss << "rows=(" << node::ExprString(range_key_) << ", "
<< frame_->frame_rows()->start()->GetExprString() << ", "
<< frame_->frame_rows()->end()->GetExprString() << ")";
}
@@ -578,7 +578,7 @@ class PhysicalRequestProviderNode : public PhysicalDataProviderNode {
PhysicalOpNode **out) override;
virtual ~PhysicalRequestProviderNode() {}
- virtual void Print(std::ostream &output, const std::string &tab) const;
+ void Print(std::ostream &output, const std::string &tab) const override;
};
class PhysicalRequestProviderNodeWithCommonColumn
@@ -731,6 +731,7 @@ class PhysicalConstProjectNode : public PhysicalOpNode {
public:
explicit PhysicalConstProjectNode(const ColumnProjects &project)
: PhysicalOpNode(kPhysicalOpConstProject, true), project_(project) {
+ output_type_ = kSchemaTypeRow;
fn_infos_.push_back(&project_.fn_info());
}
virtual ~PhysicalConstProjectNode() {}
@@ -785,7 +786,11 @@ class PhysicalAggregationNode : public PhysicalProjectNode {
public:
PhysicalAggregationNode(PhysicalOpNode *node, const ColumnProjects &project, const node::ExprNode *condition)
: PhysicalProjectNode(node, kAggregation, project, true), having_condition_(condition) {
- output_type_ = kSchemaTypeRow;
+ if (node->GetOutputType() == kSchemaTypeGroup) {
+ output_type_ = kSchemaTypeGroup;
+ } else {
+ output_type_ = kSchemaTypeRow;
+ }
fn_infos_.push_back(&having_condition_.fn_info());
}
virtual ~PhysicalAggregationNode() {}
@@ -846,9 +851,7 @@ class WindowOp {
std::ostringstream oss;
oss << "partition_" << partition_.ToString();
oss << ", " << sort_.ToString();
- if (range_.Valid()) {
- oss << ", " << range_.ToString();
- }
+ oss << ", " << range_.ToString();
return oss.str();
}
const std::string FnDetail() const {
@@ -1067,7 +1070,7 @@ class RequestWindowUnionList {
RequestWindowUnionList() : window_unions_() {}
virtual ~RequestWindowUnionList() {}
void AddWindowUnion(PhysicalOpNode *node, const RequestWindowOp &window) {
- window_unions_.push_back(std::make_pair(node, window));
+ window_unions_.emplace_back(node, window);
}
const PhysicalOpNode *GetKey(uint32_t index) {
auto iter = window_unions_.begin();
@@ -1181,23 +1184,25 @@ class PhysicalWindowAggrerationNode : public PhysicalProjectNode {
class PhysicalJoinNode : public PhysicalBinaryNode {
public:
+ static constexpr PhysicalOpType kConcreteNodeKind = kPhysicalOpJoin;
+
PhysicalJoinNode(PhysicalOpNode *left, PhysicalOpNode *right,
const node::JoinType join_type)
- : PhysicalBinaryNode(left, right, kPhysicalOpJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join_type),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
}
PhysicalJoinNode(PhysicalOpNode *left, PhysicalOpNode *right,
const node::JoinType join_type,
const node::OrderByNode *orders,
const node::ExprNode *condition)
- : PhysicalBinaryNode(left, right, kPhysicalOpJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join_type, orders, condition),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
@@ -1206,11 +1211,11 @@ class PhysicalJoinNode : public PhysicalBinaryNode {
const node::ExprNode *condition,
const node::ExprListNode *left_keys,
const node::ExprListNode *right_keys)
- : PhysicalBinaryNode(left, right, kPhysicalOpJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join_type, condition, left_keys, right_keys),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
@@ -1220,31 +1225,31 @@ class PhysicalJoinNode : public PhysicalBinaryNode {
const node::ExprNode *condition,
const node::ExprListNode *left_keys,
const node::ExprListNode *right_keys)
- : PhysicalBinaryNode(left, right, kPhysicalOpJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join_type, orders, condition, left_keys, right_keys),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
PhysicalJoinNode(PhysicalOpNode *left, PhysicalOpNode *right,
const Join &join)
- : PhysicalBinaryNode(left, right, kPhysicalOpJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
PhysicalJoinNode(PhysicalOpNode *left, PhysicalOpNode *right,
const Join &join, const bool output_right_only)
- : PhysicalBinaryNode(left, right, kPhysicalOpJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join),
joined_schemas_ctx_(this),
output_right_only_(output_right_only) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
@@ -1273,37 +1278,59 @@ class PhysicalJoinNode : public PhysicalBinaryNode {
Join join_;
SchemasContext joined_schemas_ctx_;
const bool output_right_only_;
+
+ private:
+ void InitOuptput() {
+ switch (join_.join_type_) {
+ case node::kJoinTypeLast:
+ case node::kJoinTypeConcat: {
+ output_type_ = GetProducer(0)->GetOutputType();
+ break;
+ }
+ default: {
+ // standard SQL JOINs, always treat as a table output
+ if (GetProducer(0)->GetOutputType() == kSchemaTypeGroup) {
+ output_type_ = kSchemaTypeGroup;
+ } else {
+ output_type_ = kSchemaTypeTable;
+ }
+ break;
+ }
+ }
+ }
};
class PhysicalRequestJoinNode : public PhysicalBinaryNode {
public:
+ static constexpr PhysicalOpType kConcreteNodeKind = kPhysicalOpRequestJoin;
+
PhysicalRequestJoinNode(PhysicalOpNode *left, PhysicalOpNode *right,
const node::JoinType join_type)
- : PhysicalBinaryNode(left, right, kPhysicalOpRequestJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join_type),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
PhysicalRequestJoinNode(PhysicalOpNode *left, PhysicalOpNode *right,
const node::JoinType join_type,
const node::OrderByNode *orders,
const node::ExprNode *condition)
- : PhysicalBinaryNode(left, right, kPhysicalOpRequestJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join_type, orders, condition),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
PhysicalRequestJoinNode(PhysicalOpNode *left, PhysicalOpNode *right,
const Join &join, const bool output_right_only)
- : PhysicalBinaryNode(left, right, kPhysicalOpRequestJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join),
joined_schemas_ctx_(this),
output_right_only_(output_right_only) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
@@ -1313,11 +1340,11 @@ class PhysicalRequestJoinNode : public PhysicalBinaryNode {
const node::ExprNode *condition,
const node::ExprListNode *left_keys,
const node::ExprListNode *right_keys)
- : PhysicalBinaryNode(left, right, kPhysicalOpRequestJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join_type, condition, left_keys, right_keys),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
PhysicalRequestJoinNode(PhysicalOpNode *left, PhysicalOpNode *right,
@@ -1326,11 +1353,11 @@ class PhysicalRequestJoinNode : public PhysicalBinaryNode {
const node::ExprNode *condition,
const node::ExprListNode *left_keys,
const node::ExprListNode *right_keys)
- : PhysicalBinaryNode(left, right, kPhysicalOpRequestJoin, false),
+ : PhysicalBinaryNode(left, right, kConcreteNodeKind, false),
join_(join_type, orders, condition, left_keys, right_keys),
joined_schemas_ctx_(this),
output_right_only_(false) {
- output_type_ = left->GetOutputType();
+ InitOuptput();
RegisterFunctionInfo();
}
@@ -1361,6 +1388,26 @@ class PhysicalRequestJoinNode : public PhysicalBinaryNode {
Join join_;
SchemasContext joined_schemas_ctx_;
const bool output_right_only_;
+
+ private:
+ void InitOuptput() {
+ switch (join_.join_type_) {
+ case node::kJoinTypeLast:
+ case node::kJoinTypeConcat: {
+ output_type_ = GetProducer(0)->GetOutputType();
+ break;
+ }
+ default: {
+ // standard SQL JOINs, always treat as a table output
+ if (GetProducer(0)->GetOutputType() == kSchemaTypeGroup) {
+ output_type_ = kSchemaTypeGroup;
+ } else {
+ output_type_ = kSchemaTypeTable;
+ }
+ break;
+ }
+ }
+ }
};
class PhysicalUnionNode : public PhysicalBinaryNode {
@@ -1417,7 +1464,7 @@ class PhysicalRequestUnionNode : public PhysicalBinaryNode {
instance_not_in_window_(false),
exclude_current_time_(false),
output_request_row_(true) {
- output_type_ = kSchemaTypeTable;
+ InitOuptput();
fn_infos_.push_back(&window_.partition_.fn_info());
fn_infos_.push_back(&window_.index_key_.fn_info());
@@ -1429,7 +1476,7 @@ class PhysicalRequestUnionNode : public PhysicalBinaryNode {
instance_not_in_window_(w_ptr->instance_not_in_window()),
exclude_current_time_(w_ptr->exclude_current_time()),
output_request_row_(true) {
- output_type_ = kSchemaTypeTable;
+ InitOuptput();
fn_infos_.push_back(&window_.partition_.fn_info());
fn_infos_.push_back(&window_.sort_.fn_info());
@@ -1445,7 +1492,7 @@ class PhysicalRequestUnionNode : public PhysicalBinaryNode {
instance_not_in_window_(instance_not_in_window),
exclude_current_time_(exclude_current_time),
output_request_row_(output_request_row) {
- output_type_ = kSchemaTypeTable;
+ InitOuptput();
fn_infos_.push_back(&window_.partition_.fn_info());
fn_infos_.push_back(&window_.sort_.fn_info());
@@ -1457,7 +1504,8 @@ class PhysicalRequestUnionNode : public PhysicalBinaryNode {
virtual void Print(std::ostream &output, const std::string &tab) const;
const bool Valid() { return true; }
static PhysicalRequestUnionNode *CastFrom(PhysicalOpNode *node);
- bool AddWindowUnion(PhysicalOpNode *node) {
+ bool AddWindowUnion(PhysicalOpNode *node) { return AddWindowUnion(node, window_); }
+ bool AddWindowUnion(PhysicalOpNode *node, const RequestWindowOp& window) {
if (nullptr == node) {
LOG(WARNING) << "Fail to add window union : table is null";
return false;
@@ -1474,9 +1522,8 @@ class PhysicalRequestUnionNode : public PhysicalBinaryNode {
<< "Union Table and window input schema aren't consistent";
return false;
}
- window_unions_.AddWindowUnion(node, window_);
- RequestWindowOp &window_union =
- window_unions_.window_unions_.back().second;
+ window_unions_.AddWindowUnion(node, window);
+ RequestWindowOp &window_union = window_unions_.window_unions_.back().second;
fn_infos_.push_back(&window_union.partition_.fn_info());
fn_infos_.push_back(&window_union.sort_.fn_info());
fn_infos_.push_back(&window_union.range_.fn_info());
@@ -1486,11 +1533,10 @@ class PhysicalRequestUnionNode : public PhysicalBinaryNode {
std::vector GetDependents() const override;
- const bool instance_not_in_window() const {
- return instance_not_in_window_;
- }
- const bool exclude_current_time() const { return exclude_current_time_; }
- const bool output_request_row() const { return output_request_row_; }
+ bool instance_not_in_window() const { return instance_not_in_window_; }
+ bool exclude_current_time() const { return exclude_current_time_; }
+ bool output_request_row() const { return output_request_row_; }
+ void set_output_request_row(bool flag) { output_request_row_ = flag; }
const RequestWindowOp &window() const { return window_; }
const RequestWindowUnionList &window_unions() const {
return window_unions_;
@@ -1508,10 +1554,20 @@ class PhysicalRequestUnionNode : public PhysicalBinaryNode {
}
RequestWindowOp window_;
- const bool instance_not_in_window_;
- const bool exclude_current_time_;
- const bool output_request_row_;
+ bool instance_not_in_window_;
+ bool exclude_current_time_;
+ bool output_request_row_;
RequestWindowUnionList window_unions_;
+
+ private:
+ void InitOuptput() {
+ auto left = GetProducer(0);
+ if (left->GetOutputType() == kSchemaTypeRow) {
+ output_type_ = kSchemaTypeTable;
+ } else {
+ output_type_ = kSchemaTypeGroup;
+ }
+ }
};
class PhysicalRequestAggUnionNode : public PhysicalOpNode {
@@ -1622,14 +1678,22 @@ class PhysicalFilterNode : public PhysicalUnaryNode {
public:
PhysicalFilterNode(PhysicalOpNode *node, const node::ExprNode *condition)
: PhysicalUnaryNode(node, kPhysicalOpFilter, true), filter_(condition) {
- output_type_ = node->GetOutputType();
+ if (node->GetOutputType() == kSchemaTypeGroup && filter_.index_key_.ValidKey()) {
+ output_type_ = kSchemaTypeTable;
+ } else {
+ output_type_ = node->GetOutputType();
+ }
fn_infos_.push_back(&filter_.condition_.fn_info());
fn_infos_.push_back(&filter_.index_key_.fn_info());
}
PhysicalFilterNode(PhysicalOpNode *node, Filter filter)
: PhysicalUnaryNode(node, kPhysicalOpFilter, true), filter_(filter) {
- output_type_ = node->GetOutputType();
+ if (node->GetOutputType() == kSchemaTypeGroup && filter_.index_key_.ValidKey()) {
+ output_type_ = kSchemaTypeTable;
+ } else {
+ output_type_ = node->GetOutputType();
+ }
fn_infos_.push_back(&filter_.condition_.fn_info());
fn_infos_.push_back(&filter_.index_key_.fn_info());
diff --git a/hybridse/include/vm/simple_catalog.h b/hybridse/include/vm/simple_catalog.h
index 1e1cd78a2f6..fd7c2f3b952 100644
--- a/hybridse/include/vm/simple_catalog.h
+++ b/hybridse/include/vm/simple_catalog.h
@@ -22,7 +22,6 @@
#include
#include
-#include "glog/logging.h"
#include "proto/fe_type.pb.h"
#include "vm/catalog.h"
#include "vm/mem_catalog.h"
diff --git a/hybridse/src/base/fe_slice.cc b/hybridse/src/base/fe_slice.cc
index 9f41c6016ca..c2ca3560741 100644
--- a/hybridse/src/base/fe_slice.cc
+++ b/hybridse/src/base/fe_slice.cc
@@ -25,7 +25,7 @@ void RefCountedSlice::Release() {
if (this->ref_cnt_ != nullptr) {
auto& cnt = *this->ref_cnt_;
cnt -= 1;
- if (cnt == 0) {
+ if (cnt == 0 && buf() != nullptr) {
// memset in case the buf is still used after free
memset(buf(), 0, size());
free(buf());
diff --git a/hybridse/src/node/node_manager.cc b/hybridse/src/node/node_manager.cc
index 8f6f80d7517..f60ba20d6b2 100644
--- a/hybridse/src/node/node_manager.cc
+++ b/hybridse/src/node/node_manager.cc
@@ -1031,11 +1031,6 @@ SqlNode *NodeManager::MakeReplicaNumNode(int num) {
return RegisterNode(node_ptr);
}
-SqlNode *NodeManager::MakeStorageModeNode(StorageMode storage_mode) {
- SqlNode *node_ptr = new StorageModeNode(storage_mode);
- return RegisterNode(node_ptr);
-}
-
SqlNode *NodeManager::MakePartitionNumNode(int num) {
SqlNode *node_ptr = new PartitionNumNode(num);
return RegisterNode(node_ptr);
diff --git a/hybridse/src/node/plan_node_test.cc b/hybridse/src/node/plan_node_test.cc
index 4f0d55d0166..5ffb76142a7 100644
--- a/hybridse/src/node/plan_node_test.cc
+++ b/hybridse/src/node/plan_node_test.cc
@@ -239,7 +239,8 @@ TEST_F(PlanNodeTest, ExtractColumnsAndIndexsTest) {
manager_->MakeColumnDescNode("col3", node::kFloat, true),
manager_->MakeColumnDescNode("col4", node::kVarchar, true),
manager_->MakeColumnDescNode("col5", node::kTimestamp, true), index_node},
- {manager_->MakeReplicaNumNode(3), manager_->MakePartitionNumNode(8), manager_->MakeStorageModeNode(kMemory)},
+ {manager_->MakeReplicaNumNode(3), manager_->MakePartitionNumNode(8),
+ manager_->MakeNode(kMemory)},
false);
ASSERT_TRUE(nullptr != node);
std::vector columns;
diff --git a/hybridse/src/node/sql_node.cc b/hybridse/src/node/sql_node.cc
index 16b88cd51ba..a0e8e0bec8f 100644
--- a/hybridse/src/node/sql_node.cc
+++ b/hybridse/src/node/sql_node.cc
@@ -76,6 +76,7 @@ static absl::flat_hash_map CreateCmdTypeNamesMap() {
{CmdType::kCmdDropFunction, "drop function"},
{CmdType::kCmdShowFunctions, "show functions"},
{CmdType::kCmdShowJobLog, "show joblog"},
+ {CmdType::kCmdTruncate, "truncate table"},
};
for (auto kind = 0; kind < CmdType::kLastCmd; ++kind) {
DCHECK(map.find(static_cast(kind)) != map.end());
@@ -1168,6 +1169,7 @@ static absl::flat_hash_map CreateSqlNodeTypeToNa
{kReplicaNum, "kReplicaNum"},
{kPartitionNum, "kPartitionNum"},
{kStorageMode, "kStorageMode"},
+ {kCompressType, "kCompressType"},
{kFn, "kFn"},
{kFnParaList, "kFnParaList"},
{kCreateSpStmt, "kCreateSpStmt"},
@@ -2100,6 +2102,11 @@ void FrameBound::Print(std::ostream &output, const std::string &org_tab) const {
}
}
+bool FrameBound::is_offset_bound() const {
+ return bound_type_ == kPreceding || bound_type_ == kOpenPreceding || bound_type_ == kFollowing ||
+ bound_type_ == kOpenFollowing;
+}
+
int FrameBound::Compare(const FrameBound *bound1, const FrameBound *bound2) {
if (SqlEquals(bound1, bound2)) {
return 0;
@@ -2598,6 +2605,17 @@ void StorageModeNode::Print(std::ostream &output, const std::string &org_tab) co
PrintValue(output, tab, StorageModeName(storage_mode_), "storage_mode", true);
}
+void CompressTypeNode::Print(std::ostream &output, const std::string &org_tab) const {
+ SqlNode::Print(output, org_tab);
+ const std::string tab = org_tab + INDENT + SPACE_ED;
+ output << "\n";
+ if (compress_type_ == CompressType::kSnappy) {
+ PrintValue(output, tab, "snappy", "compress_type", true);
+ } else {
+ PrintValue(output, tab, "nocompress", "compress_type", true);
+ }
+}
+
void PartitionNumNode::Print(std::ostream &output, const std::string &org_tab) const {
SqlNode::Print(output, org_tab);
const std::string tab = org_tab + INDENT + SPACE_ED;
diff --git a/hybridse/src/node/sql_node_test.cc b/hybridse/src/node/sql_node_test.cc
index 545d9b647fd..227cb80dcea 100644
--- a/hybridse/src/node/sql_node_test.cc
+++ b/hybridse/src/node/sql_node_test.cc
@@ -676,7 +676,7 @@ TEST_F(SqlNodeTest, CreateIndexNodeTest) {
node_manager_->MakeColumnDescNode("col4", node::kVarchar, true),
node_manager_->MakeColumnDescNode("col5", node::kTimestamp, true), index_node},
{node_manager_->MakeReplicaNumNode(3), node_manager_->MakePartitionNumNode(8),
- node_manager_->MakeStorageModeNode(kMemory)},
+ node_manager_->MakeNode(kMemory)},
false);
ASSERT_TRUE(nullptr != node);
std::vector columns;
diff --git a/hybridse/src/passes/physical/batch_request_optimize.cc b/hybridse/src/passes/physical/batch_request_optimize.cc
index 52488e6a981..86fdfee92c5 100644
--- a/hybridse/src/passes/physical/batch_request_optimize.cc
+++ b/hybridse/src/passes/physical/batch_request_optimize.cc
@@ -269,6 +269,7 @@ static Status UpdateProjectExpr(
return replacer.Replace(expr->DeepCopy(ctx->node_manager()), output);
}
+// simplify simple project, remove orphan descendant producer nodes
static Status CreateSimplifiedProject(PhysicalPlanContext* ctx,
PhysicalOpNode* input,
const ColumnProjects& projects,
@@ -279,8 +280,7 @@ static Status CreateSimplifiedProject(PhysicalPlanContext* ctx,
can_project = false;
for (size_t i = 0; i < cur_input->producers().size(); ++i) {
auto cand_input = cur_input->GetProducer(i);
- if (cand_input->GetOutputType() !=
- PhysicalSchemaType::kSchemaTypeRow) {
+ if (cand_input->GetOutputType() != PhysicalSchemaType::kSchemaTypeRow) {
continue;
}
bool is_valid = true;
@@ -949,21 +949,16 @@ Status CommonColumnOptimize::ProcessJoin(PhysicalPlanContext* ctx,
}
} else if (is_non_common_join) {
// join only depend on non-common left part
- if (left_state->non_common_op == join_op->GetProducer(0) &&
- right == join_op->GetProducer(1)) {
+ if (left_state->non_common_op == join_op->GetProducer(0) && right == join_op->GetProducer(1)) {
state->common_op = nullptr;
state->non_common_op = join_op;
} else {
PhysicalRequestJoinNode* new_join = nullptr;
- CHECK_STATUS(ctx->CreateOp(
- &new_join, left_state->non_common_op, right, join_op->join(),
- join_op->output_right_only()));
- CHECK_STATUS(ReplaceComponentExpr(
- join_op->join(), join_op->joined_schemas_ctx(),
- new_join->joined_schemas_ctx(), ctx->node_manager(),
- &new_join->join_));
- state->common_op =
- join_op->output_right_only() ? nullptr : left_state->common_op;
+ CHECK_STATUS(ctx->CreateOp(&new_join, left_state->non_common_op, right,
+ join_op->join(), join_op->output_right_only()));
+ CHECK_STATUS(ReplaceComponentExpr(join_op->join(), join_op->joined_schemas_ctx(),
+ new_join->joined_schemas_ctx(), ctx->node_manager(), &new_join->join_));
+ state->common_op = join_op->output_right_only() ? nullptr : left_state->common_op;
state->non_common_op = new_join;
if (!join_op->output_right_only()) {
for (size_t left_idx : left_state->common_column_indices) {
diff --git a/hybridse/src/passes/physical/batch_request_optimize_test.cc b/hybridse/src/passes/physical/batch_request_optimize_test.cc
index e53b7c377e2..48259b68ed4 100644
--- a/hybridse/src/passes/physical/batch_request_optimize_test.cc
+++ b/hybridse/src/passes/physical/batch_request_optimize_test.cc
@@ -54,6 +54,9 @@ INSTANTIATE_TEST_SUITE_P(
INSTANTIATE_TEST_SUITE_P(
BatchRequestLastJoinQuery, BatchRequestOptimizeTest,
testing::ValuesIn(sqlcase::InitCases("cases/query/last_join_query.yaml")));
+INSTANTIATE_TEST_SUITE_P(
+ BatchRequestLeftJoin, BatchRequestOptimizeTest,
+ testing::ValuesIn(sqlcase::InitCases("cases/query/left_join.yml")));
INSTANTIATE_TEST_SUITE_P(
BatchRequestLastJoinWindowQuery, BatchRequestOptimizeTest,
testing::ValuesIn(sqlcase::InitCases("cases/query/last_join_window_query.yaml")));
diff --git a/hybridse/src/passes/physical/group_and_sort_optimized.cc b/hybridse/src/passes/physical/group_and_sort_optimized.cc
index ae333b6af47..2d51b336167 100644
--- a/hybridse/src/passes/physical/group_and_sort_optimized.cc
+++ b/hybridse/src/passes/physical/group_and_sort_optimized.cc
@@ -25,6 +25,7 @@
#include "absl/cleanup/cleanup.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
+#include "node/node_enum.h"
#include "vm/physical_op.h"
namespace hybridse {
@@ -294,6 +295,7 @@ bool GroupAndSortOptimized::KeysOptimized(const SchemasContext* root_schemas_ctx
absl::Cleanup clean = [&]() {
expr_cache_.clear();
+ optimize_info_ = nullptr;
};
auto s = BuildExprCache(left_key->keys(), root_schemas_ctx);
@@ -347,6 +349,18 @@ bool GroupAndSortOptimized::KeysOptimizedImpl(const SchemasContext* root_schemas
if (DataProviderType::kProviderTypeTable == scan_op->provider_type_ ||
DataProviderType::kProviderTypePartition == scan_op->provider_type_) {
+ auto* table_node = dynamic_cast(scan_op);
+ if (optimize_info_) {
+ if (optimize_info_->left_key == left_key && optimize_info_->index_key == index_key &&
+ optimize_info_->right_key == right_key && optimize_info_->sort_key == sort) {
+ if (optimize_info_->optimized != nullptr &&
+ table_node->GetDb() == optimize_info_->optimized->GetDb() &&
+ table_node->GetName() == optimize_info_->optimized->GetName()) {
+ *new_in = optimize_info_->optimized;
+ return true;
+ }
+ }
+ }
const node::ExprListNode* right_partition =
right_key == nullptr ? left_key->keys() : right_key->keys();
@@ -453,13 +467,15 @@ bool GroupAndSortOptimized::KeysOptimizedImpl(const SchemasContext* root_schemas
dynamic_cast(node_manager_->MakeOrderByNode(node_manager_->MakeExprList(
node_manager_->MakeOrderExpression(nullptr, first_order_expression->is_asc())))));
}
+
+ optimize_info_.reset(new OptimizeInfo(left_key, index_key, right_key, sort, partition_op));
*new_in = partition_op;
return true;
}
} else if (PhysicalOpType::kPhysicalOpSimpleProject == in->GetOpType()) {
PhysicalOpNode* new_depend;
- if (!KeysOptimizedImpl(in->GetProducer(0)->schemas_ctx(), in->GetProducer(0), left_key, index_key, right_key, sort,
- &new_depend)) {
+ if (!KeysOptimizedImpl(in->GetProducer(0)->schemas_ctx(), in->GetProducer(0), left_key, index_key, right_key,
+ sort, &new_depend)) {
return false;
}
@@ -493,7 +509,8 @@ bool GroupAndSortOptimized::KeysOptimizedImpl(const SchemasContext* root_schemas
PhysicalFilterNode* filter_op = dynamic_cast(in);
PhysicalOpNode* new_depend;
- if (!KeysOptimizedImpl(root_schemas_ctx, in->producers()[0], left_key, index_key, right_key, sort, &new_depend)) {
+ if (!KeysOptimizedImpl(root_schemas_ctx, in->producers()[0], left_key, index_key, right_key, sort,
+ &new_depend)) {
return false;
}
PhysicalFilterNode* new_filter = nullptr;
@@ -515,8 +532,16 @@ bool GroupAndSortOptimized::KeysOptimizedImpl(const SchemasContext* root_schemas
&new_depend)) {
return false;
}
+ PhysicalOpNode* new_right = in->GetProducer(1);
+ if (request_join->join_.join_type_ == node::kJoinTypeConcat) {
+ // for concat join, only acceptable if the two inputs (of course same table) optimized by the same index
+ auto* rebase_sc = in->GetProducer(1)->schemas_ctx();
+ if (!KeysOptimizedImpl(rebase_sc, in->GetProducer(1), left_key, index_key, right_key, sort, &new_right)) {
+ return false;
+ }
+ }
PhysicalRequestJoinNode* new_join = nullptr;
- auto s = plan_ctx_->CreateOp(&new_join, new_depend, request_join->GetProducer(1),
+ auto s = plan_ctx_->CreateOp(&new_join, new_depend, new_right,
request_join->join(), request_join->output_right_only());
if (!s.isOK()) {
LOG(WARNING) << "Fail to create new request join op: " << s;
@@ -545,6 +570,57 @@ bool GroupAndSortOptimized::KeysOptimizedImpl(const SchemasContext* root_schemas
*new_in = new_join;
return true;
+ } else if (PhysicalOpType::kPhysicalOpProject == in->GetOpType()) {
+ auto * project = dynamic_cast(in);
+ if (project == nullptr || project->project_type_ != vm::kAggregation) {
+ return false;
+ }
+
+ auto * agg_project = dynamic_cast(in);
+
+ PhysicalOpNode* new_depend = nullptr;
+ auto* rebase_sc = in->GetProducer(0)->schemas_ctx();
+ if (!KeysOptimizedImpl(rebase_sc, in->GetProducer(0), left_key, index_key, right_key, sort,
+ &new_depend)) {
+ return false;
+ }
+
+ vm::PhysicalAggregationNode* new_agg = nullptr;
+ if (!plan_ctx_
+ ->CreateOp(&new_agg, new_depend, agg_project->project(),
+ agg_project->having_condition_.condition())
+ .isOK()) {
+ return false;
+ }
+ *new_in = new_agg;
+ return true;
+ } else if (PhysicalOpType::kPhysicalOpRequestUnion == in->GetOpType()) {
+ // JOIN (..., AGG(REQUEST_UNION(left, ...))): JOIN condition optimizing left
+ PhysicalOpNode* new_left_depend = nullptr;
+ auto* rebase_sc = in->GetProducer(0)->schemas_ctx();
+ if (!KeysOptimizedImpl(rebase_sc, in->GetProducer(0), left_key, index_key, right_key, sort,
+ &new_left_depend)) {
+ return false;
+ }
+
+ auto * request_union = dynamic_cast(in);
+
+ vm::PhysicalRequestUnionNode* new_union = nullptr;
+ if (!plan_ctx_
+ ->CreateOp(
+ &new_union, new_left_depend, in->GetProducer(1), request_union->window(),
+ request_union->instance_not_in_window(), request_union->exclude_current_time(),
+ request_union->output_request_row())
+ .isOK()) {
+ return false;
+ }
+ for (auto& pair : request_union->window_unions().window_unions_) {
+ if (!new_union->AddWindowUnion(pair.first, pair.second)) {
+ return false;
+ }
+ }
+ *new_in = new_union;
+ return true;
}
return false;
}
diff --git a/hybridse/src/passes/physical/group_and_sort_optimized.h b/hybridse/src/passes/physical/group_and_sort_optimized.h
index 1d410f2b8e8..2e50571b29d 100644
--- a/hybridse/src/passes/physical/group_and_sort_optimized.h
+++ b/hybridse/src/passes/physical/group_and_sort_optimized.h
@@ -93,6 +93,17 @@ class GroupAndSortOptimized : public TransformUpPysicalPass {
std::string db_name;
};
+ struct OptimizeInfo {
+ OptimizeInfo(const Key* left_key, const Key* index_key, const Key* right_key, const Sort* s,
+ vm::PhysicalPartitionProviderNode* optimized)
+ : left_key(left_key), index_key(index_key), right_key(right_key), sort_key(s), optimized(optimized) {}
+ const Key* left_key;
+ const Key* index_key;
+ const Key* right_key;
+ const Sort* sort_key;
+ vm::PhysicalPartitionProviderNode* optimized;
+ };
+
private:
bool Transform(PhysicalOpNode* in, PhysicalOpNode** output);
@@ -149,6 +160,8 @@ class GroupAndSortOptimized : public TransformUpPysicalPass {
// A source column name is the column name in string that refers to a physical table,
// only one table got optimized each time
std::unordered_map expr_cache_;
+
+ std::unique_ptr optimize_info_;
};
} // namespace passes
} // namespace hybridse
diff --git a/hybridse/src/passes/physical/transform_up_physical_pass.h b/hybridse/src/passes/physical/transform_up_physical_pass.h
index fed721d4c66..a9a80bd90b4 100644
--- a/hybridse/src/passes/physical/transform_up_physical_pass.h
+++ b/hybridse/src/passes/physical/transform_up_physical_pass.h
@@ -17,7 +17,6 @@
#define HYBRIDSE_SRC_PASSES_PHYSICAL_TRANSFORM_UP_PHYSICAL_PASS_H_
#include
-#include
#include
#include
diff --git a/hybridse/src/plan/planner.cc b/hybridse/src/plan/planner.cc
index c0a68e3104e..fc350d1ffb6 100644
--- a/hybridse/src/plan/planner.cc
+++ b/hybridse/src/plan/planner.cc
@@ -18,7 +18,6 @@
#include
#include