diff --git a/.asf.yaml b/.asf.yaml
index dcab78f6fd9..693bef54556 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -33,6 +33,8 @@ github:
squash: true
merge: false
rebase: false
+ collaborators:
+ - arshadmohammad
protected_branches:
dev:
required_status_checks:
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index d4a3001e523..dab838235c9 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -7,13 +7,9 @@ remember to adjust the documentation.
Feel free to ping committers for the review!
## Contribution Checklist
-
- Make sure that the pull request corresponds to a [GITHUB issue](https://github.com/apache/seatunnel/issues).
-
- Name the pull request in the form "[Feature] [component] Title of the pull request", where *Feature* can be replaced by `Hotfix`, `Bug`, etc.
-
- Minor fixes should be named following this pattern: `[hotfix] [docs] Fix typo in README.md doc`.
-
-->
### Purpose of this pull request
@@ -22,6 +18,7 @@ Feel free to ping committers for the review!
### Does this PR introduce _any_ user-facing change?
+
--->
+## Set Up With Docker In Local Mode
+
+### Zeta Engine
+
+#### Download
+
+```shell
+docker pull apache/seatunnel:
+```
+
+How to submit job in local mode
+
+```shell
+# Run fake source to console sink
+docker run --rm -it apache/seatunnel: ./bin/seatunnel.sh -m local -c config/v2.batch.config.template
+
+# Run job with custom config file
+docker run --rm -it -v //:/config apache/seatunnel: ./bin/seatunnel.sh -m local -c /config/fake_to_console.conf
+
+# Example
+# If you config file is in /tmp/job/fake_to_console.conf
+docker run --rm -it -v /tmp/job/:/config apache/seatunnel: ./bin/seatunnel.sh -m local -c /config/fake_to_console.conf
+
+# Set JVM options when running
+docker run --rm -it -v /tmp/job/:/config apache/seatunnel: ./bin/seatunnel.sh -DJvmOption="-Xms4G -Xmx4G" -m local -c /config/fake_to_console.conf
+```
+
+#### Build Image By Yourself
+
+Build from source code. The way of downloading the source code is the same as the way of downloading the binary package.
+You can download the source code from the [download page](https://seatunnel.apache.org/download/) or clone the source code from the [GitHub repository](https://github.com/apache/seatunnel/releases)
+
+##### Build With One Command
+```shell
+cd seatunnel
+# Use already sett maven profile
+sh ./mvnw -B clean install -Dmaven.test.skip=true -Dmaven.javadoc.skip=true -Dlicense.skipAddThirdParty=true -D"docker.build.skip"=false -D"docker.verify.skip"=false -D"docker.push.skip"=true -D"docker.tag"=2.3.9 -Dmaven.deploy.skip -D"skip.spotless"=true --no-snapshot-updates -Pdocker,seatunnel
+
+# Check the docker image
+docker images | grep apache/seatunnel
+```
+
+##### Build Step By Step
+```shell
+# Build binary package from source code
+sh ./mvnw clean package -DskipTests -Dskip.spotless=true
+
+# Build docker image
+cd seatunnel-dist
+docker build -f src/main/docker/Dockerfile --build-arg VERSION=2.3.9 -t apache/seatunnel:2.3.9 .
+
+# If you build from dev branch, you should add SNAPSHOT suffix to the version
+docker build -f src/main/docker/Dockerfile --build-arg VERSION=2.3.9-SNAPSHOT -t apache/seatunnel:2.3.9-SNAPSHOT .
+
+# Check the docker image
+docker images | grep apache/seatunnel
+```
+
+The Dockerfile is like this:
+```dockerfile
+FROM openjdk:8
+
+ARG VERSION
+# Build from Source Code And Copy it into image
+COPY ./target/apache-seatunnel-${VERSION}-bin.tar.gz /opt/
+
+# Download From Internet
+# Please Note this file only include fake/console connector, You'll need to download the other connectors manually
+# wget -P /opt https://dlcdn.apache.org/seatunnel/2.3.6/apache-seatunnel-${VERSION}-bin.tar.gz
+
+RUN cd /opt && \
+ tar -zxvf apache-seatunnel-${VERSION}-bin.tar.gz && \
+ mv apache-seatunnel-${VERSION} seatunnel && \
+ rm apache-seatunnel-${VERSION}-bin.tar.gz && \
+ cp seatunnel/config/log4j2_client.properties seatunnel/config/log4j2.properties && \
+ cp seatunnel/config/hazelcast-master.yaml seatunnel/config/hazelcast-worker.yaml
+
+WORKDIR /opt/seatunnel
+```
+
+### Spark or Flink Engine
+
+
+#### Mount Spark/Flink library
+
+By default, Spark home is `/opt/spark`, Flink home is `/opt/flink`.
+If you need run with spark/flink, you can mount the related library to `/opt/spark` or `/opt/flink`.
+
+```shell
+docker run \
+ -v :/opt/spark \
+ -v :/opt/flink \
+ ...
+```
+
+Or you can change the `SPARK_HOME`, `FLINK_HOME` environment variable in Dockerfile and re-build your and mount the spark/flink to related path.
+
+```dockerfile
+FROM apache/seatunnel
+
+ENV SPARK_HOME=
+
+...
+
+```
+
+```shell
+docker run \
+ -v : \
+ ...
+```
+
+### Submit job
+
+The command is different for different engines and different versions of the same engine, please choose the correct command.
+
+- Spark
+
+```shell
+# spark2
+docker run --rm -it apache/seatunnel bash ./bin/start-seatunnel-spark-2-connector-v2.sh -c config/v2.batch.config.template
+
+# spark3
+docker run --rm -it apache/seatunnel bash ./bin/start-seatunnel-spark-3-connector-v2.sh -c config/v2.batch.config.template
+```
+
+- Flink
+ before you submit job, you need start flink cluster first.
+
+```shell
+# flink version between `1.12.x` and `1.14.x`
+docker run --rm -it apache/seatunnel bash -c '/bin/start-cluster.sh && ./bin/start-seatunnel-flink-13-connector-v2.sh -c config/v2.streaming.conf.template'
+# flink version between `1.15.x` and `1.16.x`
+docker run --rm -it apache/seatunnel bash -c '/bin/start-cluster.sh && ./bin/start-seatunnel-flink-15-connector-v2.sh -c config/v2.streaming.conf.template'
+```
+
+
+
+## Set Up With Docker In Cluster Mode
+
+there has 2 ways to create cluster within docker.
+
+### Use Docker Directly
+
+#### create a network
+```shell
+docker network create seatunnel-network
+```
+
+#### start the nodes
+- start master node
+```shell
+## start master and export 5801 port
+docker run -d --name seatunnel_master \
+ --network seatunnel-network \
+ --rm \
+ -p 5801:5801 \
+ apache/seatunnel \
+ ./bin/seatunnel-cluster.sh -r master
+```
+
+- get created container ip
+```shell
+docker inspect seatunnel_master
+```
+run this command to get the pod ip.
+
+- start worker node
+```shell
+# you need update yourself master container ip to `ST_DOCKER_MEMBER_LIST`
+docker run -d --name seatunnel_worker_1 \
+ --network seatunnel-network \
+ --rm \
+ -e ST_DOCKER_MEMBER_LIST=172.18.0.2:5801 \
+ apache/seatunnel \
+ ./bin/seatunnel-cluster.sh -r worker
+
+## start worker2
+# you need update yourself master container ip to `ST_DOCKER_MEMBER_LIST`
+docker run -d --name seatunnel_worker_2 \
+ --network seatunnel-network \
+ --rm \
+ -e ST_DOCKER_MEMBER_LIST=172.18.0.2:5801 \
+ apache/seatunnel \
+ ./bin/seatunnel-cluster.sh -r worker
+
+```
+
+#### Scale your Cluster
+
+run this command to start master node.
+```shell
+# you need update yourself master container ip to `ST_DOCKER_MEMBER_LIST`
+docker run -d --name seatunnel_master \
+ --network seatunnel-network \
+ --rm \
+ -e ST_DOCKER_MEMBER_LIST=172.18.0.2:5801 \
+ apache/seatunnel \
+ ./bin/seatunnel-cluster.sh -r master
+```
+
+run this command to start worker node.
+```shell
+# you need update yourself master container ip to `ST_DOCKER_MEMBER_LIST`
+docker run -d --name seatunnel_worker_1 \
+ --network seatunnel-network \
+ --rm \
+ -e ST_DOCKER_MEMBER_LIST=172.18.0.2:5801 \
+ apache/seatunnel \
+ ./bin/seatunnel-cluster.sh -r worker
+```
+
+
+### Use Docker-compose
+
+> docker cluster mode is only support zeta engine.
+
+The `docker-compose.yaml` file is :
+```yaml
+version: '3.8'
+
+services:
+ master:
+ image: apache/seatunnel
+ container_name: seatunnel_master
+ environment:
+ - ST_DOCKER_MEMBER_LIST=172.16.0.2,172.16.0.3,172.16.0.4
+ entrypoint: >
+ /bin/sh -c "
+ /opt/seatunnel/bin/seatunnel-cluster.sh -r master
+ "
+ ports:
+ - "5801:5801"
+ networks:
+ seatunnel_network:
+ ipv4_address: 172.16.0.2
+
+ worker1:
+ image: apache/seatunnel
+ container_name: seatunnel_worker_1
+ environment:
+ - ST_DOCKER_MEMBER_LIST=172.16.0.2,172.16.0.3,172.16.0.4
+ entrypoint: >
+ /bin/sh -c "
+ /opt/seatunnel/bin/seatunnel-cluster.sh -r worker
+ "
+ depends_on:
+ - master
+ networks:
+ seatunnel_network:
+ ipv4_address: 172.16.0.3
+
+ worker2:
+ image: apache/seatunnel
+ container_name: seatunnel_worker_2
+ environment:
+ - ST_DOCKER_MEMBER_LIST=172.16.0.2,172.16.0.3,172.16.0.4
+ entrypoint: >
+ /bin/sh -c "
+ /opt/seatunnel/bin/seatunnel-cluster.sh -r worker
+ "
+ depends_on:
+ - master
+ networks:
+ seatunnel_network:
+ ipv4_address: 172.16.0.4
+
+networks:
+ seatunnel_network:
+ driver: bridge
+ ipam:
+ config:
+ - subnet: 172.16.0.0/24
+
+```
+
+run `docker-compose up -d` command to start the cluster.
+
+
+You can use `docker logs -f seatunne_master`, `docker logs -f seatunnel_worker_1` to check the node log.
+And when you call `http://localhost:5801/hazelcast/rest/maps/system-monitoring-information`, you will see there are 2 nodes as we excepted.
+
+After that, you can use client or restapi to submit job to this cluster.
+
+#### Scale your Cluster
+
+If you want to increase cluster node, like add a new work node.
+
+```yaml
+version: '3.8'
+
+services:
+ master:
+ image: apache/seatunnel
+ container_name: seatunnel_master
+ environment:
+ - ST_DOCKER_MEMBER_LIST=172.16.0.2,172.16.0.3,172.16.0.4
+ entrypoint: >
+ /bin/sh -c "
+ /opt/seatunnel/bin/seatunnel-cluster.sh -r master
+ "
+ ports:
+ - "5801:5801"
+ networks:
+ seatunnel_network:
+ ipv4_address: 172.16.0.2
+
+ worker1:
+ image: apache/seatunnel
+ container_name: seatunnel_worker_1
+ environment:
+ - ST_DOCKER_MEMBER_LIST=172.16.0.2,172.16.0.3,172.16.0.4
+ entrypoint: >
+ /bin/sh -c "
+ /opt/seatunnel/bin/seatunnel-cluster.sh -r worker
+ "
+ depends_on:
+ - master
+ networks:
+ seatunnel_network:
+ ipv4_address: 172.16.0.3
+
+ worker2:
+ image: apache/seatunnel
+ container_name: seatunnel_worker_2
+ environment:
+ - ST_DOCKER_MEMBER_LIST=172.16.0.2,172.16.0.3,172.16.0.4
+ entrypoint: >
+ /bin/sh -c "
+ /opt/seatunnel/bin/seatunnel-cluster.sh -r worker
+ "
+ depends_on:
+ - master
+ networks:
+ seatunnel_network:
+ ipv4_address: 172.16.0.4
+ ####
+ ## add new worker node
+ ####
+ worker3:
+ image: apache/seatunnel
+ container_name: seatunnel_worker_3
+ environment:
+ - ST_DOCKER_MEMBER_LIST=172.16.0.2,172.16.0.3,172.16.0.4,172.16.0.5 # add ip to here
+ entrypoint: >
+ /bin/sh -c "
+ /opt/seatunnel/bin/seatunnel-cluster.sh -r worker
+ "
+ depends_on:
+ - master
+ networks:
+ seatunnel_network:
+ ipv4_address: 172.16.0.5 # use a not used ip
+
+networks:
+ seatunnel_network:
+ driver: bridge
+ ipam:
+ config:
+ - subnet: 172.16.0.0/24
+
+```
+
+and run `docker-compose up -d` command, the new worker node will start, and the current node won't restart.
+
+
+### Job Operation on cluster
+
+#### use docker as a client
+- submit job :
+```shell
+# you need update yourself master container ip to `ST_DOCKER_MEMBER_LIST`
+docker run --name seatunnel_client \
+ --network seatunnel-network \
+ -e ST_DOCKER_MEMBER_LIST=172.18.0.2:5801 \
+ --rm \
+ apache/seatunnel \
+ ./bin/seatunnel.sh -c config/v2.batch.config.template
+```
+
+- list job
+```shell
+# you need update yourself master container ip to `ST_DOCKER_MEMBER_LIST`
+docker run --name seatunnel_client \
+ --network seatunnel-network \
+ -e ST_DOCKER_MEMBER_LIST=172.18.0.2:5801 \
+ --rm \
+ apache/seatunnel \
+ ./bin/seatunnel.sh -l
+```
+
+more command please refer [user-command](../../seatunnel-engine/user-command.md)
+
+
+
+#### use rest api
+
+please refer [Submit A Job](../../seatunnel-engine/rest-api-v2.md#submit-a-job)
+
diff --git a/docs/en/start-v2/kubernetes/kubernetes.mdx b/docs/en/start-v2/kubernetes/kubernetes.mdx
index b40e561ec72..f3cc9e6b0d5 100644
--- a/docs/en/start-v2/kubernetes/kubernetes.mdx
+++ b/docs/en/start-v2/kubernetes/kubernetes.mdx
@@ -44,7 +44,7 @@ To run the image with SeaTunnel, first create a `Dockerfile`:
```Dockerfile
FROM flink:1.13
-ENV SEATUNNEL_VERSION="2.3.6"
+ENV SEATUNNEL_VERSION="2.3.9"
ENV SEATUNNEL_HOME="/opt/seatunnel"
RUN wget https://dlcdn.apache.org/seatunnel/${SEATUNNEL_VERSION}/apache-seatunnel-${SEATUNNEL_VERSION}-bin.tar.gz
@@ -56,13 +56,13 @@ RUN cd ${SEATUNNEL_HOME} && sh bin/install-plugin.sh ${SEATUNNEL_VERSION}
Then run the following commands to build the image:
```bash
-docker build -t seatunnel:2.3.6-flink-1.13 -f Dockerfile .
+docker build -t seatunnel:2.3.9-flink-1.13 -f Dockerfile .
```
-Image `seatunnel:2.3.6-flink-1.13` needs to be present in the host (minikube) so that the deployment can take place.
+Image `seatunnel:2.3.9-flink-1.13` needs to be present in the host (minikube) so that the deployment can take place.
Load image to minikube via:
```bash
-minikube image load seatunnel:2.3.6-flink-1.13
+minikube image load seatunnel:2.3.9-flink-1.13
```
@@ -72,7 +72,7 @@ minikube image load seatunnel:2.3.6-flink-1.13
```Dockerfile
FROM openjdk:8
-ENV SEATUNNEL_VERSION="2.3.6"
+ENV SEATUNNEL_VERSION="2.3.9"
ENV SEATUNNEL_HOME="/opt/seatunnel"
RUN wget https://dlcdn.apache.org/seatunnel/${SEATUNNEL_VERSION}/apache-seatunnel-${SEATUNNEL_VERSION}-bin.tar.gz
@@ -84,13 +84,13 @@ RUN cd ${SEATUNNEL_HOME} && sh bin/install-plugin.sh ${SEATUNNEL_VERSION}
Then run the following commands to build the image:
```bash
-docker build -t seatunnel:2.3.6 -f Dockerfile .
+docker build -t seatunnel:2.3.9 -f Dockerfile .
```
-Image `seatunnel:2.3.6` need to be present in the host (minikube) so that the deployment can take place.
+Image `seatunnel:2.3.9` need to be present in the host (minikube) so that the deployment can take place.
Load image to minikube via:
```bash
-minikube image load seatunnel:2.3.6
+minikube image load seatunnel:2.3.9
```
@@ -100,7 +100,7 @@ minikube image load seatunnel:2.3.6
```Dockerfile
FROM openjdk:8
-ENV SEATUNNEL_VERSION="2.3.6"
+ENV SEATUNNEL_VERSION="2.3.9"
ENV SEATUNNEL_HOME="/opt/seatunnel"
RUN wget https://dlcdn.apache.org/seatunnel/${SEATUNNEL_VERSION}/apache-seatunnel-${SEATUNNEL_VERSION}-bin.tar.gz
@@ -112,13 +112,13 @@ RUN cd ${SEATUNNEL_HOME} && sh bin/install-plugin.sh ${SEATUNNEL_VERSION}
Then run the following commands to build the image:
```bash
-docker build -t seatunnel:2.3.6 -f Dockerfile .
+docker build -t seatunnel:2.3.9 -f Dockerfile .
```
-Image `seatunnel:2.3.6` needs to be present in the host (minikube) so that the deployment can take place.
+Image `seatunnel:2.3.9` needs to be present in the host (minikube) so that the deployment can take place.
Load image to minikube via:
```bash
-minikube image load seatunnel:2.3.6
+minikube image load seatunnel:2.3.9
```
@@ -191,7 +191,7 @@ none
]}>
-In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.6-release/config/v2.streaming.conf.template):
+In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.9-release/config/v2.streaming.conf.template):
```conf
env {
@@ -202,7 +202,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 160000
schema = {
fields {
@@ -215,8 +215,8 @@ source {
transform {
FieldMapper {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
field_mapper = {
age = age
name = new_name
@@ -226,7 +226,7 @@ transform {
sink {
Console {
- source_table_name = "fake1"
+ plugin_input = "fake1"
}
}
```
@@ -245,7 +245,7 @@ kind: FlinkDeployment
metadata:
name: seatunnel-flink-streaming-example
spec:
- image: seatunnel:2.3.6-flink-1.13
+ image: seatunnel:2.3.9-flink-1.13
flinkVersion: v1_13
flinkConfiguration:
taskmanager.numberOfTaskSlots: "2"
@@ -291,7 +291,7 @@ kubectl apply -f seatunnel-flink.yaml
-In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.6-release/config/v2.streaming.conf.template):
+In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.9-release/config/v2.streaming.conf.template):
```conf
env {
@@ -303,7 +303,7 @@ env {
source {
FakeSource {
parallelism = 2
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 16
schema = {
fields {
@@ -334,7 +334,7 @@ metadata:
spec:
containers:
- name: seatunnel
- image: seatunnel:2.3.6
+ image: seatunnel:2.3.9
command: ["/bin/sh","-c","/opt/seatunnel/bin/seatunnel.sh --config /data/seatunnel.streaming.conf -e local"]
resources:
limits:
@@ -366,7 +366,7 @@ kubectl apply -f seatunnel.yaml
-In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.6-release/config/v2.streaming.conf.template):
+In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.9-release/config/v2.streaming.conf.template):
```conf
env {
@@ -378,7 +378,7 @@ env {
source {
FakeSource {
parallelism = 2
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 16
schema = {
fields {
@@ -524,7 +524,7 @@ spec:
spec:
containers:
- name: seatunnel
- image: seatunnel:2.3.6
+ image: seatunnel:2.3.9
imagePullPolicy: IfNotPresent
ports:
- containerPort: 5801
diff --git a/docs/en/start-v2/locally/deployment.md b/docs/en/start-v2/locally/deployment.md
index 69cf5164e95..4684871acb0 100644
--- a/docs/en/start-v2/locally/deployment.md
+++ b/docs/en/start-v2/locally/deployment.md
@@ -1,54 +1,47 @@
---
-
sidebar_position: 2
--------------------
+---
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
-# Download and Make Installation Packages
+# Deployment
-## Step 1: Preparation
+## Preparation
Before starting to download SeaTunnel, you need to ensure that you have installed the following software required by SeaTunnel:
* Install [Java](https://www.java.com/en/download/) (Java 8 or 11, and other versions higher than Java 8 can theoretically work) and set `JAVA_HOME`.
-## Step 2: Download SeaTunnel
+## Download SeaTunnel Release Package
+
+### Download The Binary Package
Visit the [SeaTunnel Download Page](https://seatunnel.apache.org/download) to download the latest binary package `seatunnel--bin.tar.gz`.
Or you can also download it through the terminal:
```shell
-export version="2.3.6"
+export version="2.3.9"
wget "https://archive.apache.org/dist/seatunnel/${version}/apache-seatunnel-${version}-bin.tar.gz"
tar -xzvf "apache-seatunnel-${version}-bin.tar.gz"
```
-## Step 3: Download The Connector Plugins
+### Download The Connector Plugins
-Starting from the 2.2.0-beta version, the binary package no longer provides the connector dependencies by default. Therefore, when using it for the first time, you need to execute the following command to install the connectors (Of course, you can also manually download the connector from the [Apache Maven Repository](https://repo.maven.apache.org/maven2/org/apache/seatunnel/), and then move it to the `connectors/seatunnel` directory) :
+Starting from version 2.2.0-beta, the binary package no longer provides connector dependencies by default. Therefore, the first time you use it, you need to run the following command to install the connectors (Alternatively, you can manually download the connectors from the [Apache Maven Repository](https://repo.maven.apache.org/maven2/org/apache/seatunnel/) and move them to the `connectors/` directory. For versions before 2.3.5, place them in the `connectors/seatunnel` directory):
```bash
sh bin/install-plugin.sh
```
-If you need a specific connector version, taking 2.3.6 as an example, you need to execute the following command:
+If you need a specific connector version, taking 2.3.9 as an example, you need to execute the following command:
```bash
-sh bin/install-plugin.sh 2.3.6
+sh bin/install-plugin.sh 2.3.9
```
-Usually you don't need all connector plugins, so you can specify the plugins you need through configuring `config/plugin_config`. For example, if you only need the `connector-console` plugin, you can modify the plugin.properties configuration file as follows:
-
-```plugin_config
---seatunnel-connectors--
-connector-console
---end--
-```
-
-If you want the example application to work properly, you need to add the following plugins.
+Typically, you do not need all the connector plugins. You can specify the required plugins by configuring `config/plugin_config`. For example, if you want the sample application to work properly, you will need the `connector-console` and `connector-fake` plugins. You can modify the `plugin_config` configuration file as follows:
```plugin_config
--seatunnel-connectors--
@@ -65,10 +58,33 @@ If you want to install connector plugins by manually downloading connectors, you
:::
+## Build SeaTunnel From Source Code
+
+### Download The Source Code
+
+Build from source code. The way of downloading the source code is the same as the way of downloading the binary package.
+You can download the source code from the [download page](https://seatunnel.apache.org/download/) or clone the source code from the [GitHub repository](https://github.com/apache/seatunnel/releases)
+
+### Build The Source Code
+
+```shell
+cd seatunnel
+sh ./mvnw clean install -DskipTests -Dskip.spotless=true
+# get the binary package
+cp seatunnel-dist/target/apache-seatunnel-2.3.9-bin.tar.gz /The-Path-You-Want-To-Copy
+
+cd /The-Path-You-Want-To-Copy
+tar -xzvf "apache-seatunnel-${version}-bin.tar.gz"
+```
+
+When built from the source code, all the connector plugins and some necessary dependencies (eg: mysql driver) are included in the binary package. You can directly use the connector plugins without the need to install them separately.
+
+# Run SeaTunnel
+
Now you have downloaded the SeaTunnel binary package and the connector plugins. Next, you can choose different engine option to run synchronization tasks.
-If you use Flink to run the synchronization task, there is no need to deploy the SeaTunnel Engine service cluster. You can refer to [Quick Start of SeaTunnel Flink Engine](quick-start-flink.md) to run your synchronization task.
+If you use Flink to run the synchronization task, there is no need to deploy the SeaTunnel Engine service cluster. You can refer to [Quick Start With Flink](quick-start-flink.md) to run your synchronization task.
-If you use Spark to run the synchronization task, there is no need to deploy the SeaTunnel Engine service cluster. You can refer to [Quick Start of SeaTunnel Spark Engine](quick-start-spark.md) to run your synchronization task.
+If you use Spark to run the synchronization task, there is no need to deploy the SeaTunnel Engine service cluster. You can refer to [Quick Start With Spark](quick-start-spark.md) to run your synchronization task.
-If you use the builtin SeaTunnel Engine (Zeta) to run tasks, you need to deploy the SeaTunnel Engine service first. Refer to [Deployment of SeaTunnel Engine (Zeta) Service](quick-start-seatunnel-engine.md).
+If you use the builtin SeaTunnel Engine (Zeta) to run tasks, you need to deploy the SeaTunnel Engine service first. Refer to [Quick Start With SeaTunnel Engine](quick-start-seatunnel-engine.md).
diff --git a/docs/en/start-v2/locally/quick-start-flink.md b/docs/en/start-v2/locally/quick-start-flink.md
index fcb5ab40930..fbfc945fc7c 100644
--- a/docs/en/start-v2/locally/quick-start-flink.md
+++ b/docs/en/start-v2/locally/quick-start-flink.md
@@ -1,7 +1,6 @@
---
-
sidebar_position: 3
--------------------
+---
# Quick Start With Flink
@@ -28,7 +27,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 16
schema = {
fields {
@@ -41,8 +40,8 @@ source {
transform {
FieldMapper {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
field_mapper = {
age = age
name = new_name
@@ -52,7 +51,7 @@ transform {
sink {
Console {
- source_table_name = "fake1"
+ plugin_input = "fake1"
}
}
@@ -71,7 +70,7 @@ cd "apache-seatunnel-${version}"
./bin/start-seatunnel-flink-13-connector-v2.sh --config ./config/v2.streaming.conf.template
```
-Flink version between `1.15.x` and `1.16.x`
+Flink version between `1.15.x` and `1.18.x`
```shell
cd "apache-seatunnel-${version}"
@@ -106,7 +105,7 @@ row=16 : SGZCr, 94186144
## What's More
-For now, you have taken a quick look about SeaTunnel with Flink, and you can see [Connector](/docs/category/connector-v2) to find all
-sources and sinks SeaTunnel supported. Or see [SeaTunnel With Flink](../../other-engine/flink.md) if you want to know more about SeaTunnel With Flink.
+- Start write your own config file now, choose the [connector](../../connector-v2/source) you want to use, and configure the parameters according to the connector's documentation.
+- See [SeaTunnel With Flink](../../other-engine/flink.md) if you want to know more about SeaTunnel With Flink.
+- SeaTunnel have a builtin engine named `Zeta`, and it's the default engine of SeaTunnel. You can follow [Quick Start](quick-start-seatunnel-engine.md) to configure and run a data synchronization job.
-SeaTunnel have a builtin engine named `Zeta`, and it's the default engine of SeaTunnel. You can follow [Quick Start](quick-start-seatunnel-engine.md) to configure and run a data synchronization job.
diff --git a/docs/en/start-v2/locally/quick-start-seatunnel-engine.md b/docs/en/start-v2/locally/quick-start-seatunnel-engine.md
index 10814f0050f..fe9d8ee7983 100644
--- a/docs/en/start-v2/locally/quick-start-seatunnel-engine.md
+++ b/docs/en/start-v2/locally/quick-start-seatunnel-engine.md
@@ -1,7 +1,6 @@
---
-
sidebar_position: 2
--------------------
+---
# Quick Start With SeaTunnel Engine
@@ -22,7 +21,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 16
schema = {
fields {
@@ -35,8 +34,8 @@ source {
transform {
FieldMapper {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
field_mapper = {
age = age
name = new_name
@@ -46,7 +45,7 @@ transform {
sink {
Console {
- source_table_name = "fake1"
+ plugin_input = "fake1"
}
}
@@ -95,7 +94,107 @@ The SeaTunnel console will print some logs as below:
2022-12-19 11:01:46,491 INFO org.apache.seatunnel.connectors.seatunnel.console.sink.ConsoleSinkWriter - subtaskIndex=0 rowIndex=16: SeaTunnelRow#tableId=-1 SeaTunnelRow#kind=INSERT: mIJDt, 995616438
```
+## Extended Example: Batch Mode from MySQL to Doris
+
+### Step 1: Download the Connector
+
+First, you need to add the connector name to the `${SEATUNNEL_HOME}/config/plugin_config` file. Then, execute the command to install the connector (of course, you can also manually download the connector from the [Apache Maven Repository](https://repo.maven.apache.org/maven2/org/apache/seatunnel/) and move it to the `connectors/` directory). Finally, make sure that the `connector-jdbc` and `connector-doris` connectors are in the `${SEATUNNEL_HOME}/connectors/` directory.
+
+```bash
+# Configure the connector name.
+--seatunnel-connectors--
+connector-jdbc
+connector-doris
+--end--
+```
+
+```bash
+# Install the connector.
+sh bin/install-plugin.sh
+```
+
+### Step 2: Place the MySQL Driver
+
+You need to download the [JDBC driver JAR package](https://mvnrepository.com/artifact/mysql/mysql-connector-java) and place it in the `${SEATUNNEL_HOME}/lib/` directory.
+
+### Step 3: Add Job Configuration File to Define the Job
+
+```bash
+cd seatunnel/job/
+
+vim st.conf
+
+env {
+ parallelism = 2
+ job.mode = "BATCH"
+}
+source {
+ Jdbc {
+ url = "jdbc:mysql://localhost:3306/test"
+ driver = "com.mysql.cj.jdbc.Driver"
+ connection_check_timeout_sec = 100
+ user = "user"
+ password = "pwd"
+ table_path = "test.table_name"
+ query = "select * from test.table_name"
+ }
+}
+
+sink {
+ Doris {
+ fenodes = "doris_ip:8030"
+ username = "user"
+ password = "pwd"
+ database = "test_db"
+ table = "table_name"
+ sink.enable-2pc = "true"
+ sink.label-prefix = "test-cdc"
+ doris.config = {
+ format = "json"
+ read_json_by_line="true"
+ }
+ }
+}
+```
+
+For more information about the configuration, please refer to [Basic Concepts of Configuration](../../concept/config.md).
+
+### Step 4: Run the SeaTunnel Application
+
+You can start the application using the following command:
+
+```shell
+cd seatunnel/
+./bin/seatunnel.sh --config ./job/st.conf -m local
+
+```
+
+**Check the Output**: When you run the command, you can see its output in the console. You can consider this as an indicator of whether the command has succeeded or failed.
+
+The SeaTunnel console will print some log information like the following:
+
+```shell
+***********************************************
+ Job Statistic Information
+***********************************************
+Start Time : 2024-08-13 10:21:49
+End Time : 2024-08-13 10:21:53
+Total Time(s) : 4
+Total Read Count : 1000
+Total Write Count : 1000
+Total Failed Count : 0
+***********************************************
+```
+
+:::tip
+
+If you want to optimize your job, refer to the connector documentation for [Source-MySQL](../../connector-v2/source/Mysql.md) and [Sink-Doris](../../connector-v2/sink/Doris.md).
+
+:::
+
+
## What's More
-For now, you have taken a quick look about SeaTunnel, and you can see [connector](../../connector-v2/source/FakeSource.md) to find all
-sources and sinks SeaTunnel supported. Or see [SeaTunnel Engine(Zeta)](../../seatunnel-engine/about.md) if you want to know more about SeaTunnel Engine. Here you will learn how to deploy SeaTunnel Engine and how to use it in cluster mode.
+- Start write your own config file now, choose the [connector](../../connector-v2/source) you want to use, and configure the parameters according to the connector's documentation.
+- See [SeaTunnel Engine(Zeta)](../../seatunnel-engine/about.md) if you want to know more about SeaTunnel Engine. Here you will learn how to deploy SeaTunnel Engine and how to use it in cluster mode.
+
diff --git a/docs/en/start-v2/locally/quick-start-spark.md b/docs/en/start-v2/locally/quick-start-spark.md
index 160da9498cb..e490f238b3d 100644
--- a/docs/en/start-v2/locally/quick-start-spark.md
+++ b/docs/en/start-v2/locally/quick-start-spark.md
@@ -1,7 +1,6 @@
---
-
sidebar_position: 4
--------------------
+---
# Quick Start With Spark
@@ -29,7 +28,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 16
schema = {
fields {
@@ -42,8 +41,8 @@ source {
transform {
FieldMapper {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
field_mapper = {
age = age
name = new_name
@@ -53,7 +52,7 @@ transform {
sink {
Console {
- source_table_name = "fake1"
+ plugin_input = "fake1"
}
}
@@ -113,7 +112,7 @@ row=16 : SGZCr, 94186144
## What's More
-For now, you have taken a quick look about SeaTunnel with Spark, and you can see [Connector](/docs/category/connector-v2) to find all
-sources and sinks SeaTunnel supported. Or see [SeaTunnel With Spark](../../other-engine/spark.md) if you want to know more about SeaTunnel With Spark.
+- Start write your own config file now, choose the [connector](../../connector-v2/source) you want to use, and configure the parameters according to the connector's documentation.
+- See [SeaTunnel With Spark](../../other-engine/spark.md) if you want to know more about SeaTunnel With Spark.
+- SeaTunnel have a builtin engine named `Zeta`, and it's the default engine of SeaTunnel. You can follow [Quick Start](quick-start-seatunnel-engine.md) to configure and run a data synchronization job.
-SeaTunnel have a builtin engine named `Zeta`, and it's the default engine of SeaTunnel. You can follow [Quick Start](quick-start-seatunnel-engine.md) to configure and run a data synchronization job.
diff --git a/docs/en/transform-v2/common-options.md b/docs/en/transform-v2/common-options.md
index 7c13bac4f00..32e91bf8243 100644
--- a/docs/en/transform-v2/common-options.md
+++ b/docs/en/transform-v2/common-options.md
@@ -1,11 +1,21 @@
+---
+sidebar_position: 1
+---
+
# Transform Common Options
> This is a process of intermediate conversion between the source and sink terminals,You can use sql statements to smoothly complete the conversion process
-| Name | Type | Required | Default | Description |
-|-------------------|--------|----------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| result_table_name | String | No | - | When `source_table_name` is not specified, the current plugin processes the data set `(dataset)` output by the previous plugin in the configuration file; When `source_table_name` is specified, the current plugin is processing the data set corresponding to this parameter. |
-| source_table_name | String | No | - | When `result_table_name` is not specified, the data processed by this plugin will not be registered as a data set that can be directly accessed by other plugins, or called a temporary table `(table)`; When `result_table_name` is specified, the data processed by this plugin will be registered as a data set `(dataset)` that can be directly accessed by other plugins, or called a temporary table `(table)` . The dataset registered here can be directly accessed by other plugins by specifying `source_table_name` . |
+:::warn
+
+The old configuration name `source_table_name`/`result_table_name` is deprecated, please migrate to the new name `plugin_input`/`plugin_output` as soon as possible.
+
+:::
+
+| Name | Type | Required | Default | Description |
+|---------------|--------|----------|---------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| plugin_output | String | No | - | When `plugin_input` is not specified, the current plugin processes the data set `(dataset)` output by the previous plugin in the configuration file; When `plugin_input` is specified, the current plugin is processing the data set corresponding to this parameter. |
+| plugin_input | String | No | - | When `plugin_output` is not specified, the data processed by this plugin will not be registered as a data set that can be directly accessed by other plugins, or called a temporary table `(table)`; When `plugin_output` is specified, the data processed by this plugin will be registered as a data set `(dataset)` that can be directly accessed by other plugins, or called a temporary table `(table)` . The dataset registered here can be directly accessed by other plugins by specifying `plugin_input` . |
## Task Example
@@ -20,7 +30,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 100
schema = {
fields {
@@ -44,9 +54,9 @@ source {
transform {
Sql {
- source_table_name = "fake"
- result_table_name = "fake1"
- # the query table name must same as field 'source_table_name'
+ plugin_input = "fake"
+ plugin_output = "fake1"
+ # the query table name must same as field 'plugin_input'
query = "select id, regexp_replace(name, '.+', 'b') as name, age+1 as age, pi() as pi, c_timestamp, c_date, c_map, c_array, c_decimal, c_row from fake"
}
# The SQL transform support base function and criteria operation
@@ -55,10 +65,10 @@ transform {
sink {
Console {
- source_table_name = "fake1"
+ plugin_input = "fake1"
}
Console {
- source_table_name = "fake"
+ plugin_input = "fake"
}
}
```
diff --git a/docs/en/transform-v2/copy.md b/docs/en/transform-v2/copy.md
index 7a0e73f44be..eede3f7d077 100644
--- a/docs/en/transform-v2/copy.md
+++ b/docs/en/transform-v2/copy.md
@@ -36,8 +36,8 @@ We want copy fields `name`、`age` to a new fields `name1`、`name2`、`age1`, w
```
transform {
Copy {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
fields {
name1 = name
name2 = name
diff --git a/docs/en/transform-v2/dynamic-compile.md b/docs/en/transform-v2/dynamic-compile.md
index 5bfbbadbe08..d5f21f2708d 100644
--- a/docs/en/transform-v2/dynamic-compile.md
+++ b/docs/en/transform-v2/dynamic-compile.md
@@ -4,6 +4,13 @@
## Description
+:::tip
+
+important clause
+You need to ensure the security of your service and prevent attackers from uploading destructive code
+
+:::
+
Provide a programmable way to process rows, allowing users to customize any business behavior, even RPC requests based on existing row fields as parameters, or to expand fields by retrieving associated data from other data sources. To distinguish businesses, you can also define multiple transforms to combine,
If the conversion is too complex, it may affect performance
@@ -11,24 +18,52 @@ If the conversion is too complex, it may affect performance
| name | type | required | default value |
|------------------|--------|----------|---------------|
-| source_code | string | yes | |
-| compile_language | string | yes | |
-
-### source_code [string]
+| source_code | string | no | |
+| compile_language | Enum | yes | |
+| compile_pattern | Enum | no | SOURCE_CODE |
+| absolute_path | string | no | |
-The code must implement two methods: getInlineOutputColumns and getInlineOutputFieldValues. getInlineOutputColumns determines the columns you want to add or convert, and the original column structure can be obtained from CatalogTable
-GetInlineOutputFieldValues determines your column values. You can fulfill any of your requirements, and even complete RPC requests to obtain new values based on the original columns
-If there are third-party dependency packages, please place them in ${SEATUNNEL_HOME}/lib, if you use spark or flink, you need to put it under the libs of the corresponding service.
### common options [string]
Transform plugin common parameters, please refer to [Transform Plugin](common-options.md) for details
-### compile_language [string]
+### compile_language [Enum]
Some syntax in Java may not be supported, please refer https://github.com/janino-compiler/janino
GROOVY,JAVA
+### compile_pattern [Enum]
+
+SOURCE_CODE,ABSOLUTE_PATH
+If it is a SOURCE-CODE enumeration; the SOURCE-CODE attribute is required, and the ABSOLUTE_PATH enumeration;ABSOLUTE_PATH attribute is required
+
+### absolute_path [string]
+
+The absolute path of Java or Groovy files on the server
+
+### source_code [string]
+
+The source code.
+
+#### Details about the source code
+
+In the source code, you must implement two method:
+- `Column[] getInlineOutputColumns(CatalogTable inputCatalogTable)`
+- `Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow)`
+
+`getInlineOutputColumns` method, input parameter is `CatalogTable`, return type is `Column[]`.
+you can get the current table's schema from `CatalogTable`.
+if the return column exist in current schema, then it will overwrite by returned value (field type, comment, ...), if it's a new column, it will add into current schema.
+
+`getInlineOutputFieldValues` method, input parameter is `SeaTunnelRowAccessor`, return type is `Object[]`
+You can get the record from `SeaTunnelRowAccessor`, do you own customized data process logical.
+The return `Object[]` array length should match with `getInlineOutputColumns` method result's length. and the order also need be match.
+
+If there are third-party dependency packages, please place them in ${SEATUNNEL_HOME}/lib, if you use spark or flink, you need to put it under the libs of the corresponding service.
+You need restart the server to load the lib file.
+
+
## Example
The data read from source is a table like this:
@@ -37,92 +72,159 @@ The data read from source is a table like this:
|----------|-----|------|
| Joy Ding | 20 | 123 |
| May Ding | 20 | 123 |
-| Kin Dom | 20 | 123 |
-| Joy Dom | 20 | 123 |
+| Kin Dom | 30 | 123 |
+| Joy Dom | 30 | 123 |
-```
+Use this DynamicCompile to add a new column `compile_language`, and update the `age` field by its original value (if age = 20, update to 40)
+
+
+- use groovy
+```hacon
transform {
DynamicCompile {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "groovy_out"
compile_language="GROOVY"
+ compile_pattern="SOURCE_CODE"
source_code="""
import org.apache.seatunnel.api.table.catalog.Column
- import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor
+ import org.apache.seatunnel.api.table.type.SeaTunnelRowAccessor
import org.apache.seatunnel.api.table.catalog.CatalogTable
import org.apache.seatunnel.api.table.catalog.PhysicalColumn;
import org.apache.seatunnel.api.table.type.*;
import java.util.ArrayList;
class demo {
- public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) {
- List columns = new ArrayList<>();
- PhysicalColumn destColumn =
- PhysicalColumn.of(
- "aa",
- BasicType.STRING_TYPE,
- 10,
- true,
- "",
- "");
- columns.add(destColumn);
- return columns.toArray(new Column[0]);
- }
- public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) {
- Object[] fieldValues = new Object[1];
- fieldValues[0]="AA"
- return fieldValues;
- }
+ public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) {
+ PhysicalColumn col1 =
+ PhysicalColumn.of(
+ "compile_language",
+ BasicType.STRING_TYPE,
+ 10L,
+ true,
+ "",
+ "");
+ PhysicalColumn col2 =
+ PhysicalColumn.of(
+ "age",
+ BasicType.INT_TYPE,
+ 0L,
+ false,
+ false,
+ ""
+ );
+ return new Column[]{
+ col1, col2
+ };
+ }
+
+
+ public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) {
+ Object[] fieldValues = new Object[2];
+ // get age
+ Object ageField = inputRow.getField(1);
+ fieldValues[0] = "GROOVY";
+ if (Integer.parseInt(ageField.toString()) == 20) {
+ fieldValues[1] = 40;
+ } else {
+ fieldValues[1] = ageField;
+ }
+ return fieldValues;
+ }
};"""
}
}
+```
+- use java
+```hacon
transform {
DynamicCompile {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "java_out"
compile_language="JAVA"
+ compile_pattern="SOURCE_CODE"
source_code="""
import org.apache.seatunnel.api.table.catalog.Column;
- import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor;
+ import org.apache.seatunnel.api.table.type.SeaTunnelRowAccessor;
import org.apache.seatunnel.api.table.catalog.*;
import org.apache.seatunnel.api.table.type.*;
import java.util.ArrayList;
- public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) {
-
- ArrayList columns = new ArrayList();
- PhysicalColumn destColumn =
- PhysicalColumn.of(
- "aa",
- BasicType.STRING_TYPE,
- 10,
- true,
- "",
- "");
- return new Column[]{
- destColumn
- };
-
- }
- public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) {
- Object[] fieldValues = new Object[1];
- fieldValues[0]="AA";
- return fieldValues;
- }
+ public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) {
+ PhysicalColumn col1 =
+ PhysicalColumn.of(
+ "compile_language",
+ BasicType.STRING_TYPE,
+ 10L,
+ true,
+ "",
+ "");
+ PhysicalColumn col2 =
+ PhysicalColumn.of(
+ "age",
+ BasicType.INT_TYPE,
+ 0L,
+ false,
+ false,
+ ""
+ );
+ return new Column[]{
+ col1, col2
+ };
+ }
+
+
+ public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) {
+ Object[] fieldValues = new Object[2];
+ // get age
+ Object ageField = inputRow.getField(1);
+ fieldValues[0] = "JAVA";
+ if (Integer.parseInt(ageField.toString()) == 20) {
+ fieldValues[1] = 40;
+ } else {
+ fieldValues[1] = ageField;
+ }
+ return fieldValues;
+ }
"""
}
}
+ ```
+- use absolute path to read code
+```hacon
+ transform {
+ DynamicCompile {
+ plugin_input = "fake"
+ plugin_output = "groovy_out"
+ compile_language="GROOVY"
+ compile_pattern="ABSOLUTE_PATH"
+ absolute_path="""/tmp/GroovyFile"""
+
+ }
+}
```
-Then the data in result table `fake1` will like this
+Then the data in result table `groovy_out` will like this
+
+| name | age | card | compile_language |
+|----------|-----|------|------------------|
+| Joy Ding | 40 | 123 | GROOVY |
+| May Ding | 40 | 123 | GROOVY |
+| Kin Dom | 30 | 123 | GROOVY |
+| Joy Dom | 30 | 123 | GROOVY |
+
+Then the data in result table `java_out` will like this
+
+| name | age | card | compile_language |
+|----------|-----|------|------------------|
+| Joy Ding | 40 | 123 | JAVA |
+| May Ding | 40 | 123 | JAVA |
+| Kin Dom | 30 | 123 | JAVA |
+| Joy Dom | 30 | 123 | JAVA |
-| name | age | card | aa |
-|----------|-----|------|----|
-| Joy Ding | 20 | 123 | AA |
-| May Ding | 20 | 123 | AA |
-| Kin Dom | 20 | 123 | AA |
-| Joy Dom | 20 | 123 | AA |
+More complex examples can be referred to
+https://github.com/apache/seatunnel/tree/dev/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf
## Changelog
diff --git a/docs/en/transform-v2/embedding.md b/docs/en/transform-v2/embedding.md
new file mode 100644
index 00000000000..350a23fc555
--- /dev/null
+++ b/docs/en/transform-v2/embedding.md
@@ -0,0 +1,392 @@
+# Embedding
+
+> Embedding Transform Plugin
+
+## Description
+
+The `Embedding` transform plugin leverages embedding models to convert text data into vectorized representations. This
+transformation can be applied to various fields. The plugin supports multiple model providers and can be integrated with
+different API endpoints.
+
+## Options
+
+| Name | Type | Required | Default Value | Description |
+|--------------------------------|--------|----------|---------------|-------------------------------------------------------------------------------------------------------------|
+| model_provider | enum | yes | - | The model provider for embedding. Options may include `QIANFAN`, `OPENAI`, etc. |
+| api_key | string | yes | - | The API key required to authenticate with the embedding service. |
+| secret_key | string | yes | - | The secret key required for additional authentication with the embedding service. |
+| single_vectorized_input_number | int | no | 1 | The number of inputs vectorized in one request. Default is 1. |
+| vectorization_fields | map | yes | - | A mapping between input fields and their corresponding output vector fields. |
+| model | string | yes | - | The specific model to use for embedding (e.g: `text-embedding-3-small` for OPENAI). |
+| api_path | string | no | - | The API endpoint for the embedding service. Typically provided by the model provider. |
+| oauth_path | string | no | - | The API endpoint for the oauth service. |
+| custom_config | map | no | | Custom configurations for the model. |
+| custom_response_parse | string | no | | Specifies how to parse the response from the model using JsonPath. Example: `$.choices[*].message.content`. |
+| custom_request_headers | map | no | | Custom headers for the request to the model. |
+| custom_request_body | map | no | | Custom body for the request. Supports placeholders like `${model}`, `${input}`. |
+
+### model_provider
+
+The providers for generating embeddings include common options such as `DOUBAO`, `QIANFAN`, and `OPENAI`. Additionally,
+you can choose `CUSTOM` to implement requests and retrievals for custom embedding models.
+
+### api_key
+
+The API key for authenticating requests to the embedding service. This is typically provided by the model provider when
+you register for their service.
+
+### secret_key
+
+The secret key used for additional authentication. Some providers may require this for secure API requests.
+
+### single_vectorized_input_number
+
+Specifies how many inputs are processed in a single vectorization request. The default is 1. Adjust based on your
+processing
+capacity and the model provider's API limitations.
+
+### vectorization_fields
+
+A mapping between input fields and their respective output vector fields. This allows the plugin to understand which
+text fields to vectorize and how to store the resulting vectors.
+
+```hocon
+vectorization_fields {
+ book_intro_vector = book_intro
+ author_biography_vector = author_biography
+}
+```
+
+### model
+
+The specific embedding model to use. This depends on the `embedding_model_provider`. For example, if using OPENAI, you
+might specify `text-embedding-3-small`.
+
+### api_path
+
+The API endpoint to use for making requests to the embedding service. This might vary based on the provider and model
+used. Generally, this is provided by the model provider.
+
+### oauth_path
+
+The API endpoint for the oauth service. Get certification information. This might vary based on the provider and model
+used. Generally, this is provided by the model provider.
+
+### custom_config
+
+The `custom_config` option allows you to provide additional custom configurations for the model. This is a map where you
+can define various settings that might be required by the specific model you're using.
+
+### custom_response_parse
+
+The `custom_response_parse` option allows you to specify how to parse the model's response. You can use JsonPath to
+extract the specific data you need from the response. For example, by using `$.data[*].embedding`, you can extract
+the `embedding` field values from the following JSON and obtain a `List` of nested `List` results. For more details on
+using JsonPath, please refer to
+the [JsonPath Getting Started guide](https://github.com/json-path/JsonPath?tab=readme-ov-file#getting-started).
+
+```json
+{
+ "object": "list",
+ "data": [
+ {
+ "object": "embedding",
+ "index": 0,
+ "embedding": [
+ -0.006929283495992422,
+ -0.005336422007530928,
+ -0.00004547132266452536,
+ -0.024047505110502243
+ ]
+ }
+ ],
+ "model": "text-embedding-3-small",
+ "usage": {
+ "prompt_tokens": 5,
+ "total_tokens": 5
+ }
+}
+```
+
+### custom_request_headers
+
+The `custom_request_headers` option allows you to define custom headers that should be included in the request sent to
+the model's API. This is useful if the API requires additional headers beyond the standard ones, such as authorization
+tokens, content types, etc.
+
+### custom_request_body
+
+The `custom_request_body` option supports placeholders:
+
+- `${model}`: Placeholder for the model name.
+- `${input}`: Placeholder to determine input value and define request body request type based on the type of body
+ value. Example: `["${input}"]` -> ["input"] (list)
+
+### common options
+
+Transform plugin common parameters, please refer to [Transform Plugin](common-options.md) for details.
+
+## Example Configuration
+
+```hocon
+env {
+ job.mode = "BATCH"
+}
+
+source {
+ FakeSource {
+ row.num = 5
+ schema = {
+ fields {
+ book_id = "int"
+ book_name = "string"
+ book_intro = "string"
+ author_biography = "string"
+ }
+ }
+ rows = [
+ {fields = [1, "To Kill a Mockingbird",
+ "Set in the American South during the 1930s, To Kill a Mockingbird tells the story of young Scout Finch and her brother, Jem, who are growing up in a world of racial inequality and injustice. Their father, Atticus Finch, is a lawyer who defends a black man falsely accused of raping a white woman, teaching his children valuable lessons about morality, courage, and empathy.",
+ "Harper Lee (1926–2016) was an American novelist best known for To Kill a Mockingbird, which won the Pulitzer Prize in 1961. Lee was born in Monroeville, Alabama, and the town served as inspiration for the fictional Maycomb in her novel. Despite the success of her book, Lee remained a private person and published only one other novel, Go Set a Watchman, which was written before To Kill a Mockingbird but released in 2015 as a sequel."
+ ], kind = INSERT}
+ {fields = [2, "1984",
+ "1984 is a dystopian novel set in a totalitarian society governed by Big Brother. The story follows Winston Smith, a man who works for the Party rewriting history. Winston begins to question the Party’s control and seeks truth and freedom in a society where individuality is crushed. The novel explores themes of surveillance, propaganda, and the loss of personal autonomy.",
+ "George Orwell (1903–1950) was the pen name of Eric Arthur Blair, an English novelist, essayist, journalist, and critic. Orwell is best known for his works 1984 and Animal Farm, both of which are critiques of totalitarian regimes. His writing is characterized by lucid prose, awareness of social injustice, opposition to totalitarianism, and support of democratic socialism. Orwell’s work remains influential, and his ideas have shaped contemporary discussions on politics and society."
+ ], kind = INSERT}
+ {fields = [3, "Pride and Prejudice",
+ "Pride and Prejudice is a romantic novel that explores the complex relationships between different social classes in early 19th century England. The story centers on Elizabeth Bennet, a young woman with strong opinions, and Mr. Darcy, a wealthy but reserved gentleman. The novel deals with themes of love, marriage, and societal expectations, offering keen insights into human behavior.",
+ "Jane Austen (1775–1817) was an English novelist known for her sharp social commentary and keen observations of the British landed gentry. Her works, including Sense and Sensibility, Emma, and Pride and Prejudice, are celebrated for their wit, realism, and biting critique of the social class structure of her time. Despite her relatively modest life, Austen’s novels have gained immense popularity, and she is considered one of the greatest novelists in the English language."
+ ], kind = INSERT}
+ {fields = [4, "The Great GatsbyThe Great Gatsby",
+ "The Great Gatsby is a novel about the American Dream and the disillusionment that can come with it. Set in the 1920s, the story follows Nick Carraway as he becomes entangled in the lives of his mysterious neighbor, Jay Gatsby, and the wealthy elite of Long Island. Gatsby's obsession with the beautiful Daisy Buchanan drives the narrative, exploring themes of wealth, love, and the decay of the American Dream.",
+ "F. Scott Fitzgerald (1896–1940) was an American novelist and short story writer, widely regarded as one of the greatest American writers of the 20th century. Born in St. Paul, Minnesota, Fitzgerald is best known for his novel The Great Gatsby, which is often considered the quintessential work of the Jazz Age. His works often explore themes of youth, wealth, and the American Dream, reflecting the turbulence and excesses of the 1920s."
+ ], kind = INSERT}
+ {fields = [5, "Moby-Dick",
+ "Moby-Dick is an epic tale of obsession and revenge. The novel follows the journey of Captain Ahab, who is on a relentless quest to kill the white whale, Moby Dick, that once maimed him. Narrated by Ishmael, a sailor aboard Ahab’s ship, the story delves into themes of fate, humanity, and the struggle between man and nature. The novel is also rich with symbolism and philosophical musings.",
+ "Herman Melville (1819–1891) was an American novelist, short story writer, and poet of the American Renaissance period. Born in New York City, Melville gained initial fame with novels such as Typee and Omoo, but it was Moby-Dick, published in 1851, that would later be recognized as his masterpiece. Melville’s work is known for its complexity, symbolism, and exploration of themes such as man’s place in the universe, the nature of evil, and the quest for meaning. Despite facing financial difficulties and critical neglect during his lifetime, Melville’s reputation soared posthumously, and he is now considered one of the great American authors."
+ ], kind = INSERT}
+ ]
+ plugin_output = "fake"
+ }
+}
+
+transform {
+ Embedding {
+ plugin_input = "fake"
+ embedding_model_provider = QIANFAN
+ model = bge_large_en
+ api_key = xxxxxxxxxx
+ secret_key = xxxxxxxxxx
+ api_path = xxxxxxxxxx
+ vectorization_fields {
+ book_intro_vector = book_intro
+ author_biography_vector = author_biography
+ }
+ plugin_output = "embedding_output"
+ }
+}
+
+sink {
+ Assert {
+ plugin_input = "embedding_output"
+ rules =
+ {
+ field_rules = [
+ {
+ field_name = book_id
+ field_type = int
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = book_name
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = book_intro
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = author_biography
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = book_intro_vector
+ field_type = float_vector
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = author_biography_vector
+ field_type = float_vector
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ }
+ ]
+ }
+ }
+}
+```
+
+### Customize the embedding model
+
+```hocon
+
+env {
+ job.mode = "BATCH"
+}
+
+source {
+ FakeSource {
+ row.num = 5
+ schema = {
+ fields {
+ book_id = "int"
+ book_name = "string"
+ book_intro = "string"
+ author_biography = "string"
+ }
+ }
+ rows = [
+ {fields = [1, "To Kill a Mockingbird",
+ "Set in the American South during the 1930s, To Kill a Mockingbird tells the story of young Scout Finch and her brother, Jem, who are growing up in a world of racial inequality and injustice. Their father, Atticus Finch, is a lawyer who defends a black man falsely accused of raping a white woman, teaching his children valuable lessons about morality, courage, and empathy.",
+ "Harper Lee (1926–2016) was an American novelist best known for To Kill a Mockingbird, which won the Pulitzer Prize in 1961. Lee was born in Monroeville, Alabama, and the town served as inspiration for the fictional Maycomb in her novel. Despite the success of her book, Lee remained a private person and published only one other novel, Go Set a Watchman, which was written before To Kill a Mockingbird but released in 2015 as a sequel."
+ ], kind = INSERT}
+ {fields = [2, "1984",
+ "1984 is a dystopian novel set in a totalitarian society governed by Big Brother. The story follows Winston Smith, a man who works for the Party rewriting history. Winston begins to question the Party’s control and seeks truth and freedom in a society where individuality is crushed. The novel explores themes of surveillance, propaganda, and the loss of personal autonomy.",
+ "George Orwell (1903–1950) was the pen name of Eric Arthur Blair, an English novelist, essayist, journalist, and critic. Orwell is best known for his works 1984 and Animal Farm, both of which are critiques of totalitarian regimes. His writing is characterized by lucid prose, awareness of social injustice, opposition to totalitarianism, and support of democratic socialism. Orwell’s work remains influential, and his ideas have shaped contemporary discussions on politics and society."
+ ], kind = INSERT}
+ {fields = [3, "Pride and Prejudice",
+ "Pride and Prejudice is a romantic novel that explores the complex relationships between different social classes in early 19th century England. The story centers on Elizabeth Bennet, a young woman with strong opinions, and Mr. Darcy, a wealthy but reserved gentleman. The novel deals with themes of love, marriage, and societal expectations, offering keen insights into human behavior.",
+ "Jane Austen (1775–1817) was an English novelist known for her sharp social commentary and keen observations of the British landed gentry. Her works, including Sense and Sensibility, Emma, and Pride and Prejudice, are celebrated for their wit, realism, and biting critique of the social class structure of her time. Despite her relatively modest life, Austen’s novels have gained immense popularity, and she is considered one of the greatest novelists in the English language."
+ ], kind = INSERT}
+ {fields = [4, "The Great GatsbyThe Great Gatsby",
+ "The Great Gatsby is a novel about the American Dream and the disillusionment that can come with it. Set in the 1920s, the story follows Nick Carraway as he becomes entangled in the lives of his mysterious neighbor, Jay Gatsby, and the wealthy elite of Long Island. Gatsby's obsession with the beautiful Daisy Buchanan drives the narrative, exploring themes of wealth, love, and the decay of the American Dream.",
+ "F. Scott Fitzgerald (1896–1940) was an American novelist and short story writer, widely regarded as one of the greatest American writers of the 20th century. Born in St. Paul, Minnesota, Fitzgerald is best known for his novel The Great Gatsby, which is often considered the quintessential work of the Jazz Age. His works often explore themes of youth, wealth, and the American Dream, reflecting the turbulence and excesses of the 1920s."
+ ], kind = INSERT}
+ {fields = [5, "Moby-Dick",
+ "Moby-Dick is an epic tale of obsession and revenge. The novel follows the journey of Captain Ahab, who is on a relentless quest to kill the white whale, Moby Dick, that once maimed him. Narrated by Ishmael, a sailor aboard Ahab’s ship, the story delves into themes of fate, humanity, and the struggle between man and nature. The novel is also rich with symbolism and philosophical musings.",
+ "Herman Melville (1819–1891) was an American novelist, short story writer, and poet of the American Renaissance period. Born in New York City, Melville gained initial fame with novels such as Typee and Omoo, but it was Moby-Dick, published in 1851, that would later be recognized as his masterpiece. Melville’s work is known for its complexity, symbolism, and exploration of themes such as man’s place in the universe, the nature of evil, and the quest for meaning. Despite facing financial difficulties and critical neglect during his lifetime, Melville’s reputation soared posthumously, and he is now considered one of the great American authors."
+ ], kind = INSERT}
+ ]
+ plugin_output = "fake"
+ }
+}
+
+transform {
+ Embedding {
+ plugin_input = "fake"
+ model_provider = CUSTOM
+ model = text-embedding-3-small
+ api_key = xxxxxxxx
+ api_path = "http://mockserver:1080/v1/doubao/embedding"
+ single_vectorized_input_number = 2
+ vectorization_fields {
+ book_intro_vector = book_intro
+ author_biography_vector = author_biography
+ }
+ custom_config={
+ custom_response_parse = "$.data[*].embedding"
+ custom_request_headers = {
+ "Content-Type"= "application/json"
+ "Authorization"= "Bearer xxxxxxx
+ }
+ custom_request_body ={
+ modelx = "${model}"
+ inputx = ["${input}"]
+ }
+ }
+ plugin_output = "embedding_output_1"
+ }
+}
+
+sink {
+ Assert {
+ plugin_input = "embedding_output_1"
+ rules =
+ {
+ field_rules = [
+ {
+ field_name = book_id
+ field_type = int
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = book_name
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = book_intro
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = author_biography
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = book_intro_vector
+ field_type = float_vector
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = author_biography_vector
+ field_type = float_vector
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ }
+ ]
+ }
+ }
+}
+
+```
diff --git a/docs/en/transform-v2/field-mapper.md b/docs/en/transform-v2/field-mapper.md
index e0bd32e1492..fa54ced741e 100644
--- a/docs/en/transform-v2/field-mapper.md
+++ b/docs/en/transform-v2/field-mapper.md
@@ -36,8 +36,8 @@ We want to delete `age` field and update the filed order to `id`, `card`, `name`
```
transform {
FieldMapper {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
field_mapper = {
id = id
card = card
diff --git a/docs/en/transform-v2/filter-rowkind.md b/docs/en/transform-v2/filter-rowkind.md
index e6ef5ba98cd..68aab44b973 100644
--- a/docs/en/transform-v2/filter-rowkind.md
+++ b/docs/en/transform-v2/filter-rowkind.md
@@ -39,7 +39,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 100
schema = {
fields {
@@ -53,15 +53,15 @@ source {
transform {
FilterRowKind {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
exclude_kinds = ["INSERT"]
}
}
sink {
Console {
- source_table_name = "fake1"
+ plugin_input = "fake1"
}
}
```
diff --git a/docs/en/transform-v2/filter.md b/docs/en/transform-v2/filter.md
index f9f28b8398a..748934e621a 100644
--- a/docs/en/transform-v2/filter.md
+++ b/docs/en/transform-v2/filter.md
@@ -43,8 +43,8 @@ we want to keep the field named `name`, `card`, we can add a `Filter` Transform
```
transform {
Filter {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
include_fields = [name, card]
}
}
@@ -55,8 +55,8 @@ Or we can delete the field named `age` by adding a `Filter` Transform with `excl
```
transform {
Filter {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
exclude_fields = [age]
}
}
diff --git a/docs/en/transform-v2/jsonpath.md b/docs/en/transform-v2/jsonpath.md
index 3baf5853b70..f787487069e 100644
--- a/docs/en/transform-v2/jsonpath.md
+++ b/docs/en/transform-v2/jsonpath.md
@@ -8,24 +8,33 @@
## Options
-| name | type | required | default value |
-|---------|-------|----------|---------------|
-| Columns | Array | Yes | |
+| name | type | required | default value |
+|----------------------|-------|----------|---------------|
+| columns | Array | Yes | |
+| row_error_handle_way | Enum | No | FAIL |
### common options [string]
Transform plugin common parameters, please refer to [Transform Plugin](common-options.md) for details
-### fields[array]
+### row_error_handle_way [Enum]
+
+This option is used to specify the processing method when an error occurs in the row, the default value is `FAIL`.
+
+- FAIL: When `FAIL` is selected, data format error will block and an exception will be thrown.
+- SKIP: When `SKIP` is selected, data format error will skip this row data.
+
+### columns[array]
#### option
-| name | type | required | default value |
-|------------|--------|----------|---------------|
-| src_field | String | Yes | |
-| dest_field | String | Yes | |
-| path | String | Yes | |
-| dest_type | String | No | String |
+| name | type | required | default value |
+|-------------------------|--------|----------|---------------|
+| src_field | String | Yes | |
+| dest_field | String | Yes | |
+| path | String | Yes | |
+| dest_type | String | No | String |
+| column_error_handle_way | Enum | No | |
#### src_field
@@ -51,6 +60,14 @@ Support SeatunnelDateType
> Jsonpath
+#### column_error_handle_way [Enum]
+
+This option is used to specify the processing method when an error occurs in the column.
+
+- FAIL: When `FAIL` is selected, data format error will block and an exception will be thrown.
+- SKIP: When `SKIP` is selected, data format error will skip this column data.
+- SKIP_ROW: When `SKIP_ROW` is selected, data format error will skip this row data.
+
## Read Json Example
The data read from source is a table like this json:
@@ -76,8 +93,8 @@ Assuming we want to use JsonPath to extract properties.
```json
transform {
JsonPath {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
columns = [
{
"src_field" = "data"
@@ -155,23 +172,25 @@ Suppose a column in a row of data is of type SeatunnelRow and that the name of t
The JsonPath transform converts the values of seatunnel into an array,
-```json
+```hocon
transform {
JsonPath {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
+
+ row_error_handle_way = FAIL
columns = [
{
"src_field" = "col"
"path" = "$[0]"
"dest_field" = "name"
- "dest_type" = "string"
+ "dest_type" = "string"
},
- {
+ {
"src_field" = "col"
"path" = "$[1]"
"dest_field" = "age"
- "dest_type" = "int"
+ "dest_type" = "int"
}
]
}
@@ -184,6 +203,97 @@ Then the data result table `fake1` will like this
|------|-----|----------|-------|
| a | 18 | ["a",18] | ... |
+
+## Configure error data handle way
+
+You can configure `row_error_handle_way` and `column_error_handle_way` to handle abnormal data. Both are optional.
+
+`row_error_handle_way` is used to handle all data anomalies in the row data, while `column_error_handle_way` is used to handle data anomalies in a column. It has a higher priority than `row_error_handle_way`.
+
+### Skip error data rows
+
+Configure to skip row data with exceptions in any column
+
+```hocon
+transform {
+ JsonPath {
+
+ row_error_handle_way = SKIP
+
+ columns = [
+ {
+ "src_field" = "json_data"
+ "path" = "$.f1"
+ "dest_field" = "json_data_f1"
+ },
+ {
+ "src_field" = "json_data"
+ "path" = "$.f2"
+ "dest_field" = "json_data_f2"
+ }
+ ]
+ }
+}
+```
+
+### Skip error data column
+
+Configure only `json_data_f1` column data exceptions to skip and fill in null values, other column data exceptions will continue to throw exception interrupt handlers
+
+
+```hocon
+transform {
+ JsonPath {
+
+ row_error_handle_way = FAIL
+
+ columns = [
+ {
+ "src_field" = "json_data"
+ "path" = "$.f1"
+ "dest_field" = "json_data_f1"
+
+ "column_error_handle_way" = "SKIP"
+ },
+ {
+ "src_field" = "json_data"
+ "path" = "$.f2"
+ "dest_field" = "json_data_f2"
+ }
+ ]
+ }
+}
+```
+
+### Skip the row for specified column error
+
+Configure to skip the row of data only for `json_data_f1` column data exceptions, and continue to throw exceptions to interrupt the handler for other column data exceptions
+
+
+```hocon
+transform {
+ JsonPath {
+
+ row_error_handle_way = FAIL
+
+ columns = [
+ {
+ "src_field" = "json_data"
+ "path" = "$.f1"
+ "dest_field" = "json_data_f1"
+
+ "column_error_handle_way" = "SKIP_ROW"
+ },
+ {
+ "src_field" = "json_data"
+ "path" = "$.f2"
+ "dest_field" = "json_data_f2"
+ }
+ ]
+ }
+}
+```
+
## Changelog
* Add JsonPath Transform
diff --git a/docs/en/transform-v2/llm.md b/docs/en/transform-v2/llm.md
new file mode 100644
index 00000000000..c1c9798abe3
--- /dev/null
+++ b/docs/en/transform-v2/llm.md
@@ -0,0 +1,337 @@
+# LLM
+
+> LLM transform plugin
+
+## Description
+
+Leverage the power of a large language model (LLM) to process data by sending it to the LLM and receiving the
+generated results. Utilize the LLM's capabilities to label, clean, enrich data, perform data inference, and
+more.
+
+## Options
+
+| name | type | required | default value |
+|------------------------|--------|----------|---------------|
+| model_provider | enum | yes | |
+| output_data_type | enum | no | String |
+| output_column_name | string | no | llm_output |
+| prompt | string | yes | |
+| inference_columns | list | no | |
+| model | string | yes | |
+| api_key | string | yes | |
+| api_path | string | no | |
+| custom_config | map | no | |
+| custom_response_parse | string | no | |
+| custom_request_headers | map | no | |
+| custom_request_body | map | no | |
+
+### model_provider
+
+The model provider to use. The available options are:
+OPENAI, DOUBAO, KIMIAI, MICROSOFT, CUSTOM
+
+> tips: If you use Microsoft, please make sure api_path cannot be empty
+
+### output_data_type
+
+The data type of the output data. The available options are:
+STRING,INT,BIGINT,DOUBLE,BOOLEAN.
+Default value is STRING.
+
+### output_column_name
+
+Custom output data field name. A custom field name that is the same as an existing field name is replaced with 'llm_output'.
+
+### prompt
+
+The prompt to send to the LLM. This parameter defines how LLM will process and return data, eg:
+
+The data read from source is a table like this:
+
+| name | age |
+|---------------|-----|
+| Jia Fan | 20 |
+| Hailin Wang | 20 |
+| Eric | 20 |
+| Guangdong Liu | 20 |
+
+The prompt can be:
+
+```
+Determine whether someone is Chinese or American by their name
+```
+
+The result will be:
+
+| name | age | llm_output |
+|---------------|-----|------------|
+| Jia Fan | 20 | Chinese |
+| Hailin Wang | 20 | Chinese |
+| Eric | 20 | American |
+| Guangdong Liu | 20 | Chinese |
+
+### inference_columns
+
+The `inference_columns` option allows you to specify which columns from the input data should be used as inputs for the LLM. By default, all columns will be used as inputs.
+
+For example:
+```hocon
+transform {
+ LLM {
+ model_provider = OPENAI
+ model = gpt-4o-mini
+ api_key = sk-xxx
+ inference_columns = ["name", "age"]
+ prompt = "Determine whether someone is Chinese or American by their name"
+ }
+}
+```
+
+### model
+
+The model to use. Different model providers have different models. For example, the OpenAI model can be `gpt-4o-mini`.
+If you use OpenAI model, please refer https://platform.openai.com/docs/models/model-endpoint-compatibility
+of `/v1/chat/completions` endpoint.
+
+### api_key
+
+The API key to use for the model provider.
+If you use OpenAI model, please refer https://platform.openai.com/docs/api-reference/api-keys of how to get the API key.
+
+### api_path
+
+The API path to use for the model provider. In most cases, you do not need to change this configuration. If you
+are using an API agent's service, you may need to configure it to the agent's API address.
+
+### custom_config
+
+The `custom_config` option allows you to provide additional custom configurations for the model. This is a map where you
+can define various settings that might be required by the specific model you're using.
+
+### custom_response_parse
+
+The `custom_response_parse` option allows you to specify how to parse the model's response. You can use JsonPath to
+extract the specific data you need from the response. For example, by using `$.choices[*].message.content`, you can
+extract the `content` field values from the following JSON. For more details on using JsonPath, please refer to
+the [JsonPath Getting Started guide](https://github.com/json-path/JsonPath?tab=readme-ov-file#getting-started).
+
+```json
+{
+ "id": "chatcmpl-9s4hoBNGV0d9Mudkhvgzg64DAWPnx",
+ "object": "chat.completion",
+ "created": 1722674828,
+ "model": "gpt-4o-mini",
+ "choices": [
+ {
+ "index": 0,
+ "message": {
+ "role": "assistant",
+ "content": "[\"Chinese\"]"
+ },
+ "logprobs": null,
+ "finish_reason": "stop"
+ }
+ ],
+ "usage": {
+ "prompt_tokens": 107,
+ "completion_tokens": 3,
+ "total_tokens": 110
+ },
+ "system_fingerprint": "fp_0f03d4f0ee",
+ "code": 0,
+ "msg": "ok"
+}
+```
+
+### custom_request_headers
+
+The `custom_request_headers` option allows you to define custom headers that should be included in the request sent to
+the model's API. This is useful if the API requires additional headers beyond the standard ones, such as authorization
+tokens, content types, etc.
+
+### custom_request_body
+
+The `custom_request_body` option supports placeholders:
+
+- `${model}`: Placeholder for the model name.
+- `${input}`: Placeholder to determine input value and define request body request type based on the type of body
+ value. Example: `"${input}"` -> "input"
+- `${prompt}`:Placeholder for LLM model prompts.
+
+### common options [string]
+
+Transform plugin common parameters, please refer to [Transform Plugin](common-options.md) for details
+
+## tips
+The API interface usually has a rate limit, which can be configured with Seatunnel's speed limit to ensure smooth operation of the task.
+For details about Seatunnel speed limit Settings, please refer to [speed-limit](../concept/speed-limit.md) for details.
+
+## Example OPENAI
+
+Determine the user's country through a LLM.
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+ read_limit.rows_per_second = 10
+}
+
+source {
+ FakeSource {
+ row.num = 5
+ schema = {
+ fields {
+ id = "int"
+ name = "string"
+ }
+ }
+ rows = [
+ {fields = [1, "Jia Fan"], kind = INSERT}
+ {fields = [2, "Hailin Wang"], kind = INSERT}
+ {fields = [3, "Tomas"], kind = INSERT}
+ {fields = [4, "Eric"], kind = INSERT}
+ {fields = [5, "Guangdong Liu"], kind = INSERT}
+ ]
+ }
+}
+
+transform {
+ LLM {
+ model_provider = OPENAI
+ model = gpt-4o-mini
+ api_key = sk-xxx
+ prompt = "Determine whether someone is Chinese or American by their name"
+ }
+}
+
+sink {
+ console {
+ }
+}
+```
+
+## Example KIMIAI
+
+Determine whether a person is a historical emperor of China.
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+ read_limit.rows_per_second = 10
+}
+
+source {
+ FakeSource {
+ row.num = 5
+ schema = {
+ fields {
+ id = "int"
+ name = "string"
+ }
+ }
+ rows = [
+ {fields = [1, "Zhuge Liang"], kind = INSERT}
+ {fields = [2, "Li Shimin"], kind = INSERT}
+ {fields = [3, "Sun Wukong"], kind = INSERT}
+ {fields = [4, "Zhu Yuanzhuang"], kind = INSERT}
+ {fields = [5, "George Washington"], kind = INSERT}
+ ]
+ }
+}
+
+transform {
+ LLM {
+ model_provider = KIMIAI
+ model = moonshot-v1-8k
+ api_key = sk-xxx
+ prompt = "Determine whether a person is a historical emperor of China"
+ output_data_type = boolean
+ }
+}
+
+sink {
+ console {
+ }
+}
+```
+
+### Customize the LLM model
+
+```hocon
+env {
+ job.mode = "BATCH"
+}
+
+source {
+ FakeSource {
+ row.num = 5
+ schema = {
+ fields {
+ id = "int"
+ name = "string"
+ }
+ }
+ rows = [
+ {fields = [1, "Jia Fan"], kind = INSERT}
+ {fields = [2, "Hailin Wang"], kind = INSERT}
+ {fields = [3, "Tomas"], kind = INSERT}
+ {fields = [4, "Eric"], kind = INSERT}
+ {fields = [5, "Guangdong Liu"], kind = INSERT}
+ ]
+ plugin_output = "fake"
+ }
+}
+
+transform {
+ LLM {
+ plugin_input = "fake"
+ model_provider = CUSTOM
+ model = gpt-4o-mini
+ api_key = sk-xxx
+ prompt = "Determine whether someone is Chinese or American by their name"
+ openai.api_path = "http://mockserver:1080/v1/chat/completions"
+ custom_config={
+ custom_response_parse = "$.choices[*].message.content"
+ custom_request_headers = {
+ Content-Type = "application/json"
+ Authorization = "Bearer xxxxxxxx"
+ }
+ custom_request_body ={
+ model = "${model}"
+ messages = [
+ {
+ role = "system"
+ content = "${prompt}"
+ },
+ {
+ role = "user"
+ content = "${input}"
+ }]
+ }
+ }
+ plugin_output = "llm_output"
+ }
+}
+
+sink {
+ Assert {
+ plugin_input = "llm_output"
+ rules =
+ {
+ field_rules = [
+ {
+ field_name = llm_output
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ }
+ ]
+ }
+ }
+}
+```
diff --git a/docs/en/transform-v2/metadata.md b/docs/en/transform-v2/metadata.md
new file mode 100644
index 00000000000..abae10e4483
--- /dev/null
+++ b/docs/en/transform-v2/metadata.md
@@ -0,0 +1,85 @@
+# Metadata
+
+> Metadata transform plugin
+
+## Description
+Metadata transform plugin for adding metadata fields to data
+
+## Available Metadata
+
+| Key | DataType | Description |
+|:---------:|:--------:|:---------------------------------------------------------------------------------------------------|
+| Database | string | Name of the table that contain the row. |
+| Table | string | Name of the table that contain the row. |
+| RowKind | string | The type of operation |
+| EventTime | Long | The time at which the connector processed the event. |
+| Delay | Long | The difference between data extraction time and database change time |
+| Partition | string | Contains the partition field of the corresponding number table of the row, multiple using `,` join |
+
+### note
+ `Delay` `Partition` only worked on cdc series connectors for now , except TiDB-CDC
+
+## Options
+
+| name | type | required | default value | Description |
+|:---------------:|------|----------|---------------|---------------------------------------------------------------------------|
+| metadata_fields | map | yes | | A mapping metadata input fields and their corresponding output fields. |
+
+### metadata_fields [map]
+
+A mapping between metadata fields and their respective output fields.
+
+```hocon
+metadata_fields {
+ Database = c_database
+ Table = c_table
+ RowKind = c_rowKind
+ EventTime = c_ts_ms
+ Delay = c_delay
+}
+```
+
+## Examples
+
+```yaml
+
+env {
+ parallelism = 1
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+ read_limit.bytes_per_second = 7000000
+ read_limit.rows_per_second = 400
+}
+
+source {
+ MySQL-CDC {
+ plugin_output = "customers_mysql_cdc"
+ server-id = 5652
+ username = "root"
+ password = "zdyk_Dev@2024"
+ table-names = ["source.user"]
+ base-url = "jdbc:mysql://172.16.17.123:3306/source"
+ }
+}
+
+transform {
+ Metadata {
+ metadata_fields {
+ Database = database
+ Table = table
+ RowKind = rowKind
+ EventTime = ts_ms
+ Delay = delay
+ }
+ plugin_output = "trans_result"
+ }
+}
+
+sink {
+ Console {
+ plugin_input = "custom_name"
+ }
+}
+
+```
+
diff --git a/docs/en/transform-v2/replace.md b/docs/en/transform-v2/replace.md
index 1cc99c0ace7..ebb15a9c8ba 100644
--- a/docs/en/transform-v2/replace.md
+++ b/docs/en/transform-v2/replace.md
@@ -56,8 +56,8 @@ We want to replace the char ` ` to `_` at the `name` field. Then we can add a `R
```
transform {
Replace {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
replace_field = "name"
pattern = " "
replacement = "_"
@@ -84,7 +84,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 100
schema = {
fields {
@@ -97,8 +97,8 @@ source {
transform {
Replace {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
replace_field = "name"
pattern = ".+"
replacement = "b"
@@ -108,7 +108,7 @@ transform {
sink {
Console {
- source_table_name = "fake1"
+ plugin_input = "fake1"
}
}
```
diff --git a/docs/en/transform-v2/rowkind-extractor.md b/docs/en/transform-v2/rowkind-extractor.md
new file mode 100644
index 00000000000..a2ee384c347
--- /dev/null
+++ b/docs/en/transform-v2/rowkind-extractor.md
@@ -0,0 +1,113 @@
+# RowKindExtractor
+
+> RowKindExtractor transform plugin
+
+## Description
+
+transform cdc row to append only row that contains the cdc RowKind.
+Example:
+CDC row: -D 1, test1, test2
+transformed Row: +I 1,test1,test2,DELETE
+
+## Options
+
+| name | type | required | default value |
+|-------------------|--------|----------|---------------|
+| custom_field_name | string | yes | row_kind |
+| transform_type | enum | yes | SHORT |
+
+### custom_field_name [string]
+
+Custom field name of the RowKind field
+
+### transform_type [enum]
+
+the RowKind field value formatting , the option can be `SHORT` or `FULL`
+
+`SHORT` : +I, -U , +U, -D
+`FULL` : INSERT, UPDATE_BEFORE, UPDATE_AFTER , DELETE
+
+## Examples
+
+
+```yaml
+
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ FakeSource {
+ schema = {
+ fields {
+ pk_id = bigint
+ name = string
+ score = int
+ }
+ primaryKey {
+ name = "pk_id"
+ columnNames = [pk_id]
+ }
+ }
+ rows = [
+ {
+ kind = INSERT
+ fields = [1, "A", 100]
+ },
+ {
+ kind = INSERT
+ fields = [2, "B", 100]
+ },
+ {
+ kind = INSERT
+ fields = [3, "C", 100]
+ },
+ {
+ kind = INSERT
+ fields = [4, "D", 100]
+ },
+ {
+ kind = UPDATE_BEFORE
+ fields = [1, "A", 100]
+ },
+ {
+ kind = UPDATE_AFTER
+ fields = [1, "F", 100]
+ }
+ {
+ kind = UPDATE_BEFORE
+ fields = [2, "B", 100]
+ },
+ {
+ kind = UPDATE_AFTER
+ fields = [2, "G", 100]
+ },
+ {
+ kind = DELETE
+ fields = [3, "C", 100]
+ },
+ {
+ kind = DELETE
+ fields = [4, "D", 100]
+ }
+ ]
+ }
+}
+
+transform {
+ RowKindExtractor {
+ custom_field_name = "custom_name"
+ transform_type = FULL
+ plugin_output = "trans_result"
+ }
+}
+
+sink {
+ Console {
+ plugin_input = "custom_name"
+ }
+}
+
+```
+
diff --git a/docs/en/transform-v2/split.md b/docs/en/transform-v2/split.md
index ecfe94c854b..0df9afbdef2 100644
--- a/docs/en/transform-v2/split.md
+++ b/docs/en/transform-v2/split.md
@@ -46,8 +46,8 @@ We want split `name` field to `first_name` and `second name`, we can add `Split`
```
transform {
Split {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
separator = " "
split_field = "name"
output_fields = [first_name, second_name]
diff --git a/docs/en/transform-v2/sql-functions.md b/docs/en/transform-v2/sql-functions.md
index e1c541ef1c9..31a33989375 100644
--- a/docs/en/transform-v2/sql-functions.md
+++ b/docs/en/transform-v2/sql-functions.md
@@ -302,6 +302,14 @@ Example:
REPLACE(NAME, ' ')
+### SPLIT
+
+Split a string into an array.
+
+Example:
+
+select SPLIT(test,';') as arrays
+
### SOUNDEX
```SOUNDEX(string)```
@@ -889,7 +897,7 @@ CALL FROM_UNIXTIME(1672502400, 'yyyy-MM-dd HH:mm:ss','UTC+6')
Converts a value to another data type.
-Supported data types: STRING | VARCHAR, INT | INTEGER, LONG | BIGINT, BYTE, FLOAT, DOUBLE, DECIMAL(p,s), TIMESTAMP, DATE, TIME
+Supported data types: STRING | VARCHAR, INT | INTEGER, LONG | BIGINT, BYTE, FLOAT, DOUBLE, DECIMAL(p,s), TIMESTAMP, DATE, TIME, BYTES
Example:
@@ -973,3 +981,37 @@ It is used to determine whether the condition is valid and return different valu
Example:
case when c_string in ('c_string') then 1 else 0 end
+
+### UUID
+
+```UUID()```
+
+Generate a uuid through java function.
+
+Example:
+
+select UUID() as seatunnel_uuid
+
+### ARRAY
+
+Generate an array.
+
+Example:
+
+select ARRAY('test1','test2','test3') as arrays
+
+
+### LATERAL VIEW
+#### EXPLODE
+
+explode array column to rows.
+OUTER EXPLODE will return NULL, while array is NULL or empty
+EXPLODE(SPLIT(FIELD_NAME,separator))Used to split string type. The first parameter of SPLIT function is the field name, the second parameter is the separator
+EXPLODE(ARRAY(value1,value2)) Used to custom array type.
+```
+SELECT * FROM fake
+ LATERAL VIEW EXPLODE ( SPLIT ( NAME, ',' ) ) AS NAME
+ LATERAL VIEW EXPLODE ( SPLIT ( pk_id, ';' ) ) AS pk_id
+ LATERAL VIEW OUTER EXPLODE ( age ) AS age
+ LATERAL VIEW OUTER EXPLODE ( ARRAY(1,1) ) AS num
+```
diff --git a/docs/en/transform-v2/sql-udf.md b/docs/en/transform-v2/sql-udf.md
index df5d3b93fe5..a857fe4c51f 100644
--- a/docs/en/transform-v2/sql-udf.md
+++ b/docs/en/transform-v2/sql-udf.md
@@ -110,8 +110,8 @@ We use UDF of SQL query to transform the source data like this:
```
transform {
Sql {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
query = "select id, example(name) as name, age from fake"
}
}
diff --git a/docs/en/transform-v2/sql.md b/docs/en/transform-v2/sql.md
index a3bdb9bbfc1..a8f12568d53 100644
--- a/docs/en/transform-v2/sql.md
+++ b/docs/en/transform-v2/sql.md
@@ -12,11 +12,11 @@ SQL transform use memory SQL engine, we can via SQL functions and ability of SQL
| name | type | required | default value |
|-------------------|--------|----------|---------------|
-| source_table_name | string | yes | - |
-| result_table_name | string | yes | - |
+| plugin_input | string | yes | - |
+| plugin_output | string | yes | - |
| query | string | yes | - |
-### source_table_name [string]
+### plugin_input [string]
The source table name, the query SQL table name must match this field.
@@ -43,8 +43,8 @@ We use SQL query to transform the source data like this:
```
transform {
Sql {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
query = "select id, concat(name, '_') as name, age+1 as age from fake where id>0"
}
}
@@ -66,7 +66,7 @@ if your upstream data schema is like this:
```hacon
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 100
string.template = ["innerQuery"]
schema = {
@@ -123,7 +123,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 100
schema = {
fields {
@@ -137,15 +137,15 @@ source {
transform {
Sql {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
query = "select id, concat(name, '_') as name, age+1 as age from fake where id>0"
}
}
sink {
Console {
- source_table_name = "fake1"
+ plugin_input = "fake1"
}
}
```
diff --git a/docs/en/transform-v2/transform-multi-table.md b/docs/en/transform-v2/transform-multi-table.md
new file mode 100644
index 00000000000..e642ec9cd2d
--- /dev/null
+++ b/docs/en/transform-v2/transform-multi-table.md
@@ -0,0 +1,128 @@
+---
+sidebar_position: 2
+---
+
+# Multi-Table Transform in SeaTunnel
+
+SeaTunnel’s transform feature supports multi-table transformations, which is especially useful when the upstream plugin outputs multiple tables. This allows you to complete all necessary transformation operations within a single transform configuration. Currently, many connectors in SeaTunnel support multi-table outputs, such as `JDBCSource` and `MySQL-CDC`. All transforms can be configured for multi-table transform as described below.
+
+:::tip
+
+Multi-table Transform has no limitations on Transform capabilities; any Transform configuration can be used in a multi-table Transform. The purpose of multi-table Transform is to handle multiple tables in the data stream individually and merge the Transform configurations of multiple tables into one Transform for easier management.
+
+:::
+
+## Properties
+
+| Name | Type | Required | Default | Description |
+|----------------------------|--------|----------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| table_match_regex | String | No | .* | A regular expression to match the tables that require transformation. By default, it matches all tables. Note that this table name refers to the actual upstream table name, not `result_table_name`. |
+| table_transform | List | No | - | You can use a list in `table_transform` to specify rules for individual tables. If a transformation rule is configured for a specific table in `table_transform`, the outer rules will not apply to that table. The rules in `table_transform` take precedence. |
+| table_transform.table_path | String | No | - | When configuring a transformation rule for a table in `table_transform`, you need to specify the table path using the `table_path` field. The table path should include `databaseName[.schemaName].tableName`. |
+
+## Matching Logic
+
+Suppose we read five tables from upstream: `test.abc`, `test.abcd`, `test.xyz`, `test.xyzxyz`, and `test.www`. They share the same structure, each having three fields: `id`, `name`, and `age`.
+
+| id | name | age |
+
+Now, let's say we want to copy the data from these five tables using the Copy transform with the following specific requirements:
+- For tables `test.abc` and `test.abcd`, we need to copy the `name` field to a new field `name1`.
+- For `test.xyz`, we want to copy the `name` field to `name2`.
+- For `test.xyzxyz`, we want to copy the `name` field to `name3`.
+- For `test.www`, no changes are needed.
+
+We can configure this as follows:
+
+```hocon
+transform {
+ Copy {
+ source_table_name = "fake" // Optional dataset name to read from
+ result_table_name = "fake1" // Optional dataset name for output
+
+ table_match_regex = "test.a.*" // 1. Matches tables needing transformation, here matching `test.abc` and `test.abcd`
+ src_field = "name" // Source field
+ dest_field = "name1" // Destination field
+
+ table_transform = [{
+ table_path = "test.xyz" // 2. Specifies the table name for transformation
+ src_field = "name" // Source field
+ dest_field = "name2" // Destination field
+ }, {
+ table_path = "test.xyzxyz"
+ src_field = "name"
+ dest_field = "name3"
+ }]
+ }
+}
+```
+
+### Explanation
+
+1. With the regular expression and corresponding Copy transform options, we match tables `test.abc` and `test.abcd` and copy the `name` field to `name1`.
+2. Using the `table_transform` configuration, we specify that for table `test.xyz`, the `name` field should be copied to `name2`.
+
+This allows us to handle transformations for multiple tables within a single transform configuration.
+
+For each table, the priority of configuration is: `table_transform` > `table_match_regex`. If no rules match a table, no transformation will be applied.
+
+Below are the transform configurations for each table:
+
+- **test.abc** and **test.abcd**
+
+```hocon
+transform {
+ Copy {
+ src_field = "name"
+ dest_field = "name1"
+ }
+}
+```
+
+Output structure:
+
+| id | name | age | name1 |
+
+- **test.xyz**
+
+```hocon
+transform {
+ Copy {
+ src_field = "name"
+ dest_field = "name2"
+ }
+}
+```
+
+Output structure:
+
+| id | name | age | name2 |
+
+- **test.xyzxyz**
+
+```hocon
+transform {
+ Copy {
+ src_field = "name"
+ dest_field = "name3"
+ }
+}
+```
+
+Output structure:
+
+| id | name | age | name3 |
+
+- **test.www**
+
+```hocon
+transform {
+ // No transformation needed
+}
+```
+
+Output structure:
+
+| id | name | age |
+
+In this example, we used the Copy transform, but all transforms in SeaTunnel support multi-table transformations, and you can configure them similarly within the corresponding transform block.
\ No newline at end of file
diff --git a/docs/images/grafana.png b/docs/images/grafana.png
new file mode 100644
index 00000000000..4aca529a05b
Binary files /dev/null and b/docs/images/grafana.png differ
diff --git a/docs/images/icons/AmazonDynamoDB.svg b/docs/images/icons/AmazonDynamoDB.svg
new file mode 100644
index 00000000000..bd4f2c30f50
--- /dev/null
+++ b/docs/images/icons/AmazonDynamoDB.svg
@@ -0,0 +1,18 @@
+
+
+
+ Icon-Architecture/64/Arch_Amazon-DynamoDB_64
+ Created with Sketch.
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/images/icons/Apache Iceberg.svg b/docs/images/icons/Apache Iceberg.svg
new file mode 100644
index 00000000000..d04e866a0f6
--- /dev/null
+++ b/docs/images/icons/Apache Iceberg.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Cassandra.png b/docs/images/icons/Cassandra.png
new file mode 100644
index 00000000000..180c3110fcb
Binary files /dev/null and b/docs/images/icons/Cassandra.png differ
diff --git a/docs/images/icons/Clickhouse.png b/docs/images/icons/Clickhouse.png
new file mode 100644
index 00000000000..f9ede1c3dc5
Binary files /dev/null and b/docs/images/icons/Clickhouse.png differ
diff --git a/docs/images/icons/Doris.svg b/docs/images/icons/Doris.svg
new file mode 100644
index 00000000000..2729c9a6985
--- /dev/null
+++ b/docs/images/icons/Doris.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Elasticsearch.png b/docs/images/icons/Elasticsearch.png
new file mode 100644
index 00000000000..24bcdf28b20
Binary files /dev/null and b/docs/images/icons/Elasticsearch.png differ
diff --git a/docs/images/icons/FtpFile.svg b/docs/images/icons/FtpFile.svg
new file mode 100644
index 00000000000..4cf14476e97
--- /dev/null
+++ b/docs/images/icons/FtpFile.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Github.png b/docs/images/icons/Github.png
new file mode 100644
index 00000000000..fa94a5d635e
Binary files /dev/null and b/docs/images/icons/Github.png differ
diff --git a/docs/images/icons/Gitlab.svg b/docs/images/icons/Gitlab.svg
new file mode 100644
index 00000000000..e7645b37344
--- /dev/null
+++ b/docs/images/icons/Gitlab.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Greenplum.svg b/docs/images/icons/Greenplum.svg
new file mode 100644
index 00000000000..ead7dc6bfeb
--- /dev/null
+++ b/docs/images/icons/Greenplum.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Hbase.svg b/docs/images/icons/Hbase.svg
new file mode 100644
index 00000000000..43130873136
--- /dev/null
+++ b/docs/images/icons/Hbase.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git "a/docs/images/icons/Hdfs\346\226\207\344\273\266.svg" "b/docs/images/icons/Hdfs\346\226\207\344\273\266.svg"
new file mode 100644
index 00000000000..7bc4a938f74
--- /dev/null
+++ "b/docs/images/icons/Hdfs\346\226\207\344\273\266.svg"
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Hive.svg b/docs/images/icons/Hive.svg
new file mode 100644
index 00000000000..70859e23b97
--- /dev/null
+++ b/docs/images/icons/Hive.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/HiveJdbc.svg b/docs/images/icons/HiveJdbc.svg
new file mode 100644
index 00000000000..70859e23b97
--- /dev/null
+++ b/docs/images/icons/HiveJdbc.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Http.svg b/docs/images/icons/Http.svg
new file mode 100644
index 00000000000..e9fcaf50aca
--- /dev/null
+++ b/docs/images/icons/Http.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/InfluxDB.svg b/docs/images/icons/InfluxDB.svg
new file mode 100644
index 00000000000..a0bd1c639b6
--- /dev/null
+++ b/docs/images/icons/InfluxDB.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/IoTDB.svg b/docs/images/icons/IoTDB.svg
new file mode 100644
index 00000000000..1aad0988b75
--- /dev/null
+++ b/docs/images/icons/IoTDB.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/JDBC.svg b/docs/images/icons/JDBC.svg
new file mode 100644
index 00000000000..00365006920
--- /dev/null
+++ b/docs/images/icons/JDBC.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Jira.svg b/docs/images/icons/Jira.svg
new file mode 100644
index 00000000000..e49c6d768f9
--- /dev/null
+++ b/docs/images/icons/Jira.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Kafka.svg b/docs/images/icons/Kafka.svg
new file mode 100644
index 00000000000..094d598c4c2
--- /dev/null
+++ b/docs/images/icons/Kafka.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Kingbase.svg b/docs/images/icons/Kingbase.svg
new file mode 100644
index 00000000000..65a72ff2122
--- /dev/null
+++ b/docs/images/icons/Kingbase.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Klaviyo.svg b/docs/images/icons/Klaviyo.svg
new file mode 100644
index 00000000000..77f75c139fa
--- /dev/null
+++ b/docs/images/icons/Klaviyo.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Kudu.png b/docs/images/icons/Kudu.png
new file mode 100644
index 00000000000..1afca89769c
Binary files /dev/null and b/docs/images/icons/Kudu.png differ
diff --git a/docs/images/icons/LocalFile.svg b/docs/images/icons/LocalFile.svg
new file mode 100644
index 00000000000..414c3dde3b9
--- /dev/null
+++ b/docs/images/icons/LocalFile.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Maxcompute.svg b/docs/images/icons/Maxcompute.svg
new file mode 100644
index 00000000000..dca95d03c36
--- /dev/null
+++ b/docs/images/icons/Maxcompute.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Milvus.svg b/docs/images/icons/Milvus.svg
new file mode 100644
index 00000000000..a057c16e418
--- /dev/null
+++ b/docs/images/icons/Milvus.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/MongoDB.svg b/docs/images/icons/MongoDB.svg
new file mode 100644
index 00000000000..5bb74629c6b
--- /dev/null
+++ b/docs/images/icons/MongoDB.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/MySQL CDC.svg b/docs/images/icons/MySQL CDC.svg
new file mode 100644
index 00000000000..92cca4e38d0
--- /dev/null
+++ b/docs/images/icons/MySQL CDC.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/MySQL.svg b/docs/images/icons/MySQL.svg
new file mode 100644
index 00000000000..ab6addf3781
--- /dev/null
+++ b/docs/images/icons/MySQL.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Neo4j.svg b/docs/images/icons/Neo4j.svg
new file mode 100644
index 00000000000..264ddea47e0
--- /dev/null
+++ b/docs/images/icons/Neo4j.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Notion.svg b/docs/images/icons/Notion.svg
new file mode 100644
index 00000000000..3c6e3b0f72f
--- /dev/null
+++ b/docs/images/icons/Notion.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/ObsFile.png b/docs/images/icons/ObsFile.png
new file mode 100644
index 00000000000..be943c607ac
Binary files /dev/null and b/docs/images/icons/ObsFile.png differ
diff --git a/docs/images/icons/OceanBase.svg b/docs/images/icons/OceanBase.svg
new file mode 100644
index 00000000000..e4589987ea6
--- /dev/null
+++ b/docs/images/icons/OceanBase.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/OneSignal.svg b/docs/images/icons/OneSignal.svg
new file mode 100644
index 00000000000..8f0c26700da
--- /dev/null
+++ b/docs/images/icons/OneSignal.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/OpenMldb.png b/docs/images/icons/OpenMldb.png
new file mode 100644
index 00000000000..b66e8dedef4
Binary files /dev/null and b/docs/images/icons/OpenMldb.png differ
diff --git a/docs/images/icons/Oracle CDC.svg b/docs/images/icons/Oracle CDC.svg
new file mode 100644
index 00000000000..9f739d77862
--- /dev/null
+++ b/docs/images/icons/Oracle CDC.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Oracle.svg b/docs/images/icons/Oracle.svg
new file mode 100644
index 00000000000..c4865624c3e
--- /dev/null
+++ b/docs/images/icons/Oracle.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Paimon.svg b/docs/images/icons/Paimon.svg
new file mode 100644
index 00000000000..9dac157fdb6
--- /dev/null
+++ b/docs/images/icons/Paimon.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Persistiq.svg b/docs/images/icons/Persistiq.svg
new file mode 100644
index 00000000000..2ab14f08a78
--- /dev/null
+++ b/docs/images/icons/Persistiq.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Phoenix.svg b/docs/images/icons/Phoenix.svg
new file mode 100644
index 00000000000..6fa6e48a403
--- /dev/null
+++ b/docs/images/icons/Phoenix.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/PostgreSQL CDC.svg b/docs/images/icons/PostgreSQL CDC.svg
new file mode 100644
index 00000000000..38547f16078
--- /dev/null
+++ b/docs/images/icons/PostgreSQL CDC.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/PostgreSQL.svg b/docs/images/icons/PostgreSQL.svg
new file mode 100644
index 00000000000..38547f16078
--- /dev/null
+++ b/docs/images/icons/PostgreSQL.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Pulsar.svg b/docs/images/icons/Pulsar.svg
new file mode 100644
index 00000000000..cabedf1e022
--- /dev/null
+++ b/docs/images/icons/Pulsar.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Qdrant.svg b/docs/images/icons/Qdrant.svg
new file mode 100644
index 00000000000..b431d111a6a
--- /dev/null
+++ b/docs/images/icons/Qdrant.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Rabbitmq.svg b/docs/images/icons/Rabbitmq.svg
new file mode 100644
index 00000000000..a4ecbc6cfbf
--- /dev/null
+++ b/docs/images/icons/Rabbitmq.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Redis.svg b/docs/images/icons/Redis.svg
new file mode 100644
index 00000000000..4cbd41cada9
--- /dev/null
+++ b/docs/images/icons/Redis.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/RocketMQ.svg b/docs/images/icons/RocketMQ.svg
new file mode 100644
index 00000000000..3fd2c1adba9
--- /dev/null
+++ b/docs/images/icons/RocketMQ.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/S3File.svg b/docs/images/icons/S3File.svg
new file mode 100644
index 00000000000..ddd50aeff00
--- /dev/null
+++ b/docs/images/icons/S3File.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/SQL Server.svg b/docs/images/icons/SQL Server.svg
new file mode 100644
index 00000000000..db4b76ca740
--- /dev/null
+++ b/docs/images/icons/SQL Server.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Sftp.svg b/docs/images/icons/Sftp.svg
new file mode 100644
index 00000000000..2a8015eb504
--- /dev/null
+++ b/docs/images/icons/Sftp.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Snowflake.svg b/docs/images/icons/Snowflake.svg
new file mode 100644
index 00000000000..fb4c2868fba
--- /dev/null
+++ b/docs/images/icons/Snowflake.svg
@@ -0,0 +1,3 @@
+
+
+
diff --git a/docs/images/icons/StarRocks.svg b/docs/images/icons/StarRocks.svg
new file mode 100644
index 00000000000..10a52bbf355
--- /dev/null
+++ b/docs/images/icons/StarRocks.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/TDengine.svg b/docs/images/icons/TDengine.svg
new file mode 100644
index 00000000000..588347b3727
--- /dev/null
+++ b/docs/images/icons/TDengine.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Tablestore.svg b/docs/images/icons/Tablestore.svg
new file mode 100644
index 00000000000..24526c988b9
--- /dev/null
+++ b/docs/images/icons/Tablestore.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/images/icons/Typesense.png b/docs/images/icons/Typesense.png
new file mode 100644
index 00000000000..f25cc7e9e71
Binary files /dev/null and b/docs/images/icons/Typesense.png differ
diff --git a/docs/images/icons/Web3j.png b/docs/images/icons/Web3j.png
new file mode 100644
index 00000000000..ec031cb3280
Binary files /dev/null and b/docs/images/icons/Web3j.png differ
diff --git a/docs/images/ui/detail.png b/docs/images/ui/detail.png
new file mode 100644
index 00000000000..a376b6e4880
Binary files /dev/null and b/docs/images/ui/detail.png differ
diff --git a/docs/images/ui/finished.png b/docs/images/ui/finished.png
new file mode 100644
index 00000000000..fa800bd6029
Binary files /dev/null and b/docs/images/ui/finished.png differ
diff --git a/docs/images/ui/master.png b/docs/images/ui/master.png
new file mode 100644
index 00000000000..5e42d2854ee
Binary files /dev/null and b/docs/images/ui/master.png differ
diff --git a/docs/images/ui/overview.png b/docs/images/ui/overview.png
new file mode 100644
index 00000000000..67123532499
Binary files /dev/null and b/docs/images/ui/overview.png differ
diff --git a/docs/images/ui/running.png b/docs/images/ui/running.png
new file mode 100644
index 00000000000..889edb303b1
Binary files /dev/null and b/docs/images/ui/running.png differ
diff --git a/docs/images/ui/workers.png b/docs/images/ui/workers.png
new file mode 100644
index 00000000000..a2bf39ec218
Binary files /dev/null and b/docs/images/ui/workers.png differ
diff --git a/docs/sidebars.js b/docs/sidebars.js
index 1a9a1cf6ec5..3257181b11a 100644
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@@ -84,18 +84,19 @@ const sidebars = {
},
{
"type": "category",
- "label": "Concept",
+ "label": "Concepts",
"items": [
"concept/config",
"concept/connector-v2-features",
'concept/schema-feature',
'concept/JobEnvConfig',
+ 'concept/sink-options-placeholders',
'concept/sql-config',
'concept/speed-limit',
- 'concept/event-listener'
+ 'concept/event-listener',
+ 'concept/schema-evolution'
]
},
- "Connector-v2-release-state",
{
"type": "category",
"label": "Connector-V2",
@@ -106,8 +107,8 @@ const sidebars = {
"link": {
"type": "generated-index",
"title": "Source(V2) of SeaTunnel",
- "description": "List all source(v2) supported Apache SeaTunnel for now.",
- "slug": "/category/source-v2",
+ "description": "List all source(v2) supported by Apache SeaTunnel for now.",
+ "slug": "/connector-v2/source",
"keywords": ["source"],
"image": "/img/favicon.ico"
},
@@ -124,8 +125,8 @@ const sidebars = {
"link": {
"type": "generated-index",
"title": "Sink(V2) of SeaTunnel",
- "description": "List all sink(v2) supported Apache SeaTunnel for now.",
- "slug": "/category/sink-v2",
+ "description": "List all sink(v2) supported by Apache SeaTunnel for now.",
+ "slug": "/connector-v2/sink",
"keywords": ["sink"],
"image": "/img/favicon.ico"
},
@@ -136,6 +137,26 @@ const sidebars = {
}
]
},
+ {
+ "type": "category",
+ "label": "Formats",
+ "link": {
+ "type": "generated-index",
+ "title": "Formats",
+ "description": "List some special formats (not all) supported by Apache SeaTunnel for now.",
+ "slug": "/connector-v2/formats",
+ "keywords": ["formats"],
+ "image": "/img/favicon.ico"
+ },
+ "items": [
+ {
+ "type": "autogenerated",
+ "dirName": "connector-v2/formats"
+ }
+ ]
+ },
+ "connector-v2/source-common-options",
+ "connector-v2/sink-common-options",
"connector-v2/Error-Quick-Reference-Manual",
"connector-v2/Config-Encryption-Decryption"
]
@@ -147,7 +168,7 @@ const sidebars = {
"type": "generated-index",
"title": "Transform V2 of SeaTunnel",
"description": "List all transform v2 supported Apache SeaTunnel for now.",
- "slug": "/category/transform-v2",
+ "slug": "/transform-v2",
"keywords": ["transform-v2"],
"image": "/img/favicon.ico"
},
@@ -181,8 +202,12 @@ const sidebars = {
"seatunnel-engine/engine-jar-storage-mode",
"seatunnel-engine/tcp",
"seatunnel-engine/resource-isolation",
- "seatunnel-engine/rest-api",
- "seatunnel-engine/user-command"
+ "seatunnel-engine/rest-api-v1",
+ "seatunnel-engine/rest-api-v2",
+ "seatunnel-engine/user-command",
+ "seatunnel-engine/logging",
+ "seatunnel-engine/telemetry",
+ "seatunnel-engine/web-ui"
]
},
{
@@ -201,6 +226,7 @@ const sidebars = {
'contribution/new-license',
'contribution/coding-guide',
'contribution/contribute-transform-v2-guide',
+ 'contribution/how-to-create-your-connector'
],
},
"faq"
diff --git a/docs/zh/Connector-v2-release-state.md b/docs/zh/Connector-v2-release-state.md
deleted file mode 100644
index 779394b7035..00000000000
--- a/docs/zh/Connector-v2-release-state.md
+++ /dev/null
@@ -1,85 +0,0 @@
-# 连接器发布状态
-
-SeaTunnel 使用连接器分级系统来帮助您了解连接器的期望:
-
-| | Alpha | Beta | General Availability (GA) |
-|----------------------|------------------------------------------------------------------------------|----------------------------------------------------------------------------|--------------------------------------------------------------|
-| Expectations | alpha 连接器表示正在开发的连接器,可帮助 SeaTunnel 收集早期采用者报告的早期反馈和问题。 我们强烈反对在生产用例中使用 alpha 版本 | Beta 连接器被认为稳定可靠,没有向后不兼容的更改,但尚未得到更广泛的用户群体的验证。 我们希望在正式发布之前找到并修复该版本中的一些问题和错误。 | 普遍可用的连接器已被认为可以在生产环境中使用,并得到 SeaTunnel 的正式支持。 它的文档被认为足以支持广泛采用。 |
-| | | | |
-| Production Readiness | No | Yes | Yes |
-
-## Connector V2 Health
-
-| Connector Name | Type | Status | Support Version |
-|-------------------------------------------------------------------|--------|--------|-----------------|
-| [AmazonDynamoDB](../en/connector-v2/sink/AmazonDynamoDB.md) | Sink | Beta | 2.3.0 |
-| [AmazonDynamoDB](../en/connector-v2/source/AmazonDynamoDB.md) | Source | Beta | 2.3.0 |
-| [Asset](../en/connector-v2/sink/Assert.md) | Sink | Beta | 2.2.0-beta |
-| [Cassandra](../en/connector-v2/sink/Cassandra.md) | Sink | Beta | 2.3.0 |
-| [Cassandra](../en/connector-v2/source/Cassandra.md) | Source | Beta | 2.3.0 |
-| [ClickHouse](../en/connector-v2/source/Clickhouse.md) | Source | GA | 2.2.0-beta |
-| [ClickHouse](../en/connector-v2/sink/Clickhouse.md) | Sink | GA | 2.2.0-beta |
-| [ClickHouseFile](../en/connector-v2/sink/ClickhouseFile.md) | Sink | GA | 2.2.0-beta |
-| [Console](connector-v2/sink/Console.md) | Sink | GA | 2.2.0-beta |
-| [DataHub](../en/connector-v2/sink/Datahub.md) | Sink | Alpha | 2.2.0-beta |
-| [Doris](../en/connector-v2/sink/Doris.md) | Sink | Beta | 2.3.0 |
-| [DingTalk](../en/connector-v2/sink/DingTalk.md) | Sink | Alpha | 2.2.0-beta |
-| [Elasticsearch](connector-v2/sink/Elasticsearch.md) | Sink | GA | 2.2.0-beta |
-| [Email](connector-v2/sink/Email.md) | Sink | Alpha | 2.2.0-beta |
-| [Enterprise WeChat](../en/connector-v2/sink/Enterprise-WeChat.md) | Sink | Alpha | 2.2.0-beta |
-| [FeiShu](connector-v2/sink/Feishu.md) | Sink | Alpha | 2.2.0-beta |
-| [Fake](../en/connector-v2/source/FakeSource.md) | Source | GA | 2.2.0-beta |
-| [FtpFile](../en/connector-v2/sink/FtpFile.md) | Sink | Beta | 2.2.0-beta |
-| [Greenplum](../en/connector-v2/sink/Greenplum.md) | Sink | Beta | 2.2.0-beta |
-| [Greenplum](../en/connector-v2/source/Greenplum.md) | Source | Beta | 2.2.0-beta |
-| [HdfsFile](connector-v2/sink/HdfsFile.md) | Sink | GA | 2.2.0-beta |
-| [HdfsFile](connector-v2/source/HdfsFile.md) | Source | GA | 2.2.0-beta |
-| [Hive](../en/connector-v2/sink/Hive.md) | Sink | GA | 2.2.0-beta |
-| [Hive](../en/connector-v2/source/Hive.md) | Source | GA | 2.2.0-beta |
-| [Http](connector-v2/sink/Http.md) | Sink | Beta | 2.2.0-beta |
-| [Http](../en/connector-v2/source/Http.md) | Source | Beta | 2.2.0-beta |
-| [Iceberg](../en/connector-v2/source/Iceberg.md) | Source | Beta | 2.2.0-beta |
-| [InfluxDB](../en/connector-v2/sink/InfluxDB.md) | Sink | Beta | 2.3.0 |
-| [InfluxDB](../en/connector-v2/source/InfluxDB.md) | Source | Beta | 2.3.0-beta |
-| [IoTDB](../en/connector-v2/source/IoTDB.md) | Source | GA | 2.2.0-beta |
-| [IoTDB](../en/connector-v2/sink/IoTDB.md) | Sink | GA | 2.2.0-beta |
-| [Jdbc](../en/connector-v2/source/Jdbc.md) | Source | GA | 2.2.0-beta |
-| [Jdbc](connector-v2/sink/Jdbc.md) | Sink | GA | 2.2.0-beta |
-| [Kafka](../en/connector-v2/source/kafka.md) | Source | GA | 2.3.0 |
-| [Kafka](connector-v2/sink/Kafka.md) | Sink | GA | 2.2.0-beta |
-| [Kudu](../en/connector-v2/source/Kudu.md) | Source | Beta | 2.2.0-beta |
-| [Kudu](../en/connector-v2/sink/Kudu.md) | Sink | Beta | 2.2.0-beta |
-| [Lemlist](../en/connector-v2/source/Lemlist.md) | Source | Beta | 2.3.0 |
-| [LocalFile](../en/connector-v2/sink/LocalFile.md) | Sink | GA | 2.2.0-beta |
-| [LocalFile](../en/connector-v2/source/LocalFile.md) | Source | GA | 2.2.0-beta |
-| [Maxcompute]../en/(connector-v2/source/Maxcompute.md) | Source | Alpha | 2.3.0 |
-| [Maxcompute](../en/connector-v2/sink/Maxcompute.md) | Sink | Alpha | 2.3.0 |
-| [MongoDB](../en/connector-v2/source/MongoDB.md) | Source | Beta | 2.2.0-beta |
-| [MongoDB](../en/connector-v2/sink/MongoDB.md) | Sink | Beta | 2.2.0-beta |
-| [MyHours](../en/connector-v2/source/MyHours.md) | Source | Alpha | 2.2.0-beta |
-| [MySqlCDC](../en/connector-v2/source/MySQL-CDC.md) | Source | GA | 2.3.0 |
-| [Neo4j](../en/connector-v2/sink/Neo4j.md) | Sink | Beta | 2.2.0-beta |
-| [Notion](../en/connector-v2/source/Notion.md) | Source | Alpha | 2.3.0 |
-| [OneSignal](../en/connector-v2/source/OneSignal.md) | Source | Beta | 2.3.0 |
-| [OpenMldb](../en/connector-v2/source/OpenMldb.md) | Source | Beta | 2.3.0 |
-| [OssFile](../en/connector-v2/sink/OssFile.md) | Sink | Beta | 2.2.0-beta |
-| [OssFile](../en/connector-v2/source/OssFile.md) | Source | Beta | 2.2.0-beta |
-| [Phoenix](../en/connector-v2/sink/Phoenix.md) | Sink | Beta | 2.2.0-beta |
-| [Phoenix](../en/connector-v2/source/Phoenix.md) | Source | Beta | 2.2.0-beta |
-| [Pulsar](../en/connector-v2/source/Pulsar.md) | Source | Beta | 2.2.0-beta |
-| [RabbitMQ](../en/connector-v2/sink/Rabbitmq.md) | Sink | Beta | 2.3.0 |
-| [RabbitMQ](../en/connector-v2/source/Rabbitmq.md) | Source | Beta | 2.3.0 |
-| [Redis](../en/connector-v2/sink/Redis.md) | Sink | Beta | 2.2.0-beta |
-| [Redis](../en/connector-v2/source/Redis.md) | Source | Beta | 2.2.0-beta |
-| [S3Redshift](../en/connector-v2/sink/S3-Redshift.md) | Sink | GA | 2.3.0-beta |
-| [S3File](../en/connector-v2/source/S3File.md) | Source | GA | 2.3.0-beta |
-| [S3File](../en/connector-v2/sink/S3File.md) | Sink | GA | 2.3.0-beta |
-| [Sentry](../en/connector-v2/sink/Sentry.md) | Sink | Alpha | 2.2.0-beta |
-| [SFtpFile](../en/connector-v2/sink/SftpFile.md) | Sink | Beta | 2.3.0 |
-| [SFtpFile](../en/connector-v2/source/SftpFile.md) | Source | Beta | 2.3.0 |
-| [Slack](../en/connector-v2/sink/Slack.md) | Sink | Beta | 2.3.0 |
-| [Socket](../en/connector-v2/sink/Socket.md) | Sink | Beta | 2.2.0-beta |
-| [Socket](../en/connector-v2/source/Socket.md) | Source | Beta | 2.2.0-beta |
-| [StarRocks](../en/connector-v2/sink/StarRocks.md) | Sink | Alpha | 2.3.0 |
-| [Tablestore](../en/connector-v2/sink/Tablestore.md) | Sink | Alpha | 2.3.0 |
-
diff --git a/docs/zh/about.md b/docs/zh/about.md
index 93c7f877168..244b27af1ae 100644
--- a/docs/zh/about.md
+++ b/docs/zh/about.md
@@ -21,7 +21,7 @@ SeaTunnel专注于数据集成和数据同步,主要旨在解决数据集成
## SeaTunnel 相关特性
- 丰富且可扩展的Connector:SeaTunnel提供了不依赖于特定执行引擎的Connector API。 基于该API开发的Connector(Source、Transform、Sink)可以运行在很多不同的引擎上,例如目前支持的SeaTunnel引擎(Zeta)、Flink、Spark等。
-- Connector插件:插件式设计让用户可以轻松开发自己的Connector并将其集成到SeaTunnel项目中。 目前,SeaTunnel 支持超过 100 个连接器,并且数量正在激增。 这是[当前支持的连接器]的列表(Connector-v2-release-state.md)
+- Connector插件:插件式设计让用户可以轻松开发自己的Connector并将其集成到SeaTunnel项目中。 目前,SeaTunnel 支持超过 100 个连接器,并且数量正在激增。
- 批流集成:基于SeaTunnel Connector API开发的Connector完美兼容离线同步、实时同步、全量同步、增量同步等场景。 它们大大降低了管理数据集成任务的难度。
- 支持分布式快照算法,保证数据一致性。
- 多引擎支持:SeaTunnel默认使用SeaTunnel引擎(Zeta)进行数据同步。 SeaTunnel还支持使用Flink或Spark作为Connector的执行引擎,以适应企业现有的技术组件。 SeaTunnel 支持 Spark 和 Flink 的多个版本。
@@ -62,7 +62,7 @@ SeaTunnel 拥有大量用户。 您可以在[用户](https://seatunnel.apache.or
-SeaTunnel 丰富了CNCF 云原生景观 。
+SeaTunnel 丰富了CNCF 云原生景观 。
## 了解更多
diff --git a/docs/zh/concept/JobEnvConfig.md b/docs/zh/concept/JobEnvConfig.md
index c20797604f3..4dddb6e94a0 100644
--- a/docs/zh/concept/JobEnvConfig.md
+++ b/docs/zh/concept/JobEnvConfig.md
@@ -21,14 +21,26 @@
### checkpoint.interval
-获取定时调度检查点的时间间隔。
+获取定时调度检查点的时间间隔(毫秒)。
-在`STREAMING`模式下,检查点是必须的,如果不设置,将从应用程序配置文件`seatunnel.yaml`中获取。 在`BATCH`模式下,您可以通过不设置此参数来禁用检查点。
+在`STREAMING`模式下,检查点是必须的,如果不设置,将从应用程序配置文件`seatunnel.yaml`中获取。 在`BATCH`模式下,您可以通过不设置此参数来禁用检查点。在Zeta `STREAMING`模式下,默认值为30000毫秒。
+
+### checkpoint.timeout
+
+检查点的超时时间(毫秒)。如果检查点在超时之前没有完成,作业将失败。在Zeta中,默认值为30000毫秒。
### parallelism
该参数配置source和sink的并行度。
+### shade.identifier
+
+指定加密方式,如果您没有加密或解密配置文件的需求,此选项可以忽略。
+
+更多详细信息,您可以参考文档 [Config Encryption Decryption](../../en/connector-v2/Config-Encryption-Decryption.md)
+
+## Zeta 引擎参数
+
### job.retry.times
用于控制作业失败时的默认重试次数。默认值为3,并且仅适用于Zeta引擎。
@@ -44,12 +56,6 @@
当值为`CLIENT`时,SaveMode操作在作业提交的过程中执行,使用shell脚本提交作业时,该过程在提交作业的shell进程中执行。使用rest api提交作业时,该过程在http请求的处理线程中执行。
请尽量使用`CLUSTER`模式,因为当`CLUSTER`模式没有问题时,我们将删除`CLIENT`模式。
-### shade.identifier
-
-指定加密方式,如果您没有加密或解密配置文件的需求,此选项可以忽略。
-
-更多详细信息,您可以参考文档 [Config Encryption Decryption](../../en/connector-v2/Config-Encryption-Decryption.md)
-
## Flink 引擎参数
这里列出了一些与 Flink 中名称相对应的 SeaTunnel 参数名称,并非全部,更多内容请参考官方 [Flink Documentation](https://flink.apache.org/) for more.
diff --git a/docs/zh/concept/config.md b/docs/zh/concept/config.md
index 72c14bafcec..98bf85735fe 100644
--- a/docs/zh/concept/config.md
+++ b/docs/zh/concept/config.md
@@ -1,8 +1,3 @@
----
-
-sidebar_position: 2
--------------------
-
# 配置文件简介
在SeaTunnel中,最重要的事情就是配置文件,尽管用户可以自定义他们自己的数据同步需求以发挥SeaTunnel最大的潜力。那么接下来我将会向你介绍如何设置配置文件。
@@ -20,6 +15,12 @@ sidebar_position: 2
配置文件类似下面这个例子:
+:::warn
+
+旧的配置名称 `result_table_name`/`source_table_name` 已经过时,请尽快迁移到新名称 `plugin_output`/`plugin_input`。
+
+:::
+
### hocon
```hocon
@@ -29,7 +30,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 100
schema = {
fields {
@@ -43,8 +44,8 @@ source {
transform {
Filter {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
fields = [name, card]
}
}
@@ -57,70 +58,11 @@ sink {
fields = ["name", "card"]
username = "default"
password = ""
- source_table_name = "fake1"
+ plugin_input = "fake1"
}
}
```
-#### 多行文本支持
-
-`hocon`支持多行字符串,这样就可以包含较长的文本段落,而不必担心换行符或特殊格式。这可以通过将文本括在三层引号 **`"""`** 中来实现。例如:
-
-```
-var = """
-Apache SeaTunnel is a
-next-generation high-performance,
-distributed, massive data integration tool.
-"""
-sql = """ select * from "table" """
-```
-
-### json
-
-```json
-
-{
- "env": {
- "job.mode": "batch"
- },
- "source": [
- {
- "plugin_name": "FakeSource",
- "result_table_name": "fake",
- "row.num": 100,
- "schema": {
- "fields": {
- "name": "string",
- "age": "int",
- "card": "int"
- }
- }
- }
- ],
- "transform": [
- {
- "plugin_name": "Filter",
- "source_table_name": "fake",
- "result_table_name": "fake1",
- "fields": ["name", "card"]
- }
- ],
- "sink": [
- {
- "plugin_name": "Clickhouse",
- "host": "clickhouse:8123",
- "database": "default",
- "table": "seatunnel_console",
- "fields": ["name", "card"],
- "username": "default",
- "password": "",
- "source_table_name": "fake1"
- }
- ]
-}
-
-```
-
正如你看到的,配置文件包括几个部分:env, source, transform, sink。不同的模块具有不同的功能。
当你了解了这些模块后,你就会懂得SeaTunnel到底是如何工作的。
@@ -136,7 +78,7 @@ sql = """ select * from "table" """
source用于定义SeaTunnel在哪儿检索数据,并将检索的数据用于下一步。
可以同时定义多个source。目前支持的source请看[Source of SeaTunnel](../../en/connector-v2/source)。每种source都有自己特定的参数用来
-定义如何检索数据,SeaTunnel也抽象了每种source所使用的参数,例如 `result_table_name` 参数,用于指定当前source生成的数据的名称,
+定义如何检索数据,SeaTunnel也抽象了每种source所使用的参数,例如 `plugin_output` 参数,用于指定当前source生成的数据的名称,
方便后续其他模块使用。
### transform
@@ -151,7 +93,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 100
schema = {
fields {
@@ -171,7 +113,7 @@ sink {
fields = ["name", "age", "card"]
username = "default"
password = ""
- source_table_name = "fake1"
+ plugin_input = "fake1"
}
}
```
@@ -187,16 +129,92 @@ sink模块,你可以快速高效地完成这个操作。Sink和source非常相
### 其它
-你会疑惑当定义了多个source和多个sink时,每个sink读取哪些数据,每个transform读取哪些数据?我们使用`result_table_name` 和
-`source_table_name` 两个配置。每个source模块都会配置一个`result_table_name`来指示数据源生成的数据源名称,其它transform和sink
-模块可以使用`source_table_name` 引用相应的数据源名称,表示要读取数据进行处理。然后transform,作为一个中间的处理模块,可以同时使用
-`result_table_name` 和 `source_table_name` 配置。但你会发现在上面的配置例子中,不是每个模块都配置了这些参数,因为在SeaTunnel中,
+你会疑惑当定义了多个source和多个sink时,每个sink读取哪些数据,每个transform读取哪些数据?我们使用`plugin_output` 和
+`plugin_input` 两个配置。每个source模块都会配置一个`plugin_output`来指示数据源生成的数据源名称,其它transform和sink
+模块可以使用`plugin_input` 引用相应的数据源名称,表示要读取数据进行处理。然后transform,作为一个中间的处理模块,可以同时使用
+`plugin_output` 和 `plugin_input` 配置。但你会发现在上面的配置例子中,不是每个模块都配置了这些参数,因为在SeaTunnel中,
有一个默认的约定,如果这两个参数没有配置,则使用上一个节点的最后一个模块生成的数据。当只有一个source时这是非常方便的。
+## 多行文本支持
+
+`hocon`支持多行字符串,这样就可以包含较长的文本段落,而不必担心换行符或特殊格式。这可以通过将文本括在三层引号 **`"""`** 中来实现。例如:
+
+```
+var = """
+Apache SeaTunnel is a
+next-generation high-performance,
+distributed, massive data integration tool.
+"""
+sql = """ select * from "table" """
+```
+
+## Json格式支持
+
+在编写配置文件之前,请确保配置文件的名称应以 `.json` 结尾。
+
+```json
+
+{
+ "env": {
+ "job.mode": "batch"
+ },
+ "source": [
+ {
+ "plugin_name": "FakeSource",
+ "plugin_output": "fake",
+ "row.num": 100,
+ "schema": {
+ "fields": {
+ "name": "string",
+ "age": "int",
+ "card": "int"
+ }
+ }
+ }
+ ],
+ "transform": [
+ {
+ "plugin_name": "Filter",
+ "plugin_input": "fake",
+ "plugin_output": "fake1",
+ "fields": ["name", "card"]
+ }
+ ],
+ "sink": [
+ {
+ "plugin_name": "Clickhouse",
+ "host": "clickhouse:8123",
+ "database": "default",
+ "table": "seatunnel_console",
+ "fields": ["name", "card"],
+ "username": "default",
+ "password": "",
+ "plugin_input": "fake1"
+ }
+ ]
+}
+
+```
+
## 配置变量替换
在配置文件中,我们可以定义一些变量并在运行时替换它们。但是注意仅支持 hocon 格式的文件。
+变量使用方法:
+ - `${varName}`,如果变量未传值,则抛出异常。
+ - `${varName:default}`,如果变量未传值,则使用默认值。如果设置默认值则变量需要写在双引号中。
+ - `${varName:}`,如果变量未传值,则使用空字符串。
+
+如果您不通过`-i`设置变量值,也可以通过设置系统的环境变量传值,变量替换支持通过环境变量获取变量值。
+例如,您可以在shell脚本中设置环境变量如下:
+```shell
+export varName="value with space"
+```
+然后您可以在配置文件中使用变量。
+
+如果您在配置文件中设置了没有默认值的变量,但在执行过程中未传递该变量,则会保留该变量值,系统不会抛出异常。但请您需要确保其他流程能够正确解析该变量值。例如,ElasticSearch的索引需要支持`${xxx}`这样的格式来动态指定索引。若其他流程不支持,程序可能无法正常运行。
+
+具体样例:
```hocon
env {
job.mode = "BATCH"
@@ -206,14 +224,14 @@ env {
source {
FakeSource {
- result_table_name = ${resName}
- row.num = ${rowNum}
+ plugin_output = "${resName:fake_test}_table"
+ row.num = "${rowNum:50}"
string.template = ${strTemplate}
int.template = [20, 21]
schema = {
fields {
- name = ${nameType}
- age = "int"
+ name = "${nameType:string}"
+ age = ${ageType}
}
}
}
@@ -221,21 +239,20 @@ source {
transform {
sql {
- source_table_name = "fake"
- result_table_name = "sql"
- query = "select * from "${resName}" where name = '"${nameVal}"' "
+ plugin_input = "${resName:fake_test}_table"
+ plugin_output = "sql"
+ query = "select * from ${resName:fake_test}_table where name = '${nameVal}' "
}
}
sink {
Console {
- source_table_name = "sql"
+ plugin_input = "sql"
username = ${username}
password = ${password}
}
}
-
```
在上述配置中,我们定义了一些变量,如 ${rowNum}、${resName}。
@@ -244,16 +261,17 @@ sink {
```shell
./bin/seatunnel.sh -c
-i jobName='this_is_a_job_name'
--i resName=fake
--i rowNum=10
-i strTemplate=['abc','d~f','hi']
--i nameType=string
+-i ageType=int
-i nameVal=abc
-i username=seatunnel=2.3.1
-i password='$a^b%c.d~e0*9('
--e local
+-m local
```
+其中 `resName`,`rowNum`,`nameType` 我们未设置,他将获取默认值
+
+
然后最终提交的配置是:
```hocon
@@ -265,8 +283,8 @@ env {
source {
FakeSource {
- result_table_name = "fake"
- row.num = 10
+ plugin_output = "fake_test_table"
+ row.num = 50
string.template = ['abc','d~f','hi']
int.template = [20, 21]
schema = {
@@ -280,18 +298,18 @@ source {
transform {
sql {
- source_table_name = "fake"
- result_table_name = "sql"
- query = "select * from "fake" where name = 'abc' "
+ plugin_input = "fake_test_table"
+ plugin_output = "sql"
+ query = "select * from fake_test_table where name = 'abc' "
}
}
sink {
Console {
- source_table_name = "sql"
+ plugin_input = "sql"
username = "seatunnel=2.3.1"
- password = "$a^b%c.d~e0*9("
+ password = "$a^b%c.d~e0*9("
}
}
@@ -301,9 +319,9 @@ sink {
- 如果值包含特殊字符,如`(`,请使用`'`引号将其括起来。
- 如果替换变量包含`"`或`'`(如`"resName"`和`"nameVal"`),需要添加`"`。
-- 值不能包含空格`' '`。例如, `-i jobName='this is a job name'`将被替换为`job.name = "this"`。
+- 值不能包含空格`' '`。例如, `-i jobName='this is a job name'`将被替换为`job.name = "this"`。 你可以使用环境变量传递带有空格的值。
- 如果要使用动态参数,可以使用以下格式: `-i date=$(date +"%Y%m%d")`。
-
+- 不能使用指定系统保留字符,它将不会被`-i`替换,如:`${database_name}`、`${schema_name}`、`${table_name}`、`${schema_full_name}`、`${table_full_name}`、`${primary_key}`、`${unique_key}`、`${field_names}`。具体可参考[Sink参数占位符](sink-options-placeholders.md)
## 此外
如果你想了解更多关于格式配置的详细信息,请查看 [HOCON](https://github.com/lightbend/config/blob/main/HOCON.md)。
diff --git a/docs/zh/concept/event-listener.md b/docs/zh/concept/event-listener.md
new file mode 100644
index 00000000000..69972cbfc56
--- /dev/null
+++ b/docs/zh/concept/event-listener.md
@@ -0,0 +1,114 @@
+# 事件监听器
+
+## 介绍
+
+SeaTunnel提供了丰富的事件监听器功能,用于管理数据同步时的状态。此功能在需要监听任务运行状态时十分重要(`org.apache.seatunnel.api.event`)。本文档将指导您如何使用这些参数并有效地利用他们。
+
+## 支持的引擎
+
+> SeaTunnel Zeta
+> Flink
+> Spark
+
+## API
+
+事件(event)API的定义在 `org.apache.seatunnel.api.event`包中。
+
+### Event Data API
+
+- `org.apache.seatunnel.api.event.Event` - 事件数据的接口。
+- `org.apache.seatunnel.api.event.EventType` - 事件数据的枚举值。
+
+### Event Listener API
+
+您可以自定义事件处理器,例如将事件发送到外部系统。
+
+- `org.apache.seatunnel.api.event.EventHandler` - 事件处理器的接口,SPI将会自动从类路径中加载子类。
+
+### Event Collect API
+
+- `org.apache.seatunnel.api.source.SourceSplitEnumerator` - 在`SourceSplitEnumerator`加载事件监听器。
+
+```java
+package org.apache.seatunnel.api.source;
+
+public interface SourceSplitEnumerator {
+
+ interface Context {
+
+ /**
+ * Get the {@link org.apache.seatunnel.api.event.EventListener} of this enumerator.
+ *
+ * @return
+ */
+ EventListener getEventListener();
+ }
+}
+```
+
+- `org.apache.seatunnel.api.source.SourceReader` - 在`SourceReader`加载事件监听器。
+
+```java
+package org.apache.seatunnel.api.source;
+
+public interface SourceReader {
+
+ interface Context {
+
+ /**
+ * Get the {@link org.apache.seatunnel.api.event.EventListener} of this reader.
+ *
+ * @return
+ */
+ EventListener getEventListener();
+ }
+}
+```
+
+- `org.apache.seatunnel.api.sink.SinkWriter` - 在`SinkWriter`加载事件监听器。
+
+```java
+package org.apache.seatunnel.api.sink;
+
+public interface SinkWriter {
+
+ interface Context {
+
+ /**
+ * Get the {@link org.apache.seatunnel.api.event.EventListener} of this writer.
+ *
+ * @return
+ */
+ EventListener getEventListener();
+ }
+}
+```
+
+## 设置监听器
+
+您需要设置引擎配置以使用事件监听器功能。
+
+### Zeta 引擎
+
+配置样例(seatunnel.yaml):
+
+```
+seatunnel:
+ engine:
+ event-report-http:
+ url: "http://example.com:1024/event/report"
+ headers:
+ Content-Type: application/json
+```
+
+### Flink 引擎
+
+您可以定义 `org.apache.seatunnel.api.event.EventHandler` 接口并添加到类路径,SPI会自动加载。
+
+支持的flink版本: 1.14.0+
+
+样例: `org.apache.seatunnel.api.event.LoggingEventHandler`
+
+### Spark 引擎
+
+您可以定义 `org.apache.seatunnel.api.event.EventHandler` 接口并添加到类路径,SPI会自动加载。
diff --git a/docs/zh/concept/schema-evolution.md b/docs/zh/concept/schema-evolution.md
new file mode 100644
index 00000000000..57d562946ff
--- /dev/null
+++ b/docs/zh/concept/schema-evolution.md
@@ -0,0 +1,152 @@
+# 模式演进
+模式演进是指数据表的Schema可以改变,数据同步任务可以自动适应新的表结构的变化而无需其他操作。
+现在我们只支持对CDC源中的表进行“添加列”、“删除列”、“重命名列”和“修改列”的操作。目前这个功能只支持zeta引擎。
+
+## 已支持的连接器
+
+### 源
+[Mysql-CDC](https://github.com/apache/seatunnel/blob/dev/docs/en/connector-v2/source/MySQL-CDC.md)
+[Oracle-CDC](https://github.com/apache/seatunnel/blob/dev/docs/en/connector-v2/source/Oracle-CDC.md)
+
+### 目标
+[Jdbc-Mysql](https://github.com/apache/seatunnel/blob/dev/docs/zh/connector-v2/sink/Jdbc.md)
+[Jdbc-Oracle](https://github.com/apache/seatunnel/blob/dev/docs/en/connector-v2/sink/Jdbc.md)
+
+注意: 目前模式演进不支持transform。不同类型数据库(Oracle-CDC -> Jdbc-Mysql)的模式演进目前不支持ddl中列的默认值。
+
+当你使用Oracle-CDC时,你不能使用用户名`SYS`或`SYSTEM`来修改表结构,否则ddl事件将被过滤,这可能导致模式演进不起作用;
+另外,如果你的表名以`ORA_TEMP_`开头,也会有相同的问题。
+
+## 启用Schema evolution功能
+在CDC源连接器中模式演进默认是关闭的。你需要在CDC连接器中配置`debezium.include.schema.changes = true`来启用它。当你使用Oracle-CDC并且启用schema-evolution时,你必须将`debezium`属性中的`log.mining.strategy`指定为`redo_log_catalog`。
+
+## 示例
+
+### Mysql-CDC -> Jdbc-Mysql
+```
+env {
+ # You can set engine configuration here
+ parallelism = 5
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+ read_limit.bytes_per_second=7000000
+ read_limit.rows_per_second=400
+}
+
+source {
+ MySQL-CDC {
+ server-id = 5652-5657
+ username = "st_user_source"
+ password = "mysqlpw"
+ table-names = ["shop.products"]
+ base-url = "jdbc:mysql://mysql_cdc_e2e:3306/shop"
+ debezium = {
+ include.schema.changes = true
+ }
+ }
+}
+
+sink {
+ jdbc {
+ url = "jdbc:mysql://mysql_cdc_e2e:3306/shop"
+ driver = "com.mysql.cj.jdbc.Driver"
+ user = "st_user_sink"
+ password = "mysqlpw"
+ generate_sink_sql = true
+ database = shop
+ table = mysql_cdc_e2e_sink_table_with_schema_change_exactly_once
+ primary_keys = ["id"]
+ is_exactly_once = true
+ xa_data_source_class_name = "com.mysql.cj.jdbc.MysqlXADataSource"
+ }
+}
+```
+
+### Oracle-cdc -> Jdbc-Oracle
+```
+env {
+ # You can set engine configuration here
+ parallelism = 1
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+}
+
+source {
+ # This is a example source plugin **only for test and demonstrate the feature source plugin**
+ Oracle-CDC {
+ plugin_output = "customers"
+ username = "dbzuser"
+ password = "dbz"
+ database-names = ["ORCLCDB"]
+ schema-names = ["DEBEZIUM"]
+ table-names = ["ORCLCDB.DEBEZIUM.FULL_TYPES"]
+ base-url = "jdbc:oracle:thin:@oracle-host:1521/ORCLCDB"
+ source.reader.close.timeout = 120000
+ connection.pool.size = 1
+ debezium {
+ include.schema.changes = true
+ log.mining.strategy = redo_log_catalog
+ }
+ }
+}
+
+sink {
+ Jdbc {
+ plugin_input = "customers"
+ driver = "oracle.jdbc.driver.OracleDriver"
+ url = "jdbc:oracle:thin:@oracle-host:1521/ORCLCDB"
+ user = "dbzuser"
+ password = "dbz"
+ generate_sink_sql = true
+ database = "ORCLCDB"
+ table = "DEBEZIUM.FULL_TYPES_SINK"
+ batch_size = 1
+ primary_keys = ["ID"]
+ connection.pool.size = 1
+ }
+}
+```
+
+### Oracle-cdc -> Jdbc-Mysql
+```
+env {
+ # You can set engine configuration here
+ parallelism = 1
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+}
+
+source {
+ # This is a example source plugin **only for test and demonstrate the feature source plugin**
+ Oracle-CDC {
+ plugin_output = "customers"
+ username = "dbzuser"
+ password = "dbz"
+ database-names = ["ORCLCDB"]
+ schema-names = ["DEBEZIUM"]
+ table-names = ["ORCLCDB.DEBEZIUM.FULL_TYPES"]
+ base-url = "jdbc:oracle:thin:@oracle-host:1521/ORCLCDB"
+ source.reader.close.timeout = 120000
+ connection.pool.size = 1
+ debezium {
+ include.schema.changes = true
+ log.mining.strategy = redo_log_catalog
+ }
+ }
+}
+
+sink {
+ jdbc {
+ plugin_input = "customers"
+ url = "jdbc:mysql://oracle-host:3306/oracle_sink"
+ driver = "com.mysql.cj.jdbc.Driver"
+ user = "st_user_sink"
+ password = "mysqlpw"
+ generate_sink_sql = true
+ # You need to configure both database and table
+ database = oracle_sink
+ table = oracle_cdc_2_mysql_sink_table
+ primary_keys = ["ID"]
+ }
+}
+```
diff --git a/docs/zh/concept/schema-feature.md b/docs/zh/concept/schema-feature.md
index d719a7953e5..b504d264f83 100644
--- a/docs/zh/concept/schema-feature.md
+++ b/docs/zh/concept/schema-feature.md
@@ -172,6 +172,46 @@ constraintKeys = [
| INDEX_KEY | 键 |
| UNIQUE_KEY | 唯一键 |
+## 多表Schema
+
+```
+tables_configs = [
+ {
+ schema {
+ table = "database.schema.table1"
+ schema_first = false
+ comment = "comment"
+ columns = [
+ ...
+ ]
+ primaryKey {
+ ...
+ }
+ constraintKeys {
+ ...
+ }
+ }
+ },
+ {
+ schema = {
+ table = "database.schema.table2"
+ schema_first = false
+ comment = "comment"
+ columns = [
+ ...
+ ]
+ primaryKey {
+ ...
+ }
+ constraintKeys {
+ ...
+ }
+ }
+ }
+]
+
+```
+
## 如何使用schema
### 推荐
@@ -180,7 +220,7 @@ constraintKeys = [
source {
FakeSource {
parallelism = 2
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 16
schema {
table = "FakeDatabase.FakeTable"
@@ -234,7 +274,7 @@ source {
source {
FakeSource {
parallelism = 2
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 16
schema = {
fields {
diff --git a/docs/zh/concept/sql-config.md b/docs/zh/concept/sql-config.md
index 7defa0010b2..2d87b95ac83 100644
--- a/docs/zh/concept/sql-config.md
+++ b/docs/zh/concept/sql-config.md
@@ -1,5 +1,7 @@
# SQL配置文件
+在编写`SQL`配置文件之前,请确保配置文件的名称应该以`.sql`结尾。
+
## SQL配置文件结构
`SQL`配置文件类似下面这样:
@@ -120,7 +122,10 @@ CREATE TABLE sink_table WITH (
INSERT INTO sink_table SELECT id, name, age, email FROM source_table;
```
-* `SELECT FROM` 部分为源端映射表的表名,`SELECT` 部分的语法参考:[SQL-transform](../transform-v2/sql.md) `query` 配置项
+* `SELECT FROM` 部分为源端映射表的表名,`SELECT` 部分的语法参考:[SQL-transform](../transform-v2/sql.md) `query` 配置项。如果select的字段是关键字([参考](https://github.com/JSQLParser/JSqlParser/blob/master/src/main/jjtree/net/sf/jsqlparser/parser/JSqlParserCC.jjt)),你应该像这样使用\`filedName\`
+```sql
+INSERT INTO sink_table SELECT id, name, age, email,`output` FROM source_table;
+```
* `INSERT INTO` 部分为目标端映射表的表名
* 注意:该语法**不支持**在 `INSERT` 中指定字段,如:`INSERT INTO sink_table (id, name, age, email) SELECT id, name, age, email FROM source_table;`
diff --git a/docs/zh/connector-v2/Config-Encryption-Decryption.md b/docs/zh/connector-v2/Config-Encryption-Decryption.md
index e7b13aea86a..c2b7ced0ab6 100644
--- a/docs/zh/connector-v2/Config-Encryption-Decryption.md
+++ b/docs/zh/connector-v2/Config-Encryption-Decryption.md
@@ -43,7 +43,7 @@ Base64编码支持加密以下参数:
source {
MySQL-CDC {
- result_table_name = "fake"
+ plugin_output = "fake"
parallelism = 1
server-id = 5656
port = 56725
@@ -97,7 +97,7 @@ Base64编码支持加密以下参数:
"port" : 56725,
"database-name" : "inventory_vwyw0n",
"parallelism" : 1,
- "result_table_name" : "fake",
+ "plugin_output" : "fake",
"table-name" : "products",
"plugin_name" : "MySQL-CDC",
"server-id" : 5656,
diff --git a/docs/zh/connector-v2/formats/avro.md b/docs/zh/connector-v2/formats/avro.md
index 7176f4e507f..826fc27b448 100644
--- a/docs/zh/connector-v2/formats/avro.md
+++ b/docs/zh/connector-v2/formats/avro.md
@@ -51,7 +51,7 @@ source {
}
}
}
- result_table_name = "fake"
+ plugin_output = "fake"
}
}
@@ -76,7 +76,7 @@ source {
Kafka {
bootstrap.servers = "kafkaCluster:9092"
topic = "test_avro_topic"
- result_table_name = "kafka_table"
+ plugin_output = "kafka_table"
start_mode = "earliest"
format = avro
format_error_handle_way = skip
@@ -104,7 +104,7 @@ source {
sink {
Console {
- source_table_name = "kafka_table"
+ plugin_input = "kafka_table"
}
}
```
diff --git a/docs/zh/connector-v2/formats/canal-json.md b/docs/zh/connector-v2/formats/canal-json.md
index 92c4338eb56..fc3344d963c 100644
--- a/docs/zh/connector-v2/formats/canal-json.md
+++ b/docs/zh/connector-v2/formats/canal-json.md
@@ -86,7 +86,7 @@ source {
Kafka {
bootstrap.servers = "kafkaCluster:9092"
topic = "products_binlog"
- result_table_name = "kafka_name"
+ plugin_output = "kafka_name"
start_mode = earliest
schema = {
fields {
diff --git a/docs/zh/connector-v2/formats/cdc-compatible-debezium-json.md b/docs/zh/connector-v2/formats/cdc-compatible-debezium-json.md
index e34a5b39a22..999ab200186 100644
--- a/docs/zh/connector-v2/formats/cdc-compatible-debezium-json.md
+++ b/docs/zh/connector-v2/formats/cdc-compatible-debezium-json.md
@@ -17,7 +17,7 @@ env {
source {
MySQL-CDC {
- result_table_name = "table1"
+ plugin_output = "table1"
base-url="jdbc:mysql://localhost:3306/test"
"startup.mode"=INITIAL
@@ -43,7 +43,7 @@ source {
sink {
Kafka {
- source_table_name = "table1"
+ plugin_input = "table1"
bootstrap.servers = "localhost:9092"
diff --git a/docs/zh/connector-v2/formats/debezium-json.md b/docs/zh/connector-v2/formats/debezium-json.md
index 3e70a5d31ed..88b32540395 100644
--- a/docs/zh/connector-v2/formats/debezium-json.md
+++ b/docs/zh/connector-v2/formats/debezium-json.md
@@ -85,7 +85,7 @@ source {
Kafka {
bootstrap.servers = "kafkaCluster:9092"
topic = "products_binlog"
- result_table_name = "kafka_name"
+ plugin_output = "kafka_name"
start_mode = earliest
schema = {
fields {
diff --git a/docs/zh/connector-v2/formats/kafka-compatible-kafkaconnect-json.md b/docs/zh/connector-v2/formats/kafka-compatible-kafkaconnect-json.md
index d0ceb58ac6c..027d90ded07 100644
--- a/docs/zh/connector-v2/formats/kafka-compatible-kafkaconnect-json.md
+++ b/docs/zh/connector-v2/formats/kafka-compatible-kafkaconnect-json.md
@@ -16,7 +16,7 @@ source {
Kafka {
bootstrap.servers = "localhost:9092"
topic = "jdbc_source_record"
- result_table_name = "kafka_table"
+ plugin_output = "kafka_table"
start_mode = earliest
schema = {
fields {
diff --git a/docs/zh/connector-v2/formats/ogg-json.md b/docs/zh/connector-v2/formats/ogg-json.md
index 7b64f5b5e41..80c88e6ac13 100644
--- a/docs/zh/connector-v2/formats/ogg-json.md
+++ b/docs/zh/connector-v2/formats/ogg-json.md
@@ -66,7 +66,7 @@ source {
Kafka {
bootstrap.servers = "127.0.0.1:9092"
topic = "ogg"
- result_table_name = "kafka_name"
+ plugin_output = "kafka_name"
start_mode = earliest
schema = {
fields {
diff --git a/docs/zh/connector-v2/formats/protobuf.md b/docs/zh/connector-v2/formats/protobuf.md
new file mode 100644
index 00000000000..5fac7f93211
--- /dev/null
+++ b/docs/zh/connector-v2/formats/protobuf.md
@@ -0,0 +1,164 @@
+# Protobuf 格式
+
+Protobuf(Protocol Buffers)是一种由Google开发的语言中立、平台无关的数据序列化格式。它提供了一种高效的方式来编码结构化数据,同时支持多种编程语言和平台。
+
+目前支持在 Kafka 中使用 protobuf 格式。
+
+## Kafka 使用示例
+
+- 模拟随机生成数据源,并以 protobuf 的格式 写入 kafka 的实例
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ FakeSource {
+ parallelism = 1
+ plugin_output = "fake"
+ row.num = 16
+ schema = {
+ fields {
+ c_int32 = int
+ c_int64 = long
+ c_float = float
+ c_double = double
+ c_bool = boolean
+ c_string = string
+ c_bytes = bytes
+
+ Address {
+ city = string
+ state = string
+ street = string
+ }
+ attributes = "map"
+ phone_numbers = "array"
+ }
+ }
+ }
+}
+
+sink {
+ kafka {
+ topic = "test_protobuf_topic_fake_source"
+ bootstrap.servers = "kafkaCluster:9092"
+ format = protobuf
+ kafka.request.timeout.ms = 60000
+ kafka.config = {
+ acks = "all"
+ request.timeout.ms = 60000
+ buffer.memory = 33554432
+ }
+ protobuf_message_name = Person
+ protobuf_schema = """
+ syntax = "proto3";
+
+ package org.apache.seatunnel.format.protobuf;
+
+ option java_outer_classname = "ProtobufE2E";
+
+ message Person {
+ int32 c_int32 = 1;
+ int64 c_int64 = 2;
+ float c_float = 3;
+ double c_double = 4;
+ bool c_bool = 5;
+ string c_string = 6;
+ bytes c_bytes = 7;
+
+ message Address {
+ string street = 1;
+ string city = 2;
+ string state = 3;
+ string zip = 4;
+ }
+
+ Address address = 8;
+
+ map attributes = 9;
+
+ repeated string phone_numbers = 10;
+ }
+ """
+ }
+}
+```
+
+- 从 kafka 读取 protobuf 格式的数据并打印到控制台的示例
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ Kafka {
+ topic = "test_protobuf_topic_fake_source"
+ format = protobuf
+ protobuf_message_name = Person
+ protobuf_schema = """
+ syntax = "proto3";
+
+ package org.apache.seatunnel.format.protobuf;
+
+ option java_outer_classname = "ProtobufE2E";
+
+ message Person {
+ int32 c_int32 = 1;
+ int64 c_int64 = 2;
+ float c_float = 3;
+ double c_double = 4;
+ bool c_bool = 5;
+ string c_string = 6;
+ bytes c_bytes = 7;
+
+ message Address {
+ string street = 1;
+ string city = 2;
+ string state = 3;
+ string zip = 4;
+ }
+
+ Address address = 8;
+
+ map attributes = 9;
+
+ repeated string phone_numbers = 10;
+ }
+ """
+ schema = {
+ fields {
+ c_int32 = int
+ c_int64 = long
+ c_float = float
+ c_double = double
+ c_bool = boolean
+ c_string = string
+ c_bytes = bytes
+
+ Address {
+ city = string
+ state = string
+ street = string
+ }
+ attributes = "map"
+ phone_numbers = "array"
+ }
+ }
+ bootstrap.servers = "kafkaCluster:9092"
+ start_mode = "earliest"
+ plugin_output = "kafka_table"
+ }
+}
+
+sink {
+ Console {
+ plugin_input = "kafka_table"
+ }
+}
+```
+
diff --git a/docs/zh/connector-v2/sink-common-options.md b/docs/zh/connector-v2/sink-common-options.md
new file mode 100644
index 00000000000..2944181fb8e
--- /dev/null
+++ b/docs/zh/connector-v2/sink-common-options.md
@@ -0,0 +1,68 @@
+---
+sidebar_position: 4
+---
+
+# Sink 常用选项
+
+> Sink 连接器常用参数
+
+:::warn
+
+旧的配置名称 `source_table_name` 已经过时,请尽快迁移到新名称 `plugin_input`。
+
+:::
+
+| 名称 | 类型 | 是否需要 | 默认值 |
+|--------------|--------|------|-----|
+| plugin_input | string | 否 | - |
+| parallelism | int | 否 | - |
+
+### plugin_input [string]
+
+当不指定 `plugin_input` 时,当前插件处理配置文件中上一个插件输出的数据集 `dataset`
+
+当指定了 `plugin_input` 时,当前插件正在处理该参数对应的数据集
+
+### parallelism [int]
+
+当没有指定`parallelism`时,默认使用 env 中的 `parallelism`。
+
+当指定 `parallelism` 时,它将覆盖 env 中的 `parallelism`。
+
+## Examples
+
+```bash
+source {
+ FakeSourceStream {
+ parallelism = 2
+ plugin_output = "fake"
+ field_name = "name,age"
+ }
+}
+
+transform {
+ Filter {
+ plugin_input = "fake"
+ fields = [name]
+ plugin_output = "fake_name"
+ }
+ Filter {
+ plugin_input = "fake"
+ fields = [age]
+ plugin_output = "fake_age"
+ }
+}
+
+sink {
+ Console {
+ plugin_input = "fake_name"
+ }
+ Console {
+ plugin_input = "fake_age"
+ }
+}
+```
+
+> 如果作业只有一个 source 和一个(或零个)transform 和一个 sink ,则不需要为连接器指定 `plugin_input` 和 `plugin_output`。
+> 如果 source 、transform 和 sink 中任意运算符的数量大于 1,则必须为作业中的每个连接器指定 `plugin_input` 和 `plugin_output`
+
diff --git a/docs/zh/connector-v2/sink.md b/docs/zh/connector-v2/sink.md
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/docs/zh/connector-v2/sink/Assert.md b/docs/zh/connector-v2/sink/Assert.md
new file mode 100644
index 00000000000..af94a45ba83
--- /dev/null
+++ b/docs/zh/connector-v2/sink/Assert.md
@@ -0,0 +1,610 @@
+# Assert
+
+> Assert 数据接收器
+
+## 描述
+
+Assert 数据接收器是一个用于断言数据是否符合用户定义规则的数据接收器。用户可以通过配置规则来断言数据是否符合预期,如果数据不符合规则,将会抛出异常。
+
+## 核心特性
+
+- [ ] [精准一次](../../concept/connector-v2-features.md)
+
+## 配置
+
+| Name | Type | Required | Default |
+|------------------------------------------------------------------------------------------------|-------------------------------------------------|----------|---------|
+| rules | ConfigMap | yes | - |
+| rules.field_rules | string | yes | - |
+| rules.field_rules.field_name | string\|ConfigMap | yes | - |
+| rules.field_rules.field_type | string | no | - |
+| rules.field_rules.field_value | ConfigList | no | - |
+| rules.field_rules.field_value.rule_type | string | no | - |
+| rules.field_rules.field_value.rule_value | numeric | no | - |
+| rules.field_rules.field_value.equals_to | boolean\|numeric\|string\|ConfigList\|ConfigMap | no | - |
+| rules.row_rules | string | yes | - |
+| rules.row_rules.rule_type | string | no | - |
+| rules.row_rules.rule_value | string | no | - |
+| rules.catalog_table_rule | ConfigMap | no | - |
+| rules.catalog_table_rule.primary_key_rule | ConfigMap | no | - |
+| rules.catalog_table_rule.primary_key_rule.primary_key_name | string | no | - |
+| rules.catalog_table_rule.primary_key_rule.primary_key_columns | ConfigList | no | - |
+| rules.catalog_table_rule.constraint_key_rule | ConfigList | no | - |
+| rules.catalog_table_rule.constraint_key_rule.constraint_key_name | string | no | - |
+| rules.catalog_table_rule.constraint_key_rule.constraint_key_type | string | no | - |
+| rules.catalog_table_rule.constraint_key_rule.constraint_key_columns | ConfigList | no | - |
+| rules.catalog_table_rule.constraint_key_rule.constraint_key_columns.constraint_key_column_name | string | no | - |
+| rules.catalog_table_rule.constraint_key_rule.constraint_key_columns.constraint_key_sort_type | string | no | - |
+| rules.catalog_table_rule.column_rule | ConfigList | no | - |
+| rules.catalog_table_rule.column_rule.name | string | no | - |
+| rules.catalog_table_rule.column_rule.type | string | no | - |
+| rules.catalog_table_rule.column_rule.column_length | int | no | - |
+| rules.catalog_table_rule.column_rule.nullable | boolean | no | - |
+| rules.catalog_table_rule.column_rule.default_value | string | no | - |
+| rules.catalog_table_rule.column_rule.comment | comment | no | - |
+| rules.table-names | ConfigList | no | - |
+| rules.tables_configs | ConfigList | no | - |
+| rules.tables_configs.table_path | String | no | - |
+| common-options | | no | - |
+
+### rules [ConfigMap]
+
+规则定义用户可用数据的规则。每个规则代表一个字段验证或行数量验证。
+
+### field_rules [ConfigList]
+
+字段规则用于字段验证
+
+### field_name [string]
+
+字段名
+
+### field_type [string | ConfigMap]
+
+字段类型。字段类型应符合此[指南](../../concept/schema-feature.md#如何声明支持的类型)。
+
+### field_value [ConfigList]
+
+字段值规则定义数据值验证
+
+### rule_type [string]
+
+规则类型。目前支持以下规则
+- NOT_NULL `值不能为空`
+- NULL `值可以为空`
+- MIN `定义数据的最小值`
+- MAX `定义数据的最大值`
+- MIN_LENGTH `定义字符串数据的最小长度`
+- MAX_LENGTH `定义字符串数据的最大长度`
+- MIN_ROW `定义最小行数`
+- MAX_ROW `定义最大行数`
+
+### rule_value [numeric]
+
+与规则类型相关的值。当`rule_type`为`MIN`、`MAX`、`MIN_LENGTH`、`MAX_LENGTH`、`MIN_ROW`或`MAX_ROW`时,用户需要为`rule_value`分配一个值。
+
+### equals_to [boolean | numeric | string | ConfigList | ConfigMap]
+
+`equals_to`用于比较字段值是否等于配置的预期值。用户可以将所有类型的值分配给`equals_to`。这些类型在[这里](../../concept/schema-feature.md#目前支持哪些类型)有详细说明。
+例如,如果一个字段是一个包含三个字段的行,行类型的声明是`{a = array, b = map, c={c_0 = int, b = string}}`,用户可以将值`[["a", "b"], { k0 = 9999.99, k1 = 111.11 }, [123, "abcd"]]`分配给`equals_to`。
+
+> 定义字段值的方式与[FakeSource](../../../en/connector-v2/source/FakeSource.md#customize-the-data-content-simple)一致。
+>
+> `equals_to`不能应用于`null`类型字段。但是,用户可以使用规则类型`NULL`进行验证,例如`{rule_type = NULL}`。
+
+### catalog_table_rule [ConfigMap]
+
+catalog_table_rule用于断言Catalog表是否与用户定义的表相同。
+
+### table-names [ConfigList]
+
+用于断言表是否在数据中。
+
+### tables_configs [ConfigList]
+
+用于断言多个表是否在数据中。
+
+### table_path [String]
+
+表的路径。
+
+### common options
+
+Sink 插件的通用参数,请参考 [Sink Common Options](../sink-common-options.md) 了解详情
+
+## 示例
+
+### 简单
+整个Config遵循`hocon`风格
+
+```hocon
+Assert {
+ rules =
+ {
+ row_rules = [
+ {
+ rule_type = MAX_ROW
+ rule_value = 10
+ },
+ {
+ rule_type = MIN_ROW
+ rule_value = 5
+ }
+ ],
+ field_rules = [{
+ field_name = name
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ },
+ {
+ rule_type = MIN_LENGTH
+ rule_value = 5
+ },
+ {
+ rule_type = MAX_LENGTH
+ rule_value = 10
+ }
+ ]
+ }, {
+ field_name = age
+ field_type = int
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = 23
+ },
+ {
+ rule_type = MIN
+ rule_value = 32767
+ },
+ {
+ rule_type = MAX
+ rule_value = 2147483647
+ }
+ ]
+ }
+ ]
+ catalog_table_rule {
+ primary_key_rule = {
+ primary_key_name = "primary key"
+ primary_key_columns = ["id"]
+ }
+ constraint_key_rule = [
+ {
+ constraint_key_name = "unique_name"
+ constraint_key_type = UNIQUE_KEY
+ constraint_key_columns = [
+ {
+ constraint_key_column_name = "id"
+ constraint_key_sort_type = ASC
+ }
+ ]
+ }
+ ]
+ column_rule = [
+ {
+ name = "id"
+ type = bigint
+ },
+ {
+ name = "name"
+ type = string
+ },
+ {
+ name = "age"
+ type = int
+ }
+ ]
+ }
+ }
+
+ }
+```
+
+### 复杂
+
+这里有一个更复杂的例子,涉及到`equals_to`。
+
+```hocon
+source {
+ FakeSource {
+ row.num = 1
+ schema = {
+ fields {
+ c_null = "null"
+ c_string = string
+ c_boolean = boolean
+ c_tinyint = tinyint
+ c_smallint = smallint
+ c_int = int
+ c_bigint = bigint
+ c_float = float
+ c_double = double
+ c_decimal = "decimal(30, 8)"
+ c_date = date
+ c_timestamp = timestamp
+ c_time = time
+ c_bytes = bytes
+ c_array = "array"
+ c_map = "map"
+ c_map_nest = "map"
+ c_row = {
+ c_null = "null"
+ c_string = string
+ c_boolean = boolean
+ c_tinyint = tinyint
+ c_smallint = smallint
+ c_int = int
+ c_bigint = bigint
+ c_float = float
+ c_double = double
+ c_decimal = "decimal(30, 8)"
+ c_date = date
+ c_timestamp = timestamp
+ c_time = time
+ c_bytes = bytes
+ c_array = "array"
+ c_map = "map"
+ }
+ }
+ }
+ rows = [
+ {
+ kind = INSERT
+ fields = [
+ null, "AAA", false, 1, 1, 333, 323232, 3.1, 9.33333, 99999.99999999, "2012-12-21", "2012-12-21T12:34:56", "12:34:56",
+ "bWlJWmo=",
+ [0, 1, 2],
+ "{ 12:01:26 = v0 }",
+ { k1 = [123, "BBB-BB"]},
+ [
+ null, "AAA", false, 1, 1, 333, 323232, 3.1, 9.33333, 99999.99999999, "2012-12-21", "2012-12-21T12:34:56", "12:34:56",
+ "bWlJWmo=",
+ [0, 1, 2],
+ { k0 = v0 }
+ ]
+ ]
+ }
+ ]
+ plugin_output = "fake"
+ }
+}
+
+sink{
+ Assert {
+ plugin_input = "fake"
+ rules =
+ {
+ row_rules = [
+ {
+ rule_type = MAX_ROW
+ rule_value = 1
+ },
+ {
+ rule_type = MIN_ROW
+ rule_value = 1
+ }
+ ],
+ field_rules = [
+ {
+ field_name = c_null
+ field_type = "null"
+ field_value = [
+ {
+ rule_type = NULL
+ }
+ ]
+ },
+ {
+ field_name = c_string
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = "AAA"
+ }
+ ]
+ },
+ {
+ field_name = c_boolean
+ field_type = boolean
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = false
+ }
+ ]
+ },
+ {
+ field_name = c_tinyint
+ field_type = tinyint
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = 1
+ }
+ ]
+ },
+ {
+ field_name = c_smallint
+ field_type = smallint
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = 1
+ }
+ ]
+ },
+ {
+ field_name = c_int
+ field_type = int
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = 333
+ }
+ ]
+ },
+ {
+ field_name = c_bigint
+ field_type = bigint
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = 323232
+ }
+ ]
+ },
+ {
+ field_name = c_float
+ field_type = float
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = 3.1
+ }
+ ]
+ },
+ {
+ field_name = c_double
+ field_type = double
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = 9.33333
+ }
+ ]
+ },
+ {
+ field_name = c_decimal
+ field_type = "decimal(30, 8)"
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = 99999.99999999
+ }
+ ]
+ },
+ {
+ field_name = c_date
+ field_type = date
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = "2012-12-21"
+ }
+ ]
+ },
+ {
+ field_name = c_timestamp
+ field_type = timestamp
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = "2012-12-21T12:34:56"
+ }
+ ]
+ },
+ {
+ field_name = c_time
+ field_type = time
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = "12:34:56"
+ }
+ ]
+ },
+ {
+ field_name = c_bytes
+ field_type = bytes
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = "bWlJWmo="
+ }
+ ]
+ },
+ {
+ field_name = c_array
+ field_type = "array"
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = [0, 1, 2]
+ }
+ ]
+ },
+ {
+ field_name = c_map
+ field_type = "map"
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = "{ 12:01:26 = v0 }"
+ }
+ ]
+ },
+ {
+ field_name = c_map_nest
+ field_type = "map"
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = { k1 = [123, "BBB-BB"] }
+ }
+ ]
+ },
+ {
+ field_name = c_row
+ field_type = {
+ c_null = "null"
+ c_string = string
+ c_boolean = boolean
+ c_tinyint = tinyint
+ c_smallint = smallint
+ c_int = int
+ c_bigint = bigint
+ c_float = float
+ c_double = double
+ c_decimal = "decimal(30, 8)"
+ c_date = date
+ c_timestamp = timestamp
+ c_time = time
+ c_bytes = bytes
+ c_array = "array"
+ c_map = "map"
+ }
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ equals_to = [
+ null, "AAA", false, 1, 1, 333, 323232, 3.1, 9.33333, 99999.99999999, "2012-12-21", "2012-12-21T12:34:56", "12:34:56",
+ "bWlJWmo=",
+ [0, 1, 2],
+ { k0 = v0 }
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ }
+}
+```
+
+### 验证多表
+
+验证多个表
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = BATCH
+}
+
+source {
+ FakeSource {
+ tables_configs = [
+ {
+ row.num = 16
+ schema {
+ table = "test.table1"
+ fields {
+ c_int = int
+ c_bigint = bigint
+ }
+ }
+ },
+ {
+ row.num = 17
+ schema {
+ table = "test.table2"
+ fields {
+ c_string = string
+ c_tinyint = tinyint
+ }
+ }
+ }
+ ]
+ }
+}
+
+transform {
+}
+
+sink {
+ Assert {
+ rules =
+ {
+ tables_configs = [
+ {
+ table_path = "test.table1"
+ row_rules = [
+ {
+ rule_type = MAX_ROW
+ rule_value = 16
+ },
+ {
+ rule_type = MIN_ROW
+ rule_value = 16
+ }
+ ],
+ field_rules = [{
+ field_name = c_int
+ field_type = int
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ }, {
+ field_name = c_bigint
+ field_type = bigint
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ }]
+ },
+ {
+ table_path = "test.table2"
+ row_rules = [
+ {
+ rule_type = MAX_ROW
+ rule_value = 17
+ },
+ {
+ rule_type = MIN_ROW
+ rule_value = 17
+ }
+ ],
+ field_rules = [{
+ field_name = c_string
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ }, {
+ field_name = c_tinyint
+ field_type = tinyint
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ }]
+ }
+ ]
+
+ }
+ }
+}
+
+```
+
diff --git a/docs/zh/connector-v2/sink/Clickhouse.md b/docs/zh/connector-v2/sink/Clickhouse.md
index 2b5e23d5568..61a359f5c0b 100644
--- a/docs/zh/connector-v2/sink/Clickhouse.md
+++ b/docs/zh/connector-v2/sink/Clickhouse.md
@@ -23,9 +23,9 @@
为了使用 Clickhouse 连接器,需要以下依赖项。它们可以通过 install-plugin.sh 或从 Maven 中央存储库下载。
-| 数据源 | 支持的版本 | 依赖 |
-|------------|-----------|------------------------------------------------------------------------------------------------------------|
-| Clickhouse | universal | [下载](https://mvnrepository.com/artifact/org.apache.seatunnel/seatunnel-connectors-v2/connector-clickhouse) |
+| 数据源 | 支持的版本 | 依赖 |
+|------------|-----------|------------------------------------------------------------------------------------|
+| Clickhouse | universal | [下载](https://mvnrepository.com/artifact/org.apache.seatunnel/connector-clickhouse) |
## 数据类型映射
@@ -58,7 +58,7 @@
| primary_key | String | No | - | 标记`clickhouse`表中的主键列,并根据主键执行INSERT/UPDATE/DELETE到`clickhouse`表. |
| support_upsert | Boolean | No | false | 支持按查询主键更新插入行. |
| allow_experimental_lightweight_delete | Boolean | No | false | 允许基于`MergeTree`表引擎实验性轻量级删除. |
-| common-options | | No | - | Sink插件查用参数,详见[Sink常用选项](common-options.md). |
+| common-options | | No | - | Sink插件查用参数,详见[Sink常用选项](../sink-common-options.md). |
## 如何创建一个clickhouse 同步任务
diff --git a/docs/zh/connector-v2/sink/ClickhouseFile.md b/docs/zh/connector-v2/sink/ClickhouseFile.md
index b36a2982f53..eb06bfeff2d 100644
--- a/docs/zh/connector-v2/sink/ClickhouseFile.md
+++ b/docs/zh/connector-v2/sink/ClickhouseFile.md
@@ -104,7 +104,7 @@ ClickhouseFile本地存储临时文件的目录。
### common options
-Sink插件常用参数,请参考[Sink常用选项](common-options.md)获取更多细节信息。
+Sink插件常用参数,请参考[Sink常用选项](../sink-common-options.md)获取更多细节信息。
## 示例
diff --git a/docs/zh/connector-v2/sink/Console.md b/docs/zh/connector-v2/sink/Console.md
index 43dff335132..19702acb3d1 100644
--- a/docs/zh/connector-v2/sink/Console.md
+++ b/docs/zh/connector-v2/sink/Console.md
@@ -24,11 +24,11 @@
## 接收器选项
-| 名称 | 类型 | 是否必须 | 默认值 | 描述 |
-|--------------------|---------|------|-----|---------------------------------------------------|
-| common-options | | 否 | - | Sink插件常用参数,请参考 [Sink常用选项](common-options.md) 了解详情 |
-| log.print.data | boolean | 否 | - | 确定是否应在日志中打印数据的标志。默认值为`true` |
-| log.print.delay.ms | int | 否 | - | 将每个数据项打印到日志之间的延迟(以毫秒为单位)。默认值为`0` |
+| 名称 | 类型 | 是否必须 | 默认值 | 描述 |
+|--------------------|---------|------|-----|-----------------------------------------------------------|
+| common-options | | 否 | - | Sink插件常用参数,请参考 [Sink常用选项](../sink-common-options.md) 了解详情 |
+| log.print.data | boolean | 否 | - | 确定是否应在日志中打印数据的标志。默认值为`true` |
+| log.print.delay.ms | int | 否 | - | 将每个数据项打印到日志之间的延迟(以毫秒为单位)。默认值为`0` |
## 任务示例
@@ -44,7 +44,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
schema = {
fields {
name = "string"
@@ -56,7 +56,7 @@ source {
sink {
Console {
- source_table_name = "fake"
+ plugin_input = "fake"
}
}
```
@@ -73,7 +73,7 @@ env {
source {
FakeSource {
- result_table_name = "fake1"
+ plugin_output = "fake1"
schema = {
fields {
id = "int"
@@ -84,7 +84,7 @@ source {
}
}
FakeSource {
- result_table_name = "fake2"
+ plugin_output = "fake2"
schema = {
fields {
name = "string"
@@ -96,10 +96,10 @@ source {
sink {
Console {
- source_table_name = "fake1"
+ plugin_input = "fake1"
}
Console {
- source_table_name = "fake2"
+ plugin_input = "fake2"
}
}
```
diff --git a/docs/zh/connector-v2/sink/DingTalk.md b/docs/zh/connector-v2/sink/DingTalk.md
index 8f584fc7ad7..aa4de3a209c 100644
--- a/docs/zh/connector-v2/sink/DingTalk.md
+++ b/docs/zh/connector-v2/sink/DingTalk.md
@@ -34,7 +34,7 @@
### common options
-Sink插件的通用参数,请参考 [Sink Common Options](common-options.md) 了解详情
+Sink插件的通用参数,请参考 [Sink Common Options](../sink-common-options.md) 了解详情
## 任务示例
diff --git a/docs/zh/connector-v2/sink/Doris.md b/docs/zh/connector-v2/sink/Doris.md
index afc470326f5..d2176237be0 100644
--- a/docs/zh/connector-v2/sink/Doris.md
+++ b/docs/zh/connector-v2/sink/Doris.md
@@ -53,18 +53,19 @@ Doris Sink连接器的内部实现是通过stream load批量缓存和导入的
### schema_save_mode[Enum]
在开启同步任务之前,针对现有的表结构选择不同的处理方案。
-选项介绍:
+选项介绍:
`RECREATE_SCHEMA` :表不存在时创建,表保存时删除并重建。
-`CREATE_SCHEMA_WHEN_NOT_EXIST` :表不存在时会创建,表存在时跳过。
-`ERROR_WHEN_SCHEMA_NOT_EXIST` :表不存在时会报错。
+`CREATE_SCHEMA_WHEN_NOT_EXIST` :表不存在时会创建,表存在时跳过。
+`ERROR_WHEN_SCHEMA_NOT_EXIST` :表不存在时会报错。
+`IGNORE` :忽略对表的处理。
### data_save_mode[Enum]
在开启同步任务之前,针对目标端已有的数据选择不同的处理方案。
-选项介绍:
-`DROP_DATA`: 保留数据库结构并删除数据。
-`APPEND_DATA`:保留数据库结构,保留数据。
-`CUSTOM_PROCESSING`:用户自定义处理。
+选项介绍:
+`DROP_DATA`: 保留数据库结构并删除数据。
+`APPEND_DATA`:保留数据库结构,保留数据。
+`CUSTOM_PROCESSING`:用户自定义处理。
`ERROR_WHEN_DATA_EXISTS`:有数据时报错。
### save_mode_create_template
@@ -146,6 +147,15 @@ CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}`
支持的格式包括 CSV 和 JSON。
+## 调优指南
+适当增加`sink.buffer-size`和`doris.batch.size`的值可以提高写性能。
+
+在流模式下,如果`doris.batch.size`和`checkpoint.interval`都配置为较大的值,最后到达的数据可能会有较大的延迟(延迟的时间就是检查点间隔的时间)。
+
+这是因为最后到达的数据总量可能不会超过doris.batch.size指定的阈值。因此,在接收到数据的数据量没有超过该阈值之前只有检查点才会触发提交操作。因此,需要选择一个合适的检查点间隔。
+
+此外,如果你通过`sink.enable-2pc=true`属性启用2pc。`sink.buffer-size`将会失去作用,只有检查点才能触发提交。
+
## 任务示例
### 简单示例:
diff --git a/docs/zh/connector-v2/sink/Elasticsearch.md b/docs/zh/connector-v2/sink/Elasticsearch.md
index edf974d8fba..8682d262274 100644
--- a/docs/zh/connector-v2/sink/Elasticsearch.md
+++ b/docs/zh/connector-v2/sink/Elasticsearch.md
@@ -102,7 +102,7 @@ x-pack 密码
### common options
-Sink插件常用参数,请参考 [Sink常用选项](common-options.md) 了解详情
+Sink插件常用参数,请参考 [Sink常用选项](../sink-common-options.md) 了解详情
### schema_save_mode
@@ -111,6 +111,7 @@ Sink插件常用参数,请参考 [Sink常用选项](common-options.md) 了解
`RECREATE_SCHEMA` :当表不存在时会创建,当表已存在时会删除并重建
`CREATE_SCHEMA_WHEN_NOT_EXIST` :当表不存在时会创建,当表已存在时则跳过创建
`ERROR_WHEN_SCHEMA_NOT_EXIST` :当表不存在时将抛出错误
+`IGNORE` :忽略对表的处理
### data_save_mode
diff --git a/docs/zh/connector-v2/sink/Email.md b/docs/zh/connector-v2/sink/Email.md
index cc3999c580c..9c78fc18f16 100644
--- a/docs/zh/connector-v2/sink/Email.md
+++ b/docs/zh/connector-v2/sink/Email.md
@@ -16,17 +16,18 @@
## 选项
-| 名称 | 类型 | 是否必须 | 默认值 |
-|--------------------------|--------|------|-----|
-| email_from_address | string | 是 | - |
-| email_to_address | string | 是 | - |
-| email_host | string | 是 | - |
-| email_transport_protocol | string | 是 | - |
-| email_smtp_auth | string | 是 | - |
-| email_authorization_code | string | 是 | - |
-| email_message_headline | string | 是 | - |
-| email_message_content | string | 是 | - |
-| common-options | | 否 | - |
+| 名称 | 类型 | 是否必须 | 默认值 |
+|--------------------------|---------|------|-----|
+| email_from_address | string | 是 | - |
+| email_to_address | string | 是 | - |
+| email_host | string | 是 | - |
+| email_transport_protocol | string | 是 | - |
+| email_smtp_auth | boolean | 是 | - |
+| email_smtp_port | int | 否 | 465 |
+| email_authorization_code | string | 否 | - |
+| email_message_headline | string | 是 | - |
+| email_message_content | string | 是 | - |
+| common-options | | 否 | - |
### email_from_address [string]
@@ -34,7 +35,7 @@
### email_to_address [string]
-接收邮件的地址
+接收邮件的地址,支持多个邮箱地址,以逗号(,)分隔。
### email_host [string]
@@ -44,10 +45,14 @@
加载会话的协议
-### email_smtp_auth [string]
+### email_smtp_auth [boolean]
是否对客户进行认证
+### email_smtp_port [int]
+
+选择用于身份验证的端口。
+
### email_authorization_code [string]
授权码,您可以从邮箱设置中获取授权码
@@ -62,7 +67,7 @@
### common options
-Sink插件常用参数,请参考 [Sink常用选项](common-options.md) 了解详情.
+Sink插件常用参数,请参考 [Sink常用选项](../sink-common-options.md) 了解详情.
## 示例
diff --git a/docs/zh/connector-v2/sink/Feishu.md b/docs/zh/connector-v2/sink/Feishu.md
index c561e50a971..2d9705b3c44 100644
--- a/docs/zh/connector-v2/sink/Feishu.md
+++ b/docs/zh/connector-v2/sink/Feishu.md
@@ -42,11 +42,11 @@
## 接收器选项
-| 名称 | 类型 | 是否必需 | 默认值 | 描述 |
-|----------------|--------|------|-----|----------------------------------------------------|
-| url | String | 是 | - | 飞书web hook URL |
-| headers | Map | 否 | - | HTTP 请求头 |
-| common-options | | 否 | - | 接收器插件常见参数,请参阅 [接收器通用选项](common-options.md) 以获取详细信息 |
+| 名称 | 类型 | 是否必需 | 默认值 | 描述 |
+|----------------|--------|------|-----|------------------------------------------------------------|
+| url | String | 是 | - | 飞书web hook URL |
+| headers | Map | 否 | - | HTTP 请求头 |
+| common-options | | 否 | - | 接收器插件常见参数,请参阅 [接收器通用选项](../sink-common-options.md) 以获取详细信息 |
## 任务示例
diff --git a/docs/zh/connector-v2/sink/Hbase.md b/docs/zh/connector-v2/sink/Hbase.md
index 871cad206c6..f028a8c93ee 100644
--- a/docs/zh/connector-v2/sink/Hbase.md
+++ b/docs/zh/connector-v2/sink/Hbase.md
@@ -102,7 +102,7 @@ hbase 写入数据 TTL 时间,默认以表设置的TTL为准,单位毫秒
### 常见选项
-Sink 插件常用参数,详见 Sink 常用选项 [Sink Common Options](common-options.md)
+Sink 插件常用参数,详见 Sink 常用选项 [Sink Common Options](../sink-common-options.md)
## 案例
@@ -119,6 +119,78 @@ Hbase {
```
+### 写入多表
+
+```hocon
+env {
+ # You can set engine configuration here
+ execution.parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ FakeSource {
+ tables_configs = [
+ {
+ schema = {
+ table = "hbase_sink_1"
+ fields {
+ name = STRING
+ c_string = STRING
+ c_double = DOUBLE
+ c_bigint = BIGINT
+ c_float = FLOAT
+ c_int = INT
+ c_smallint = SMALLINT
+ c_boolean = BOOLEAN
+ time = BIGINT
+ }
+ }
+ rows = [
+ {
+ kind = INSERT
+ fields = ["label_1", "sink_1", 4.3, 200, 2.5, 2, 5, true, 1627529632356]
+ }
+ ]
+ },
+ {
+ schema = {
+ table = "hbase_sink_2"
+ fields {
+ name = STRING
+ c_string = STRING
+ c_double = DOUBLE
+ c_bigint = BIGINT
+ c_float = FLOAT
+ c_int = INT
+ c_smallint = SMALLINT
+ c_boolean = BOOLEAN
+ time = BIGINT
+ }
+ }
+ rows = [
+ {
+ kind = INSERT
+ fields = ["label_2", "sink_2", 4.3, 200, 2.5, 2, 5, true, 1627529632357]
+ }
+ ]
+ }
+ ]
+ }
+}
+
+sink {
+ Hbase {
+ zookeeper_quorum = "hadoop001:2181,hadoop002:2181,hadoop003:2181"
+ table = "${table_name}"
+ rowkey_column = ["name"]
+ family_name {
+ all_columns = info
+ }
+ }
+}
+```
+
## 写入指定列族
```hocon
diff --git a/docs/zh/connector-v2/sink/HdfsFile.md b/docs/zh/connector-v2/sink/HdfsFile.md
index dee466770e6..81081bad94d 100644
--- a/docs/zh/connector-v2/sink/HdfsFile.md
+++ b/docs/zh/connector-v2/sink/HdfsFile.md
@@ -60,7 +60,7 @@
| kerberos_principal | string | 否 | - | kerberos 的主体 |
| kerberos_keytab_path | string | 否 | - | kerberos 的 keytab 路径 |
| compress_codec | string | 否 | none | 压缩编解码器 |
-| common-options | object | 否 | - | 接收器插件通用参数,请参阅 [接收器通用选项](common-options.md) 了解详情 |
+| common-options | object | 否 | - | 接收器插件通用参数,请参阅 [接收器通用选项](../sink-common-options.md) 了解详情 |
| max_rows_in_memory | int | 否 | - | 仅当 file_format 为 excel 时使用。当文件格式为 Excel 时,可以缓存在内存中的最大数据项数。 |
| sheet_name | string | 否 | Sheet${Random number} | 仅当 file_format 为 excel 时使用。将工作簿的表写入指定的表名 |
@@ -87,7 +87,7 @@ source {
# 这是一个示例源插件 **仅用于测试和演示功能源插件**
FakeSource {
parallelism = 1
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 16
schema = {
fields {
@@ -109,7 +109,7 @@ source {
}
}
# 如果您想获取有关如何配置 seatunnel 的更多信息和查看完整的源端插件列表,
- # 请访问 https://seatunnel.apache.org/docs/category/source-v2
+ # 请访问 https://seatunnel.apache.org/docs/connector-v2/source
}
transform {
@@ -124,7 +124,7 @@ sink {
file_format_type = "orc"
}
# 如果您想获取有关如何配置 seatunnel 的更多信息和查看完整的接收器插件列表,
- # 请访问 https://seatunnel.apache.org/docs/category/sink-v2
+ # 请访问 https://seatunnel.apache.org/docs/connector-v2/sink
}
```
diff --git a/docs/zh/connector-v2/sink/Http.md b/docs/zh/connector-v2/sink/Http.md
index f837380efdd..12ce90614ff 100644
--- a/docs/zh/connector-v2/sink/Http.md
+++ b/docs/zh/connector-v2/sink/Http.md
@@ -25,22 +25,22 @@
想使用 Http 连接器,需要安装以下必要的依赖。可以通过运行 install-plugin.sh 脚本或者从 Maven 中央仓库下载这些依赖
-| 数据源 | 支持版本 | 依赖 |
-|------|------|------------------------------------------------------------------------------------------------------|
-| Http | 通用 | [下载](https://mvnrepository.com/artifact/org.apache.seatunnel/seatunnel-connectors-v2/connector-http) |
+| 数据源 | 支持版本 | 依赖 |
+|------|------|------------------------------------------------------------------------------|
+| Http | 通用 | [下载](https://mvnrepository.com/artifact/org.apache.seatunnel/connector-http) |
## 接收器选项
-| 名称 | 类型 | 是否必须 | 默认值 | 描述 |
-|-----------------------------|--------|------|-------|----------------------------------------------------|
-| url | String | 是 | - | Http 请求链接 |
-| headers | Map | 否 | - | Http 标头 |
-| retry | Int | 否 | - | 如果请求http返回`IOException`的最大重试次数 |
-| retry_backoff_multiplier_ms | Int | 否 | 100 | http请求失败,重试回退次数(毫秒)乘数 |
-| retry_backoff_max_ms | Int | 否 | 10000 | http请求失败,最大重试回退时间(毫秒) |
-| connect_timeout_ms | Int | 否 | 12000 | 连接超时设置,默认12s |
-| socket_timeout_ms | Int | 否 | 60000 | 套接字超时设置,默认为60s |
-| common-options | | 否 | - | Sink插件常用参数,请参考 [Sink常用选项 ](common-options.md) 了解详情 |
+| 名称 | 类型 | 是否必须 | 默认值 | 描述 |
+|-----------------------------|--------|------|-------|------------------------------------------------------------|
+| url | String | 是 | - | Http 请求链接 |
+| headers | Map | 否 | - | Http 标头 |
+| retry | Int | 否 | - | 如果请求http返回`IOException`的最大重试次数 |
+| retry_backoff_multiplier_ms | Int | 否 | 100 | http请求失败,重试回退次数(毫秒)乘数 |
+| retry_backoff_max_ms | Int | 否 | 10000 | http请求失败,最大重试回退时间(毫秒) |
+| connect_timeout_ms | Int | 否 | 12000 | 连接超时设置,默认12s |
+| socket_timeout_ms | Int | 否 | 60000 | 套接字超时设置,默认为60s |
+| common-options | | 否 | - | Sink插件常用参数,请参考 [Sink常用选项 ](../sink-common-options.md) 了解详情 |
## 示例
diff --git a/docs/zh/connector-v2/sink/Hudi.md b/docs/zh/connector-v2/sink/Hudi.md
index ab1fc43603f..7d8007f6b03 100644
--- a/docs/zh/connector-v2/sink/Hudi.md
+++ b/docs/zh/connector-v2/sink/Hudi.md
@@ -8,51 +8,97 @@
## 主要特点
-- [x] [exactly-once](../../concept/connector-v2-features.md)
+- [ ] [exactly-once](../../concept/connector-v2-features.md)
- [x] [cdc](../../concept/connector-v2-features.md)
+- [x] [support multiple table write](../../concept/connector-v2-features.md)
## 选项
-| 名称 | 类型 | 是否必需 | 默认值 |
-|----------------------------|--------|------|---------------|
-| table_name | string | 是 | - |
-| table_dfs_path | string | 是 | - |
-| conf_files_path | string | 否 | - |
-| record_key_fields | string | 否 | - |
-| partition_fields | string | 否 | - |
-| table_type | enum | 否 | copy_on_write |
-| op_type | enum | 否 | insert |
-| batch_interval_ms | Int | 否 | 1000 |
-| insert_shuffle_parallelism | Int | 否 | 2 |
-| upsert_shuffle_parallelism | Int | 否 | 2 |
-| min_commits_to_keep | Int | 否 | 20 |
-| max_commits_to_keep | Int | 否 | 30 |
-| common-options | config | 否 | - |
+基础配置:
+
+| 名称 | 名称 | 是否必需 | 默认值 |
+|----------------------------|--------|------ |------------------------------|
+| table_dfs_path | string | 是 | - |
+| conf_files_path | string | 否 | - |
+| table_list | string | 否 | - |
+| schema_save_mode | enum | 否 | CREATE_SCHEMA_WHEN_NOT_EXIST |
+| common-options | config | 否 | - |
+
+表清单配置:
+
+| 名称 | 类型 | 是否必需 | 默认值 |
+|----------------------------|--------|----------|---------------|
+| table_name | string | yes | - |
+| database | string | no | default |
+| table_type | enum | no | COPY_ON_WRITE |
+| op_type | enum | no | insert |
+| record_key_fields | string | no | - |
+| partition_fields | string | no | - |
+| batch_interval_ms | Int | no | 1000 |
+| batch_size | Int | no | 1000 |
+| insert_shuffle_parallelism | Int | no | 2 |
+| upsert_shuffle_parallelism | Int | no | 2 |
+| min_commits_to_keep | Int | no | 20 |
+| max_commits_to_keep | Int | no | 30 |
+| index_type | enum | no | BLOOM |
+| index_class_name | string | no | - |
+| record_byte_size | Int | no | 1024 |
+| cdc_enabled | boolean| no | false |
+
+注意: 当此配置对应于单个表时,您可以将table_list中的配置项展平到外层。
### table_name [string]
`table_name` Hudi 表的名称。
+### database [string]
+
+`database` Hudi 表的database.
+
### table_dfs_path [string]
-`table_dfs_path` Hudi 表的 DFS 根路径,例如 "hdfs://nameservice/data/hudi/hudi_table/"。
+`table_dfs_path` Hudi 表的 DFS 根路径,例如 "hdfs://nameservice/data/hudi/"。
### table_type [enum]
`table_type` Hudi 表的类型。
+### record_key_fields [string]
+
+`record_key_fields` Hudi 表的记录键字段, 当op_type是`UPSERT`类型时, 必须配置该项.
+
+### partition_fields [string]
+
+`partition_fields` Hudi 表的分区字段.
+
+### index_type [string]
+
+`index_type` Hudi 表的索引类型. 当前只支持`BLOOM`, `SIMPLE`, `GLOBAL SIMPLE`三种类型.
+
+### index_class_name [string]
+
+`index_class_name` Hudi 表自定义索引名称,例如: `org.apache.seatunnel.connectors.seatunnel.hudi.index.CustomHudiIndex`.
+
+### record_byte_size [Int]
+
+`record_byte_size` Hudi 表单行记录的大小, 该值可用于预估每个hudi数据文件中记录的大致数量。调整此参数与`batch_size`可以有效减少hudi数据文件写放大次数.
+
### conf_files_path [string]
`conf_files_path` 环境配置文件路径列表(本地路径),用于初始化 HDFS 客户端以读取 Hudi 表文件。示例:"/home/test/hdfs-site.xml;/home/test/core-site.xml;/home/test/yarn-site.xml"。
### op_type [enum]
-`op_type` Hudi 表的操作类型。值可以是 'insert'、'upsert' 或 'bulk_insert'。
+`op_type` Hudi 表的操作类型。值可以是 `insert`、`upsert` 或 `bulk_insert`。
### batch_interval_ms [Int]
`batch_interval_ms` 批量写入 Hudi 表的时间间隔。
+### batch_size [Int]
+
+`batch_size` 批量写入 Hudi 表的记录数大小.
+
### insert_shuffle_parallelism [Int]
`insert_shuffle_parallelism` 插入数据到 Hudi 表的并行度。
@@ -69,24 +115,92 @@
`max_commits_to_keep` Hudi 表保留的最多提交数。
+### cdc_enabled [boolean]
+
+`cdc_enabled` 是否持久化Hudi表的CDC变更日志。启用后,在必要时持久化更改数据,表可以作为CDC模式进行查询.
+
+### schema_save_mode [Enum]
+
+在启动同步任务之前,针对目标侧已有的表结构选择不同的处理方案
+选项介绍:
+`RECREATE_SCHEMA`:当表不存在时会创建,当表已存在时会删除并重建
+`CREATE_SCHEMA_WHEN_NOT_EXIST`:当表不存在时会创建,当表已存在时则跳过创建
+`ERROR_WHEN_SCHEMA_NOT_EXIST`:当表不存在时将抛出错误
+`IGNORE` :忽略对表的处理
+
### 通用选项
-数据源插件的通用参数,请参考 [Source Common Options](common-options.md) 了解详细信息。
+数据源插件的通用参数,请参考 [Source Common Options](../sink-common-options.md) 了解详细信息。
## 示例
+### 单表
```hocon
-source {
-
+sink {
Hudi {
- table_dfs_path = "hdfs://nameserivce/data/hudi/hudi_table/"
- table_type = "cow"
+ table_dfs_path = "hdfs://nameserivce/data/"
+ database = "st"
+ table_name = "test_table"
+ table_type = "COPY_ON_WRITE"
conf_files_path = "/home/test/hdfs-site.xml;/home/test/core-site.xml;/home/test/yarn-site.xml"
+ batch_size = 10000
use.kerberos = true
kerberos.principal = "test_user@xxx"
kerberos.principal.file = "/home/test/test_user.keytab"
}
+}
+```
+### 多表
+```hocon
+env {
+ parallelism = 1
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+}
+
+source {
+ Mysql-CDC {
+ base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel"
+ username = "root"
+ password = "******"
+
+ table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"]
+ }
+}
+
+transform {
+}
+
+sink {
+ Hudi {
+ table_dfs_path = "hdfs://nameserivce/data/"
+ conf_files_path = "/home/test/hdfs-site.xml;/home/test/core-site.xml;/home/test/yarn-site.xml"
+ table_list = [
+ {
+ database = "st1"
+ table_name = "role"
+ table_type = "COPY_ON_WRITE"
+ op_type="INSERT"
+ batch_size = 10000
+ },
+ {
+ database = "st1"
+ table_name = "user"
+ table_type = "COPY_ON_WRITE"
+ op_type="UPSERT"
+ # op_type is 'UPSERT', must configured record_key_fields
+ record_key_fields = "user_id"
+ batch_size = 10000
+ },
+ {
+ database = "st1"
+ table_name = "Bucket"
+ table_type = "MERGE_ON_READ"
+ }
+ ]
+ ...
+ }
}
```
diff --git a/docs/zh/connector-v2/sink/Jdbc.md b/docs/zh/connector-v2/sink/Jdbc.md
index d61292cb921..4370af20026 100644
--- a/docs/zh/connector-v2/sink/Jdbc.md
+++ b/docs/zh/connector-v2/sink/Jdbc.md
@@ -79,7 +79,9 @@ JDBC 连接的 URL。参考案例:`jdbc:postgresql://localhost/test`
### compatible_mode [string]
-数据库的兼容模式,当数据库支持多种兼容模式时需要。例如,使用 OceanBase 数据库时,需要将其设置为 'mysql' 或 'oracle' 。
+数据库的兼容模式,当数据库支持多种兼容模式时需要。
+
+例如,使用 OceanBase 数据库时,需要将其设置为 'mysql' 或 'oracle' 。使用StarRocks时,需要将其设置为`starrocks`。
Postgres 9.5及以下版本,请设置为 `postgresLow` 来支持 CDC
@@ -167,7 +169,7 @@ Tip: 如果目标数据库有 SCHEMA 的概念,则表参数必须写成 `xxx.x
### common options
-Sink插件常用参数,请参考 [Sink常用选项](common-options.md) 了解详情
+Sink插件常用参数,请参考 [Sink常用选项](../sink-common-options.md) 了解详情
### schema_save_mode [Enum]
@@ -176,6 +178,7 @@ Sink插件常用参数,请参考 [Sink常用选项](common-options.md) 了解
`RECREATE_SCHEMA`:当表不存在时会创建,当表已存在时会删除并重建
`CREATE_SCHEMA_WHEN_NOT_EXIST`:当表不存在时会创建,当表已存在时则跳过创建
`ERROR_WHEN_SCHEMA_NOT_EXIST`:当表不存在时将抛出错误
+`IGNORE` :忽略对表的处理
### data_save_mode [Enum]
@@ -213,26 +216,27 @@ Sink插件常用参数,请参考 [Sink常用选项](common-options.md) 了解
附录参数仅提供参考
-| 数据源 | driver | url | xa_data_source_class_name | maven |
-|------------|----------------------------------------------|--------------------------------------------------------------------|----------------------------------------------------|-------------------------------------------------------------------------------------------------------------|
-| MySQL | com.mysql.cj.jdbc.Driver | jdbc:mysql://localhost:3306/test | com.mysql.cj.jdbc.MysqlXADataSource | https://mvnrepository.com/artifact/mysql/mysql-connector-java |
-| PostgreSQL | org.postgresql.Driver | jdbc:postgresql://localhost:5432/postgres | org.postgresql.xa.PGXADataSource | https://mvnrepository.com/artifact/org.postgresql/postgresql |
-| DM | dm.jdbc.driver.DmDriver | jdbc:dm://localhost:5236 | dm.jdbc.driver.DmdbXADataSource | https://mvnrepository.com/artifact/com.dameng/DmJdbcDriver18 |
-| Phoenix | org.apache.phoenix.queryserver.client.Driver | jdbc:phoenix:thin:url=http://localhost:8765;serialization=PROTOBUF | / | https://mvnrepository.com/artifact/com.aliyun.phoenix/ali-phoenix-shaded-thin-client |
-| SQL Server | com.microsoft.sqlserver.jdbc.SQLServerDriver | jdbc:sqlserver://localhost:1433 | com.microsoft.sqlserver.jdbc.SQLServerXADataSource | https://mvnrepository.com/artifact/com.microsoft.sqlserver/mssql-jdbc |
-| Oracle | oracle.jdbc.OracleDriver | jdbc:oracle:thin:@localhost:1521/xepdb1 | oracle.jdbc.xa.OracleXADataSource | https://mvnrepository.com/artifact/com.oracle.database.jdbc/ojdbc8 |
-| sqlite | org.sqlite.JDBC | jdbc:sqlite:test.db | / | https://mvnrepository.com/artifact/org.xerial/sqlite-jdbc |
-| GBase8a | com.gbase.jdbc.Driver | jdbc:gbase://e2e_gbase8aDb:5258/test | / | https://www.gbase8.cn/wp-content/uploads/2020/10/gbase-connector-java-8.3.81.53-build55.5.7-bin_min_mix.jar |
-| StarRocks | com.mysql.cj.jdbc.Driver | jdbc:mysql://localhost:3306/test | / | https://mvnrepository.com/artifact/mysql/mysql-connector-java |
-| db2 | com.ibm.db2.jcc.DB2Driver | jdbc:db2://localhost:50000/testdb | com.ibm.db2.jcc.DB2XADataSource | https://mvnrepository.com/artifact/com.ibm.db2.jcc/db2jcc/db2jcc4 |
-| saphana | com.sap.db.jdbc.Driver | jdbc:sap://localhost:39015 | / | https://mvnrepository.com/artifact/com.sap.cloud.db.jdbc/ngdbc |
-| Doris | com.mysql.cj.jdbc.Driver | jdbc:mysql://localhost:3306/test | / | https://mvnrepository.com/artifact/mysql/mysql-connector-java |
-| teradata | com.teradata.jdbc.TeraDriver | jdbc:teradata://localhost/DBS_PORT=1025,DATABASE=test | / | https://mvnrepository.com/artifact/com.teradata.jdbc/terajdbc |
-| Redshift | com.amazon.redshift.jdbc42.Driver | jdbc:redshift://localhost:5439/testdb | com.amazon.redshift.xa.RedshiftXADataSource | https://mvnrepository.com/artifact/com.amazon.redshift/redshift-jdbc42 |
-| Snowflake | net.snowflake.client.jdbc.SnowflakeDriver | jdbc:snowflake://.snowflakecomputing.com | / | https://mvnrepository.com/artifact/net.snowflake/snowflake-jdbc |
-| Vertica | com.vertica.jdbc.Driver | jdbc:vertica://localhost:5433 | / | https://repo1.maven.org/maven2/com/vertica/jdbc/vertica-jdbc/12.0.3-0/vertica-jdbc-12.0.3-0.jar |
-| Kingbase | com.kingbase8.Driver | jdbc:kingbase8://localhost:54321/db_test | / | https://repo1.maven.org/maven2/cn/com/kingbase/kingbase8/8.6.0/kingbase8-8.6.0.jar |
-| OceanBase | com.oceanbase.jdbc.Driver | jdbc:oceanbase://localhost:2881 | / | https://repo1.maven.org/maven2/com/oceanbase/oceanbase-client/2.4.3/oceanbase-client-2.4.3.jar |
+| 数据源 | driver | url | xa_data_source_class_name | maven |
+|------------|----------------------------------------------|--------------------------------------------------------------------|----------------------------------------------------|------------------------------------------------------------------------------------------------------|
+| MySQL | com.mysql.cj.jdbc.Driver | jdbc:mysql://localhost:3306/test | com.mysql.cj.jdbc.MysqlXADataSource | https://mvnrepository.com/artifact/mysql/mysql-connector-java |
+| PostgreSQL | org.postgresql.Driver | jdbc:postgresql://localhost:5432/postgres | org.postgresql.xa.PGXADataSource | https://mvnrepository.com/artifact/org.postgresql/postgresql |
+| DM | dm.jdbc.driver.DmDriver | jdbc:dm://localhost:5236 | dm.jdbc.driver.DmdbXADataSource | https://mvnrepository.com/artifact/com.dameng/DmJdbcDriver18 |
+| Phoenix | org.apache.phoenix.queryserver.client.Driver | jdbc:phoenix:thin:url=http://localhost:8765;serialization=PROTOBUF | / | https://mvnrepository.com/artifact/com.aliyun.phoenix/ali-phoenix-shaded-thin-client |
+| SQL Server | com.microsoft.sqlserver.jdbc.SQLServerDriver | jdbc:sqlserver://localhost:1433 | com.microsoft.sqlserver.jdbc.SQLServerXADataSource | https://mvnrepository.com/artifact/com.microsoft.sqlserver/mssql-jdbc |
+| Oracle | oracle.jdbc.OracleDriver | jdbc:oracle:thin:@localhost:1521/xepdb1 | oracle.jdbc.xa.OracleXADataSource | https://mvnrepository.com/artifact/com.oracle.database.jdbc/ojdbc8 |
+| sqlite | org.sqlite.JDBC | jdbc:sqlite:test.db | / | https://mvnrepository.com/artifact/org.xerial/sqlite-jdbc |
+| GBase8a | com.gbase.jdbc.Driver | jdbc:gbase://e2e_gbase8aDb:5258/test | / | https://cdn.gbase.cn/products/30/p5CiVwXBKQYIUGN8ecHvk/gbase-connector-java-9.5.0.7-build1-bin.jar |
+| StarRocks | com.mysql.cj.jdbc.Driver | jdbc:mysql://localhost:3306/test | / | https://mvnrepository.com/artifact/mysql/mysql-connector-java |
+| db2 | com.ibm.db2.jcc.DB2Driver | jdbc:db2://localhost:50000/testdb | com.ibm.db2.jcc.DB2XADataSource | https://mvnrepository.com/artifact/com.ibm.db2.jcc/db2jcc/db2jcc4 |
+| saphana | com.sap.db.jdbc.Driver | jdbc:sap://localhost:39015 | / | https://mvnrepository.com/artifact/com.sap.cloud.db.jdbc/ngdbc |
+| Doris | com.mysql.cj.jdbc.Driver | jdbc:mysql://localhost:3306/test | / | https://mvnrepository.com/artifact/mysql/mysql-connector-java |
+| teradata | com.teradata.jdbc.TeraDriver | jdbc:teradata://localhost/DBS_PORT=1025,DATABASE=test | / | https://mvnrepository.com/artifact/com.teradata.jdbc/terajdbc |
+| Redshift | com.amazon.redshift.jdbc42.Driver | jdbc:redshift://localhost:5439/testdb | com.amazon.redshift.xa.RedshiftXADataSource | https://mvnrepository.com/artifact/com.amazon.redshift/redshift-jdbc42 |
+| Snowflake | net.snowflake.client.jdbc.SnowflakeDriver | jdbc:snowflake://.snowflakecomputing.com | / | https://mvnrepository.com/artifact/net.snowflake/snowflake-jdbc |
+| Vertica | com.vertica.jdbc.Driver | jdbc:vertica://localhost:5433 | / | https://repo1.maven.org/maven2/com/vertica/jdbc/vertica-jdbc/12.0.3-0/vertica-jdbc-12.0.3-0.jar |
+| Kingbase | com.kingbase8.Driver | jdbc:kingbase8://localhost:54321/db_test | / | https://repo1.maven.org/maven2/cn/com/kingbase/kingbase8/8.6.0/kingbase8-8.6.0.jar |
+| OceanBase | com.oceanbase.jdbc.Driver | jdbc:oceanbase://localhost:2881 | / | https://repo1.maven.org/maven2/com/oceanbase/oceanbase-client/2.4.12/oceanbase-client-2.4.12.jar |
+| opengauss | org.opengauss.Driver | jdbc:opengauss://localhost:5432/postgres | / | https://repo1.maven.org/maven2/org/opengauss/opengauss-jdbc/5.1.0-og/opengauss-jdbc-5.1.0-og.jar |
## 示例
diff --git a/docs/zh/connector-v2/sink/Kafka.md b/docs/zh/connector-v2/sink/Kafka.md
index c0ce9338708..c43b0d41664 100644
--- a/docs/zh/connector-v2/sink/Kafka.md
+++ b/docs/zh/connector-v2/sink/Kafka.md
@@ -24,25 +24,27 @@
为了使用 Kafka 连接器,需要以下依赖项
可以通过 install-plugin.sh 或从 Maven 中央存储库下载
-| 数据源 | 支持版本 | Maven |
-|-------|------|-------------------------------------------------------------------------------------------------------|
-| Kafka | 通用 | [下载](https://mvnrepository.com/artifact/org.apache.seatunnel/seatunnel-connectors-v2/connector-kafka) |
+| 数据源 | 支持版本 | Maven |
+|-------|------|-------------------------------------------------------------------------------|
+| Kafka | 通用 | [下载](https://mvnrepository.com/artifact/org.apache.seatunnel/connector-kafka) |
## 接收器选项
-| 名称 | 类型 | 是否需要 | 默认值 | 描述 |
-|----------------------|--------|------|------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| topic | String | 是 | - | 当表用作接收器时,topic 名称是要写入数据的 topic |
-| bootstrap.servers | String | 是 | - | Kafka brokers 使用逗号分隔 |
-| kafka.config | Map | 否 | - | 除了上述 Kafka Producer 客户端必须指定的参数外,用户还可以为 Producer 客户端指定多个非强制参数,涵盖 [Kafka官方文档中指定的所有生产者参数](https://kafka.apache.org/documentation.html#producerconfigs) |
-| semantics | String | 否 | NON | 可以选择的语义是 EXACTLY_ONCE/AT_LEAST_ONCE/NON,默认 NON。 |
-| partition_key_fields | Array | 否 | - | 配置字段用作 kafka 消息的key |
-| partition | Int | 否 | - | 可以指定分区,所有消息都会发送到此分区 |
-| assign_partitions | Array | 否 | - | 可以根据消息的内容决定发送哪个分区,该参数的作用是分发信息 |
-| transaction_prefix | String | 否 | - | 如果语义指定为EXACTLY_ONCE,生产者将把所有消息写入一个 Kafka 事务中,kafka 通过不同的 transactionId 来区分不同的事务。该参数是kafka transactionId的前缀,确保不同的作业使用不同的前缀 |
-| format | String | 否 | json | 数据格式。默认格式是json。可选文本格式,canal-json、debezium-json 和 avro。如果使用 json 或文本格式。默认字段分隔符是`,`。如果自定义分隔符,请添加`field_delimiter`选项。如果使用canal格式,请参考[canal-json](../formats/canal-json.md)。如果使用debezium格式,请参阅 [debezium-json](../formats/debezium-json.md) 了解详细信息 |
-| field_delimiter | String | 否 | , | 自定义数据格式的字段分隔符 |
-| common-options | | 否 | - | Sink插件常用参数,请参考 [Sink常用选项 ](common-options.md) 了解详情 |
+| 名称 | 类型 | 是否需要 | 默认值 | 描述 |
+|----------------------|--------|------|------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| topic | String | 是 | - | 当表用作接收器时,topic 名称是要写入数据的 topic |
+| bootstrap.servers | String | 是 | - | Kafka brokers 使用逗号分隔 |
+| kafka.config | Map | 否 | - | 除了上述 Kafka Producer 客户端必须指定的参数外,用户还可以为 Producer 客户端指定多个非强制参数,涵盖 [Kafka官方文档中指定的所有生产者参数](https://kafka.apache.org/documentation.html#producerconfigs) |
+| semantics | String | 否 | NON | 可以选择的语义是 EXACTLY_ONCE/AT_LEAST_ONCE/NON,默认 NON。 |
+| partition_key_fields | Array | 否 | - | 配置字段用作 kafka 消息的key |
+| partition | Int | 否 | - | 可以指定分区,所有消息都会发送到此分区 |
+| assign_partitions | Array | 否 | - | 可以根据消息的内容决定发送哪个分区,该参数的作用是分发信息 |
+| transaction_prefix | String | 否 | - | 如果语义指定为EXACTLY_ONCE,生产者将把所有消息写入一个 Kafka 事务中,kafka 通过不同的 transactionId 来区分不同的事务。该参数是kafka transactionId的前缀,确保不同的作业使用不同的前缀 |
+| format | String | 否 | json | 数据格式。默认格式是json。可选文本格式,canal-json、debezium-json 、 avro 和 protobuf。如果使用 json 或文本格式。默认字段分隔符是`,`。如果自定义分隔符,请添加`field_delimiter`选项。如果使用canal格式,请参考[canal-json](../formats/canal-json.md)。如果使用debezium格式,请参阅 [debezium-json](../formats/debezium-json.md) 了解详细信息 |
+| field_delimiter | String | 否 | , | 自定义数据格式的字段分隔符 |
+| common-options | | 否 | - | Sink插件常用参数,请参考 [Sink常用选项 ](../sink-common-options.md) 了解详情 |
+|protobuf_message_name|String|否|-| format配置为protobuf时生效,取Message名称 |
+|protobuf_schema|String|否|-| format配置为protobuf时生效取Schema名称 |
## 参数解释
@@ -110,7 +112,7 @@ env {
source {
FakeSource {
parallelism = 1
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 16
schema = {
fields {
@@ -194,3 +196,56 @@ sink {
}
```
+### Protobuf配置
+
+`format` 设置为 `protobuf`,配置`protobuf`数据结构,`protobuf_message_name`和`protobuf_schema`参数
+
+使用样例:
+
+```hocon
+sink {
+ kafka {
+ topic = "test_protobuf_topic_fake_source"
+ bootstrap.servers = "kafkaCluster:9092"
+ format = protobuf
+ kafka.request.timeout.ms = 60000
+ kafka.config = {
+ acks = "all"
+ request.timeout.ms = 60000
+ buffer.memory = 33554432
+ }
+ protobuf_message_name = Person
+ protobuf_schema = """
+ syntax = "proto3";
+
+ package org.apache.seatunnel.format.protobuf;
+
+ option java_outer_classname = "ProtobufE2E";
+
+ message Person {
+ int32 c_int32 = 1;
+ int64 c_int64 = 2;
+ float c_float = 3;
+ double c_double = 4;
+ bool c_bool = 5;
+ string c_string = 6;
+ bytes c_bytes = 7;
+
+ message Address {
+ string street = 1;
+ string city = 2;
+ string state = 3;
+ string zip = 4;
+ }
+
+ Address address = 8;
+
+ map attributes = 9;
+
+ repeated string phone_numbers = 10;
+ }
+ """
+ }
+}
+```
+
diff --git a/docs/zh/connector-v2/sink/LocalFile.md b/docs/zh/connector-v2/sink/LocalFile.md
index 53aa0cb480e..419963ec55b 100644
--- a/docs/zh/connector-v2/sink/LocalFile.md
+++ b/docs/zh/connector-v2/sink/LocalFile.md
@@ -164,7 +164,7 @@
### 常见选项
-Sink 插件的常见参数,请参阅 [Sink 常见选项](common-options.md) 获取详细信息。
+Sink 插件的常见参数,请参阅 [Sink 常见选项](../sink-common-options.md) 获取详细信息。
### max_rows_in_memory [int]
diff --git a/docs/zh/connector-v2/sink/Paimon.md b/docs/zh/connector-v2/sink/Paimon.md
index 50f88731d3e..09f4e63fbfc 100644
--- a/docs/zh/connector-v2/sink/Paimon.md
+++ b/docs/zh/connector-v2/sink/Paimon.md
@@ -30,21 +30,40 @@ libfb303-xxx.jar
## 连接器选项
-| 名称 | 类型 | 是否必须 | 默认值 | 描述 |
-|-----------------------------|-----|------|------------------------------|---------------------------------------------------------------------------------------------------|---|
-| warehouse | 字符串 | 是 | - | Paimon warehouse路径 |
-| catalog_type | 字符串 | 否 | filesystem | Paimon的catalog类型,目前支持filesystem和hive |
-| catalog_uri | 字符串 | 否 | - | Paimon catalog的uri,仅当catalog_type为hive时需要配置 | |
-| database | 字符串 | 是 | - | 数据库名称 |
-| table | 字符串 | 是 | - | 表名 |
-| hdfs_site_path | 字符串 | 否 | - | hdfs-site.xml文件路径 |
-| schema_save_mode | 枚举 | 否 | CREATE_SCHEMA_WHEN_NOT_EXIST | Schema保存模式 |
-| data_save_mode | 枚举 | 否 | APPEND_DATA | 数据保存模式 |
-| paimon.table.primary-keys | 字符串 | 否 | - | 主键字段列表,联合主键使用逗号分隔(注意:分区字段需要包含在主键字段中) |
-| paimon.table.partition-keys | 字符串 | 否 | - | 分区字段列表,多字段使用逗号分隔 |
-| paimon.table.write-props | Map | 否 | - | Paimon表初始化指定的属性, [参考](https://paimon.apache.org/docs/0.6/maintenance/configurations/#coreoptions) |
-| paimon.hadoop.conf | Map | 否 | - | Hadoop配置文件属性信息 |
-| paimon.hadoop.conf-path | 字符串 | 否 | - | Hadoop配置文件目录,用于加载'core-site.xml', 'hdfs-site.xml', 'hive-site.xml'文件配置 |
+| 名称 | 类型 | 是否必须 | 默认值 | 描述 |
+|-----------------------------|------|------|------------------------------|-------------------------------------------------------------------------------------------------------|
+| warehouse | 字符串 | 是 | - | Paimon warehouse路径 |
+| catalog_type | 字符串 | 否 | filesystem | Paimon的catalog类型,目前支持filesystem和hive |
+| catalog_uri | 字符串 | 否 | - | Paimon catalog的uri,仅当catalog_type为hive时需要配置 |
+| database | 字符串 | 是 | - | 数据库名称 |
+| table | 字符串 | 是 | - | 表名 |
+| hdfs_site_path | 字符串 | 否 | - | hdfs-site.xml文件路径 |
+| schema_save_mode | 枚举 | 否 | CREATE_SCHEMA_WHEN_NOT_EXIST | Schema保存模式 |
+| data_save_mode | 枚举 | 否 | APPEND_DATA | 数据保存模式 |
+| paimon.table.primary-keys | 字符串 | 否 | - | 主键字段列表,联合主键使用逗号分隔(注意:分区字段需要包含在主键字段中) |
+| paimon.table.partition-keys | 字符串 | 否 | - | 分区字段列表,多字段使用逗号分隔 |
+| paimon.table.write-props | Map | 否 | - | Paimon表初始化指定的属性, [参考](https://paimon.apache.org/docs/master/maintenance/configurations/#coreoptions) |
+| paimon.hadoop.conf | Map | 否 | - | Hadoop配置文件属性信息 |
+| paimon.hadoop.conf-path | 字符串 | 否 | - | Hadoop配置文件目录,用于加载'core-site.xml', 'hdfs-site.xml', 'hive-site.xml'文件配置 |
+
+## 更新日志
+你必须配置`changelog-producer=input`来启用paimon表的changelog产生模式。如果你使用了paimon sink的自动建表功能,你可以在`paimon.table.write-props`中指定这个属性。
+
+Paimon表的changelog产生模式有[四种](https://paimon.apache.org/docs/master/primary-key-table/changelog-producer/),分别是`none`、`input`、`lookup` 和 `full-compaction`。
+
+目前支持全部`changelog-producer`模式。默认是`none`模式。
+
+* [`none`](https://paimon.apache.org/docs/master/primary-key-table/changelog-producer/#none)
+* [`input`](https://paimon.apache.org/docs/master/primary-key-table/changelog-producer/#input)
+* [`lookup`](https://paimon.apache.org/docs/master/primary-key-table/changelog-producer/#lookup)
+* [`full-compaction`](https://paimon.apache.org/docs/master/primary-key-table/changelog-producer/#full-compaction)
+> 注意:
+> 当你使用流模式去读paimon表的数据时,不同模式将会产生[不同的结果](https://github.com/apache/seatunnel/blob/dev/docs/en/connector-v2/source/Paimon.md#changelog)。
+
+## 文件系统
+Paimon连接器支持向多文件系统写入数据。目前支持的文件系统有hdfs和s3。
+如果您使用s3文件系统。您可以配置`fs.s3a.access-key `, `fs.s3a.secret-key`, `fs.s3a.endpoint`, `fs.s3a.path.style.access`, `fs.s3a.aws.credentials`。在`paimon.hadoop.conf`选项中设置提供程序的属性。
+除此之外,warehouse应该以`s3a://`开头。
## 示例
@@ -79,6 +98,53 @@ sink {
}
```
+### 单表(基于S3文件系统)
+
+```hocon
+env {
+ execution.parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ FakeSource {
+ schema = {
+ fields {
+ c_map = "map"
+ c_array = "array"
+ c_string = string
+ c_boolean = boolean
+ c_tinyint = tinyint
+ c_smallint = smallint
+ c_int = int
+ c_bigint = bigint
+ c_float = float
+ c_double = double
+ c_bytes = bytes
+ c_date = date
+ c_decimal = "decimal(38, 18)"
+ c_timestamp = timestamp
+ }
+ }
+ }
+}
+
+sink {
+ Paimon {
+ warehouse = "s3a://test/"
+ database = "seatunnel_namespace11"
+ table = "st_test"
+ paimon.hadoop.conf = {
+ fs.s3a.access-key=G52pnxg67819khOZ9ezX
+ fs.s3a.secret-key=SHJuAQqHsLrgZWikvMa3lJf5T0NfM5LMFliJh9HF
+ fs.s3a.endpoint="http://minio4:9000"
+ fs.s3a.path.style.access=true
+ fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider
+ }
+ }
+}
+```
+
### 单表(指定hadoop HA配置和kerberos配置)
```hocon
@@ -239,9 +305,85 @@ sink {
}
}
```
+#### 使用`changelog-producer`属性写入
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+}
+
+source {
+ Mysql-CDC {
+ base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel"
+ username = "root"
+ password = "******"
+ table-names = ["seatunnel.role"]
+ }
+}
+
+sink {
+ Paimon {
+ catalog_name = "seatunnel_test"
+ warehouse = "file:///tmp/seatunnel/paimon/hadoop-sink/"
+ database = "seatunnel"
+ table = "role"
+ paimon.table.write-props = {
+ changelog-producer = full-compaction
+ changelog-tmp-path = /tmp/paimon/changelog
+ }
+ }
+}
+```
+
+### 动态分桶paimon单表
+
+只有在主键表并指定bucket = -1时才会生效
+
+#### 核心参数:[参考官网](https://paimon.apache.org/docs/master/primary-key-table/data-distribution/#dynamic-bucket)
+
+| 名称 | 类型 | 是否必须 | 默认值 | 描述 |
+|--------------------------------|------|------|----------|------------------|
+| dynamic-bucket.target-row-num | long | 是 | 2000000L | 控制一个bucket的写入的行数 |
+| dynamic-bucket.initial-buckets | int | 否 | | 控制初始化桶的数量 |
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+}
+
+source {
+ Mysql-CDC {
+ base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel"
+ username = "root"
+ password = "******"
+ table-names = ["seatunnel.role"]
+ }
+}
+
+sink {
+ Paimon {
+ catalog_name="seatunnel_test"
+ warehouse="file:///tmp/seatunnel/paimon/hadoop-sink/"
+ database="seatunnel"
+ table="role"
+ paimon.table.write-props = {
+ bucket = -1
+ dynamic-bucket.target-row-num = 50000
+ }
+ paimon.table.partition-keys = "dt"
+ paimon.table.primary-keys = "pk_id,dt"
+ }
+}
+```
### 多表
+#### 示例1
+
```hocon
env {
parallelism = 1
@@ -271,3 +413,41 @@ sink {
}
```
+#### 示例2
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ Jdbc {
+ driver = oracle.jdbc.driver.OracleDriver
+ url = "jdbc:oracle:thin:@localhost:1521/XE"
+ user = testUser
+ password = testPassword
+
+ table_list = [
+ {
+ table_path = "TESTSCHEMA.TABLE_1"
+ },
+ {
+ table_path = "TESTSCHEMA.TABLE_2"
+ }
+ ]
+ }
+}
+
+transform {
+}
+
+sink {
+ Paimon {
+ catalog_name="seatunnel_test"
+ warehouse="file:///tmp/seatunnel/paimon/hadoop-sink/"
+ database="${schema_name}_test"
+ table="${table_name}_test"
+ }
+}
+```
diff --git a/docs/zh/connector-v2/sink/Phoenix.md b/docs/zh/connector-v2/sink/Phoenix.md
index 9a3adc14e5c..67598ebb450 100644
--- a/docs/zh/connector-v2/sink/Phoenix.md
+++ b/docs/zh/connector-v2/sink/Phoenix.md
@@ -30,7 +30,7 @@ phoenix(thin)驱动:`jdbc:phoenix:thin:url=http://localhost:8765;serializa
### common options
-Sink插件常用参数,请参考[Sink常用选项](common-options.md)获取更多细节信息。
+Sink插件常用参数,请参考[Sink常用选项](../sink-common-options.md)获取更多细节信息。
## 示例
diff --git a/docs/zh/connector-v2/sink/Prometheus.md b/docs/zh/connector-v2/sink/Prometheus.md
new file mode 100644
index 00000000000..834d8128aa4
--- /dev/null
+++ b/docs/zh/connector-v2/sink/Prometheus.md
@@ -0,0 +1,101 @@
+# Prometheus
+
+> Prometheus 数据接收器
+
+## 引擎支持
+
+> Spark
+> Flink
+> SeaTunnel Zeta
+
+## 主要特性
+
+- [ ] [exactly-once](../../concept/connector-v2-features.md)
+- [ ] [cdc](../../concept/connector-v2-features.md)
+- [x] [support multiple table write](../../concept/connector-v2-features.md)
+
+## 描述
+
+接收Source端传入的数据,利用数据触发 web hooks。
+
+> 例如,来自上游的数据为 [`label: {"__name__": "test1"}, value: 1.2.3,time:2024-08-15T17:00:00`], 则body内容如下: `{"label":{"__name__": "test1"}, "value":"1.23","time":"2024-08-15T17:00:00"}`
+
+**Tips: Prometheus 数据接收器 仅支持 `post json` 类型的 web hook,source 数据将被视为 webhook 中的 body 内容。并且不支持传递过去太久的数据**
+
+## 支持的数据源信息
+
+想使用 Prometheus 连接器,需要安装以下必要的依赖。可以通过运行 install-plugin.sh 脚本或者从 Maven 中央仓库下载这些依赖
+
+| 数据源 | 支持版本 | 依赖 |
+|------|-----------|------------------------------------------------------------------------------------------------------------------|
+| Http | universal | [Download](https://mvnrepository.com/artifact/org.apache.seatunnel/seatunnel-connectors-v2/connector-prometheus) |
+
+## 接收器选项
+
+| Name | Type | Required | Default | Description |
+|-----------------------------|--------|----------|---------|-------------------------------------------------------------------|
+| url | String | Yes | - | Http 请求链接 |
+| headers | Map | No | - | Http 标头 |
+| retry | Int | No | - | 如果请求http返回`IOException`的最大重试次数 |
+| retry_backoff_multiplier_ms | Int | No | 100 | http请求失败,重试回退次数(毫秒)乘数 |
+| retry_backoff_max_ms | Int | No | 10000 | http请求失败,最大重试回退时间(毫秒) |
+| connect_timeout_ms | Int | No | 12000 | 连接超时设置,默认12s |
+| socket_timeout_ms | Int | No | 60000 | 套接字超时设置,默认为60s |
+| key_timestamp | Int | NO | - | prometheus时间戳的key. |
+| key_label | String | yes | - | prometheus标签的key |
+| key_value | Double | yes | - | prometheus值的key |
+| batch_size | Int | false | 1024 | prometheus批量写入大小 |
+| flush_interval | Long | false | 300000L | prometheus定时写入 |
+| common-options | | No | - | Sink插件常用参数,请参考 [Sink常用选项 ](../sink-common-options.md) 了解详情 |
+
+## 示例
+
+简单示例:
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ FakeSource {
+ schema = {
+ fields {
+ c_map = "map"
+ c_double = double
+ c_timestamp = timestamp
+ }
+ }
+ plugin_output = "fake"
+ rows = [
+ {
+ kind = INSERT
+ fields = [{"__name__": "test1"}, 1.23, "2024-08-15T17:00:00"]
+ },
+ {
+ kind = INSERT
+ fields = [{"__name__": "test2"}, 1.23, "2024-08-15T17:00:00"]
+ }
+ ]
+ }
+}
+
+
+sink {
+ Prometheus {
+ url = "http://prometheus:9090/api/v1/write"
+ key_label = "c_map"
+ key_value = "c_double"
+ key_timestamp = "c_timestamp"
+ batch_size = 1
+ }
+}
+```
+
+## Changelog
+
+### 2.3.8-beta 2024-08-22
+
+- 添加prometheus接收连接器
+
diff --git a/docs/zh/connector-v2/sink/Pulsar.md b/docs/zh/connector-v2/sink/Pulsar.md
index b85a41ae9df..9f965dcf7ba 100644
--- a/docs/zh/connector-v2/sink/Pulsar.md
+++ b/docs/zh/connector-v2/sink/Pulsar.md
@@ -24,21 +24,21 @@ Apache Pulsar 的接收连接器。
## 输出选项
-| 名称 | 类型 | 是否必须 | 默认值 | 描述 |
-|----------------------|--------|------|---------------------|-----------------------------------------|
-| topic | String | Yes | - | 输出到Pulsar主题名称. |
-| client.service-url | String | Yes | - | Pulsar 服务的服务 URL 提供者. |
-| admin.service-url | String | Yes | - | 管理端点的 Pulsar 服务 HTTP URL. |
-| auth.plugin-class | String | No | - | 身份验证插件的名称. |
-| auth.params | String | No | - | 身份验证插件的参数. |
-| format | String | No | json | 数据格式。默认格式为 json。可选的文本格式. |
-| field_delimiter | String | No | , | 自定义数据格式的字段分隔符. |
-| semantics | Enum | No | AT_LEAST_ONCE | 写入 pulsar 的一致性语义. |
-| transaction_timeout | Int | No | 600 | 默认情况下,事务超时指定为 10 分钟. |
-| pulsar.config | Map | No | - | 除了上述必须由 Pulsar 生产者客户端指定的参数外. |
-| message.routing.mode | Enum | No | RoundRobinPartition | 要分区的消息的默认路由模式. |
-| partition_key_fields | array | No | - | 配置哪些字段用作 pulsar 消息的键. |
-| common-options | config | no | - | 源插件常用参数,详见源码 [常用选项](common-options.md). |
+| 名称 | 类型 | 是否必须 | 默认值 | 描述 |
+|----------------------|--------|------|---------------------|-------------------------------------------------|
+| topic | String | Yes | - | 输出到Pulsar主题名称. |
+| client.service-url | String | Yes | - | Pulsar 服务的服务 URL 提供者. |
+| admin.service-url | String | Yes | - | 管理端点的 Pulsar 服务 HTTP URL. |
+| auth.plugin-class | String | No | - | 身份验证插件的名称. |
+| auth.params | String | No | - | 身份验证插件的参数. |
+| format | String | No | json | 数据格式。默认格式为 json。可选的文本格式. |
+| field_delimiter | String | No | , | 自定义数据格式的字段分隔符. |
+| semantics | Enum | No | AT_LEAST_ONCE | 写入 pulsar 的一致性语义. |
+| transaction_timeout | Int | No | 600 | 默认情况下,事务超时指定为 10 分钟. |
+| pulsar.config | Map | No | - | 除了上述必须由 Pulsar 生产者客户端指定的参数外. |
+| message.routing.mode | Enum | No | RoundRobinPartition | 要分区的消息的默认路由模式. |
+| partition_key_fields | array | No | - | 配置哪些字段用作 pulsar 消息的键. |
+| common-options | config | no | - | 源插件常用参数,详见源码 [常用选项](../sink-common-options.md). |
## 参数解释
@@ -117,7 +117,7 @@ Pulsar 服务的 Service URL 提供程序。要使用客户端库连接到 Pulsa
### 常见选项
-源插件常用参数,详见源码[常用选项](common-options.md) .
+源插件常用参数,详见源码[常用选项](../sink-common-options.md) .
## 任务示例
@@ -136,7 +136,7 @@ env {
source {
FakeSource {
parallelism = 1
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 16
schema = {
fields {
@@ -152,7 +152,7 @@ sink {
topic = "example"
client.service-url = "localhost:pulsar://localhost:6650"
admin.service-url = "http://my-broker.example.com:8080"
- result_table_name = "test"
+ plugin_output = "test"
pulsar.config = {
sendTimeoutMs = 30000
}
diff --git a/docs/zh/connector-v2/sink/Qdrant.md b/docs/zh/connector-v2/sink/Qdrant.md
new file mode 100644
index 00000000000..7394eb85414
--- /dev/null
+++ b/docs/zh/connector-v2/sink/Qdrant.md
@@ -0,0 +1,68 @@
+# Qdrant
+
+> Qdrant 数据连接器
+
+[Qdrant](https://qdrant.tech/) 是一个高性能的向量搜索引擎和向量数据库。
+
+该连接器可用于将数据写入 Qdrant 集合。
+
+## 数据类型映射
+
+| SeaTunnel 数据类型 | Qdrant 数据类型 |
+|---------------------|---------------|
+| TINYINT | INTEGER |
+| SMALLINT | INTEGER |
+| INT | INTEGER |
+| BIGINT | INTEGER |
+| FLOAT | DOUBLE |
+| DOUBLE | DOUBLE |
+| BOOLEAN | BOOL |
+| STRING | STRING |
+| ARRAY | LIST |
+| FLOAT_VECTOR | DENSE_VECTOR |
+| BINARY_VECTOR | DENSE_VECTOR |
+| FLOAT16_VECTOR | DENSE_VECTOR |
+| BFLOAT16_VECTOR | DENSE_VECTOR |
+| SPARSE_FLOAT_VECTOR | SPARSE_VECTOR |
+
+主键列的值将用作 Qdrant 中的点 ID。如果没有主键,则将使用随机 UUID。
+
+## 选项
+
+| 名称 | 类型 | 必填 | 默认值 |
+|-----------------|--------|----|-----------|
+| collection_name | string | 是 | - |
+| batch_size | int | 否 | 64 |
+| host | string | 否 | localhost |
+| port | int | 否 | 6334 |
+| api_key | string | 否 | - |
+| use_tls | bool | 否 | false |
+| common-options | | 否 | - |
+
+### collection_name [string]
+
+要从中读取数据的 Qdrant 集合的名称。
+
+### batch_size [int]
+
+每个 upsert 请求到 Qdrant 的批量大小。
+
+### host [string]
+
+Qdrant 实例的主机名。默认为 "localhost"。
+
+### port [int]
+
+Qdrant 实例的 gRPC 端口。
+
+### api_key [string]
+
+用于身份验证的 API 密钥(如果设置)。
+
+### use_tls [bool]
+
+是否使用 TLS(SSL)连接。如果使用 Qdrant 云(https),则需要。
+
+### 通用选项
+
+接收插件的通用参数,请参考[源通用选项](../sink-common-options.md)了解详情。
diff --git a/docs/zh/connector-v2/sink/Rabbitmq.md b/docs/zh/connector-v2/sink/Rabbitmq.md
index 6562dd2fdc5..02d2b5c17d2 100644
--- a/docs/zh/connector-v2/sink/Rabbitmq.md
+++ b/docs/zh/connector-v2/sink/Rabbitmq.md
@@ -90,7 +90,7 @@ In addition to the above parameters that must be specified by the RabbitMQ clien
### common options
-Sink插件常用参数,请参考[Sink常用选项](common-options.md)获取更多细节信息。
+Sink插件常用参数,请参考[Sink常用选项](../sink-common-options.md)获取更多细节信息。
## 示例
diff --git a/docs/zh/connector-v2/sink/Redis.md b/docs/zh/connector-v2/sink/Redis.md
index ac09849b7eb..d4bb13cd888 100644
--- a/docs/zh/connector-v2/sink/Redis.md
+++ b/docs/zh/connector-v2/sink/Redis.md
@@ -12,20 +12,25 @@
## 选项
-| 名称 | 类型 | 是否必须 | 默认值 |
-|----------------|--------|---------------------|--------|
-| host | string | 是 | - |
-| port | int | 是 | - |
-| key | string | 是 | - |
-| data_type | string | 是 | - |
-| user | string | 否 | - |
-| auth | string | 否 | - |
-| db_num | int | 否 | 0 |
-| mode | string | 否 | single |
-| nodes | list | 当 mode=cluster 时为:是 | - |
-| format | string | 否 | json |
-| expire | long | 否 | -1 |
-| common-options | | 否 | - |
+| name | type | required | default value |
+|--------------------|---------|-----------------------|---------------|
+| host | string | yes | - |
+| port | int | yes | - |
+| key | string | yes | - |
+| data_type | string | yes | - |
+| batch_size | int | no | 10 |
+| user | string | no | - |
+| auth | string | no | - |
+| db_num | int | no | 0 |
+| mode | string | no | single |
+| nodes | list | yes when mode=cluster | - |
+| format | string | no | json |
+| expire | long | no | -1 |
+| support_custom_key | boolean | no | false |
+| value_field | string | no | - |
+| hash_key_field | string | no | - |
+| hash_value_field | string | no | - |
+| common-options | | no | - |
### host [string]
@@ -48,13 +53,12 @@ Redis 端口
| 200 | 获取成功 | true |
| 500 | 内部错误 | false |
-如果将字段名称指定为 `code` 并将 data_type 设置为 `key`,将有两个数据写入 Redis:
-1. `200 -> {code: 200, message: true, data: 获取成功}`
-2. `500 -> {code: 500, message: false, data: 内部错误}`
-
-如果将字段名称指定为 `value` 并将 data_type 设置为 `key`,则由于上游数据的字段中没有 `value` 字段,将只有一个数据写入 Redis:
-
-1. `value -> {code: 500, message: false, data: 内部错误}`
+如果将字段名称指定为 code 并将 data_type 设置为 key,将有两个数据写入 Redis:
+1. `200 -> {code: 200, data: 获取成功, success: true}`
+2. `500 -> {code: 500, data: 内部错误, success: false}`
+
+如果将字段名称指定为 value 并将 data_type 设置为 key,则由于上游数据的字段中没有 value 字段,将只有一个数据写入 Redis:
+1. `value -> {code: 500, data: 内部错误, success: false}`
请参见 data_type 部分以了解具体的写入规则。
@@ -128,9 +132,62 @@ Redis 节点信息,在集群模式下使用,必须按如下格式:
设置 Redis 的过期时间,单位为秒。默认值为 -1,表示键不会自动过期。
+### support_custom_key [boolean]
+
+设置为true,表示启用自定义Key。
+
+上游数据如下:
+
+| code | data | success |
+|------|------|---------|
+| 200 | 获取成功 | true |
+| 500 | 内部错误 | false |
+
+可以使用`{`和`}`符号自定义Redis键名,`{}`中的字段名会被解析替换为上游数据中的某个字段值,例如:将字段名称指定为 `{code}` 并将 data_type 设置为 `key`,将有两个数据写入 Redis:
+1. `200 -> {code: 200, data: 获取成功, success: true}`
+2. `500 -> {code: 500, data: 内部错误, success: false}`
+
+Redis键名可以由固定部分和变化部分组成,通过Redis分组符号:连接,例如:将字段名称指定为 `code:{code}` 并将 data_type 设置为 `key`,将有两个数据写入 Redis:
+1. `code:200 -> {code: 200, data: 获取成功, success: true}`
+2. `code:500 -> {code: 500, data: 内部错误, success: false}`
+
+### value_field [string]
+
+要写入Redis的值的字段, `data_type` 支持 `key` `list` `set` `zset`.
+
+当你指定Redis键名字段`key`指定为 `value`,值字段`value_field`指定为`data`,并将`data_type`指定为`key`时,
+
+上游数据如下:
+
+| code | data | success |
+|------|------|---------|
+| 200 | 获取成功 | true |
+
+如下的数据会被写入Redis:
+1. `value -> 获取成功`
+
+### hash_key_field [string]
+
+要写入Redis的hash键字段, `data_type` 支持 `hash`
+
+### hash_value_field [string]
+
+要写入Redis的hash值字段, `data_type` 支持 `hash`
+
+当你指定Redis键名字段`key`指定为 `value`,hash键字段`hash_key_field`指定为`data`,hash值字段`hash_value_field`指定为`success`,并将`data_type`指定为`hash`时,
+
+上游数据如下:
+
+| code | data | success |
+|------|------|---------|
+| 200 | 获取成功 | true |
+
+如下的数据会被写入Redis:
+1. `value -> 获取成功 | true`
+
### common options
-Sink 插件通用参数,请参考 [Sink Common Options](common-options.md) 获取详情
+Sink 插件通用参数,请参考 [Sink Common Options](../sink-common-options.md) 获取详情
## 示例
@@ -145,6 +202,43 @@ Redis {
}
```
+自定义Key示例:
+
+```hocon
+Redis {
+ host = localhost
+ port = 6379
+ key = "name:{name}"
+ support_custom_key = true
+ data_type = key
+}
+```
+
+自定义Value示例:
+
+```hocon
+Redis {
+ host = localhost
+ port = 6379
+ key = person
+ value_field = "name"
+ data_type = key
+}
+```
+
+自定义HashKey和HashValue示例:
+
+```hocon
+Redis {
+ host = localhost
+ port = 6379
+ key = person
+ hash_key_field = "name"
+ hash_value_field = "age"
+ data_type = hash
+}
+```
+
## 更新日志
### 2.2.0-beta 2022-09-26
diff --git a/docs/zh/connector-v2/sink/Sls.md b/docs/zh/connector-v2/sink/Sls.md
new file mode 100644
index 00000000000..94e4f3c07a8
--- /dev/null
+++ b/docs/zh/connector-v2/sink/Sls.md
@@ -0,0 +1,84 @@
+# Sls
+
+> Sls sink connector
+
+## Support Those Engines
+
+> Spark
+> Flink
+> Seatunnel Zeta
+
+## 主要特性
+
+- [ ] [exactly-once](../../concept/connector-v2-features.md)
+- [ ] [cdc](../../concept/connector-v2-features.md)
+
+## 描述
+
+Sink connector for Aliyun Sls.
+
+从写入数据到阿里云Sls日志服务
+
+为了使用Sls连接器,需要以下依赖关系。
+它们可以通过install-plugin.sh或Maven中央存储库下载。
+
+| Datasource | Supported Versions | Maven |
+|------------|--------------------|-----------------------------------------------------------------------------------|
+| Sls | Universal | [Download](https://mvnrepository.com/artifact/org.apache.seatunnel/connector-sls) |
+
+## 支持的数据源信息
+
+| Name | Type | Required | Default | Description |
+|-------------------------------------|----------|----------|-------------------|------------------------------------------------------------------------------------------------------------------------------------|
+| project | String | Yes | - | [阿里云 Sls 项目](https://help.aliyun.com/zh/sls/user-guide/manage-a-project?spm=a2c4g.11186623.0.0.6f9755ebyfaYSl) |
+| logstore | String | Yes | - | [阿里云 Sls 日志库](https://help.aliyun.com/zh/sls/user-guide/manage-a-logstore?spm=a2c4g.11186623.0.0.13137c08nfuiBC) |
+| endpoint | String | Yes | - | [阿里云访问服务点](https://help.aliyun.com/zh/sls/developer-reference/api-sls-2020-12-30-endpoint?spm=a2c4g.11186623.0.0.548945a8UyJULa) |
+| access_key_id | String | Yes | - | [阿里云访问用户ID](https://help.aliyun.com/zh/ram/user-guide/create-an-accesskey-pair?spm=a2c4g.11186623.0.0.4a6e4e554CKhSc#task-2245479) |
+| access_key_secret | String | Yes | - | [阿里云访问用户密码](https://help.aliyun.com/zh/ram/user-guide/create-an-accesskey-pair?spm=a2c4g.11186623.0.0.4a6e4e554CKhSc#task-2245479) |
+| source | String | No | SeaTunnel-Source | 在sls中数据来源标记 |
+| topic | String | No | SeaTunnel-Topic | 在sls中数据主题标记 |
+
+## 任务示例
+
+### 简单示例
+
+> 此示例写入sls的logstore1的数据。如果您尚未安装和部署SeaTunnel,则需要按照安装SeaTunnel中的说明安装和部署SeaTunnel。然后按照[快速启动SeaTunnel引擎](../../Start-v2/locale/Quick-Start SeaTunnel Engine.md)中的说明运行此作业。
+
+[创建RAM用户及授权](https://help.aliyun.com/zh/sls/create-a-ram-user-and-authorize-the-ram-user-to-access-log-service?spm=a2c4g.11186623.0.i4), 请确认RAM用户有足够的权限来读取及管理数据,参考:[RAM自定义授权示例](https://help.aliyun.com/zh/sls/use-custom-policies-to-grant-permissions-to-a-ram-user?spm=a2c4g.11186623.0.0.4a6e4e554CKhSc#reference-s3z-m1l-z2b)
+
+```hocon
+# Defining the runtime environment
+env {
+ parallelism = 2
+ job.mode = "STREAMING"
+ checkpoint.interval = 30000
+}
+source {
+ FakeSource {
+ row.num = 10
+ map.size = 10
+ array.size = 10
+ bytes.length = 10
+ string.length = 10
+ schema = {
+ fields = {
+ id = "int"
+ name = "string"
+ description = "string"
+ weight = "string"
+ }
+ }
+ }
+}
+
+sink {
+ Sls {
+ endpoint = "cn-hangzhou-intranet.log.aliyuncs.com"
+ project = "project1"
+ logstore = "logstore1"
+ access_key_id = "xxxxxxxxxxxxxxxxxxxxxxxx"
+ access_key_secret = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+ }
+}
+```
+
diff --git a/docs/zh/connector-v2/sink/StarRocks.md b/docs/zh/connector-v2/sink/StarRocks.md
index 6be7ff7e8e0..1b699f2c4c4 100644
--- a/docs/zh/connector-v2/sink/StarRocks.md
+++ b/docs/zh/connector-v2/sink/StarRocks.md
@@ -99,10 +99,11 @@ table选项参数可以填入一任意表名,这个名字最终会被用作目
### schema_save_mode[Enum]
-在同步任务打开之前,针对目标端已存在的表结构选择不同的处理方法。可选值有:
-`RECREATE_SCHEMA` :不存在的表会直接创建,已存在的表会删除并根据参数重新创建
-`CREATE_SCHEMA_WHEN_NOT_EXIST` :忽略已存在的表,不存在的表会直接创建
-`ERROR_WHEN_SCHEMA_NOT_EXIST` :当有不存在的表时会直接报错
+在同步任务打开之前,针对目标端已存在的表结构选择不同的处理方法。可选值有:
+`RECREATE_SCHEMA` :不存在的表会直接创建,已存在的表会删除并根据参数重新创建
+`CREATE_SCHEMA_WHEN_NOT_EXIST` :忽略已存在的表,不存在的表会直接创建
+`ERROR_WHEN_SCHEMA_NOT_EXIST` :当有不存在的表时会直接报错
+`IGNORE` :忽略对表的处理
### data_save_mode[Enum]
@@ -185,6 +186,7 @@ source {
sink {
StarRocks {
nodeUrls = ["e2e_starRocksdb:8030"]
+ base-url = "jdbc:mysql://e2e_starRocksdb:9030/"
username = root
password = ""
database = "test"
@@ -204,6 +206,7 @@ sink {
sink {
StarRocks {
nodeUrls = ["e2e_starRocksdb:8030"]
+ base-url = "jdbc:mysql://e2e_starRocksdb:9030/"
username = root
password = ""
database = "test"
@@ -222,6 +225,7 @@ sink {
sink {
StarRocks {
nodeUrls = ["e2e_starRocksdb:8030"]
+ base-url = "jdbc:mysql://e2e_starRocksdb:9030/"
username = root
password = ""
database = "test"
@@ -242,6 +246,7 @@ sink {
sink {
StarRocks {
nodeUrls = ["e2e_starRocksdb:8030"]
+ base-url = "jdbc:mysql://e2e_starRocksdb:9030/"
username = root
password = ""
database = "test"
@@ -262,6 +267,7 @@ sink {
sink {
StarRocks {
nodeUrls = ["e2e_starRocksdb:8030"]
+ base-url = "jdbc:mysql://e2e_starRocksdb:9030/"
username = root
password = ""
database = "test"
diff --git a/docs/zh/connector-v2/sink/Typesense.md b/docs/zh/connector-v2/sink/Typesense.md
new file mode 100644
index 00000000000..f6c06e5f2b5
--- /dev/null
+++ b/docs/zh/connector-v2/sink/Typesense.md
@@ -0,0 +1,95 @@
+# Typesense
+
+## 描述
+
+输出数据到 `Typesense`
+
+## 主要特性
+
+- [ ] [精确一次](../../concept/connector-v2-features.md)
+- [x] [cdc](../../concept/connector-v2-features.md)
+
+## 选项
+
+| 名称 | 类型 | 是否必须 | 默认值 |
+|------------------|--------|------|------------------------------|
+| hosts | array | 是 | - |
+| collection | string | 是 | - |
+| schema_save_mode | string | 是 | CREATE_SCHEMA_WHEN_NOT_EXIST |
+| data_save_mode | string | 是 | APPEND_DATA |
+| primary_keys | array | 否 | |
+| key_delimiter | string | 否 | `_` |
+| api_key | string | 否 | |
+| max_retry_count | int | 否 | 3 |
+| max_batch_size | int | 否 | 10 |
+| common-options | | 否 | - |
+
+### hosts [array]
+
+Typesense的访问地址,格式为 `host:port`,例如:["typesense-01:8108"]
+
+### collection [string]
+
+要写入的集合名,例如:“seatunnel”
+
+### primary_keys [array]
+
+主键字段用于生成文档 `id`。
+
+### key_delimiter [string]
+
+设定复合键的分隔符(默认为 `_`)。
+
+### api_key [config]
+
+typesense 安全认证的 api_key。
+
+### max_retry_count [int]
+
+批次批量请求最大尝试大小
+
+### max_batch_size [int]
+
+批次批量文档最大大小
+
+### common options
+
+Sink插件常用参数,请参考 [Sink常用选项](../sink-common-options.md) 了解详情
+
+### schema_save_mode
+
+在启动同步任务之前,针对目标侧已有的表结构选择不同的处理方案
+选项介绍:
+`RECREATE_SCHEMA` :当表不存在时会创建,当表已存在时会删除并重建
+`CREATE_SCHEMA_WHEN_NOT_EXIST` :当表不存在时会创建,当表已存在时则跳过创建
+`ERROR_WHEN_SCHEMA_NOT_EXIST` :当表不存在时将抛出错误
+
+### data_save_mode
+
+在启动同步任务之前,针对目标侧已存在的数据选择不同的处理方案
+选项介绍:
+`DROP_DATA`: 保留数据库结构,删除数据
+`APPEND_DATA`:保留数据库结构,保留数据
+`ERROR_WHEN_DATA_EXISTS`:当有数据时抛出错误
+
+## 示例
+
+简单示例
+
+```bash
+sink {
+ Typesense {
+ plugin_input = "typesense_test_table"
+ hosts = ["localhost:8108"]
+ collection = "typesense_to_typesense_sink_with_query"
+ max_retry_count = 3
+ max_batch_size = 10
+ api_key = "xyz"
+ primary_keys = ["num_employees","id"]
+ key_delimiter = "="
+ schema_save_mode = "CREATE_SCHEMA_WHEN_NOT_EXIST"
+ data_save_mode = "APPEND_DATA"
+ }
+}
+```
+
diff --git a/docs/zh/connector-v2/sink/common-options.md b/docs/zh/connector-v2/sink/common-options.md
deleted file mode 100644
index 8569b46da0e..00000000000
--- a/docs/zh/connector-v2/sink/common-options.md
+++ /dev/null
@@ -1,58 +0,0 @@
-# Sink 常用选项
-
-> Sink 连接器常用参数
-
-| 名称 | 类型 | 是否需要 | 默认值 |
-|-------------------|--------|------|-----|
-| source_table_name | string | 否 | - |
-| parallelism | int | 否 | - |
-
-### source_table_name [string]
-
-当不指定 `source_table_name` 时,当前插件处理配置文件中上一个插件输出的数据集 `dataset`
-
-当指定了 `source_table_name` 时,当前插件正在处理该参数对应的数据集
-
-### parallelism [int]
-
-当没有指定`parallelism`时,默认使用 env 中的 `parallelism`。
-
-当指定 `parallelism` 时,它将覆盖 env 中的 `parallelism`。
-
-## Examples
-
-```bash
-source {
- FakeSourceStream {
- parallelism = 2
- result_table_name = "fake"
- field_name = "name,age"
- }
-}
-
-transform {
- Filter {
- source_table_name = "fake"
- fields = [name]
- result_table_name = "fake_name"
- }
- Filter {
- source_table_name = "fake"
- fields = [age]
- result_table_name = "fake_age"
- }
-}
-
-sink {
- Console {
- source_table_name = "fake_name"
- }
- Console {
- source_table_name = "fake_age"
- }
-}
-```
-
-> 如果作业只有一个 source 和一个(或零个)transform 和一个 sink ,则不需要为连接器指定 `source_table_name` 和 `result_table_name`。
-> 如果 source 、transform 和 sink 中任意运算符的数量大于 1,则必须为作业中的每个连接器指定 `source_table_name` 和 `result_table_name`
-
diff --git a/docs/zh/connector-v2/source-common-options.md b/docs/zh/connector-v2/source-common-options.md
new file mode 100644
index 00000000000..9a95c163390
--- /dev/null
+++ b/docs/zh/connector-v2/source-common-options.md
@@ -0,0 +1,91 @@
+---
+sidebar_position: 3
+---
+
+# Source Common Options
+
+> Source connector 的常用参数
+
+:::warn
+
+旧的配置名称 `result_table_name` 已经过时,请尽快迁移到新名称 `plugin_output`。
+
+:::
+
+| 名称 | 类型 | 必填 | 默认值 | 描述 |
+|---------------|--------|----|-----|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| plugin_output | String | 否 | - | 当未指定 `plugin_output` 时,此插件处理的数据将不会被注册为可由其他插件直接访问的数据集 `(dataStream/dataset)`,或称为临时表 `(table)`。 当指定了 `plugin_output` 时,此插件处理的数据将被注册为可由其他插件直接访问的数据集 `(dataStream/dataset)`,或称为临时表 `(table)`。此处注册的数据集 `(dataStream/dataset)` 可通过指定 `plugin_input` 直接被其他插件访问。 |
+| parallelism | Int | 否 | - | 当未指定 `parallelism` 时,默认使用环境中的 `parallelism`。 当指定了 `parallelism` 时,将覆盖环境中的 `parallelism` 设置。 |
+
+# 重要提示
+
+在作业配置中使用 `plugin_output` 时,必须设置 `plugin_input` 参数。
+
+## 任务示例
+
+### 简单示例
+
+> 注册一个流或批处理数据源,并在注册时返回表名 `fake_table`
+
+```bash
+source {
+ FakeSourceStream {
+ plugin_output = "fake_table"
+ }
+}
+```
+
+### 复杂示例
+
+> 这是将Fake数据源转换并写入到两个不同的目标中
+
+```bash
+env {
+ job.mode = "BATCH"
+}
+
+source {
+ FakeSource {
+ plugin_output = "fake"
+ row.num = 100
+ schema = {
+ fields {
+ id = "int"
+ name = "string"
+ age = "int"
+ c_timestamp = "timestamp"
+ c_date = "date"
+ c_map = "map"
+ c_array = "array"
+ c_decimal = "decimal(30, 8)"
+ c_row = {
+ c_row = {
+ c_int = int
+ }
+ }
+ }
+ }
+ }
+}
+
+transform {
+ Sql {
+ plugin_input = "fake"
+ plugin_output = "fake1"
+ # 查询表名必须与字段 'plugin_input' 相同
+ query = "select id, regexp_replace(name, '.+', 'b') as name, age+1 as age, pi() as pi, c_timestamp, c_date, c_map, c_array, c_decimal, c_row from fake"
+ }
+ # SQL 转换支持基本函数和条件操作
+ # 但不支持复杂的 SQL 操作,包括:多源表/行 JOIN 和聚合操作等
+}
+
+sink {
+ Console {
+ plugin_input = "fake1"
+ }
+ Console {
+ plugin_input = "fake"
+ }
+}
+```
+
diff --git a/docs/zh/connector-v2/source.md b/docs/zh/connector-v2/source.md
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/docs/zh/connector-v2/source/Doris.md b/docs/zh/connector-v2/source/Doris.md
new file mode 100644
index 00000000000..ba3549473a5
--- /dev/null
+++ b/docs/zh/connector-v2/source/Doris.md
@@ -0,0 +1,212 @@
+# Doris
+
+> Doris 源连接器
+
+## 支持的引擎
+
+> Spark
+> Flink
+> SeaTunnel Zeta
+
+## 主要功能
+
+- [x] [批处理](../../concept/connector-v2-features.md)
+- [ ] [流处理](../../concept/connector-v2-features.md)
+- [ ] [精确一次](../../concept/connector-v2-features.md)
+- [x] [列投影](../../concept/connector-v2-features.md)
+- [x] [并行度](../../concept/connector-v2-features.md)
+- [x] [支持用户自定义分片](../../concept/connector-v2-features.md)
+- [x] [支持多表读](../../concept/connector-v2-features.md)
+
+## 描述
+
+用于 Apache Doris 的源连接器。
+
+## 支持的数据源信息
+
+| 数据源 | 支持版本 | 驱动 | Url | Maven |
+|------------|--------------------------------------|--------|-----|-------|
+| Doris | 仅支持Doris2.0及以上版本. | - | - | - |
+
+## 数据类型映射
+
+| Doris 数据类型 | SeaTunnel 数据类型 |
+|--------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------|
+| INT | INT |
+| TINYINT | TINYINT |
+| SMALLINT | SMALLINT |
+| BIGINT | BIGINT |
+| LARGEINT | STRING |
+| BOOLEAN | BOOLEAN |
+| DECIMAL | DECIMAL((Get the designated column's specified column size)+1, (Gets the designated column's number of digits to right of the decimal point.))) |
+| FLOAT | FLOAT |
+| DOUBLE | DOUBLE |
+| CHAR VARCHAR STRING TEXT | STRING |
+| DATE | DATE |
+| DATETIME DATETIME(p) | TIMESTAMP |
+| ARRAY | ARRAY |
+
+## 源选项
+
+基础配置:
+
+| 名称 | 类型 | 是否必须 | 默认值 | 描述 |
+|----------------------------------|--------|----------|------------|-----------------------------------------------------------------------------------------------------|
+| fenodes | string | yes | - | FE 地址, 格式:`"fe_host:fe_http_port"` |
+| username | string | yes | - | 用户名 |
+| password | string | yes | - | 密码 |
+| doris.request.retries | int | no | 3 | 请求Doris FE的重试次数 |
+| doris.request.read.timeout.ms | int | no | 30000 | |
+| doris.request.connect.timeout.ms | int | no | 30000 | |
+| query-port | string | no | 9030 | Doris查询端口 |
+| doris.request.query.timeout.s | int | no | 3600 | Doris扫描数据的超时时间,单位秒 |
+| table_list | string | 否 | - | 表清单 |
+
+表清单配置:
+
+| 名称 | 类型 | 是否必须 | 默认值 | 描述 |
+|----------------------------------|--------|----------|------------|-----------------------------------------------------------------------------------------------------|
+| database | string | yes | - | 数据库 |
+| table | string | yes | - | 表名 |
+| doris.read.field | string | no | - | 选择要读取的Doris表字段 |
+| doris.filter.query | string | no | - | 数据过滤. 格式:"字段 = 值", 例如:doris.filter.query = "F_ID > 2" |
+| doris.batch.size | int | no | 1024 | 每次能够从BE中读取到的最大行数 |
+| doris.exec.mem.limit | long | no | 2147483648 | 单个be扫描请求可以使用的最大内存。默认内存为2G(2147483648) |
+
+注意: 当此配置对应于单个表时,您可以将table_list中的配置项展平到外层。
+
+### 提示
+
+> 不建议随意修改高级参数
+
+## 例子
+
+### 单表
+> 这是一个从doris读取数据后,输出到控制台的例子:
+
+```
+env {
+ parallelism = 2
+ job.mode = "BATCH"
+}
+source{
+ Doris {
+ fenodes = "doris_e2e:8030"
+ username = root
+ password = ""
+ database = "e2e_source"
+ table = "doris_e2e_table"
+ }
+}
+
+transform {
+ # If you would like to get more information about how to configure seatunnel and see full list of transform plugins,
+ # please go to https://seatunnel.apache.org/docs/transform/sql
+}
+
+sink {
+ Console {}
+}
+```
+
+使用`doris.read.field`参数来选择需要读取的Doris表字段:
+
+```
+env {
+ parallelism = 2
+ job.mode = "BATCH"
+}
+source{
+ Doris {
+ fenodes = "doris_e2e:8030"
+ username = root
+ password = ""
+ database = "e2e_source"
+ table = "doris_e2e_table"
+ doris.read.field = "F_ID,F_INT,F_BIGINT,F_TINYINT,F_SMALLINT"
+ }
+}
+
+transform {
+ # If you would like to get more information about how to configure seatunnel and see full list of transform plugins,
+ # please go to https://seatunnel.apache.org/docs/transform/sql
+}
+
+sink {
+ Console {}
+}
+```
+
+使用`doris.filter.query`来过滤数据,参数值将作为过滤条件直接传递到doris:
+
+```
+env {
+ parallelism = 2
+ job.mode = "BATCH"
+}
+source{
+ Doris {
+ fenodes = "doris_e2e:8030"
+ username = root
+ password = ""
+ database = "e2e_source"
+ table = "doris_e2e_table"
+ doris.filter.query = "F_ID > 2"
+ }
+}
+
+transform {
+ # If you would like to get more information about how to configure seatunnel and see full list of transform plugins,
+ # please go to https://seatunnel.apache.org/docs/transform/sql
+}
+
+sink {
+ Console {}
+}
+```
+### 多表
+```
+env{
+ parallelism = 1
+ job.mode = "BATCH"
+}
+
+source{
+ Doris {
+ fenodes = "xxxx:8030"
+ username = root
+ password = ""
+ table_list = [
+ {
+ database = "st_source_0"
+ table = "doris_table_0"
+ doris.read.field = "F_ID,F_INT,F_BIGINT,F_TINYINT"
+ doris.filter.query = "F_ID >= 50"
+ },
+ {
+ database = "st_source_1"
+ table = "doris_table_1"
+ }
+ ]
+ }
+}
+
+transform {}
+
+sink{
+ Doris {
+ fenodes = "xxxx:8030"
+ schema_save_mode = "RECREATE_SCHEMA"
+ username = root
+ password = ""
+ database = "st_sink"
+ table = "${table_name}"
+ sink.enable-2pc = "true"
+ sink.label-prefix = "test_json"
+ doris.config = {
+ format="json"
+ read_json_by_line="true"
+ }
+ }
+}
+```
diff --git a/docs/zh/connector-v2/source/Elasticsearch.md b/docs/zh/connector-v2/source/Elasticsearch.md
new file mode 100644
index 00000000000..7a27f2b9371
--- /dev/null
+++ b/docs/zh/connector-v2/source/Elasticsearch.md
@@ -0,0 +1,247 @@
+# Elasticsearch
+
+> Elasticsearch source 连接器
+
+## 简介
+
+支持读取 Elasticsearch2.x 版本和 8.x 版本之间的数据
+
+## Key features
+
+- [x] [批处理](../../concept/connector-v2-features.md)
+- [ ] [流处理](../../concept/connector-v2-features.md)
+- [ ] [精准一次](../../concept/connector-v2-features.md)
+- [x] [column projection](../../concept/connector-v2-features.md)
+- [ ] [并行度](../../concept/connector-v2-features.md)
+- [ ] [支持用户自定义的分片](../../concept/connector-v2-features.md)
+
+## 配置参数选项
+
+| 参数名称 | 类型 | 是否必须 | 默认值或者描述 |
+| ----------------------- | ------- | -------- | ------------------------------------------------------- |
+| hosts | 数组 | | - |
+| username | string | no | - |
+| password | string | no | - |
+| index | string | No | 单索引同步配置,如果index_list没有配置,则必须配置index |
+| index_list | array | no | 用来定义多索引同步任务 |
+| source | array | no | - |
+| query | json | no | {"match_all": {}} |
+| scroll_time | string | no | 1m |
+| scroll_size | int | no | 100 |
+| tls_verify_certificate | boolean | no | true |
+| tls_verify_hostnames | boolean | no | true |
+| array_column | map | no | |
+| tls_keystore_path | string | no | - |
+| tls_keystore_password | string | no | - |
+| tls_truststore_path | string | no | - |
+| tls_truststore_password | string | no | - |
+| common-options | | no | - |
+
+### hosts [array]
+
+Elasticsearch 集群的 HTTP 地址,格式为 `host:port`,允许指定多个主机。例如:`["host1:9200", "host2:9200"]`。
+
+### username [string]
+
+用户名
+
+### password [string]
+
+密码
+
+### index [string]
+
+Elasticsearch 索引名称,支持 * 模糊匹配。比如存在索引index1,index2,可以指定index*同时读取两个索引的数据。
+
+### source [array]
+
+索引的字段
+
+你可以通过指定字段 `_id` 来获取文档 ID。如果将 `_id` 写入到其他索引,由于 Elasticsearch 的限制,你需要为 `_id` 指定一个别名。
+
+如果你没有配置 `source`,它将自动从索引的映射中获取。
+
+### array_column [array]
+
+由于 Elasticsearch 中没有数组索引,因此需要指定数组类型。
+
+假设tags和phones是数组类型:
+
+```hocon
+array_column = {tags = "array",phones = "array"}
+```
+
+### query [json]
+
+ElasticsSearch的原生查询语句,用于控制读取哪些数据写入到其他数据源。
+
+### scroll_time [String]
+
+`Seatunnel`底层会使用滚动查询来查询数据,所以需要使用这个参数控制搜索上下文的时间长度。
+
+### scroll_size [int]
+
+滚动查询的最大文档数量。
+
+### index_list [array]
+
+`index_list` 用于定义多索引同步任务。它是一个数组,包含单表同步所需的参数,如 `query`、`source/schema`、`scroll_size` 和 `scroll_time`。建议不要将 `index_list` 和 `query` 配置在同一层级。有关更多详细信息,请参考后面的多表同步示例。
+
+### tls_verify_certificate [boolean]
+
+启用 HTTPS 端点的证书验证
+
+### tls_verify_hostname [boolean]
+
+启用 HTTPS 端点的主机名验证
+
+### tls_keystore_path [string]
+
+PEM 或 JKS 密钥库的路径。该文件必须对运行 SeaTunnel 的操作系统用户可读。
+
+### tls_keystore_password [string]
+
+指定密钥库的密钥密码。
+
+### tls_truststore_path [string]
+
+PEM 或 JKS 信任库的路径。该文件必须对运行 SeaTunnel 的操作系统用户可读。
+
+### tls_truststore_password [string]
+
+指定信任库的密钥密码。
+
+### common options
+
+Source 插件常用参数,具体请参考 [Source 常用选项](../source-common-options.md)
+
+## 使用案例
+
+案例一
+
+> 案例一会从满足seatunnel-*匹配的索引中按照query读取数据,查询只会返回文档`id`,`name`,`age`,`tags`,`phones` 三个字段。在这个例子中,使用了source字段配置应该读取哪些字段,使用`array_column`指定了`tags`,`phones`应该被当做数组处理。
+
+```hocon
+Elasticsearch {
+ hosts = ["localhost:9200"]
+ index = "seatunnel-*"
+ array_column = {tags = "array",phones = "array"}
+ source = ["_id","name","age","tags","phones"]
+ query = {"range":{"firstPacket":{"gte":1669225429990,"lte":1669225429990}}}
+}
+```
+
+案例二:多索引同步
+
+> 此示例演示了如何从 `read_index1` 和 `read_index2` 中读取不同的数据数据,并将其分别写入 `read_index1_copy`,`read_index12_copy` 索引。
+> 在 `read_index1` 中,我使用 `source` 来指定要读取的字段,并使用`array_column`指明哪些字段是数组字段。
+
+```hocon
+source {
+ Elasticsearch {
+ hosts = ["https://elasticsearch:9200"]
+ username = "elastic"
+ password = "elasticsearch"
+ tls_verify_certificate = false
+ tls_verify_hostname = false
+ index_list = [
+ {
+ index = "read_index1"
+ query = {"range": {"c_int": {"gte": 10, "lte": 20}}}
+ source = [
+ c_map,
+ c_array,
+ c_string,
+ c_boolean,
+ c_tinyint,
+ c_smallint,
+ c_bigint,
+ c_float,
+ c_double,
+ c_decimal,
+ c_bytes,
+ c_int,
+ c_date,
+ c_timestamp
+ ]
+ array_column = {
+ c_array = "array"
+ }
+ }
+ {
+ index = "read_index2"
+ query = {"match_all": {}}
+ source = [
+ c_int2,
+ c_date2,
+ c_null
+ ]
+
+ }
+
+ ]
+
+ }
+}
+
+transform {
+}
+
+sink {
+ Elasticsearch {
+ hosts = ["https://elasticsearch:9200"]
+ username = "elastic"
+ password = "elasticsearch"
+ tls_verify_certificate = false
+ tls_verify_hostname = false
+
+ index = "multi_source_write_test_index"
+ index_type = "st"
+ "schema_save_mode"="CREATE_SCHEMA_WHEN_NOT_EXIST"
+ "data_save_mode"="APPEND_DATA"
+ }
+}
+```
+
+案例三:SSL(禁用证书验证)
+
+```hocon
+source {
+ Elasticsearch {
+ hosts = ["https://localhost:9200"]
+ username = "elastic"
+ password = "elasticsearch"
+
+ tls_verify_certificate = false
+ }
+}
+```
+
+案例四:SSL(禁用主机名验证)
+
+```hocon
+source {
+ Elasticsearch {
+ hosts = ["https://localhost:9200"]
+ username = "elastic"
+ password = "elasticsearch"
+
+ tls_verify_hostname = false
+ }
+}
+```
+
+案例五:SSL(启用证书验证)
+
+```hocon
+source {
+ Elasticsearch {
+ hosts = ["https://localhost:9200"]
+ username = "elastic"
+ password = "elasticsearch"
+
+ tls_keystore_path = "${your elasticsearch home}/config/certs/http.p12"
+ tls_keystore_password = "${your password}"
+ }
+}
+```
\ No newline at end of file
diff --git a/docs/zh/connector-v2/source/Hbase.md b/docs/zh/connector-v2/source/Hbase.md
new file mode 100644
index 00000000000..4c4481815e7
--- /dev/null
+++ b/docs/zh/connector-v2/source/Hbase.md
@@ -0,0 +1,96 @@
+# Hbase
+
+> Hbase 源连接器
+
+## 描述
+
+从 Apache Hbase 读取数据。
+
+## 主要功能
+
+- [x] [批处理](../../concept/connector-v2-features.md)
+- [ ] [流处理](../../concept/connector-v2-features.md)
+- [ ] [精确一次](../../concept/connector-v2-features.md)
+- [x] [Schema](../../concept/connector-v2-features.md)
+- [x] [并行度](../../concept/connector-v2-features.md)
+- [ ] [支持用户定义的拆分](../../concept/connector-v2-features.md)
+
+## 选项
+
+| 名称 | 类型 | 必填 | 默认值 |
+|--------------------|---------|----|-------|
+| zookeeper_quorum | string | 是 | - |
+| table | string | 是 | - |
+| schema | config | 是 | - |
+| hbase_extra_config | string | 否 | - |
+| caching | int | 否 | -1 |
+| batch | int | 否 | -1 |
+| cache_blocks | boolean | 否 | false |
+| common-options | | 否 | - |
+
+### zookeeper_quorum [string]
+
+hbase的zookeeper集群主机,例如:“hadoop001:2181,hadoop002:2181,hadoop003:2181”
+
+### table [string]
+
+要写入的表名,例如:“seatunnel”
+
+### schema [config]
+
+Hbase 使用字节数组进行存储。因此,您需要为表中的每一列配置数据类型。有关更多信息,请参阅:[guide](../../concept/schema-feature.md#how-to-declare-type-supported)。
+
+### hbase_extra_config [config]
+
+hbase 的额外配置
+
+### caching
+
+caching 参数用于设置在扫描过程中一次从服务器端获取的行数。这可以减少客户端与服务器之间的往返次数,从而提高扫描效率。默认值:-1
+
+### batch
+
+batch 参数用于设置在扫描过程中每次返回的最大列数。这对于处理有很多列的行特别有用,可以避免一次性返回过多数据,从而节省内存并提高性能。
+
+### cache_blocks
+
+cache_blocks 参数用于设置在扫描过程中是否缓存数据块。默认情况下,HBase 会在扫描时将数据块缓存到块缓存中。如果设置为 false,则在扫描过程中不会缓存数据块,从而减少内存的使用。在SeaTunnel中默认值为: false
+
+### 常用选项
+
+Source 插件常用参数,具体请参考 [Source 常用选项](../source-common-options.md)
+
+## 示例
+
+```bash
+source {
+ Hbase {
+ zookeeper_quorum = "hadoop001:2181,hadoop002:2181,hadoop003:2181"
+ table = "seatunnel_test"
+ caching = 1000
+ batch = 100
+ cache_blocks = false
+ schema = {
+ columns = [
+ {
+ name = "rowkey"
+ type = string
+ },
+ {
+ name = "columnFamily1:column1"
+ type = boolean
+ },
+ {
+ name = "columnFamily1:column2"
+ type = double
+ },
+ {
+ name = "columnFamily2:column1"
+ type = bigint
+ }
+ ]
+ }
+ }
+}
+```
+
diff --git a/docs/zh/connector-v2/source/HdfsFile.md b/docs/zh/connector-v2/source/HdfsFile.md
index efce1d14017..4de3014f5c0 100644
--- a/docs/zh/connector-v2/source/HdfsFile.md
+++ b/docs/zh/connector-v2/source/HdfsFile.md
@@ -39,7 +39,7 @@
## 源选项
-| 名称 | 类型 | 是否必须 | 默认值 | 描述 |
+| 名称 | 类型 | 是否必须 | 默认值 | 描述 |
|---------------------------|---------|------|----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| path | string | 是 | - | 源文件路径。 |
| file_format_type | string | 是 | - | 我们支持以下文件类型:`text` `json` `csv` `orc` `parquet` `excel`。请注意,最终文件名将以文件格式的后缀结束,文本文件的后缀是 `txt`。 |
@@ -55,15 +55,71 @@
| kerberos_principal | string | 否 | - | kerberos 的 principal。 |
| kerberos_keytab_path | string | 否 | - | kerberos 的 keytab 路径。 |
| skip_header_row_number | long | 否 | 0 | 跳过前几行,但仅适用于 txt 和 csv。例如,设置如下:`skip_header_row_number = 2`。然后 Seatunnel 将跳过源文件中的前两行。 |
+| file_filter_pattern | string | 否 | - | 过滤模式,用于过滤文件。 |
+| null_format | string | 否 | - | 定义哪些字符串可以表示为 null,但仅适用于 txt 和 csv. 例如: `\N` |
| schema | config | 否 | - | 上游数据的模式字段。 |
| sheet_name | string | 否 | - | 读取工作簿的表格,仅在文件格式为 excel 时使用。 |
| compress_codec | string | 否 | none | 文件的压缩编解码器。 |
-| common-options | | 否 | - | 源插件通用参数,请参阅 [源通用选项](../../../en/connector-v2/source/common-options.md) 获取详细信息。 |
+| common-options | | 否 | - | 源插件通用参数,请参阅 [源通用选项](../../../en/connector-v2/source-common-options.md) 获取详细信息。 |
### delimiter/field_delimiter [string]
**delimiter** 参数在版本 2.3.5 后将被弃用,请改用 **field_delimiter**。
+### file_filter_pattern [string]
+
+过滤模式,用于过滤文件。
+
+这个过滤规则遵循正则表达式. 关于详情,请参考 https://en.wikipedia.org/wiki/Regular_expression 学习
+
+这里是一些例子.
+
+文件清单:
+```
+/data/seatunnel/20241001/report.txt
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+/data/seatunnel/20241012/logo.png
+```
+匹配规则:
+
+**例子 1**: *匹配所有txt为后缀名的文件*,匹配正则为:
+```
+/data/seatunnel/20241001/.*\.txt
+```
+匹配的结果是:
+```
+/data/seatunnel/20241001/report.txt
+```
+**例子 2**: *匹配所有文件名以abc开头的文件*,匹配正则为:
+```
+/data/seatunnel/20241002/abc.*
+```
+匹配的结果是:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+```
+**例子 3**: *匹配所有文件名以abc开头,并且文件第四个字母是 h 或者 g 的文件*, 匹配正则为:
+```
+/data/seatunnel/20241007/abc[h,g].*
+```
+匹配的结果是:
+```
+/data/seatunnel/20241007/abch202410.csv
+```
+**例子 4**: *匹配所有文件夹第三级以 202410 开头并且文件后缀名是.csv的文件*, 匹配正则为:
+```
+/data/seatunnel/202410\d*/.*\.csv
+```
+匹配的结果是:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+```
+
### compress_codec [string]
文件的压缩编解码器及支持的详细信息如下所示:
@@ -106,7 +162,7 @@ source {
fs.defaultFS = "hdfs://namenode001"
}
# 如果您想获取有关如何配置 seatunnel 和查看源插件完整列表的更多信息,
- # 请访问 https://seatunnel.apache.org/docs/category/source-v2
+ # 请访问 https://seatunnel.apache.org/docs/connector-v2/source
}
transform {
@@ -121,7 +177,29 @@ sink {
file_format = "orc"
}
# 如果您想获取有关如何配置 seatunnel 和查看接收器插件完整列表的更多信息,
- # 请访问 https://seatunnel.apache.org/docs/category/sink-v2
+ # 请访问 https://seatunnel.apache.org/docs/connector-v2/sink
}
```
+### Filter File
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ HdfsFile {
+ path = "/apps/hive/demo/student"
+ file_format_type = "json"
+ fs.defaultFS = "hdfs://namenode001"
+ file_filter_pattern = "abc[DX]*.*"
+ }
+}
+
+sink {
+ Console {
+ }
+}
+```
\ No newline at end of file
diff --git a/docs/zh/connector-v2/source/Kafka.md b/docs/zh/connector-v2/source/Kafka.md
new file mode 100644
index 00000000000..04820cc7c13
--- /dev/null
+++ b/docs/zh/connector-v2/source/Kafka.md
@@ -0,0 +1,348 @@
+# Kafka
+
+> Kafka 源连接器
+
+## 支持以下引擎
+
+> Spark
+> Flink
+> Seatunnel Zeta
+
+## 主要功能
+
+- [x] [批处理](../../concept/connector-v2-features.md)
+- [x] [流处理](../../concept/connector-v2-features.md)
+- [x] [精确一次](../../concept/connector-v2-features.md)
+- [ ] [列投影](../../concept/connector-v2-features.md)
+- [x] [并行度](../../concept/connector-v2-features.md)
+- [ ] [支持用户定义拆分](../../concept/connector-v2-features.md)
+
+## 描述
+
+用于 Apache Kafka 的源连接器。
+
+## 支持的数据源信息
+
+使用 Kafka 连接器需要以下依赖项。
+可以通过 install-plugin.sh 下载或从 Maven 中央仓库获取。
+
+| 数据源 | 支持的版本 | Maven 下载链接 |
+|-------|-------|-------------------------------------------------------------------------------|
+| Kafka | 通用版本 | [下载](https://mvnrepository.com/artifact/org.apache.seatunnel/connector-kafka) |
+
+## 源选项
+
+| 名称 | 类型 | 是否必填 | 默认值 | 描述 |
+|-------------------------------------|-------------------------------------|------|--------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| topic | String | 是 | - | 使用表作为数据源时要读取数据的主题名称。它也支持通过逗号分隔的多个主题列表,例如 'topic-1,topic-2'。 |
+| table_list | Map | 否 | - | 主题列表配置,你可以同时配置一个 `table_list` 和一个 `topic`。 |
+| bootstrap.servers | String | 是 | - | 逗号分隔的 Kafka brokers 列表。 |
+| pattern | Boolean | 否 | false | 如果 `pattern` 设置为 `true`,则会使用指定的正则表达式匹配并订阅主题。 |
+| consumer.group | String | 否 | SeaTunnel-Consumer-Group | `Kafka 消费者组 ID`,用于区分不同的消费者组。 |
+| commit_on_checkpoint | Boolean | 否 | true | 如果为 true,消费者的偏移量将会定期在后台提交。 |
+| poll.timeout | Long | 否 | 10000 | kafka主动拉取时间间隔(毫秒)。 |
+| kafka.config | Map | 否 | - | 除了上述必要参数外,用户还可以指定多个非强制的消费者客户端参数,覆盖 [Kafka 官方文档](https://kafka.apache.org/documentation.html#consumerconfigs) 中指定的所有消费者参数。 |
+| schema | Config | 否 | - | 数据结构,包括字段名称和字段类型。 |
+| format | String | 否 | json | 数据格式。默认格式为 json。可选格式包括 text, canal_json, debezium_json, ogg_json, maxwell_json, avro 和 protobuf。默认字段分隔符为 ", "。如果自定义分隔符,添加 "field_delimiter" 选项。如果使用 canal 格式,请参考 [canal-json](../formats/canal-json.md) 了解详细信息。如果使用 debezium 格式,请参考 [debezium-json](../formats/debezium-json.md)。一些Format的详细信息请参考 [formats](../formats) |
+| format_error_handle_way | String | 否 | fail | 数据格式错误的处理方式。默认值为 fail,可选值为 fail 和 skip。当选择 fail 时,数据格式错误将阻塞并抛出异常。当选择 skip 时,数据格式错误将跳过此行数据。 |
+| field_delimiter | String | 否 | , | 自定义数据格式的字段分隔符。 |
+| start_mode | StartMode[earliest],[group_offsets] | 否 | group_offsets | 消费者的初始消费模式。 |
+| start_mode.offsets | Config | 否 | - | 用于 specific_offsets 消费模式的偏移量。 |
+| start_mode.timestamp | Long | 否 | - | 用于 "timestamp" 消费模式的时间。 |
+| partition-discovery.interval-millis | Long | 否 | -1 | 动态发现主题和分区的间隔时间。 |
+| common-options | | 否 | - | 源插件的常见参数,详情请参考 [Source Common Options](../source-common-options.md)。 |
+| protobuf_message_name | String | 否 | - | 当格式设置为 protobuf 时有效,指定消息名称。 |
+| protobuf_schema | String | 否 | - | 当格式设置为 protobuf 时有效,指定 Schema 定义。 |
+
+## 任务示例
+
+### 简单示例
+
+> 此示例读取 Kafka 的 topic_1、topic_2 和 topic_3 的数据并将其打印到客户端。如果尚未安装和部署 SeaTunnel,请按照 [安装指南](../../start-v2/locally/deployment.md) 进行安装和部署。然后,按照 [快速开始](../../start-v2/locally/quick-start-seatunnel-engine.md) 运行此任务。
+
+```hocon
+# 定义运行环境
+env {
+ parallelism = 2
+ job.mode = "BATCH"
+}
+source {
+ Kafka {
+ schema = {
+ fields {
+ name = "string"
+ age = "int"
+ }
+ }
+ format = text
+ field_delimiter = "#"
+ topic = "topic_1,topic_2,topic_3"
+ bootstrap.servers = "localhost:9092"
+ kafka.config = {
+ client.id = client_1
+ max.poll.records = 500
+ auto.offset.reset = "earliest"
+ enable.auto.commit = "false"
+ }
+ }
+}
+sink {
+ Console {}
+}
+```
+
+### 正则表达式主题
+
+```hocon
+source {
+ Kafka {
+ topic = ".*seatunnel*."
+ pattern = "true"
+ bootstrap.servers = "localhost:9092"
+ consumer.group = "seatunnel_group"
+ }
+}
+```
+
+### AWS MSK SASL/SCRAM
+
+将以下 `${username}` 和 `${password}` 替换为 AWS MSK 中的配置值。
+
+```hocon
+source {
+ Kafka {
+ topic = "seatunnel"
+ bootstrap.servers = "xx.amazonaws.com.cn:9096,xxx.amazonaws.com.cn:9096,xxxx.amazonaws.com.cn:9096"
+ consumer.group = "seatunnel_group"
+ kafka.config = {
+ security.protocol=SASL_SSL
+ sasl.mechanism=SCRAM-SHA-512
+ sasl.jaas.config="org.apache.kafka.common.security.scram.ScramLoginModule required username=\"username\" password=\"password\";"
+ }
+ }
+}
+```
+
+### AWS MSK IAM
+
+从 [此处](https://github.com/aws/aws-msk-iam-auth/releases) 下载 `aws-msk-iam-auth-1.1.5.jar` 并将其放在 `$SEATUNNEL_HOME/plugin/kafka/lib` 目录下。
+
+确保 IAM 策略中包含 `"kafka-cluster:Connect"` 权限,如下所示:
+
+```hocon
+"Effect": "Allow",
+"Action": [
+ "kafka-cluster:Connect",
+ "kafka-cluster:AlterCluster",
+ "kafka-cluster:DescribeCluster"
+],
+```
+
+源配置示例:
+
+```hocon
+source {
+ Kafka {
+ topic = "seatunnel"
+ bootstrap.servers = "xx.amazonaws.com.cn:9098,xxx.amazonaws.com.cn:9098,xxxx.amazonaws.com.cn:9098"
+ consumer.group = "seatunnel_group"
+ kafka.config = {
+ security.protocol=SASL_SSL
+ sasl.mechanism=AWS_MSK_IAM
+ sasl.jaas.config="software.amazon.msk.auth.iam.IAMLoginModule required;"
+ sasl.client.callback.handler.class="software.amazon.msk.auth.iam.IAMClientCallbackHandler"
+ }
+ }
+}
+```
+
+### Kerberos 认证示例
+
+源配置示例:
+
+```hocon
+source {
+ Kafka {
+ topic = "seatunnel"
+ bootstrap.servers = "127.0.0.1:9092"
+ consumer.group = "seatunnel_group"
+ kafka.config = {
+ security.protocol=SASL_PLAINTEXT
+ sasl.kerberos.service.name=kafka
+ sasl.mechanism=GSSAPI
+ java.security.krb5.conf="/etc/krb5.conf"
+ sasl.jaas.config="com.sun.security.auth.module.Krb5LoginModule required \n useKeyTab=true \n storeKey=true \n keyTab=\"/path/to/xxx.keytab\" \n principal=\"user@xxx.com\";"
+ }
+ }
+}
+```
+
+### 多 Kafka 源示例
+
+> 根据不同的 Kafka 主题和格式解析数据,并基于 ID 执行 upsert 操作。
+
+> 注意: Kafka是一个非结构化数据源,应该使用`tables_configs`,将来会删除`table_list`
+
+```hocon
+
+env {
+ execution.parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ Kafka {
+ bootstrap.servers = "kafka_e2e:9092"
+ tables_configs = [
+ {
+ topic = "^test-ogg-sou.*"
+ pattern = "true"
+ consumer.group = "ogg_multi_group"
+ start_mode = earliest
+ schema = {
+ fields {
+ id = "int"
+ name = "string"
+ description = "string"
+ weight = "string"
+ }
+ },
+ format = ogg_json
+ },
+ {
+ topic = "test-cdc_mds"
+ start_mode = earliest
+ schema = {
+ fields {
+ id = "int"
+ name = "string"
+ description = "string"
+ weight = "string"
+ }
+ },
+ format = canal_json
+ }
+ ]
+ }
+}
+
+sink {
+ Jdbc {
+ driver = org.postgresql.Driver
+ url = "jdbc:postgresql://postgresql:5432/test?loggerLevel=OFF"
+ user = test
+ password = test
+ generate_sink_sql = true
+ database = test
+ table = public.sink
+ primary_keys = ["id"]
+ }
+}
+```
+
+```hocon
+env {
+ execution.parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ Kafka {
+
+
+ bootstrap.servers = "kafka_e2e:9092"
+ table_list = [
+ {
+ topic = "^test-ogg-sou.*"
+ pattern = "true"
+ consumer.group = "ogg_multi_group"
+ start_mode = earliest
+ schema = {
+ fields {
+ id = "int"
+ name = "string"
+ description = "string"
+ weight = "string"
+ }
+ },
+ format = ogg_json
+ },
+ {
+ topic = "test-cdc_mds"
+ start_mode = earliest
+ schema = {
+ fields {
+ id = "int"
+ name = "string"
+ description = "string"
+ weight = "string"
+ }
+ },
+ format = canal_json
+ }
+ ]
+ }
+}
+
+sink {
+ Jdbc {
+ driver = org.postgresql.Driver
+ url = "jdbc:postgresql://postgresql:5432/test?loggerLevel=OFF"
+ user = test
+ password = test
+ generate_sink_sql = true
+ database = test
+ table = public.sink
+ primary_keys = ["id"]
+ }
+}
+```
+
+### Protobuf配置
+
+`format` 设置为 `protobuf`,配置`protobuf`数据结构,`protobuf_message_name`和`protobuf_schema`参数
+
+使用样例:
+
+```hocon
+source {
+ Kafka {
+ topic = "test_protobuf_topic_fake_source"
+ format = protobuf
+ protobuf_message_name = Person
+ protobuf_schema = """
+ syntax = "proto3";
+
+ package org.apache.seatunnel.format.protobuf;
+
+ option java_outer_classname = "ProtobufE2E";
+
+ message Person {
+ int32 c_int32 = 1;
+ int64 c_int64 = 2;
+ float c_float = 3;
+ double c_double = 4;
+ bool c_bool = 5;
+ string c_string = 6;
+ bytes c_bytes = 7;
+
+ message Address {
+ string street = 1;
+ string city = 2;
+ string state = 3;
+ string zip = 4;
+ }
+
+ Address address = 8;
+
+ map attributes = 9;
+
+ repeated string phone_numbers = 10;
+ }
+ """
+ bootstrap.servers = "kafkaCluster:9092"
+ start_mode = "earliest"
+ plugin_output = "kafka_table"
+ }
+}
+```
diff --git a/docs/zh/connector-v2/source/Opengauss-CDC.md b/docs/zh/connector-v2/source/Opengauss-CDC.md
new file mode 100644
index 00000000000..b175f611ecb
--- /dev/null
+++ b/docs/zh/connector-v2/source/Opengauss-CDC.md
@@ -0,0 +1,169 @@
+# Opengauss CDC
+
+> Opengauss CDC源连接器
+
+## 支持这些引擎
+
+> SeaTunnel Zeta
+> Flink
+
+## 主要功能
+
+- [ ] [批处理](../../concept/connector-v2-features.md)
+- [x] [流处理](../../concept/connector-v2-features.md)
+- [x] [精确一次](../../concept/connector-v2-features.md)
+- [ ] [列投影](../../concept/connector-v2-features.md)
+- [x] [并行度](../../concept/connector-v2-features.md)
+- [x] [支持用户定义的拆分](../../concept/connector-v2-features.md)
+
+## 描述
+
+Opengauss CDC连接器允许从Opengauss数据库读取快照数据和增量数据。这个文档描述如何设置Opengauss CDC连接器以在Opengauss database中运行SQL查询。
+
+## 使用步骤
+
+> 这里是启用Opengauss CDC的步骤:
+
+1. 确保wal_level被设置为logical, 你可以直接使用SQL命令来修改这个配置:
+
+```sql
+ALTER SYSTEM SET wal_level TO 'logical';
+SELECT pg_reload_conf();
+```
+
+2. 改变指定表的REPLICA策略为FULL
+
+```sql
+ALTER TABLE your_table_name REPLICA IDENTITY FULL;
+```
+
+如果你有很多表,你可以使用下面SQL的结果集来改变所有表的REPLICA策略
+
+```sql
+select 'ALTER TABLE ' || schemaname || '.' || tablename || ' REPLICA IDENTITY FULL;' from pg_tables where schemaname = 'YourTableSchema'
+```
+
+## 数据类型映射
+
+| Opengauss Data type | SeaTunnel Data type |
+|-----------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------|
+| BOOL | BOOLEAN |
+| BYTEA | BYTES |
+| INT2 SMALLSERIAL INT4 SERIAL | INT |
+| INT8 BIGSERIAL | BIGINT |
+| FLOAT4 | FLOAT |
+| FLOAT8 | DOUBLE |
+| NUMERIC(Get the designated column's specified column size>0) | DECIMAL(Get the designated column's specified column size,Gets the number of digits in the specified column to the right of the decimal point) |
+| NUMERIC(Get the designated column's specified column size<0) | DECIMAL(38, 18) |
+| BPCHAR CHARACTER VARCHAR TEXT GEOMETRY GEOGRAPHY JSON JSONB | STRING |
+| TIMESTAMP | TIMESTAMP |
+| TIME | TIME |
+| DATE | DATE |
+| OTHER DATA TYPES | NOT SUPPORTED YET |
+
+## 源端可选项
+
+| Name | Type | Required | Default | Description |
+|------------------------------------------------|------|----------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| base-url | 字符串 | 是 | - | JDBC连接的URL. 参考: `jdbc:postgresql://localhost:5432/postgres_cdc?loggerLevel=OFF`. |
+| username | 字符串 | 是 | - | 连接数据库的用户名 |
+| password | 字符串 | 是 | - | 连接数据库的密码 |
+| database-names | 列表 | 否 | - | 监控的数据库名称 |
+| table-names | 列表 | 是 | - | 监控的数据表名称. 表名需要包含数据库名称, 例如: `database_name.table_name` |
+| table-names-config | 列表 | 否 | - | 表配置的列表集合. 例如: [{"table": "db1.schema1.table1","primaryKeys":["key1"]}] |
+| startup.mode | 枚举 | 否 | INITIAL | Opengauss CDC消费者的可选启动模式, 有效的枚举是`initial`, `earliest`, `latest`. `initial`: 启动时同步历史数据,然后同步增量数据 `earliest`: 从可能的最早偏移量启动 `latest`: 从最近的偏移量启动 |
+| snapshot.split.size | 整型 | 否 | 8096 | 表快照的分割大小(行数),在读取表的快照时,捕获的表被分割成多个split |
+| snapshot.fetch.size | 整型 | 否 | 1024 | 读取表快照时,每次轮询的最大读取大小 |
+| slot.name | 字符串 | 否 | - | Opengauss逻辑解码插槽的名称,该插槽是为特定数据库/模式的特定插件的流式更改而创建的。服务器使用此插槽将事件流传输到正在配置的连接器。默认值为seatunnel |
+| decoding.plugin.name | 字符串 | 否 | pgoutput | 安装在服务器上的Postgres逻辑解码插件的名称,支持的值是decoderbufs、wal2json、wal2json_rds、wal2json_streaming、wal2json_rds_streaming和pgoutput |
+| server-time-zone | 字符串 | 否 | UTC | 数据库服务器中的会话时区。如果没有设置,则使用ZoneId.systemDefault()来确定服务器的时区 |
+| connect.timeout.ms | 时间间隔 | 否 | 30000 | 在尝试连接数据库服务器之后,连接器在超时之前应该等待的最大时间 |
+| connect.max-retries | 整型 | 否 | 3 | 连接器在建立数据库服务器连接时应该重试的最大次数 |
+| connection.pool.size | 整型 | 否 | 20 | jdbc连接池的大小 |
+| chunk-key.even-distribution.factor.upper-bound | 双浮点型 | 否 | 100 | chunk的key分布因子的上界。该因子用于确定表数据是否均匀分布。如果分布因子被计算为小于或等于这个上界(即(MAX(id) - MIN(id) + 1) /行数),表的所有chunk将被优化以达到均匀分布。否则,如果分布因子更大,则认为表分布不均匀,如果估计的分片数量超过`sample-sharding.threshold`指定的值,则将使用基于采样的分片策略。默认值为100.0。 |
+| chunk-key.even-distribution.factor.lower-bound | 双浮点型 | 否 | 0.05 | chunk的key分布因子的下界。该因子用于确定表数据是否均匀分布。如果分布因子的计算结果大于或等于这个下界(即(MAX(id) - MIN(id) + 1) /行数),那么表的所有块将被优化以达到均匀分布。否则,如果分布因子较小,则认为表分布不均匀,如果估计的分片数量超过`sample-sharding.threshold`指定的值,则使用基于采样的分片策略。缺省值为0.05。 |
+| sample-sharding.threshold | 整型 | 否 | 1000 | 此配置指定了用于触发采样分片策略的估计分片数的阈值。当分布因子超出了由`chunk-key.even-distribution.factor.upper-bound `和`chunk-key.even-distribution.factor.lower-bound`,并且估计的分片计数(以近似的行数/块大小计算)超过此阈值,则将使用样本分片策略。这有助于更有效地处理大型数据集。默认值为1000个分片。 |
+| inverse-sampling.rate | 整型 | 否 | 1000 | 采样分片策略中使用的采样率的倒数。例如,如果该值设置为1000,则意味着在采样过程中应用了1/1000的采样率。该选项提供了控制采样粒度的灵活性,从而影响最终的分片数量。当处理非常大的数据集时,它特别有用,其中首选较低的采样率。缺省值为1000。 |
+| exactly_once | 布尔 | 否 | false | 启用exactly once语义 |
+| format | 枚举 | 否 | DEFAULT | Opengauss CDC可选的输出格式, 有效的枚举是`DEFAULT`, `COMPATIBLE_DEBEZIUM_JSON`. |
+| debezium | 配置 | 否 | - | 将 [Debezium的属性](https://github.com/debezium/debezium/blob/v1.9.8.Final/documentation/modules/ROOT/pages/connectors/postgresql.adoc#connector-configuration-properties) 传递到Debezium嵌入式引擎,该引擎用于捕获来自Opengauss服务的数据更改 |
+| common-options | | 否 | - | 源码插件通用参数, 请参考[Source Common Options](../source-common-options.md)获取详情 |
+
+## 任务示例
+
+### 简单
+
+> 支持多表读
+
+```
+
+env {
+ # You can set engine configuration here
+ execution.parallelism = 1
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+ read_limit.bytes_per_second=7000000
+ read_limit.rows_per_second=400
+}
+
+source {
+ Opengauss-CDC {
+ plugin_output = "customers_opengauss_cdc"
+ username = "gaussdb"
+ password = "openGauss@123"
+ database-names = ["opengauss_cdc"]
+ schema-names = ["inventory"]
+ table-names = ["opengauss_cdc.inventory.opengauss_cdc_table_1","opengauss_cdc.inventory.opengauss_cdc_table_2"]
+ base-url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc"
+ decoding.plugin.name = "pgoutput"
+ }
+}
+
+transform {
+
+}
+
+sink {
+ jdbc {
+ plugin_input = "customers_opengauss_cdc"
+ url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc"
+ driver = "org.postgresql.Driver"
+ user = "dailai"
+ password = "openGauss@123"
+
+ compatible_mode="postgresLow"
+ generate_sink_sql = true
+ # You need to configure both database and table
+ database = "opengauss_cdc"
+ schema = "inventory"
+ tablePrefix = "sink_"
+ primary_keys = ["id"]
+ }
+}
+
+```
+
+### 支持自定义主键
+
+```
+source {
+ Opengauss-CDC {
+ plugin_output = "customers_opengauss_cdc"
+ username = "gaussdb"
+ password = "openGauss@123"
+ database-names = ["opengauss_cdc"]
+ schema-names = ["inventory"]
+ table-names = ["opengauss_cdc.inventory.full_types_no_primary_key"]
+ base-url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc?loggerLevel=OFF"
+ decoding.plugin.name = "pgoutput"
+ exactly_once = true
+ table-names-config = [
+ {
+ table = "opengauss_cdc.inventory.full_types_no_primary_key"
+ primaryKeys = ["id"]
+ }
+ ]
+ }
+}
+```
+
diff --git a/docs/zh/connector-v2/source/Prometheus.md b/docs/zh/connector-v2/source/Prometheus.md
new file mode 100644
index 00000000000..1dca6b463cb
--- /dev/null
+++ b/docs/zh/connector-v2/source/Prometheus.md
@@ -0,0 +1,152 @@
+# Prometheus
+
+> Prometheus 数据源连接器
+
+## 描述
+
+用于读取prometheus数据。
+
+## 主要特性
+
+- [x] [批处理](../../concept/connector-v2-features.md)
+- [ ] [流处理](../../concept/connector-v2-features.md)
+- [ ] [并行](../../concept/connector-v2-features.md)
+
+## 源选项
+
+| 名称 | 类型 | 是否必填 | 默认值 |
+|-----------------------------|---------|------|-----------------|
+| url | String | Yes | - |
+| query | String | Yes | - |
+| query_type | String | Yes | Instant |
+| content_field | String | Yes | $.data.result.* |
+| schema.fields | Config | Yes | - |
+| format | String | No | json |
+| params | Map | Yes | - |
+| poll_interval_millis | int | No | - |
+| retry | int | No | - |
+| retry_backoff_multiplier_ms | int | No | 100 |
+| retry_backoff_max_ms | int | No | 10000 |
+| enable_multi_lines | boolean | No | false |
+| common-options | config | No | |
+
+### url [String]
+
+http 请求路径。
+
+### query [String]
+
+Prometheus 表达式查询字符串
+
+### query_type [String]
+
+Instant/Range
+
+1. Instant : 简单指标的即时查询。
+2. Range : 一段时间内指标数据。
+
+https://prometheus.io/docs/prometheus/latest/querying/api/
+
+### params [Map]
+
+http 请求参数
+
+### poll_interval_millis [int]
+
+流模式下请求HTTP API间隔(毫秒)
+
+### retry [int]
+
+The max retry times if request http return to `IOException`
+
+### retry_backoff_multiplier_ms [int]
+
+请求http返回到' IOException '的最大重试次数
+
+### retry_backoff_max_ms [int]
+
+http请求失败,最大重试回退时间(毫秒)
+
+### format [String]
+
+上游数据的格式,默认为json。
+
+### schema [Config]
+
+按照如下填写一个固定值
+
+```hocon
+ schema = {
+ fields {
+ metric = "map"
+ value = double
+ time = long
+ }
+ }
+
+```
+
+#### fields [Config]
+
+上游数据的模式字段
+
+### common options
+
+源插件常用参数,请参考[Source Common Options](../source-common-options.md) 了解详细信息
+
+## 示例
+
+### Instant:
+
+```hocon
+source {
+ Prometheus {
+ plugin_output = "http"
+ url = "http://mockserver:1080"
+ query = "up"
+ query_type = "Instant"
+ content_field = "$.data.result.*"
+ format = "json"
+ schema = {
+ fields {
+ metric = "map"
+ value = double
+ time = long
+ }
+ }
+ }
+}
+```
+
+### Range
+
+```hocon
+source {
+ Prometheus {
+ plugin_output = "http"
+ url = "http://mockserver:1080"
+ query = "up"
+ query_type = "Range"
+ content_field = "$.data.result.*"
+ format = "json"
+ start = "2024-07-22T20:10:30.781Z"
+ end = "2024-07-22T20:11:00.781Z"
+ step = "15s"
+ schema = {
+ fields {
+ metric = "map"
+ value = double
+ time = long
+ }
+ }
+ }
+ }
+```
+
+## Changelog
+
+### next version
+
+- 添加Prometheus源连接器
+- 减少配置项
+
diff --git a/docs/zh/connector-v2/source/Qdrant.md b/docs/zh/connector-v2/source/Qdrant.md
new file mode 100644
index 00000000000..140ff36a395
--- /dev/null
+++ b/docs/zh/connector-v2/source/Qdrant.md
@@ -0,0 +1,79 @@
+# Qdrant
+
+> Qdrant 数据源连接器
+
+[Qdrant](https://qdrant.tech/) 是一个高性能的向量搜索引擎和向量数据库。
+
+该连接器可用于从 Qdrant 集合中读取数据。
+
+## 选项
+
+| 名称 | 类型 | 必填 | 默认值 |
+|-----------------|--------|----|-----------|
+| collection_name | string | 是 | - |
+| schema | config | 是 | - |
+| host | string | 否 | localhost |
+| port | int | 否 | 6334 |
+| api_key | string | 否 | - |
+| use_tls | bool | 否 | false |
+| common-options | | 否 | - |
+
+### collection_name [string]
+
+要从中读取数据的 Qdrant 集合的名称。
+
+### schema [config]
+
+要将数据读取到的表的模式。
+
+例如:
+
+```hocon
+schema = {
+ fields {
+ age = int
+ address = string
+ some_vector = float_vector
+ }
+}
+```
+
+Qdrant 中的每个条目称为一个点。
+
+`float_vector` 类型的列从每个点的向量中读取,其他列从与该点关联的 JSON 有效负载中读取。
+
+如果列被标记为主键,Qdrant 点的 ID 将写入其中。它可以是 `"string"` 或 `"int"` 类型。因为 Qdrant 仅[允许](https://qdrant.tech/documentation/concepts/points/#point-ids)使用正整数和 UUID 作为点 ID。
+
+如果集合是用单个默认/未命名向量创建的,请使用 `default_vector` 作为向量名称。
+
+```hocon
+schema = {
+ fields {
+ age = int
+ address = string
+ default_vector = float_vector
+ }
+}
+```
+
+Qdrant 中点的 ID 将写入标记为主键的列中。它可以是 `int` 或 `string` 类型。
+
+### host [string]
+
+Qdrant 实例的主机名。默认为 "localhost"。
+
+### port [int]
+
+Qdrant 实例的 gRPC 端口。
+
+### api_key [string]
+
+用于身份验证的 API 密钥(如果设置)。
+
+### use_tls [bool]
+
+是否使用 TLS(SSL)连接。如果使用 Qdrant 云(https),则需要。
+
+### 通用选项
+
+源插件的通用参数,请参考[源通用选项](../source-common-options.md)了解详情。****
diff --git a/docs/zh/connector-v2/source/Sls.md b/docs/zh/connector-v2/source/Sls.md
new file mode 100644
index 00000000000..59b00df0850
--- /dev/null
+++ b/docs/zh/connector-v2/source/Sls.md
@@ -0,0 +1,87 @@
+# Sls
+
+> Sls source connector
+
+## 支持的引擎
+
+> Spark
+> Flink
+> Seatunnel Zeta
+
+## 主要特性
+
+- [x] [batch](../../concept/connector-v2-features.md)
+- [x] [stream](../../concept/connector-v2-features.md)
+- [x] [exactly-once](../../concept/connector-v2-features.md)
+- [ ] [column projection](../../concept/connector-v2-features.md)
+- [x] [parallelism](../../concept/connector-v2-features.md)
+- [ ] [support user-defined split](../../concept/connector-v2-features.md)
+
+## 描述
+
+从阿里云Sls日志服务中读取数据。
+
+## 支持的数据源信息
+
+为了使用Sls连接器,需要以下依赖关系。
+它们可以通过install-plugin.sh或Maven中央存储库下载。
+
+| 数据源 | 支持的版本 | Maven |
+|-----|-----------|-----------------------------------------------------------------------------------|
+| Sls | Universal | [Download](https://mvnrepository.com/artifact/org.apache.seatunnel/connector-sls) |
+
+## Source Options
+
+| Name | Type | Required | Default | Description |
+|-------------------------------------|---------------------------------------------|----------|--------------------------|------------------------------------------------------------------------------------------------------------------------------------|
+| project | String | Yes | - | [阿里云 Sls 项目](https://help.aliyun.com/zh/sls/user-guide/manage-a-project?spm=a2c4g.11186623.0.0.6f9755ebyfaYSl) |
+| logstore | String | Yes | - | [阿里云 Sls 日志库](https://help.aliyun.com/zh/sls/user-guide/manage-a-logstore?spm=a2c4g.11186623.0.0.13137c08nfuiBC) |
+| endpoint | String | Yes | - | [阿里云访问服务点](https://help.aliyun.com/zh/sls/developer-reference/api-sls-2020-12-30-endpoint?spm=a2c4g.11186623.0.0.548945a8UyJULa) |
+| access_key_id | String | Yes | - | [阿里云访问用户ID](https://help.aliyun.com/zh/ram/user-guide/create-an-accesskey-pair?spm=a2c4g.11186623.0.0.4a6e4e554CKhSc#task-2245479) |
+| access_key_secret | String | Yes | - | [阿里云访问用户密码](https://help.aliyun.com/zh/ram/user-guide/create-an-accesskey-pair?spm=a2c4g.11186623.0.0.4a6e4e554CKhSc#task-2245479) |
+| start_mode | StartMode[earliest],[group_cursor],[latest] | No | group_cursor | 消费者的初始消费模式 |
+| consumer_group | String | No | SeaTunnel-Consumer-Group | Sls消费者组id,用于区分不同的消费者组 |
+| auto_cursor_reset | CursorMode[begin],[end] | No | end | 当消费者组中没有记录读取游标时,初始化读取游标 |
+| batch_size | Int | No | 1000 | 每次从SLS中读取的数据量 |
+| partition-discovery.interval-millis | Long | No | -1 | 动态发现主题和分区的间隔 |
+
+## 任务示例
+
+### 简单示例
+
+> 此示例读取sls的logstore1的数据并将其打印到客户端。如果您尚未安装和部署SeaTunnel,则需要按照安装SeaTunnel中的说明安装和部署SeaTunnel。然后按照[快速启动SeaTunnel引擎](../../Start-v2/locale/Quick-Start SeaTunnel Engine.md)中的说明运行此作业。
+
+[创建RAM用户及授权](https://help.aliyun.com/zh/sls/create-a-ram-user-and-authorize-the-ram-user-to-access-log-service?spm=a2c4g.11186623.0.i4), 请确认RAM用户有足够的权限来读取及管理数据,参考:[RAM自定义授权示例](https://help.aliyun.com/zh/sls/use-custom-policies-to-grant-permissions-to-a-ram-user?spm=a2c4g.11186623.0.0.4a6e4e554CKhSc#reference-s3z-m1l-z2b)
+
+```hocon
+# Defining the runtime environment
+env {
+ parallelism = 2
+ job.mode = "STREAMING"
+ checkpoint.interval = 30000
+}
+
+source {
+ Sls {
+ endpoint = "cn-hangzhou-intranet.log.aliyuncs.com"
+ project = "project1"
+ logstore = "logstore1"
+ access_key_id = "xxxxxxxxxxxxxxxxxxxxxxxx"
+ access_key_secret = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+ schema = {
+ fields = {
+ id = "int"
+ name = "string"
+ description = "string"
+ weight = "string"
+ }
+ }
+ }
+}
+
+sink {
+ Console {
+ }
+}
+```
+
diff --git a/docs/zh/connector-v2/source/TiDB-CDC.md b/docs/zh/connector-v2/source/TiDB-CDC.md
new file mode 100644
index 00000000000..a2f4ba21af4
--- /dev/null
+++ b/docs/zh/connector-v2/source/TiDB-CDC.md
@@ -0,0 +1,121 @@
+# TiDB CDC
+
+> TiDB CDC模式的连接器
+
+## 支持的引擎
+
+> SeaTunnel Zeta
+> Flink
+
+## 主要功能
+
+- [ ] [批处理](../../concept/connector-v2-features.md)
+- [x] [流处理](../../concept/connector-v2-features.md)
+- [x] [精确一次](../../concept/connector-v2-features.md)
+- [ ] [column projection](../../concept/connector-v2-features.md)
+- [x] [并行度](../../concept/connector-v2-features.md)
+- [ ] [支持用户定义的拆分](../../concept/connector-v2-features.md)
+
+## Description
+
+TiDB-CDC连接器允许从 TiDB 数据库读取快照数据和增量数据。本文将介绍如何设置 TiDB-CDC 连接器,在 TiDB 数据库中对数据进行快照和捕获流事件。
+
+## 支持的数据源信息
+
+| 数据源 | 支持的版本 | 驱动 | Maven |
+|------------------|------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------|----------------------------------------------------------------------|
+| MySQL | [MySQL](https://dev.mysql.com/doc): 5.5, 5.6, 5.7, 8.0.x [RDS MySQL](https://www.aliyun.com/product/rds/mysql): 5.6, 5.7, 8.0.x | com.mysql.cj.jdbc.Driver | https://mvnrepository.com/artifact/mysql/mysql-connector-java/8.0.28 |
+| tikv-client-java | 3.2.0 | - | https://mvnrepository.com/artifact/org.tikv/tikv-client-java/3.2.0 |
+
+## Using Dependency
+
+### 安装驱动
+
+#### 在 Flink 引擎下
+
+> 1. 你需要确保 [jdbc 驱动 jar 包](https:/mvnrepository.com/artifact/mysql/mysql-connector-java) 和 [tikv-client-java jar 包](https:/mvnrepository.com/artifact/org.tikv/tikv-client-java/3.2.0) 已经放在目录 '${SEATUNNEL_HOME}/plugins/'.
+
+#### 在 SeaTunnel Zeta 引擎下
+
+> 1. 你需要确保 [jdbc 驱动 jar 包](https:/mvnrepository.com/artifact/mysql/mysql-connector-java) 和 [tikv-client-java jar 包](https:/mvnrepository.com/artifact/org.tikv/tikv-client-java/3.2.0) 已经放在目录 `${SEATUNNEL_HOME}/lib/` .
+
+请下载Mysql驱动和tikv-java-client并将其放在`${SEATUNNEL_HOME}/lib/`目录中。例如:cp mysql-connector-java-xxx.jar`$SEATNUNNEL_HOME/lib/`
+
+## 数据类型映射
+
+| Mysql Data Type | SeaTunnel Data Type |
+|------------------------------------------------------------------------------------------------|---------------------|
+| BIT(1) TINYINT(1) | BOOLEAN |
+| TINYINT | TINYINT |
+| TINYINT UNSIGNED SMALLINT | SMALLINT |
+| SMALLINT UNSIGNED MEDIUMINT MEDIUMINT UNSIGNED INT INTEGER YEAR | INT |
+| INT UNSIGNED INTEGER UNSIGNED BIGINT | BIGINT |
+| BIGINT UNSIGNED | DECIMAL(20,0) |
+| DECIMAL(p, s) DECIMAL(p, s) UNSIGNED NUMERIC(p, s) NUMERIC(p, s) UNSIGNED | DECIMAL(p,s) |
+| FLOAT FLOAT UNSIGNED | FLOAT |
+| DOUBLE DOUBLE UNSIGNED REAL REAL UNSIGNED | DOUBLE |
+| CHAR VARCHAR TINYTEXT MEDIUMTEXT TEXT LONGTEXT ENUM JSON ENUM | STRING |
+| DATE | DATE |
+| TIME(s) | TIME(s) |
+| DATETIME TIMESTAMP(s) | TIMESTAMP(s) |
+| BINARY VARBINAR BIT(p) TINYBLOB MEDIUMBLOB BLOB LONGBLOB GEOMETRY | BYTES |
+
+## Source Options
+
+| Name | Type | Required | Default | Description |
+|------------------------------|---------|----------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| base-url | String | Yes | - | The URL of the JDBC connection. Refer to a case: `jdbc:mysql://tidb0:4000/inventory`. |
+| username | String | Yes | - | Name of the database to use when connecting to the database server. |
+| password | String | Yes | - | Password to use when connecting to the database server. |
+| pd-addresses | String | Yes | - | TiKV cluster's PD address |
+| database-name | String | Yes | - | Database name of the database to monitor. |
+| table-name | String | Yes | - | Table name of the database to monitor. The table name needs to include the database name. |
+| startup.mode | Enum | No | INITIAL | Optional startup mode for TiDB CDC consumer, valid enumerations are `initial`, `earliest`, `latest` and `specific`. `initial`: Synchronize historical data at startup, and then synchronize incremental data. `earliest`: Startup from the earliest offset possible. `latest`: Startup from the latest offset. `specific`: Startup from user-supplied specific offsets. |
+| tikv.grpc.timeout_in_ms | Long | No | - | TiKV GRPC timeout in ms. |
+| tikv.grpc.scan_timeout_in_ms | Long | No | - | TiKV GRPC scan timeout in ms. |
+| tikv.batch_get_concurrency | Integer | No | - | TiKV GRPC batch get concurrency |
+| tikv.batch_scan_concurrency | Integer | No | - | TiKV GRPC batch scan concurrency |
+
+## 任务示例
+
+### 简单示例
+
+```
+env {
+ parallelism = 1
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+}
+
+source {
+ TiDB-CDC {
+ plugin_output = "products_tidb_cdc"
+ base-url = "jdbc:mysql://tidb0:4000/inventory"
+ driver = "com.mysql.cj.jdbc.Driver"
+ tikv.grpc.timeout_in_ms = 20000
+ pd-addresses = "pd0:2379"
+ username = "root"
+ password = ""
+ database-name = "inventory"
+ table-name = "products"
+ }
+}
+
+transform {
+}
+
+sink {
+ jdbc {
+ plugin_input = "products_tidb_cdc"
+ url = "jdbc:mysql://tidb0:4000/inventory"
+ driver = "com.mysql.cj.jdbc.Driver"
+ user = "root"
+ password = ""
+ database = "inventory"
+ table = "products_sink"
+ generate_sink_sql = true
+ primary_keys = ["id"]
+ }
+}
+```
+
diff --git a/docs/zh/connector-v2/source/Typesense.md b/docs/zh/connector-v2/source/Typesense.md
new file mode 100644
index 00000000000..35f04e23a27
--- /dev/null
+++ b/docs/zh/connector-v2/source/Typesense.md
@@ -0,0 +1,79 @@
+# Typesense
+
+> Typesense 源连接器
+
+## 描述
+
+从 Typesense 读取数据。
+
+## 主要功能
+
+- [x] [批处理](../../concept/connector-v2-features.md)
+- [ ] [流处理](../../concept/connector-v2-features.md)
+- [ ] [精确一次](../../concept/connector-v2-features.md)
+- [x] [Schema](../../concept/connector-v2-features.md)
+- [x] [并行度](../../concept/connector-v2-features.md)
+- [ ] [支持用户定义的拆分](../../concept/connector-v2-features.md)
+
+## 选项
+
+| 名称 | 类型 | 必填 | 默认值 |
+|------------|--------|----|-----|
+| hosts | array | 是 | - |
+| collection | string | 是 | - |
+| schema | config | 是 | - |
+| api_key | string | 否 | - |
+| query | string | 否 | - |
+| batch_size | int | 否 | 100 |
+
+### hosts [array]
+
+Typesense的访问地址,格式为 `host:port`,例如:["typesense-01:8108"]
+
+### collection [string]
+
+要写入的集合名,例如:“seatunnel”
+
+### schema [config]
+
+typesense 需要读取的列。有关更多信息,请参阅:[guide](../../concept/schema-feature.md#how-to-declare-type-supported)。
+
+### api_key [config]
+
+typesense 安全认证的 api_key。
+
+### batch_size
+
+读取数据时,每批次查询数量
+
+### 常用选项
+
+Source 插件常用参数,具体请参考 [Source 常用选项](../source-common-options.md)
+
+## 示例
+
+```bash
+source {
+ Typesense {
+ hosts = ["localhost:8108"]
+ collection = "companies"
+ api_key = "xyz"
+ query = "q=*&filter_by=num_employees:>9000"
+ schema = {
+ fields {
+ company_name_list = array
+ company_name = string
+ num_employees = long
+ country = string
+ id = string
+ c_row = {
+ c_int = int
+ c_string = string
+ c_array_int = array
+ }
+ }
+ }
+ }
+}
+```
+
diff --git a/docs/zh/contribution/how-to-create-your-connector.md b/docs/zh/contribution/how-to-create-your-connector.md
index 3aef1b140c2..c8157fbb992 100644
--- a/docs/zh/contribution/how-to-create-your-connector.md
+++ b/docs/zh/contribution/how-to-create-your-connector.md
@@ -1,4 +1,4 @@
-## 开发自己的Connector
+# 开发自己的Connector
如果你想针对SeaTunnel新的连接器API开发自己的连接器(Connector V2),请查看[这里](https://github.com/apache/seatunnel/blob/dev/seatunnel-connectors-v2/README.zh.md) 。
diff --git a/docs/zh/contribution/setup.md b/docs/zh/contribution/setup.md
index 3527f72c1dc..662663a4961 100644
--- a/docs/zh/contribution/setup.md
+++ b/docs/zh/contribution/setup.md
@@ -75,39 +75,47 @@ Apache SeaTunnel 使用 `Spotless` 来统一代码风格和格式检查。可以
完成上面所有的工作后,环境搭建已经完成, 可以直接运行我们的示例了。 所有的示例在 `seatunnel-examples` 模块里, 你可以随意选择进行编译和调试,参考 [running or debugging
it in IDEA](https://www.jetbrains.com/help/idea/run-debug-configuration.html)。
-我们使用 `seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/java/org/apache/seatunnel/example/flink/v2/SeaTunnelApiExample.java`
+我们使用 `seatunnel-examples/seatunnel-engine-examples/src/main/java/org/apache/seatunnel/example/engine/SeaTunnelEngineLocalExample.java`
作为示例, 运行成功后的输出如下:
```log
-+I[Ricky Huo, 71]
-+I[Gary, 12]
-+I[Ricky Huo, 93]
-...
-...
-+I[Ricky Huo, 83]
+2024-08-10 11:45:32,839 INFO org.apache.seatunnel.core.starter.seatunnel.command.ClientExecuteCommand -
+***********************************************
+ Job Statistic Information
+***********************************************
+Start Time : 2024-08-10 11:45:30
+End Time : 2024-08-10 11:45:32
+Total Time(s) : 2
+Total Read Count : 5
+Total Write Count : 5
+Total Failed Count : 0
+***********************************************
```
## 更多信息
所有的实例都用了简单的 source 和 sink, 这样可以使得运行更独立和更简单。
你可以修改 `resources/examples` 中的示例的配置。 例如下面的配置使用 PostgreSQL 作为源,并且输出到控制台。
+请注意引用FakeSource 和 Console 以外的连接器时,需要修改seatunnel-example对应子模块下的`pom.xml`文件中的依赖。
```conf
env {
parallelism = 1
+ job.mode = "BATCH"
}
-
source {
- JdbcSource {
- driver = org.postgresql.Driver
- url = "jdbc:postgresql://host:port/database"
- username = postgres
- query = "select * from test"
- }
+ Jdbc {
+ driver = org.postgresql.Driver
+ url = "jdbc:postgresql://host:port/database"
+ username = postgres
+ password = "123456"
+ query = "select * from test"
+ table_path = "database.test"
+ }
}
sink {
- ConsoleSink {}
+ Console {}
}
```
diff --git a/docs/zh/faq.md b/docs/zh/faq.md
index 8f0a3a83fef..aa84d420509 100644
--- a/docs/zh/faq.md
+++ b/docs/zh/faq.md
@@ -1,56 +1,108 @@
# 常见问题解答
-## 为什么要安装Spark或者Flink这样的计算引擎?
-
-SeaTunnel现在使用Spark、Flink等计算引擎来完成资源调度和节点通信,因此我们可以专注于数据同步的易用性和高性能组件的开发。 但这只是暂时的。
+## SeaTunnel 支持哪些数据来源和数据目的地?
+SeaTunnel 支持多种数据源来源和数据目的地,您可以在官网找到详细的列表:
+SeaTunnel 支持的数据来源(Source)列表:https://seatunnel.apache.org/docs/connector-v2/source
+SeaTunnel 支持的数据目的地(Sink)列表:https://seatunnel.apache.org/docs/connector-v2/sink
+
+## SeaTunnel 是否支持批处理和流处理?
+SeaTunnel 支持批流一体,SeaTunnel 可以设置批处理和流处理两种模式。您可以根据具体的业务场景和需求选择合适的处理模式。批处理适合定时数据同步场景,而流处理适合实时同步和数据变更捕获 (CDC) 场景。
+
+## 使用 SeaTunnel 需要安装 Spark 或者 Flink 这样的引擎么?
+Spark 和 Flink 不是必需的,SeaTunnel 可以支持 Zeta、Spark 和 Flink 3 种作为同步引擎的选择,您可以选择之一就行,社区尤其推荐使用 Zeta 这种专为同步场景打造的新一代超高性能同步引擎。Zeta 被社区用户亲切的称为 “泽塔奥特曼”!
+社区对 Zeta 的支持力度是最大的,功能也更丰富。
+
+## SeaTunnel 支持的数据转换功能有哪些?
+SeaTunnel 支持多种数据转换功能,包括字段映射、数据过滤、数据格式转换等。可以通过在配置文件中定义 `transform` 模块来实现数据转换。详情请参考 SeaTunnel [Transform 文档](https://seatunnel.apache.org/docs/transform-v2)。
+
+## SeaTunnel 是否可以自定义数据清洗规则?
+SeaTunnel 支持自定义数据清洗规则。可以在 `transform` 模块中配置自定义规则,例如清理脏数据、删除无效记录或字段转换。
+
+## SeaTunnel 是否支持实时增量同步?
+SeaTunnel 支持增量数据同步。例如通过 CDC 连接器实现对数据库的增量同步,适用于需要实时捕获数据变更的场景。
+
+## SeaTunnel 目前支持哪些数据源的 CDC ?
+目前支持 MongoDB CDC、MySQL CDC、Opengauss CDC、Oracle CDC、PostgreSQL CDC、Sql Server CDC、TiDB CDC等,更多请查阅[Source](https://seatunnel.apache.org/docs/connector-v2/source)。
+
+## SeaTunnel CDC 同步需要的权限如何开启?
+这样就可以了。
+这里多说一句,连接器对应的 cdc 权限开启步骤在官网都有写,请参照 SeaTunnel 对应的官网操作即可
+
+## SeaTunnel 支持从 MySQL 备库进行 CDC 么?日志如何拉取?
+支持,是通过订阅 MySQL binlog 日志方式到同步服务器上解析 binlog 日志方式进行
+
+## SeaTunnel 是否支持无主键表的 CDC 同步?
+不支持无主键表的 cdc 同步。原因如下:
+比如上游有 2 条一模一样的数据,然后上游删除或修改了一条,下游由于无法区分到底是哪条需要删除或修改,会出现这 2 条都被删除或修改的情况。
+没主键要类似去重的效果本身有点儿自相矛盾,就像辨别西游记里的真假悟空,到底哪个是真的
+
+## SeaTunnel 是否支持自动建表?
+在同步任务启动之前,可以为目标端已有的表结构选择不同的处理方案。是通过 `schema_save_mode` 参数来控制的。
+`schema_save_mode` 有以下几种方式可选:
+- **`RECREATE_SCHEMA`**:当表不存在时会创建,若表已存在则删除并重新创建。
+- **`CREATE_SCHEMA_WHEN_NOT_EXIST`**:当表不存在时会创建,若表已存在则跳过创建。
+- **`ERROR_WHEN_SCHEMA_NOT_EXIST`**:当表不存在时会报错。
+- **`IGNORE`**:忽略对表的处理。
+ 目前很多 connector 已经支持了自动建表,请参考对应的 connector 文档,这里拿 Jdbc 举例,请参考 [Jdbc sink](https://seatunnel.apache.org/docs/connector-v2/sink/Jdbc#schema_save_mode-enum)
+
+## SeaTunnel 是否支持数据同步任务开始前对已有数据进行处理?
+在同步任务启动之前,可以为目标端已有的数据选择不同的处理方案。是通过 `data_save_mode` 参数来控制的。
+`data_save_mode` 有以下几种可选项:
+- **`DROP_DATA`**:保留数据库结构,删除数据。
+- **`APPEND_DATA`**:保留数据库结构,保留数据。
+- **`CUSTOM_PROCESSING`**:用户自定义处理。
+- **`ERROR_WHEN_DATA_EXISTS`**:当存在数据时,报错。
+ 目前很多 connector 已经支持了对已有数据进行处理,请参考对应的 connector 文档,这里拿 Jdbc 举例,请参考 [Jdbc sink](https://seatunnel.apache.org/docs/connector-v2/sink/Jdbc#data_save_mode-enum)
+
+## SeaTunnel 是否支持精确一致性管理?
+SeaTunnel 支持一部分数据源的精确一致性,例如支持 MySQL、PostgreSQL 等数据库的事务写入,确保数据在同步过程中的一致性,另外精确一致性也要看数据库本身是否可以支持
+
+## SeaTunnel 可以定期执行任务吗?
+您可以通过使用 linux 自带 cron 能力来实现定时数据同步任务,也可以结合 DolphinScheduler 等调度工具实现复杂的定时任务管理。
## 我有一个问题,我自己无法解决
-
-我在使用SeaTunnel时遇到了问题,无法自行解决。 我应该怎么办? 首先,在[问题列表](https://github.com/apache/seatunnel/issues)或[邮件列表](https://lists.apache.org/list.html?dev@seatunnel.apache.org)中搜索 )看看是否有人已经问过同样的问题并得到答案。 如果您找不到问题的答案,您可以通过[这些方式](https://github.com/apache/seatunnel#contact-us)联系社区成员寻求帮助。
+我在使用 SeaTunnel 时遇到了问题,无法自行解决。 我应该怎么办?有以下几种方式
+1、在[问题列表](https://github.com/apache/seatunnel/issues)或[邮件列表](https://lists.apache.org/list.html?dev@seatunnel.apache.org)中搜索看看是否有人已经问过同样的问题并得到答案。
+2、如果您找不到问题的答案,您可以通过[这些方式](https://github.com/apache/seatunnel#contact-us)联系社区成员寻求帮助。
+3、中国用户可以添加微信群助手:seatunnel1,加入社区交流群,也欢迎大家关注微信公众号:seatunnel。
## 如何声明变量?
-
-您想知道如何在 SeaTunnel 的配置中声明一个变量,然后在运行时动态替换该变量的值吗?
-
-从“v1.2.4”开始,SeaTunnel 支持配置中的变量替换。 该功能常用于定时或非定时离线处理,以替代时间、日期等变量。 用法如下:
-
+您想知道如何在 SeaTunnel 的配置中声明一个变量,然后在运行时动态替换该变量的值吗? 该功能常用于定时或非定时离线处理,以替代时间、日期等变量。 用法如下:
在配置中配置变量名称。 下面是一个sql转换的例子(实际上,配置文件中任何地方“key = value”中的值都可以使用变量替换):
-
```
...
transform {
- sql {
- query = "select * from user_view where city ='"${city}"' and dt = '"${date}"'"
+ Sql {
+ query = "select * from user_view where city ='${city}' and dt = '${date}'"
}
}
...
```
-以Spark Local模式为例,启动命令如下:
+以使用 SeaTunnel Zeta Local模式为例,启动命令如下:
```bash
-./bin/start-seatunnel-spark.sh \
--c ./config/your_app.conf \
--e client \
+$SEATUNNEL_HOME/bin/seatunnel.sh \
+-c $SEATUNNEL_HOME/config/your_app.conf \
-m local[2] \
--i city=shanghai \
--i date=20190319
+-i city=Singapore \
+-i date=20231110
```
-您可以使用参数“-i”或“--variable”后跟“key=value”来指定变量的值,其中key需要与配置中的变量名称相同。
+您可以使用参数“-i”或“--variable”后跟“key=value”来指定变量的值,其中key需要与配置中的变量名称相同。详情可以参考:https://seatunnel.apache.org/docs/concept/config
## 如何在配置文件中写入多行文本的配置项?
-
-当配置的文本很长并且想要将其换行时,可以使用三个双引号来指示其开始和结束:
+当配置的文本很长并且想要将其换行时,您可以使用三个双引号来指示其开始和结束:
```
var = """
- whatever you want
+Apache SeaTunnel is a
+next-generation high-performance,
+distributed, massive data integration tool.
"""
```
## 如何实现多行文本的变量替换?
-
在多行文本中进行变量替换有点麻烦,因为变量不能包含在三个双引号中:
```
@@ -61,297 +113,15 @@ your string 1
请参阅:[lightbend/config#456](https://github.com/lightbend/config/issues/456)。
-## Azkaban、Oozie、DolphinScheduler 是否支持 SeaTunnel?
-
-当然! 请参阅下面的屏幕截图:
-
-![工作流程.png](../images/workflow.png)
-
-![azkaban.png](../images/azkaban.png)
-
-## SeaTunnel是否有配置多个源的情况,例如同时在源中配置elasticsearch和hdfs?
-
-```
-env {
- ...
-}
-
-source {
- hdfs { ... }
- elasticsearch { ... }
- jdbc {...}
-}
-
-transform {
- ...
-}
-
-sink {
- elasticsearch { ... }
-}
-```
-
-## 有 HBase 插件吗?
-
-有一个 HBase 输入插件。 您可以从这里下载:https://github.com/garyelephant/waterdrop-input-hbase
-
-## 如何使用SeaTunnel将数据写入Hive?
-
-```
-env {
- spark.sql.catalogImplementation = "hive"
- spark.hadoop.hive.exec.dynamic.partition = "true"
- spark.hadoop.hive.exec.dynamic.partition.mode = "nonstrict"
-}
-
-source {
- sql = "insert into ..."
-}
-
-sink {
- // The data has been written to hive through the sql source. This is just a placeholder, it does not actually work.
- stdout {
- limit = 1
- }
-}
-```
-
-此外,SeaTunnel 在 `1.5.7` 版本之后在 `1.x` 分支中实现了 `Hive` 输出插件; 在“2.x”分支中。 Spark 引擎的 Hive 插件已从版本“2.0.5”开始支持:https://github.com/apache/seatunnel/issues/910。
-
-## SeaTunnel如何编写ClickHouse的多个实例来实现负载均衡?
-
-1.直接写分布式表(不推荐)
-
-2.在ClickHouse的多个实例前面添加代理或域名(DNS):
-
-```
-{
- output {
- clickhouse {
- host = "ck-proxy.xx.xx:8123"
- # Local table
- table = "table_name"
- }
- }
-}
-```
-
-3. 在配置文件中配置多个ClickHouse实例:
-
- ```
- {
- output {
- clickhouse {
- host = "ck1:8123,ck2:8123,ck3:8123"
- # Local table
- table = "table_name"
- }
- }
- }
- ```
-4. 使用集群模式:
-
- ```
- {
- output {
- clickhouse {
- # Configure only one host
- host = "ck1:8123"
- cluster = "clickhouse_cluster_name"
- # Local table
- table = "table_name"
- }
- }
- }
- ```
-
-## SeaTunnel 消费 Kafka 时如何解决 OOM?
-
-大多数情况下,OOM是由于没有对消费进行速率限制而导致的。 解决方法如下:
-
-对于目前Kafka的Spark消费限制:
-
-1. 假设您使用 KafkaStream 消费的 Kafka `Topic 1` 的分区数量 = N。
-
-2. 假设“Topic 1”的消息生产者(Producer)的生产速度为K条消息/秒,则向分区写入消息的速度必须一致。
-
-3、假设经过测试发现Spark Executor每核每秒的处理能力为M。
-
-可以得出以下结论:
-
-1、如果想让Spark对`Topic 1`的消耗跟上它的生产速度,那么需要 `spark.executor.cores` * `spark.executor.instances` >= K / M
-
-2、当出现数据延迟时,如果希望消耗速度不要太快,导致spark执行器OOM,那么需要配置 `spark.streaming.kafka.maxRatePerPartition` <= (`spark.executor.cores` * `spark.executor.instances`) * M / N
-
-3、一般来说,M和N都确定了,从2可以得出结论:`spark.streaming.kafka.maxRatePerPartition`的大小与`spark.executor.cores` * `spark的大小正相关 .executor.instances`,可以在增加资源`maxRatePerPartition`的同时增加,以加快消耗。
-
-![Kafka](../images/kafka.png)
-
-## 如何解决错误 `Exception in thread "main" java.lang.NoSuchFieldError: INSTANCE`?
-
-原因是Spark的CDH版本自带的httpclient.jar版本较低,而ClickHouse JDBC基于的httpclient版本是4.5.2,包版本冲突。 解决办法是将CDH自带的jar包替换为httpclient-4.5.2版本。
-
-## 我的Spark集群默认的JDK是JDK7。 安装JDK8后,如何指定SeaTunnel以JDK8启动?
-
-在 SeaTunnel 的配置文件中,指定以下配置:
-
-```shell
-spark {
- ...
- spark.executorEnv.JAVA_HOME="/your/java_8_home/directory"
- spark.yarn.appMasterEnv.JAVA_HOME="/your/java_8_home/directory"
- ...
-}
-```
-
-## 如何为 YARN 上的 SeaTunnel 指定不同的 JDK 版本?
-
-例如要设置JDK版本为JDK8,有两种情况:
-
-- YARN集群已部署JDK8,但默认JDK不是JDK8。 在 SeaTunnel 配置文件中添加两个配置:
-
- ```
- env {
- ...
- spark.executorEnv.JAVA_HOME="/your/java_8_home/directory"
- spark.yarn.appMasterEnv.JAVA_HOME="/your/java_8_home/directory"
- ...
- }
- ```
-- YARN集群未部署JDK8。 此时,启动附带JDK8的SeaTunnel。 详细操作参见:
- https://www.cnblogs.com/jasondan/p/spark-specific-jdk-version.html
-
-## Spark local[*]模式运行SeaTunnel时总是出现OOM怎么办?
-
-如果以本地模式运行,则需要修改`start-seatunnel.sh`启动脚本。 在 `spark-submit` 之后添加参数 `--driver-memory 4g` 。 一般情况下,生产环境中不使用本地模式。 因此,On YARN时一般不需要设置该参数。 有关详细信息,请参阅:[应用程序属性](https://spark.apache.org/docs/latest/configuration.html#application-properties)。
-
-## 我可以在哪里放置自己编写的插件或第三方 jdbc.jar 以供 SeaTunnel 加载?
-
-将Jar包放置在plugins目录指定结构下:
-
-```bash
-cd SeaTunnel
-mkdir -p plugins/my_plugins/lib
-cp third-part.jar plugins/my_plugins/lib
-```
-
-`my_plugins` 可以是任何字符串。
-
-## 如何在 SeaTunnel-V1(Spark) 中配置日志记录相关参数?
-
-可以通过三种方式配置日志相关参数(例如日志级别):
-
-- [不推荐] 更改默认的`$SPARK_HOME/conf/log4j.properties`。
- - 这将影响通过 `$SPARK_HOME/bin/spark-submit` 提交的所有程序。
-- [不推荐]直接在SeaTunnel的Spark代码中修改日志相关参数。
- - 这相当于写死了,每次改变都需要重新编译。
-- [推荐] 使用以下方法更改 SeaTunnel 配置文件中的日志记录配置(更改仅在 SeaTunnel >= 1.5.5 时生效):
-
- ```
- env {
- spark.driver.extraJavaOptions = "-Dlog4j.configuration=file:/log4j.properties"
- spark.executor.extraJavaOptions = "-Dlog4j.configuration=file:/log4j.properties"
- }
- source {
- ...
- }
- transform {
- ...
- }
- sink {
- ...
- }
- ```
-
-可供参考的log4j配置文件内容如下:
-
-```
-$ cat log4j.properties
-log4j.rootLogger=ERROR, console
-
-# set the log level for these components
-log4j.logger.org=ERROR
-log4j.logger.org.apache.spark=ERROR
-log4j.logger.org.spark-project=ERROR
-log4j.logger.org.apache.hadoop=ERROR
-log4j.logger.io.netty=ERROR
-log4j.logger.org.apache.zookeeper=ERROR
-
-# add a ConsoleAppender to the logger stdout to write to the console
-log4j.appender.console=org.apache.log4j.ConsoleAppender
-log4j.appender.console.layout=org.apache.log4j.PatternLayout
-# use a simple message format
-log4j.appender.console.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
-```
-
-## 如何在 SeaTunnel-V2(Spark、Flink) 中配置日志记录相关参数?
-
-目前,无法直接设置它们。 您需要修改SeaTunnel启动脚本。 相关参数在任务提交命令中指定。 具体参数请参考官方文档:
-
-- Spark官方文档:http://spark.apache.org/docs/latest/configuration.html#configuring-logging
-- Flink 官方文档:https://ci.apache.org/projects/flink/flink-docs-stable/monitoring/logging.html
-
-参考:
-
-https://stackoverflow.com/questions/27781187/how-to-stop-info-messages-displaying-on-spark-console
-
-http://spark.apache.org/docs/latest/configuration.html#configuring-logging
-
-https://medium.com/@iacomini.riccardo/spark-logging-configuration-in-yarn-faf5ba5fdb01
-
-## 如何配置SeaTunnel-E2E Test的日志记录相关参数?
-
-`seatunnel-e2e` 的 log4j 配置文件位于 `seatunnel-e2e/seatunnel-e2e-common/src/test/resources/log4j2.properties` 中。 您可以直接在配置文件中修改日志记录相关参数。
-
-例如,如果您想输出更详细的E2E Test日志,只需将配置文件中的“rootLogger.level”降级即可。
-
-## 写入 ClickHouse 时出错:ClassCastException
-
-在SeaTunnel中,不会主动转换数据类型。 Input读取数据后,对应的
-架构。 编写ClickHouse时,需要严格匹配字段类型,不匹配的情况需要解决。
-
-数据转换可以通过以下两个插件实现:
-
-1.过滤器转换插件
-2.过滤Sql插件
-
-详细数据类型转换参考:[ClickHouse数据类型检查列表](https://interestinglab.github.io/seatunnel-docs/#/en/configuration/output-plugins/Clickhouse?id=clickhouse-data-type-check-list)
-
-请参阅问题:[#488](https://github.com/apache/seatunnel/issues/488)[#382](https://github.com/apache/seatunnel/issues/382)。
-
-## SeaTunnel 如何访问经过 kerberos 验证的 HDFS、YARN、Hive 等资源?
-
-请参考:[#590](https://github.com/apache/seatunnel/issues/590)。
-
-## 如何排查 NoClassDefFoundError、ClassNotFoundException 等问题?
-
-有很大概率是Java类路径中加载了多个不同版本的对应Jar包类,是因为加载顺序冲突,而不是因为Jar确实丢失了。 修改这条SeaTunnel启动命令,在spark-submit提交部分添加如下参数,通过输出日志进行详细调试。
-
-```
-spark-submit --verbose
- ...
- --conf 'spark.driver.extraJavaOptions=-verbose:class'
- --conf 'spark.executor.extraJavaOptions=-verbose:class'
- ...
-```
-
-## 如何使用SeaTunnel跨HDFS集群同步数据?
-
-只需正确配置 hdfs-site.xml 即可。 参考:https://www.cnblogs.com/suanec/p/7828139.html。
-
-## 我想学习SeaTunnel的源代码。 我应该从哪里开始?
-
-SeaTunnel 拥有完全抽象、结构化的代码实现,很多人都选择 SeaTunnel 作为学习 Spark 的方式。 您可以从主程序入口了解源代码:SeaTunnel.java
-
-## SeaTunnel开发者开发自己的插件时,是否需要了解SeaTunnel代码? 这些插件是否应该集成到 SeaTunnel 项目中?
-
-开发者开发的插件与SeaTunnel项目无关,不需要包含您的插件代码。
-
-该插件可以完全独立于 SeaTunnel 项目,因此您可以使用 Java、Scala、Maven、sbt、Gradle 或任何您想要的方式编写它。 这也是我们推荐开发者开发插件的方式。
-## 当我导入项目时,编译器出现异常“找不到类`org.apache.seatunnel.shade.com.typesafe.config.Config`”
+## 如果想学习 SeaTunnel 的源代码,应该从哪里开始?
+SeaTunnel 拥有完全抽象、结构化的非常优秀的架构设计和代码实现,很多用户都选择 SeaTunnel 作为学习大数据架构的方式。 您可以从`seatunnel-examples`模块开始了解和调试源代码:SeaTunnelEngineLocalExample.java
+具体参考:https://seatunnel.apache.org/docs/contribution/setup
+针对中国用户,如果有伙伴想贡献自己的一份力量让 SeaTunnel 更好,特别欢迎加入社区贡献者种子群,欢迎添加微信:davidzollo,添加时请注明 "参与开源共建", 群仅仅用于技术交流, 重要的事情讨论还请发到 dev@seatunnel.apache.org 邮件里进行讨论。
-首先运行“mvn install”。 在 `seatunnel-config/seatunnel-config-base` 子项目中,包 `com.typesafe.config` 已重新定位到 `org.apache.seatunnel.shade.com.typesafe.config` 并安装到 maven 本地存储库 在子项目 `seatunnel-config/seatunnel-config-shade` 中。
+## 如果想开发自己的 source、sink、transform 时,是否需要了解 SeaTunnel 所有源代码?
+不需要,您只需要关注 source、sink、transform 对应的接口即可。
+如果你想针对 SeaTunnel API 开发自己的连接器(Connector V2),请查看**[Connector Development Guide](https://github.com/apache/seatunnel/blob/dev/seatunnel-connectors-v2/README.zh.md)** 。
## JDK17环境下,提示java.lang.NoClassDefFoundError: org/apache/hadoop/shaded/com/ctc/wstx/io/InputBootstrapper怎么处理?
diff --git a/docs/zh/other-engine/flink.md b/docs/zh/other-engine/flink.md
index 856aeb78101..06f51a82b46 100644
--- a/docs/zh/other-engine/flink.md
+++ b/docs/zh/other-engine/flink.md
@@ -36,7 +36,7 @@ env {
source {
FakeSource {
row.num = 16
- result_table_name = "fake_table"
+ plugin_output = "fake_table"
schema = {
fields {
c_map = "map"
diff --git a/docs/zh/seatunnel-engine/about.md b/docs/zh/seatunnel-engine/about.md
index 9deeec82f98..09f836dc41e 100644
--- a/docs/zh/seatunnel-engine/about.md
+++ b/docs/zh/seatunnel-engine/about.md
@@ -1,7 +1,6 @@
---
-
sidebar_position: 1
--------------------
+---
# SeaTunnel Engine 简介
diff --git a/docs/zh/seatunnel-engine/checkpoint-storage.md b/docs/zh/seatunnel-engine/checkpoint-storage.md
index f0c506fdbf8..a60fdff5ae0 100644
--- a/docs/zh/seatunnel-engine/checkpoint-storage.md
+++ b/docs/zh/seatunnel-engine/checkpoint-storage.md
@@ -1,7 +1,6 @@
---
-
sidebar_position: 7
--------------------
+---
# 检查点存储
@@ -13,7 +12,7 @@ sidebar_position: 7
SeaTunnel Engine支持以下检查点存储类型:
-- HDFS (OSS,S3,HDFS,LocalFile)
+- HDFS (OSS,COS,S3,HDFS,LocalFile)
- LocalFile (本地),(已弃用: 使用HDFS(LocalFile)替代).
我们使用微内核设计模式将检查点存储模块从引擎中分离出来。这允许用户实现他们自己的检查点存储模块。
@@ -66,13 +65,48 @@ seatunnel:
fs.oss.accessKeyId: your-access-key
fs.oss.accessKeySecret: your-secret-key
fs.oss.endpoint: endpoint address
- fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider
```
有关Hadoop Credential Provider API的更多信息,请参见: [Credential Provider API](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CredentialProviderAPI.html).
阿里云OSS凭证提供程序实现见: [验证凭证提供](https://github.com/aliyun/aliyun-oss-java-sdk/tree/master/src/main/java/com/aliyun/oss/common/auth)
+#### COS
+
+腾讯云COS基于hdfs-file,所以你可以参考[Hadoop COS文档](https://hadoop.apache.org/docs/stable/hadoop-cos/cloud-storage/)来配置COS.
+
+除了与公共COS buckets交互之外,COS客户端需要与buckets交互所需的凭据。
+客户端支持多种身份验证机制,并且可以配置使用哪种机制及其使用顺序。也可以使用com.qcloud.cos.auth.COSCredentialsProvider的自定义实现。
+如果您使用SimpleCredentialsProvider(可以从腾讯云API密钥管理中获得),它们包括一个secretId和一个secretKey。
+您可以这样配置:
+
+```yaml
+seatunnel:
+ engine:
+ checkpoint:
+ interval: 6000
+ timeout: 7000
+ storage:
+ type: hdfs
+ max-retained: 3
+ plugin-config:
+ storage.type: cos
+ cos.bucket: cosn://your-bucket
+ fs.cosn.credentials.provider: org.apache.hadoop.fs.cosn.auth.SimpleCredentialsProvider
+ fs.cosn.userinfo.secretId: your-secretId
+ fs.cosn.userinfo.secretKey: your-secretKey
+ fs.cosn.bucket.region: your-region
+```
+
+有关Hadoop Credential Provider API的更多信息,请参见: [Credential Provider API](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CredentialProviderAPI.html).
+
+腾讯云COS相关配置可参考:[Tencent Hadoop-COS文档](https://doc.fincloud.tencent.cn/tcloud/Storage/COS/846365/hadoop)
+
+使用前请将如下jar添加到lib目录下:
+- [hadoop-cos-3.4.1.jar](https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-cos/3.4.1)
+- [cos_api-bundle-5.6.69.jar](https://mvnrepository.com/artifact/com.qcloud/cos_api-bundle/5.6.69)
+- [hadoop-shaded-guava-1.1.1.jar](https://mvnrepository.com/artifact/org.apache.hadoop.thirdparty/hadoop-shaded-guava/1.1.1)
+
#### S3
S3基于hdfs-file,所以你可以参考[Hadoop s3文档](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html)来配置s3。
diff --git a/docs/zh/seatunnel-engine/deployment.md b/docs/zh/seatunnel-engine/deployment.md
index d9dc8d3b39c..3ff895f47ba 100644
--- a/docs/zh/seatunnel-engine/deployment.md
+++ b/docs/zh/seatunnel-engine/deployment.md
@@ -1,7 +1,6 @@
---
-
sidebar_position: 3
--------------------
+---
# SeaTunnel Engine(Zeta) 安装部署
diff --git a/docs/zh/seatunnel-engine/download-seatunnel.md b/docs/zh/seatunnel-engine/download-seatunnel.md
index c108f4812a3..8d06a2e4f78 100644
--- a/docs/zh/seatunnel-engine/download-seatunnel.md
+++ b/docs/zh/seatunnel-engine/download-seatunnel.md
@@ -1,7 +1,6 @@
---
-
sidebar_position: 2
--------------------
+---
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
@@ -21,7 +20,7 @@ import TabItem from '@theme/TabItem';
或者您也可以通过终端下载
```shell
-export version="2.3.6"
+export version="2.3.9"
wget "https://archive.apache.org/dist/seatunnel/${version}/apache-seatunnel-${version}-bin.tar.gz"
tar -xzvf "apache-seatunnel-${version}-bin.tar.gz"
```
@@ -31,13 +30,13 @@ tar -xzvf "apache-seatunnel-${version}-bin.tar.gz"
从2.2.0-beta版本开始,二进制包不再默认提供连接器依赖,因此在第一次使用它时,您需要执行以下命令来安装连接器:(当然,您也可以从 [Apache Maven Repository](https://repo.maven.apache.org/maven2/org/apache/seatunnel/) 手动下载连接器,然后将其移动至`connectors/seatunnel`目录下)。
```bash
-sh bin/install-plugin.sh 2.3.6
+sh bin/install-plugin.sh 2.3.9
```
-如果您需要指定的连接器版本,以2.3.6为例,您需要执行如下命令
+如果您需要指定的连接器版本,以2.3.9为例,您需要执行如下命令
```bash
-sh bin/install-plugin.sh 2.3.6
+sh bin/install-plugin.sh 2.3.9
```
通常您并不需要所有的连接器插件,所以您可以通过配置`config/plugin_config`来指定您所需要的插件,例如,您只需要`connector-console`插件,那么您可以修改plugin.properties配置文件如下
diff --git a/docs/zh/seatunnel-engine/engine-jar-storage-mode.md b/docs/zh/seatunnel-engine/engine-jar-storage-mode.md
index 81dc0cacb34..b087f0a63f5 100644
--- a/docs/zh/seatunnel-engine/engine-jar-storage-mode.md
+++ b/docs/zh/seatunnel-engine/engine-jar-storage-mode.md
@@ -1,7 +1,6 @@
---
-
sidebar_position: 9
--------------------
+---
# 配置引擎 Jar 存储模式
diff --git a/docs/zh/seatunnel-engine/hybrid-cluster-deployment.md b/docs/zh/seatunnel-engine/hybrid-cluster-deployment.md
index f1deba3dec1..77805273452 100644
--- a/docs/zh/seatunnel-engine/hybrid-cluster-deployment.md
+++ b/docs/zh/seatunnel-engine/hybrid-cluster-deployment.md
@@ -1,7 +1,6 @@
---
-
sidebar_position: 5
--------------------
+---
# 部署 SeaTunnel Engine 混合模式集群
@@ -44,7 +43,7 @@ SeaTunnel Engine 基于 [Hazelcast IMDG](https://docs.hazelcast.com/imdg/4.1/)
`backup count` 是定义同步备份数量的参数。例如,如果设置为 1,则分区的备份将放置在一个其他成员上。如果设置为 2,则将放置在两个其他成员上。
-我们建议 `backup-count` 的值为 `min(1, max(5, N/2))`。 `N` 是集群节点的数量。
+我们建议 `backup-count` 的值为 `max(1, min(5, N/2))`。 `N` 是集群节点的数量。
```yaml
seatunnel:
@@ -137,6 +136,23 @@ seatunnel:
classloader-cache-mode: true
```
+### 4.6 作业调度策略
+
+当资源不足时,作业调度策略可以配置为以下两种模式:
+
+1. `WAIT`:等待资源可用。
+2. `REJECT`:拒绝作业,默认值。
+
+示例
+
+```yaml
+seatunnel:
+ engine:
+ job-schedule-strategy: WAIT
+```
+
+当`dynamic-slot: ture`时,`job-schedule-strategy: WAIT` 配置会失效,将被强制修改为`job-schedule-strategy: REJECT`,因为动态Slot时该参数没有意义,可以直接提交。
+
## 5. 配置 SeaTunnel Engine 网络服务
所有 SeaTunnel Engine 网络相关的配置都在 `hazelcast.yaml` 文件中.
@@ -258,7 +274,6 @@ map:
fs.oss.accessKeyId: OSS access key id
fs.oss.accessKeySecret: OSS access key secret
fs.oss.endpoint: OSS endpoint
- fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider
```
注意:使用OSS 时,确保 lib目录下有这几个jar.
@@ -307,10 +322,18 @@ mkdir -p $SEATUNNEL_HOME/logs
日志将写入 `$SEATUNNEL_HOME/logs/seatunnel-engine-server.log`
-## 8. 安装 SeaTunnel Engine 客户端
+## 8. 提交作业和管理作业
+
+### 8.1 使用 SeaTunnel Engine 客户端提交作业
+
+#### 安装 SeaTunnel Engine 客户端
您只需将 SeaTunnel Engine 节点上的 `$SEATUNNEL_HOME` 目录复制到客户端节点,并像 SeaTunnel Engine 服务器节点一样配置 `SEATUNNEL_HOME`。
-## 9. 提交作业和管理作业
+#### 提交作业和管理作业
现在集群部署完成了,您可以通过以下教程完成作业的提交和管理:[提交和管理作业](user-command.md)
+
+### 8.2 使用 REST API 提交作业
+
+SeaTunnel Engine 提供了 REST API 用于提交作业。有关详细信息,请参阅 [REST API V2](rest-api-v2.md)
\ No newline at end of file
diff --git a/docs/zh/seatunnel-engine/local-mode-deployment.md b/docs/zh/seatunnel-engine/local-mode-deployment.md
index 0230cfcca1a..e69bf426d8a 100644
--- a/docs/zh/seatunnel-engine/local-mode-deployment.md
+++ b/docs/zh/seatunnel-engine/local-mode-deployment.md
@@ -1,7 +1,6 @@
---
-
sidebar_position: 4
--------------------
+---
# 以Local模式运行作业
@@ -28,6 +27,16 @@ Local模式下每个任务都会启动一个独立的进程,任务运行完成
$SEATUNNEL_HOME/bin/seatunnel.sh --config $SEATUNNEL_HOME/config/v2.batch.config.template -e local
```
+### 配置本地模式的JVM参数
+
+本地模式支持两种设置JVM参数的方式:
+
+1. 添加JVM参数到`$SEATUNNEL_HOME/config/jvm_client_options`文件中。
+
+ 修改`$SEATUNNEL_HOME/config/jvm_client_options`文件中的JVM参数。 请注意,该文件中的JVM参数会应用到所有使用`seatunnel.sh`提交的作业。包括Local模式和集群模式。
+
+2. 在启动Local模式时添加JVM参数。例如,`$SEATUNNEL_HOME/bin/seatunnel.sh --config $SEATUNNEL_HOME/config/v2.batch.config.template -m local -DJvmOption="-Xms2G -Xmx2G"`
+
## 作业运维
Local模式下提交的作业会在提交作业的进程中运行,作业完成后进程会退出,如果要中止作业只需要退出提交作业的进程即可。作业的运行日志会输出到提交作业的进程的标准输出中。
diff --git a/docs/zh/seatunnel-engine/logging.md b/docs/zh/seatunnel-engine/logging.md
new file mode 100644
index 00000000000..f97ea572e8c
--- /dev/null
+++ b/docs/zh/seatunnel-engine/logging.md
@@ -0,0 +1,149 @@
+---
+sidebar_position: 14
+---
+
+# 日志
+
+每个 SeaTunnel Engine 进程都会创建一个日志文件,其中包含该进程中发生的各种事件的消息。这些日志提供了对 SeaTunnel Engine 内部工作原理的深入了解,可用于检测问题(以 WARN/ERROR 消息的形式)并有助于调试问题。
+
+SeaTunnel Engine 中的日志记录使用 SLF4J 日志记录接口。这允许您使用任何支持 SLF4J 的日志记录框架,而无需修改 SeaTunnel Engine 源代码。
+
+默认情况下,Log4j2 用作底层日志记录框架。
+
+## 结构化信息
+
+SeaTunnel Engine 向大多数相关日志消息的 MDC 添加了以下字段(实验性功能):
+
+- Job ID
+ - key: ST-JID
+ - format: string
+
+这在具有结构化日志记录的环境中最为有用,允许您快速过滤相关日志。
+
+MDC 由 slf4j 传播到日志后端,后者通常会自动将其添加到日志记录中(例如,在 log4j json 布局中)。或者,也可以明确配置 - log4j 模式布局可能如下所示:
+
+```properties
+[%X{ST-JID}] %c{0} %m%n.
+```
+
+## 配置 Log4j2
+
+Log4j2 使用属性文件进行控制。
+
+SeaTunnel Engine 发行版在 `config` 目录中附带以下 log4j 属性文件,如果启用了 Log4j2,则会自动使用这些文件:
+
+- `log4j2_client.properties`: 由命令行客户端使用 (例如, `seatunnel.sh`)
+- `log4j2.properties`: 由 SeaTunnel 引擎服务使用 (例如, `seatunnel-cluster.sh`)
+
+默认情况下,日志文件输出到 `logs` 目录。
+
+Log4j 会定期扫描上述文件以查找更改,并根据需要调整日志记录行为。默认情况下,此检查每 60 秒进行一次,由 Log4j 属性文件中的 monitorInterval 设置控制。
+
+### 配置作业生成单独的日志文件
+
+要为每个作业输出单独的日志文件,您可以更新 `log4j2.properties` 文件中的以下配置:
+
+```properties
+...
+rootLogger.appenderRef.file.ref = routingAppender
+...
+
+appender.file.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%-30.30c{1.}] [%t] - %m%n
+...
+```
+
+此配置为每个作业生成单独的日志文件,例如:
+
+```
+job-xxx1.log
+job-xxx2.log
+job-xxx3.log
+...
+```
+
+### 配置混合日志文件
+
+*默认已采用此配置模式。*
+
+要将所有作业日志输出到 SeaTunnel Engine 系统日志文件中,您可以在 `log4j2.properties` 文件中更新以下配置:
+
+```properties
+...
+rootLogger.appenderRef.file.ref = fileAppender
+...
+
+appender.file.layout.pattern = [%X{ST-JID}] %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%-30.30c{1.}] [%t] - %m%n
+...
+```
+
+### 兼容 Log4j1/Logback
+
+SeaTunnel Engine 自动集成了大多数 Log 桥接器,允许针对 Log4j1/Logback 类工作的现有应用程序继续工作。
+
+### REST-API方式查询日志
+
+SeaTunnel 提供了一个 API,用于查询日志。
+
+**使用样例:**
+- 获取所有节点jobId为`733584788375666689`的日志信息:`http://localhost:8080/logs/733584788375666689`
+- 获取所有节点日志列表:`http://localhost:8080/logs`
+- 获取所有节点日志列表以JSON格式返回:`http://localhost:8080/logs?format=json`
+- 获取日志文件内容:`http://localhost:8080/logs/job-898380162133917698.log`
+
+有关详细信息,请参阅 [REST-API](rest-api-v2.md)。
+
+## SeaTunnel 日志配置
+
+### 定时删除旧日志
+
+SeaTunnel 支持定时删除旧日志文件,以避免磁盘空间不足。您可以在 `seatunnel.yml` 文件中添加以下配置:
+
+```yaml
+seatunnel:
+ engine:
+ history-job-expire-minutes: 1440
+ telemetry:
+ logs:
+ scheduled-deletion-enable: true
+```
+
+- `history-job-expire-minutes`: 设置历史作业和日志的保留时间(单位:分钟)。系统将在指定的时间后自动清除过期的作业信息和日志文件。
+- `scheduled-deletion-enable`: 启用定时清理功能,默认为 `true`。系统将在作业达到 `history-job-expire-minutes` 设置的过期时间后自动删除相关日志文件。关闭该功能后,日志将永久保留在磁盘上,需要用户自行管理,否则可能影响磁盘占用。建议根据需求合理配置。
+
+
+## 开发人员最佳实践
+
+您可以通过调用 `org.slf4j.LoggerFactory#LoggerFactory.getLogger` 并以您的类的类作为参数来创建 SLF4J 记录器。
+
+当然您也可以使用 lombok 注解 `@Slf4j` 来实现同样的效果
+
+```java
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestConnector {
+ private static final Logger LOG = LoggerFactory.getLogger(TestConnector.class);
+
+ public static void main(String[] args) {
+ LOG.info("Hello world!");
+ }
+}
+```
+
+为了最大限度地利用 SLF4J,建议使用其占位符机制。使用占位符可以避免不必要的字符串构造,以防日志级别设置得太高而导致消息无法记录。
+
+占位符的语法如下:
+
+```java
+LOG.info("This message contains {} placeholders. {}", 1, "key1");
+```
+
+占位符还可以与需要记录的异常结合使用
+
+```java
+try {
+ // some code
+} catch (Exception e) {
+ LOG.error("An {} occurred", "error", e);
+}
+```
\ No newline at end of file
diff --git a/docs/zh/seatunnel-engine/resource-isolation.md b/docs/zh/seatunnel-engine/resource-isolation.md
index ea09d6a8929..5cb3db9cf35 100644
--- a/docs/zh/seatunnel-engine/resource-isolation.md
+++ b/docs/zh/seatunnel-engine/resource-isolation.md
@@ -1,48 +1,49 @@
---
-
sidebar_position: 9
--------------------
+---
-在2.3.6版本之后, SeaTunnel支持对每个实例添加`tag`, 然后在提交任务时可以在配置文件中使用`tag_filter`来选择任务将要运行的节点.
+# 资源隔离
-# 如何实现改功能
+SeaTunnel支持对每个实例添加`tag`, 然后在提交任务时可以在配置文件中使用`tag_filter`来选择任务将要运行的节点.
+
+## 配置
1. 更新`hazelcast.yaml`文件
-```yaml
-hazelcast:
- cluster-name: seatunnel
- network:
- rest-api:
- enabled: true
- endpoint-groups:
- CLUSTER_WRITE:
- enabled: true
- DATA:
+ ```yaml
+ hazelcast:
+ cluster-name: seatunnel
+ network:
+ rest-api:
enabled: true
- join:
- tcp-ip:
- enabled: true
- member-list:
- - localhost
- port:
- auto-increment: false
- port: 5801
- properties:
- hazelcast.invocation.max.retry.count: 20
- hazelcast.tcp.join.port.try.count: 30
- hazelcast.logging.type: log4j2
- hazelcast.operation.generic.thread.count: 50
- member-attributes:
- group:
- type: string
- value: platform
- team:
- type: string
- value: team1
-```
-
-在这个配置中, 我们通过`member-attributes`设置了`group=platform, team=team1`这样两个`tag`
+ endpoint-groups:
+ CLUSTER_WRITE:
+ enabled: true
+ DATA:
+ enabled: true
+ join:
+ tcp-ip:
+ enabled: true
+ member-list:
+ - localhost
+ port:
+ auto-increment: false
+ port: 5801
+ properties:
+ hazelcast.invocation.max.retry.count: 20
+ hazelcast.tcp.join.port.try.count: 30
+ hazelcast.logging.type: log4j2
+ hazelcast.operation.generic.thread.count: 50
+ member-attributes:
+ group:
+ type: string
+ value: platform
+ team:
+ type: string
+ value: team1
+ ```
+
+ 在这个配置中, 我们通过`member-attributes`设置了`group=platform, team=team1`这样两个`tag`
2. 在任务的配置中添加`tag_filter`来选择你需要运行该任务的节点
@@ -57,7 +58,7 @@ env {
}
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
parallelism = 1
schema = {
fields {
@@ -70,14 +71,18 @@ transform {
}
sink {
console {
- source_table_name="fake"
+ plugin_input="fake"
}
}
```
-**注意:**
-- 当在任务的配置中, 没有添加`tag_filter`时, 会从所有节点中随机选择节点来运行任务.
-- 当`tag_filter`中存在多个过滤条件时, 会根据key存在以及value相等的全部匹配的节点, 当没有找到匹配的节点时, 会抛出 `NoEnoughResourceException`异常.
+ **注意:**
+ - 当在任务的配置中, 没有添加`tag_filter`时, 会从所有节点中随机选择节点来运行任务.
+ - 当`tag_filter`中存在多个过滤条件时, 会根据key存在以及value相等的全部匹配的节点, 当没有找到匹配的节点时, 会抛出 `NoEnoughResourceException`异常.
+
+ ![img.png](../../images/resource-isolation.png)
+
+3. 更新运行中node的tags (可选)
-![img.png](../../images/resource-isolation.png)
+ 获取具体的使用信息,请参考 [更新运行节点的tags](rest-api-v2.md)
diff --git a/docs/zh/seatunnel-engine/rest-api.md b/docs/zh/seatunnel-engine/rest-api-v1.md
similarity index 57%
rename from docs/zh/seatunnel-engine/rest-api.md
rename to docs/zh/seatunnel-engine/rest-api-v1.md
index 1b0166425ba..15b0cf0545d 100644
--- a/docs/zh/seatunnel-engine/rest-api.md
+++ b/docs/zh/seatunnel-engine/rest-api-v1.md
@@ -1,9 +1,14 @@
---
-
sidebar_position: 11
---------------------
+---
+
+# RESTful API V1
+
+:::caution warn
-# RESTful API
+推荐使用v2版本的Rest API。 v1 版本已弃用,并将在将来删除。
+
+:::
SeaTunnel有一个用于监控的API,可用于查询运行作业的状态和统计信息,以及最近完成的作业。监控API是RESTful风格的,它接受HTTP请求并使用JSON数据格式进行响应。
@@ -69,7 +74,33 @@ network:
------------------------------------------------------------------------------------------
-### 返回所有作业及其当前状态的概览。
+### 返回当前节点的线程堆栈信息。
+
+
+ GET
/hazelcast/rest/maps/thread-dump
(返回当前节点的线程堆栈信息。)
+
+#### Parameters
+
+
+#### Responses
+
+```json
+[
+ {
+ "threadName": "",
+ "threadId": 0,
+ "threadState": "",
+ "stackTrace": ""
+ }
+]
+```
+
+
+
+------------------------------------------------------------------------------------------
+
+
+### 返回所有作业及其当前状态的概览
GET
/hazelcast/rest/maps/running-jobs
(返回所有作业及其当前状态的概览。)
@@ -88,10 +119,19 @@ network:
},
"createTime": "",
"jobDag": {
- "vertices": [
+ "jobId": "",
+ "envOptions": [],
+ "vertexInfoMap": [
+ {
+ "vertexId": 1,
+ "type": "",
+ "vertexName": "",
+ "tablePaths": [
+ ""
+ ]
+ }
],
- "edges": [
- ]
+ "pipelineEdges": {}
},
"pluginJarsUrls": [
],
@@ -108,7 +148,7 @@ network:
------------------------------------------------------------------------------------------
-### 返回作业的详细信息。
+### 返回作业的详细信息
GET
/hazelcast/rest/maps/job-info/:jobId
(返回作业的详细信息。)
@@ -128,14 +168,37 @@ network:
"jobStatus": "",
"createTime": "",
"jobDag": {
- "vertices": [
+ "jobId": "",
+ "envOptions": [],
+ "vertexInfoMap": [
+ {
+ "vertexId": 1,
+ "type": "",
+ "vertexName": "",
+ "tablePaths": [
+ ""
+ ]
+ }
],
- "edges": [
- ]
+ "pipelineEdges": {}
},
"metrics": {
- "sourceReceivedCount": "",
- "sinkWriteCount": ""
+ "SourceReceivedCount": "",
+ "SourceReceivedQPS": "",
+ "SourceReceivedBytes": "",
+ "SourceReceivedBytesPerSeconds": "",
+ "SinkWriteCount": "",
+ "SinkWriteQPS": "",
+ "SinkWriteBytes": "",
+ "SinkWriteBytesPerSeconds": "",
+ "TableSourceReceivedCount": {},
+ "TableSourceReceivedBytes": {},
+ "TableSourceReceivedBytesPerSeconds": {},
+ "TableSourceReceivedQPS": {},
+ "TableSinkWriteCount": {},
+ "TableSinkWriteQPS": {},
+ "TableSinkWriteBytes": {},
+ "TableSinkWriteBytesPerSeconds": {}
},
"finishedTime": "",
"errorMsg": null,
@@ -185,10 +248,19 @@ network:
"jobStatus": "",
"createTime": "",
"jobDag": {
- "vertices": [
+ "jobId": "",
+ "envOptions": [],
+ "vertexInfoMap": [
+ {
+ "vertexId": 1,
+ "type": "",
+ "vertexName": "",
+ "tablePaths": [
+ ""
+ ]
+ }
],
- "edges": [
- ]
+ "pipelineEdges": {}
},
"metrics": {
"sourceReceivedCount": "",
@@ -220,7 +292,7 @@ network:
------------------------------------------------------------------------------------------
-### 返回所有已完成的作业信息。
+### 返回所有已完成的作业信息
GET
/hazelcast/rest/maps/finished-jobs/:state
(返回所有已完成的作业信息。)
@@ -242,7 +314,21 @@ network:
"errorMsg": null,
"createTime": "",
"finishTime": "",
- "jobDag": "",
+ "jobDag": {
+ "jobId": "",
+ "envOptions": [],
+ "vertexInfoMap": [
+ {
+ "vertexId": 1,
+ "type": "",
+ "vertexName": "",
+ "tablePaths": [
+ ""
+ ]
+ }
+ ],
+ "pipelineEdges": {}
+ },
"metrics": ""
}
]
@@ -252,7 +338,7 @@ network:
------------------------------------------------------------------------------------------
-### 返回系统监控信息。
+### 返回系统监控信息
GET
/hazelcast/rest/maps/system-monitoring-information
(返回系统监控信息。)
@@ -264,6 +350,9 @@ network:
```json
[
{
+ "isMaster": "true",
+ "host": "localhost",
+ "port": "5801",
"processors":"8",
"physical.memory.total":"16.0G",
"physical.memory.free":"16.3M",
@@ -317,7 +406,7 @@ network:
------------------------------------------------------------------------------------------
-### 提交作业。
+### 提交作业
POST
/hazelcast/rest/maps/submit-job
(如果作业提交成功,返回jobId和jobName。)
@@ -340,7 +429,7 @@ network:
"source": [
{
"plugin_name": "FakeSource",
- "result_table_name": "fake",
+ "plugin_output": "fake",
"row.num": 100,
"schema": {
"fields": {
@@ -356,7 +445,7 @@ network:
"sink": [
{
"plugin_name": "Console",
- "source_table_name": ["fake"]
+ "plugin_input": ["fake"]
}
]
}
@@ -375,7 +464,110 @@ network:
------------------------------------------------------------------------------------------
-### 停止作业。
+
+### 批量提交作业
+
+
+POST
/hazelcast/rest/maps/submit-jobs
(如果作业提交成功,返回jobId和jobName。)
+
+#### 参数(在请求体中params字段中添加)
+
+> | 参数名称 | 是否必传 | 参数类型 | 参数描述 |
+> |----------------------|----------|--------|-----------------------------------|
+> | jobId | optional | string | job id |
+> | jobName | optional | string | job name |
+> | isStartWithSavePoint | optional | string | if job is started with save point |
+
+
+
+#### 请求体
+
+```json
+[
+ {
+ "params":{
+ "jobId":"123456",
+ "jobName":"SeaTunnel-01"
+ },
+ "env": {
+ "job.mode": "batch"
+ },
+ "source": [
+ {
+ "plugin_name": "FakeSource",
+ "plugin_output": "fake",
+ "row.num": 1000,
+ "schema": {
+ "fields": {
+ "name": "string",
+ "age": "int",
+ "card": "int"
+ }
+ }
+ }
+ ],
+ "transform": [
+ ],
+ "sink": [
+ {
+ "plugin_name": "Console",
+ "plugin_input": ["fake"]
+ }
+ ]
+ },
+ {
+ "params":{
+ "jobId":"1234567",
+ "jobName":"SeaTunnel-02"
+ },
+ "env": {
+ "job.mode": "batch"
+ },
+ "source": [
+ {
+ "plugin_name": "FakeSource",
+ "plugin_output": "fake",
+ "row.num": 1000,
+ "schema": {
+ "fields": {
+ "name": "string",
+ "age": "int",
+ "card": "int"
+ }
+ }
+ }
+ ],
+ "transform": [
+ ],
+ "sink": [
+ {
+ "plugin_name": "Console",
+ "plugin_input": ["fake"]
+ }
+ ]
+ }
+]
+```
+
+#### 响应
+
+```json
+[
+ {
+ "jobId": "123456",
+ "jobName": "SeaTunnel-01"
+ },{
+ "jobId": "1234567",
+ "jobName": "SeaTunnel-02"
+ }
+]
+```
+
+
+
+------------------------------------------------------------------------------------------
+
+### 停止作业
POST
/hazelcast/rest/maps/stop-job
(如果作业成功停止,返回jobId。)
@@ -399,9 +591,47 @@ network:
+
+------------------------------------------------------------------------------------------
+
+### 批量停止作业
+
+
+POST
/hazelcast/rest/maps/stop-jobs
(如果作业成功停止,返回jobId。)
+
+#### 请求体
+
+```json
+[
+ {
+ "jobId": 881432421482889220,
+ "isStopWithSavePoint": false
+ },
+ {
+ "jobId": 881432456517910529,
+ "isStopWithSavePoint": false
+ }
+]
+```
+
+#### 响应
+
+```json
+[
+ {
+ "jobId": 881432421482889220
+ },
+ {
+ "jobId": 881432456517910529
+ }
+]
+```
+
+
+
------------------------------------------------------------------------------------------
-### 加密配置。
+### 加密配置
POST
/hazelcast/rest/maps/encrypt-config
(如果配置加密成功,则返回加密后的配置。)
@@ -424,7 +654,7 @@ network:
"age": "int"
}
},
- "result_table_name": "fake",
+ "plugin_output": "fake",
"parallelism": 1,
"hostname": "127.0.0.1",
"username": "seatunnel",
@@ -464,7 +694,7 @@ network:
"age": "int"
}
},
- "result_table_name": "fake",
+ "plugin_output": "fake",
"parallelism": 1,
"hostname": "127.0.0.1",
"username": "c2VhdHVubmVs",
@@ -488,3 +718,134 @@ network:
+------------------------------------------------------------------------------------------
+
+### 更新运行节点的tags
+
+
+POST
/hazelcast/rest/maps/update-tags
因为更新只能针对于某个节点,因此需要用当前节点ip:port用于更新
(如果更新成功,则返回"success"信息)
+
+
+#### 更新节点tags
+##### 请求体
+如果请求参数是`Map`对象,表示要更新当前节点的tags
+```json
+{
+ "tag1": "dev_1",
+ "tag2": "dev_2"
+}
+```
+##### 响应
+
+```json
+{
+ "status": "success",
+ "message": "update node tags done."
+}
+```
+#### 移除节点tags
+##### 请求体
+如果参数为空`Map`对象,表示要清除当前节点的tags
+```json
+{}
+```
+##### 响应
+响应体将为:
+```json
+{
+ "status": "success",
+ "message": "update node tags done."
+}
+```
+
+#### 请求参数异常
+- 如果请求参数为空
+
+##### 响应
+
+```json
+{
+ "status": "fail",
+ "message": "Request body is empty."
+}
+```
+- 如果参数不是`Map`对象
+##### 响应
+
+```json
+{
+ "status": "fail",
+ "message": "Invalid JSON format in request body."
+}
+```
+
+
+
+------------------------------------------------------------------------------------------
+
+### 获取所有节点日志内容
+
+
+ GET
/hazelcast/rest/maps/logs/:jobId
(返回日志列表。)
+
+#### 请求参数
+
+#### 参数(在请求体中params字段中添加)
+
+> | 参数名称 | 是否必传 | 参数类型 | 参数描述 |
+> |----------------------|----------|--------|-----------------------------------|
+> | jobId | optional | string | job id |
+
+当`jobId`为空时,返回所有节点的日志信息,否则返回指定`jobId`在所有节点的的日志列表。
+
+#### 响应
+
+返回请求节点的日志列表、内容
+
+#### 返回所有日志文件列表
+
+如果你想先查看日志列表,可以通过`GET`请求获取日志列表,`http://localhost:5801/hazelcast/rest/maps/logs?format=json`
+
+```json
+[
+ {
+ "node": "localhost:5801",
+ "logLink": "http://localhost:5801/hazelcast/rest/maps/logs/job-899485770241277953.log",
+ "logName": "job-899485770241277953.log"
+ },
+ {
+ "node": "localhost:5801",
+ "logLink": "http://localhost:5801/hazelcast/rest/maps/logs/job-899470314109468673.log",
+ "logName": "job-899470314109468673.log"
+ }
+]
+```
+
+当前支持的格式有`json`和`html`,默认为`html`。
+
+#### 例子
+
+获取所有节点jobId为`733584788375666689`的日志信息:`http://localhost:5801/hazelcast/rest/maps/logs/733584788375666689`
+获取所有节点日志列表:`http://localhost:5801/hazelcast/rest/maps/logs`
+获取所有节点日志列表以JSON格式返回:`http://localhost:5801/hazelcast/rest/maps/logs?format=json`
+获取日志文件内容:`http://localhost:5801/hazelcast/rest/maps/logs/job-898380162133917698.log``
+
+
+
+
+
+### 获取单节点日志内容
+
+
+ GET
/hazelcast/rest/maps/log
(返回日志列表。)
+
+#### 响应
+
+返回请求节点的日志列表
+
+#### 例子
+
+获取当前节点的日志列表:`http://localhost:5801/hazelcast/rest/maps/log`
+获取日志文件内容:`http://localhost:5801/hazelcast/rest/maps/log/job-898380162133917698.log`
+
+
diff --git a/docs/zh/seatunnel-engine/rest-api-v2.md b/docs/zh/seatunnel-engine/rest-api-v2.md
new file mode 100644
index 00000000000..0e3b3e2657d
--- /dev/null
+++ b/docs/zh/seatunnel-engine/rest-api-v2.md
@@ -0,0 +1,852 @@
+---
+sidebar_position: 12
+---
+
+# RESTful API V2
+
+SeaTunnel有一个用于监控的API,可用于查询运行作业的状态和统计信息,以及最近完成的作业。监控API是RESTful风格的,它接受HTTP请求并使用JSON数据格式进行响应。
+
+## 概述
+
+v2版本的api使用jetty支持,与v1版本的接口规范相同 ,可以通过修改`seatunnel.yaml`中的配置项来指定端口和context-path,
+同时可以配置 `enable-dynamic-port` 开启动态端口(默认从 `port` 开始累加),默认为关闭,
+如果`enable-dynamic-port`为`true`,我们将使用`port`和`port`+`port-range`范围内未使用的端口,默认范围是100。
+
+```yaml
+
+seatunnel:
+ engine:
+ http:
+ enable-http: true
+ port: 8080
+ enable-dynamic-port: false
+ port-range: 100
+```
+
+同时也可以配置context-path,配置如下:
+
+```yaml
+
+seatunnel:
+ engine:
+ http:
+ enable-http: true
+ port: 8080
+ context-path: /seatunnel
+```
+
+## API参考
+
+### 返回Zeta集群的概览
+
+
+ GET
/overview?tag1=value1&tag2=value2
(Returns an overview over the Zeta engine cluster.)
+
+#### 参数
+
+> | 参数名称 | 是否必传 | 参数类型 | 参数描述 |
+> |--------|------|------|--------------------------|
+> | tag键值对 | 否 | 字符串 | 一组标签值, 通过该标签值过滤满足条件的节点信息 |
+
+#### 响应
+
+```json
+{
+ "projectVersion":"2.3.5-SNAPSHOT",
+ "gitCommitAbbrev":"DeadD0d0",
+ "totalSlot":"0",
+ "unassignedSlot":"0",
+ "works":"1",
+ "runningJobs":"0",
+ "finishedJobs":"0",
+ "failedJobs":"0",
+ "cancelledJobs":"0"
+}
+```
+
+**注意:**
+- 当你使用`dynamic-slot`时, 返回结果中的`totalSlot`和`unassignedSlot`将始终为0. 设置为固定的slot值后, 将正确返回集群中总共的slot数量以及未分配的slot数量.
+- 当添加标签过滤后, `works`, `totalSlot`, `unassignedSlot`将返回满足条件的节点的相关指标. 注意`runningJobs`等job相关指标为集群级别结果, 无法根据标签进行过滤.
+
+
+
+------------------------------------------------------------------------------------------
+
+### 返回所有作业及其当前状态的概览
+
+
+ GET
/running-jobs
(返回所有作业及其当前状态的概览。)
+
+#### 参数
+
+#### 响应
+
+```json
+[
+ {
+ "jobId": "",
+ "jobName": "",
+ "jobStatus": "",
+ "createTime": "",
+ "jobDag": {
+ "jobId": "",
+ "envOptions": [],
+ "vertexInfoMap": [
+ {
+ "vertexId": 1,
+ "type": "",
+ "vertexName": "",
+ "tablePaths": [
+ ""
+ ]
+ }
+ ],
+ "pipelineEdges": {}
+ },
+ "pluginJarsUrls": [
+ ],
+ "isStartWithSavePoint": false,
+ "metrics": {
+ "sourceReceivedCount": "",
+ "sinkWriteCount": ""
+ }
+ }
+]
+```
+
+
+
+------------------------------------------------------------------------------------------
+
+### 返回作业的详细信息
+
+
+ GET
/job-info/:jobId
(返回作业的详细信息。)
+
+#### 参数
+
+> | 参数名称 | 是否必传 | 参数类型 | 参数描述 |
+> |-------|------|------|--------|
+> | jobId | 是 | long | job id |
+
+#### 响应
+
+```json
+{
+ "jobId": "",
+ "jobName": "",
+ "jobStatus": "",
+ "createTime": "",
+ "jobDag": {
+ "jobId": "",
+ "envOptions": [],
+ "vertexInfoMap": [
+ {
+ "vertexId": 1,
+ "type": "",
+ "vertexName": "",
+ "tablePaths": [
+ ""
+ ]
+ }
+ ],
+ "pipelineEdges": {}
+ },
+ "metrics": {
+ "SourceReceivedCount": "",
+ "SourceReceivedQPS": "",
+ "SourceReceivedBytes": "",
+ "SourceReceivedBytesPerSeconds": "",
+ "SinkWriteCount": "",
+ "SinkWriteQPS": "",
+ "SinkWriteBytes": "",
+ "SinkWriteBytesPerSeconds": "",
+ "TableSourceReceivedCount": {},
+ "TableSourceReceivedBytes": {},
+ "TableSourceReceivedBytesPerSeconds": {},
+ "TableSourceReceivedQPS": {},
+ "TableSinkWriteCount": {},
+ "TableSinkWriteQPS": {},
+ "TableSinkWriteBytes": {},
+ "TableSinkWriteBytesPerSeconds": {}
+ },
+ "finishedTime": "",
+ "errorMsg": null,
+ "envOptions": {
+ },
+ "pluginJarsUrls": [
+ ],
+ "isStartWithSavePoint": false
+}
+```
+
+`jobId`, `jobName`, `jobStatus`, `createTime`, `jobDag`, `metrics` 字段总会返回.
+`envOptions`, `pluginJarsUrls`, `isStartWithSavePoint` 字段在Job在RUNNING状态时会返回
+`finishedTime`, `errorMsg` 字段在Job结束时会返回,结束状态为不为RUNNING,可能为FINISHED,可能为CANCEL
+
+当我们查询不到这个Job时,返回结果为:
+
+```json
+{
+ "jobId" : ""
+}
+```
+
+
+
+------------------------------------------------------------------------------------------
+
+### 返回作业的详细信息
+
+此API已经弃用,请使用/job-info/:jobId替代。
+
+
+ GET
/running-job/:jobId
(返回作业的详细信息。)
+
+#### 参数
+
+> | 参数名称 | 是否必传 | 参数类型 | 参数描述 |
+> |-------|------|------|--------|
+> | jobId | 是 | long | job id |
+
+#### 响应
+
+```json
+{
+ "jobId": "",
+ "jobName": "",
+ "jobStatus": "",
+ "createTime": "",
+ "jobDag": {
+ "jobId": "",
+ "envOptions": [],
+ "vertexInfoMap": [
+ {
+ "vertexId": 1,
+ "type": "",
+ "vertexName": "",
+ "tablePaths": [
+ ""
+ ]
+ }
+ ],
+ "pipelineEdges": {}
+ },
+ "metrics": {
+ "sourceReceivedCount": "",
+ "sinkWriteCount": ""
+ },
+ "finishedTime": "",
+ "errorMsg": null,
+ "envOptions": {
+ },
+ "pluginJarsUrls": [
+ ],
+ "isStartWithSavePoint": false
+}
+```
+
+`jobId`, `jobName`, `jobStatus`, `createTime`, `jobDag`, `metrics` 字段总会返回.
+`envOptions`, `pluginJarsUrls`, `isStartWithSavePoint` 字段在Job在RUNNING状态时会返回
+`finishedTime`, `errorMsg` 字段在Job结束时会返回,结束状态为不为RUNNING,可能为FINISHED,可能为CANCEL
+
+当我们查询不到这个Job时,返回结果为:
+
+```json
+{
+ "jobId" : ""
+}
+```
+
+
+
+------------------------------------------------------------------------------------------
+
+### 返回所有已完成的作业信息
+
+
+ GET
/finished-jobs/:state
(返回所有已完成的作业信息。)
+
+#### 参数
+
+> | 参数名称 | 是否必传 | 参数类型 | 参数描述 |
+> |-------|----------|--------|------------------------------------------------------------------|
+> | state | optional | string | finished job status. `FINISHED`,`CANCELED`,`FAILED`,`UNKNOWABLE` |
+
+#### 响应
+
+```json
+[
+ {
+ "jobId": "",
+ "jobName": "",
+ "jobStatus": "",
+ "errorMsg": null,
+ "createTime": "",
+ "finishTime": "",
+ "jobDag": {
+ "jobId": "",
+ "envOptions": [],
+ "vertexInfoMap": [
+ {
+ "vertexId": 1,
+ "type": "",
+ "vertexName": "",
+ "tablePaths": [
+ ""
+ ]
+ }
+ ],
+ "pipelineEdges": {}
+ },
+ "metrics": ""
+ }
+]
+```
+
+
+
+------------------------------------------------------------------------------------------
+
+### 返回系统监控信息
+
+
+ GET
/system-monitoring-information
(返回系统监控信息。)
+
+#### 参数
+
+#### 响应
+
+```json
+[
+ {
+ "processors":"8",
+ "physical.memory.total":"16.0G",
+ "physical.memory.free":"16.3M",
+ "swap.space.total":"0",
+ "swap.space.free":"0",
+ "heap.memory.used":"135.7M",
+ "heap.memory.free":"440.8M",
+ "heap.memory.total":"576.5M",
+ "heap.memory.max":"3.6G",
+ "heap.memory.used/total":"23.54%",
+ "heap.memory.used/max":"3.73%",
+ "minor.gc.count":"6",
+ "minor.gc.time":"110ms",
+ "major.gc.count":"2",
+ "major.gc.time":"73ms",
+ "load.process":"24.78%",
+ "load.system":"60.00%",
+ "load.systemAverage":"2.07",
+ "thread.count":"117",
+ "thread.peakCount":"118",
+ "cluster.timeDiff":"0",
+ "event.q.size":"0",
+ "executor.q.async.size":"0",
+ "executor.q.client.size":"0",
+ "executor.q.client.query.size":"0",
+ "executor.q.client.blocking.size":"0",
+ "executor.q.query.size":"0",
+ "executor.q.scheduled.size":"0",
+ "executor.q.io.size":"0",
+ "executor.q.system.size":"0",
+ "executor.q.operations.size":"0",
+ "executor.q.priorityOperation.size":"0",
+ "operations.completed.count":"10",
+ "executor.q.mapLoad.size":"0",
+ "executor.q.mapLoadAllKeys.size":"0",
+ "executor.q.cluster.size":"0",
+ "executor.q.response.size":"0",
+ "operations.running.count":"0",
+ "operations.pending.invocations.percentage":"0.00%",
+ "operations.pending.invocations.count":"0",
+ "proxy.count":"8",
+ "clientEndpoint.count":"0",
+ "connection.active.count":"2",
+ "client.connection.count":"0",
+ "connection.count":"0"
+ }
+]
+```
+
+
+
+------------------------------------------------------------------------------------------
+
+### 提交作业
+
+
+POST
/submit-job
(如果作业提交成功,返回jobId和jobName。)
+
+#### 参数
+
+> | 参数名称 | 是否必传 | 参数类型 | 参数描述 |
+> |----------------------|----------|-----------------------------------|-----------------------------------|
+> | jobId | optional | string | job id |
+> | jobName | optional | string | job name |
+> | isStartWithSavePoint | optional | string | if job is started with save point |
+> | format | optional | string | 配置风格,支持json和hocon,默认 json |
+
+#### 请求体
+
+你可以选择用json或者hocon的方式来传递请求体。
+Json请求示例:
+```json
+{
+ "env": {
+ "job.mode": "batch"
+ },
+ "source": [
+ {
+ "plugin_name": "FakeSource",
+ "plugin_output": "fake",
+ "row.num": 100,
+ "schema": {
+ "fields": {
+ "name": "string",
+ "age": "int",
+ "card": "int"
+ }
+ }
+ }
+ ],
+ "transform": [
+ ],
+ "sink": [
+ {
+ "plugin_name": "Console",
+ "plugin_input": ["fake"]
+ }
+ ]
+}
+```
+
+Hocon请求示例:
+```hocon
+env {
+ job.mode = "batch"
+}
+
+source {
+ FakeSource {
+ result_table_name = "fake"
+ row.num = 100
+ schema = {
+ fields {
+ name = "string"
+ age = "int"
+ card = "int"
+ }
+ }
+ }
+}
+
+transform {
+}
+
+sink {
+ Console {
+ source_table_name = "fake"
+ }
+}
+
+```
+#### 响应
+
+```json
+{
+ "jobId": 733584788375666689,
+ "jobName": "rest_api_test"
+}
+```
+
+
+
+------------------------------------------------------------------------------------------
+
+
+### 批量提交作业
+
+
+POST
/submit-jobs
(如果作业提交成功,返回jobId和jobName。)
+
+#### 参数(在请求体中params字段中添加)
+
+> | 参数名称 | 是否必传 | 参数类型 | 参数描述 |
+> |----------------------|----------|--------|-----------------------------------|
+> | jobId | optional | string | job id |
+> | jobName | optional | string | job name |
+> | isStartWithSavePoint | optional | string | if job is started with save point |
+
+
+
+#### 请求体
+
+```json
+[
+ {
+ "params":{
+ "jobId":"123456",
+ "jobName":"SeaTunnel-01"
+ },
+ "env": {
+ "job.mode": "batch"
+ },
+ "source": [
+ {
+ "plugin_name": "FakeSource",
+ "plugin_output": "fake",
+ "row.num": 1000,
+ "schema": {
+ "fields": {
+ "name": "string",
+ "age": "int",
+ "card": "int"
+ }
+ }
+ }
+ ],
+ "transform": [
+ ],
+ "sink": [
+ {
+ "plugin_name": "Console",
+ "plugin_input": ["fake"]
+ }
+ ]
+ },
+ {
+ "params":{
+ "jobId":"1234567",
+ "jobName":"SeaTunnel-02"
+ },
+ "env": {
+ "job.mode": "batch"
+ },
+ "source": [
+ {
+ "plugin_name": "FakeSource",
+ "plugin_output": "fake",
+ "row.num": 1000,
+ "schema": {
+ "fields": {
+ "name": "string",
+ "age": "int",
+ "card": "int"
+ }
+ }
+ }
+ ],
+ "transform": [
+ ],
+ "sink": [
+ {
+ "plugin_name": "Console",
+ "plugin_input": ["fake"]
+ }
+ ]
+ }
+]
+```
+
+#### 响应
+
+```json
+[
+ {
+ "jobId": "123456",
+ "jobName": "SeaTunnel-01"
+ },{
+ "jobId": "1234567",
+ "jobName": "SeaTunnel-02"
+ }
+]
+```
+
+
+
+------------------------------------------------------------------------------------------
+
+### 停止作业
+
+
+POST
/stop-job
(如果作业成功停止,返回jobId。)
+
+#### 请求体
+
+```json
+{
+ "jobId": 733584788375666689,
+ "isStopWithSavePoint": false # if job is stopped with save point
+}
+```
+
+#### 响应
+
+```json
+{
+"jobId": 733584788375666689
+}
+```
+
+
+
+
+------------------------------------------------------------------------------------------
+
+### 批量停止作业
+
+
+POST
/stop-jobs
(如果作业成功停止,返回jobId。)
+
+#### 请求体
+
+```json
+[
+ {
+ "jobId": 881432421482889220,
+ "isStopWithSavePoint": false
+ },
+ {
+ "jobId": 881432456517910529,
+ "isStopWithSavePoint": false
+ }
+]
+```
+
+#### 响应
+
+```json
+[
+ {
+ "jobId": 881432421482889220
+ },
+ {
+ "jobId": 881432456517910529
+ }
+]
+```
+
+
+
+------------------------------------------------------------------------------------------
+
+### 加密配置
+
+
+POST
/encrypt-config
(如果配置加密成功,则返回加密后的配置。)
+有关自定义加密的更多信息,请参阅文档[配置-加密-解密](../connector-v2/Config-Encryption-Decryption.md).
+
+#### 请求体
+
+```json
+{
+ "env": {
+ "parallelism": 1,
+ "shade.identifier":"base64"
+ },
+ "source": [
+ {
+ "plugin_name": "MySQL-CDC",
+ "schema" : {
+ "fields": {
+ "name": "string",
+ "age": "int"
+ }
+ },
+ "plugin_output": "fake",
+ "parallelism": 1,
+ "hostname": "127.0.0.1",
+ "username": "seatunnel",
+ "password": "seatunnel_password",
+ "table-name": "inventory_vwyw0n"
+ }
+ ],
+ "transform": [
+ ],
+ "sink": [
+ {
+ "plugin_name": "Clickhouse",
+ "host": "localhost:8123",
+ "database": "default",
+ "table": "fake_all",
+ "username": "seatunnel",
+ "password": "seatunnel_password"
+ }
+ ]
+}
+```
+
+#### 响应
+
+```json
+{
+ "env": {
+ "parallelism": 1,
+ "shade.identifier": "base64"
+ },
+ "source": [
+ {
+ "plugin_name": "MySQL-CDC",
+ "schema": {
+ "fields": {
+ "name": "string",
+ "age": "int"
+ }
+ },
+ "plugin_output": "fake",
+ "parallelism": 1,
+ "hostname": "127.0.0.1",
+ "username": "c2VhdHVubmVs",
+ "password": "c2VhdHVubmVsX3Bhc3N3b3Jk",
+ "table-name": "inventory_vwyw0n"
+ }
+ ],
+ "transform": [],
+ "sink": [
+ {
+ "plugin_name": "Clickhouse",
+ "host": "localhost:8123",
+ "database": "default",
+ "table": "fake_all",
+ "username": "c2VhdHVubmVs",
+ "password": "c2VhdHVubmVsX3Bhc3N3b3Jk"
+ }
+ ]
+}
+```
+
+
+
+------------------------------------------------------------------------------------------
+
+### 更新运行节点的tags
+
+
+POST
/update-tags
因为更新只能针对于某个节点,因此需要用当前节点ip:port用于更新
(如果更新成功,则返回"success"信息)
+
+
+#### 更新节点tags
+##### 请求体
+如果请求参数是`Map`对象,表示要更新当前节点的tags
+```json
+{
+ "tag1": "dev_1",
+ "tag2": "dev_2"
+}
+```
+##### 响应
+
+```json
+{
+ "status": "success",
+ "message": "update node tags done."
+}
+```
+#### 移除节点tags
+##### 请求体
+如果参数为空`Map`对象,表示要清除当前节点的tags
+```json
+{}
+```
+##### 响应
+响应体将为:
+```json
+{
+ "status": "success",
+ "message": "update node tags done."
+}
+```
+
+#### 请求参数异常
+- 如果请求参数为空
+
+##### 响应
+
+```json
+{
+ "status": "fail",
+ "message": "Request body is empty."
+}
+```
+- 如果参数不是`Map`对象
+##### 响应
+
+```json
+{
+ "status": "fail",
+ "message": "Invalid JSON format in request body."
+}
+```
+
+
+
+------------------------------------------------------------------------------------------
+
+### 获取所有节点日志内容
+
+
+ GET
/logs/:jobId
(返回日志列表。)
+
+#### 请求参数
+
+#### 参数(在请求体中params字段中添加)
+
+> | 参数名称 | 是否必传 | 参数类型 | 参数描述 |
+> |----------------------|----------|--------|-----------------------------------|
+> | jobId | optional | string | job id |
+
+当`jobId`为空时,返回所有节点的日志信息,否则返回指定`jobId`在所有节点的的日志列表。
+
+#### 响应
+
+返回请求节点的日志列表、内容
+
+#### 返回所有日志文件列表
+
+如果你想先查看日志列表,可以通过`GET`请求获取日志列表,`http://localhost:8080/logs?format=json`
+
+```json
+[
+ {
+ "node": "localhost:8080",
+ "logLink": "http://localhost:8080/logs/job-899485770241277953.log",
+ "logName": "job-899485770241277953.log"
+ },
+ {
+ "node": "localhost:8080",
+ "logLink": "http://localhost:8080/logs/job-899470314109468673.log",
+ "logName": "job-899470314109468673.log"
+ }
+]
+```
+
+当前支持的格式有`json`和`html`,默认为`html`。
+
+
+#### 例子
+
+获取所有节点jobId为`733584788375666689`的日志信息:`http://localhost:8080/logs/733584788375666689`
+获取所有节点日志列表:`http://localhost:8080/logs`
+获取所有节点日志列表以JSON格式返回:`http://localhost:8080/logs?format=json`
+获取日志文件内容:`http://localhost:8080/logs/job-898380162133917698.log`
+
+
+
+
+
+### 获取单节点日志内容
+
+
+ GET
/log
(返回日志列表。)
+
+#### 响应
+
+返回请求节点的日志列表
+
+#### 例子
+
+获取当前节点的日志列表:`http://localhost:5801/log`
+获取日志文件内容:`http://localhost:5801/log/job-898380162133917698.log``
+
+
diff --git a/docs/zh/seatunnel-engine/savepoint.md b/docs/zh/seatunnel-engine/savepoint.md
index b1bab640e5e..a6097b497e3 100644
--- a/docs/zh/seatunnel-engine/savepoint.md
+++ b/docs/zh/seatunnel-engine/savepoint.md
@@ -1,7 +1,6 @@
---
-
sidebar_position: 8
--------------------
+---
# 使用保存点和通过保存点恢复
diff --git a/docs/zh/seatunnel-engine/separated-cluster-deployment.md b/docs/zh/seatunnel-engine/separated-cluster-deployment.md
index 807fb8d28c7..bdc369ff8c0 100644
--- a/docs/zh/seatunnel-engine/separated-cluster-deployment.md
+++ b/docs/zh/seatunnel-engine/separated-cluster-deployment.md
@@ -1,7 +1,6 @@
---
-
sidebar_position: 6
--------------------
+---
# 部署 SeaTunnel Engine 分离模式集群
@@ -76,7 +75,7 @@ SeaTunnel Engine 基于 [Hazelcast IMDG](https://docs.hazelcast.com/imdg/4.1/)
`backup count` 是定义同步备份数量的参数。例如,如果设置为 1,则分区的备份将放置在一个其他成员上。如果设置为 2,则将放置在两个其他成员上。
-我们建议 `backup-count` 的值为 `min(1, max(5, N/2))`。 `N` 是集群节点的数量。
+我们建议 `backup-count` 的值为 `max(1, min(5, N/2))`。 `N` 是集群节点的数量。
```yaml
seatunnel:
@@ -272,7 +271,6 @@ map:
fs.oss.accessKeyId: OSS access key id
fs.oss.accessKeySecret: OSS access key secret
fs.oss.endpoint: OSS endpoint
- fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider
```
注意:使用OSS 时,确保 lib目录下有这几个jar.
@@ -286,6 +284,23 @@ netty-common-4.1.89.Final.jar
seatunnel-hadoop3-3.1.4-uber.jar
```
+### 4.7 作业调度策略
+
+当资源不足时,作业调度策略可以配置为以下两种模式:
+
+1. `WAIT`:等待资源可用。
+2. `REJECT`:拒绝作业,默认值。
+
+示例
+
+```yaml
+seatunnel:
+ engine:
+ job-schedule-strategy: WAIT
+```
+
+当`dynamic-slot: ture`时,`job-schedule-strategy: WAIT` 配置会失效,将被强制修改为`job-schedule-strategy: REJECT`,因为动态Slot时该参数没有意义,可以直接提交。
+
## 5. 配置 SeaTunnel Engine 网络服务
所有 SeaTunnel Engine 网络相关的配置都在 `hazelcast-master.yaml`和`hazelcast-worker.yaml` 文件中.
@@ -406,7 +421,22 @@ export SEATUNNEL_HOME=${seatunnel install path}
export PATH=$PATH:$SEATUNNEL_HOME/bin
```
-### 8.2 配置 SeaTunnel Engine 客户端
+## 8. 提交作业和管理作业
+
+### 8.1 使用 SeaTunnel Engine 客户端提交作业
+
+#### 安装 SeaTunnel Engine 客户端
+
+##### 设置和服务器一样的`SEATUNNEL_HOME`
+
+您可以通过添加 `/etc/profile.d/seatunnel.sh` 文件来配置 `SEATUNNEL_HOME` 。`/etc/profile.d/seatunnel.sh` 的内容如下:
+
+```
+export SEATUNNEL_HOME=${seatunnel install path}
+export PATH=$PATH:$SEATUNNEL_HOME/bin
+```
+
+##### 配置 SeaTunnel Engine 客户端
所有 SeaTunnel Engine 客户端的配置都在 `hazelcast-client.yaml` 里。
@@ -429,6 +459,10 @@ hazelcast-client:
- master-node-2:5801
```
-## 9. 提交作业和管理作业
+#### 提交作业和管理作业
现在集群部署完成了,您可以通过以下教程完成作业的提交和管理:[提交和管理作业](user-command.md)
+
+### 8.2 使用 REST API 提交作业
+
+SeaTunnel Engine 提供了 REST API 用于提交作业。有关详细信息,请参阅 [REST API V2](rest-api-v2.md)
diff --git a/docs/zh/seatunnel-engine/tcp.md b/docs/zh/seatunnel-engine/tcp.md
index 256bb01fe6b..1adf5c867ce 100644
--- a/docs/zh/seatunnel-engine/tcp.md
+++ b/docs/zh/seatunnel-engine/tcp.md
@@ -1,7 +1,6 @@
---
-
sidebar_position: 10
---------------------
+---
# TCP NetWork
diff --git a/docs/zh/seatunnel-engine/telemetry.md b/docs/zh/seatunnel-engine/telemetry.md
new file mode 100644
index 00000000000..b65a3d0225b
--- /dev/null
+++ b/docs/zh/seatunnel-engine/telemetry.md
@@ -0,0 +1,151 @@
+---
+sidebar_position: 14
+---
+
+# Telemetry
+
+通过 `Prometheus-exports` 集成 `Metrices` 可以更好地与相关的监控平台(如 Prometheus 和 Grafana)无缝衔接,提高对 SeaTunnel
+集群的监控和告警能力。
+
+您可以在 `seatunnel.yaml` 文件中配置监控的相关设置。
+
+以下是一个声明式配置的示例。
+
+```yaml
+seatunnel:
+ engine:
+ telemetry:
+ metric:
+ enabled: true
+```
+
+## 指标
+
+Prometheus 的[指标文本](./telemetry/metrics.txt),获取方式为 `http://{instanceHost}:5801/hazelcast/rest/instance/metrics`。
+
+OpenMetrics 的[指标文本](./telemetry/openmetrics.txt)
+,获取方式为 `http://{instanceHost}:5801/hazelcast/rest/instance/openmetrics`。
+
+可用的指标包括以下类别。
+
+注意:所有指标都有相同的标签名 `cluster`,其值为 `hazelcast.cluster-name` 的配置。
+
+### 节点指标
+
+| MetricName | Type | Labels | 描述 |
+|-------------------------------------------|-------|------------------------------------------------------------------------------------------------------------|-------------------------------------|
+| cluster_info | Gauge | **hazelcastVersion**,hazelcast 的版本。**master**,seatunnel 主地址。 | 集群信息 |
+| cluster_time | Gauge | **hazelcastVersion**,hazelcast 的版本。 | 集群时间 |
+| node_count | Gauge | - | 集群节点总数 |
+| node_state | Gauge | **address**,服务器实例地址,例如:"127.0.0.1:5801" | seatunnel 节点是否正常 |
+| hazelcast_executor_executedCount | Gauge | **type**,执行器的类型,包括:"async" "client" "clientBlocking" "clientQuery" "io" "offloadable" "scheduled" "system" | seatunnel 集群节点的 hazelcast 执行器执行次数 |
+| hazelcast_executor_isShutdown | Gauge | **type**,执行器的类型,包括:"async" "client" "clientBlocking" "clientQuery" "io" "offloadable" "scheduled" "system" | seatunnel 集群节点的 hazelcast 执行器是否关闭 |
+| hazelcast_executor_isTerminated | Gauge | **type**,执行器的类型,包括:"async" "client" "clientBlocking" "clientQuery" "io" "offloadable" "scheduled" "system" | seatunnel 集群节点的 hazelcast 执行器是否终止 |
+| hazelcast_executor_maxPoolSize | Gauge | **type**,执行器的类型,包括:"async" "client" "clientBlocking" "clientQuery" "io" "offloadable" "scheduled" "system" | seatunnel 集群节点的 hazelcast 执行器最大池大小 |
+| hazelcast_executor_poolSize | Gauge | **type**,执行器的类型,包括:"async" "client" "clientBlocking" "clientQuery" "io" "offloadable" "scheduled" "system" | seatunnel 集群节点的 hazelcast 执行器当前池大小 |
+| hazelcast_executor_queueRemainingCapacity | Gauge | **type**,执行器的类型,包括:"async" "client" "clientBlocking" "clientQuery" "io" "offloadable" "scheduled" "system" | seatunnel 集群节点的 hazelcast 执行器剩余队列容量 |
+| hazelcast_executor_queueSize | Gauge | **type**,执行器的类型,包括:"async" "client" "clientBlocking" "clientQuery" "io" "offloadable" "scheduled" "system" | seatunnel 集群节点的 hazelcast 执行器当前队列大小 |
+| hazelcast_partition_partitionCount | Gauge | - | seatunnel 集群节点的分区数量 |
+| hazelcast_partition_activePartition | Gauge | - | seatunnel 集群节点的活跃分区数量 |
+| hazelcast_partition_isClusterSafe | Gauge | - | 分区是否安全 |
+| hazelcast_partition_isLocalMemberSafe | Gauge | - | 本地成员是否安全 |
+
+### 线程池状态
+
+| MetricName | Type | Labels | 描述 |
+|-------------------------------------|---------|-----------------------------------------|--------------------------------|
+| job_thread_pool_activeCount | Gauge | **address**,服务器实例地址,例如:"127.0.0.1:5801" | seatunnel 协调器作业执行器缓存线程池的活动线程数 |
+| job_thread_pool_corePoolSize | Gauge | **address**,服务器实例地址,例如:"127.0.0.1:5801" | seatunnel 协调器作业执行器缓存线程池的核心池大小 |
+| job_thread_pool_maximumPoolSize | Gauge | **address**,服务器实例地址,例如:"127.0.0.1:5801" | seatunnel 协调器作业执行器缓存线程池的最大池大小 |
+| job_thread_pool_poolSize | Gauge | **address**,服务器实例地址,例如:"127.0.0.1:5801" | seatunnel 协调器作业执行器缓存线程池的当前池大小 |
+| job_thread_pool_queueTaskCount | Gauge | **address**,服务器实例地址,例如:"127.0.0.1:5801" | seatunnel 协调器作业执行器缓存线程池的队列任务数 |
+| job_thread_pool_completedTask_total | Counter | **address**,服务器实例地址,例如:"127.0.0.1:5801" | seatunnel 协调器作业执行器缓存线程池的完成任务数 |
+| job_thread_pool_task_total | Counter | **address**,服务器实例地址,例如:"127.0.0.1:5801" | seatunnel 协调器作业执行器缓存线程池的总任务数 |
+| job_thread_pool_rejection_total | Counter | **address**,服务器实例地址,例如:"127.0.0.1:5801" | seatunnel 协调器作业执行器缓存线程池的拒绝任务总数 |
+
+### 作业信息详细
+
+| MetricName | Type | Labels | 描述 |
+|------------|-------|---------------------------------------------------------------------------------------------------------|---------------------|
+| job_count | Gauge | **type**,作业的类型,包括:"canceled" "cancelling" "created" "failed" "failing" "finished" "running" "scheduled" | seatunnel 集群的所有作业计数 |
+
+### JVM 指标
+
+| MetricName | Type | Labels | 描述 |
+|--------------------------------------------|---------|---------------------------------------------------------------------------------------------------------------|----------------------------------------|
+| jvm_threads_current | Gauge | - | JVM 的当前线程数 |
+| jvm_threads_daemon | Gauge | - | JVM 的守护线程数 |
+| jvm_threads_peak | Gauge | - | JVM 的峰值线程数 |
+| jvm_threads_started_total | Counter | - | JVM 启动的线程总数 |
+| jvm_threads_deadlocked | Gauge | - | JVM 线程在等待获取对象监视器或拥有的可拥有同步器时处于死锁状态的周期数 |
+| jvm_threads_deadlocked_monitor | Gauge | - | JVM 线程在等待获取对象监视器时处于死锁状态的周期数 |
+| jvm_threads_state | Gauge | **state**,JVM 线程的状态,包括:"NEW" "TERMINATED" "RUNNABLE" "BLOCKED" "WAITING" "TIMED_WAITING" "UNKNOWN" | 按状态分类的线程当前计数 |
+| jvm_classes_currently_loaded | Gauge | - | JVM 中当前加载的类的数量 |
+| jvm_classes_loaded_total | Counter | - | 自 JVM 开始执行以来加载的类的总数 |
+| jvm_classes_unloaded_total | Counter | - | 自 JVM 开始执行以来卸载的类的总数 |
+| jvm_memory_pool_allocated_bytes_total | Counter | **pool**,包括:"Code Cache" "PS Eden Space" "PS Old Gen" "PS Survivor Space" "Compressed Class Space" "Metaspace" | 在给定 JVM 内存池中分配的总字节数。仅在垃圾收集后更新,而不是持续更新。 |
+| jvm_gc_collection_seconds_count | Summary | **gc**,包括:"PS Scavenge" "PS MarkSweep" | 在给定 JVM 垃圾收集器中花费的时间(以秒为单位) |
+| jvm_gc_collection_seconds_sum | Summary | **gc**,包括:"PS Scavenge" "PS MarkSweep" | 在给定 JVM 垃圾收集器中花费的时间(以秒为单位)
+| jvm_info | Gauge | **runtime**,例如:“Java(TM) SE Runtime Environment”。**供应商**,例如:“Oracle Corporation”。**版本**,例如:“1.8.0_212-b10” | VM 版本信息 |
+| process_cpu_seconds_total | Counter | - | 用户和系统 CPU 时间总计,以秒为单位 |
+| process_start_time_seconds | Gauge | - | 进程自 Unix 纪元以来的启动时间,以秒为单位 |
+| process_open_fds | Gauge | - | 打开的文件描述符数量 |
+| process_max_fds | Gauge | - | 最大打开的文件描述符数量 |
+| jvm_memory_objects_pending_finalization | Gauge | - | 等待最终化队列中的对象数量 |
+| jvm_memory_bytes_used | Gauge | **area**,包括: "heap" "noheap" | 给定 JVM 内存区域使用的字节数 |
+| jvm_memory_bytes_committed | Gauge | **area**,包括: "heap" "noheap" | 给定 JVM 内存区域的提交字节数 |
+| jvm_memory_bytes_max | Gauge | **area**,包括: "heap" "noheap" | 给定 JVM 内存区域的最大字节数 |
+| jvm_memory_bytes_init | Gauge | **area**,包括: "heap" "noheap" | 给定 JVM 内存区域的初始字节数 |
+| jvm_memory_pool_bytes_used | Gauge | **pool**,包括:"Code Cache" "PS Eden Space" "PS Old Ge" "PS Survivor Space" "Compressed Class Space" "Metaspace" | 给定 JVM 内存池使用的字节数 |
+| jvm_memory_pool_bytes_committed | Gauge | **pool**,包括:"Code Cache" "PS Eden Space" "PS Old Ge" "PS Survivor Space" "Compressed Class Space" "Metaspace" | 给定 JVM 内存池的提交字节数 |
+| jvm_memory_pool_bytes_max | Gauge | **pool**,包括:"Code Cache" "PS Eden Space" "PS Old Ge" "PS Survivor Space" "Compressed Class Space" "Metaspace" | 给定 JVM 内存池的最大字节数 |
+| jvm_memory_pool_bytes_init | Gauge | **pool**,包括:"Code Cache" "PS Eden Space" "PS Old Ge" "PS Survivor Space" "Compressed Class Space" "Metaspace" | 给定 JVM 内存池的初始字节数 |
+| jvm_memory_pool_allocated_bytes_created | Gauge | **pool**,包括:"Code Cache" "PS Eden Space" "PS Old Ge" "PS Survivor Space" "Compressed Class Space" "Metaspace" | 给定 JVM 内存池中创建的总字节数。仅在 GC 后更新,而不是持续更新 |
+| jvm_memory_pool_collection_used_bytes | Gauge | **pool**,包括:"PS Eden Space" "PS Old Ge" "PS Survivor Space" | 给定 JVM 内存池在最后一次回收后的使用字节数 |
+| jvm_memory_pool_collection_committed_bytes | Gauge | **pool**,包括:"PS Eden Space" "PS Old Ge" "PS Survivor Space" | 给定 JVM 内存池在最后一次回收后的提交字节数 |
+| jvm_memory_pool_collection_max_bytes | Gauge | **pool**,包括:"PS Eden Space" "PS Old Ge" "PS Survivor Space" | 给定 JVM 内存池在最后一次回收后的最大字节数 |
+| jvm_memory_pool_collection_init_bytes | Gauge | **pool**,包括:"PS Eden Space" "PS Old Ge" "PS Survivor Space" | 给定 JVM 内存池在最后一次回收后的初始字节数 |
+| jvm_buffer_pool_used_bytes | Gauge | **pool**,包括:"direct" "mapped" | 给定 JVM 缓冲池使用的字节数 |
+| jvm_buffer_pool_capacity_bytes | Gauge | **pool**,包括:"direct" "mapped" | 给定 JVM 缓冲池的字节容量 |
+| jvm_buffer_pool_used_buffers | Gauge | **pool**,包括:"direct" "mapped" | 给定 JVM 缓冲池使用的缓冲区 |
+
+## 通过 Prometheus 和 Grafana 进行集群监控
+
+### 安装 Prometheus
+
+有关如何设置 Prometheus 服务器的指南,请访问
+[安装](https://prometheus.io/docs/prometheus/latest/installation)
+
+### 配置 Prometheus
+
+将 SeaTunnel 实例指标导出添加到 `/etc/prometheus/prometheus.yaml` 中。例如:
+
+```yaml
+global:
+ # 从此作业中抓取目标的频率。
+ scrape_interval: 15s
+scrape_configs:
+ # 默认分配给抓取指标的作业名称。
+ - job_name: 'seatunnel'
+ scrape_interval: 5s
+ # 指标导出路径
+ metrics_path: /hazelcast/rest/instance/metrics
+ # 此作业静态配置的目标列表。
+ static_configs:
+ # 静态配置中指定的目标。
+ - targets: [ 'localhost:5801' ]
+ # 为从目标抓取的所有指标分配的标签。
+ # labels: [:]
+```
+
+### 安装 Grafana
+
+有关如何设置 Grafana 服务器的指南,请访问
+[安装](https://grafana.com/docs/grafana/latest/setup-grafana/installation)
+
+#### 监控仪表板
+
+- 在 Grafana 中添加 Prometheus 数据源。
+- 将 `Seatunnel Cluster` 监控仪表板导入到 Grafana 中,使用 [仪表板 JSON](./telemetry/grafana-dashboard.json)。
+
+监控[效果图](../../images/grafana.png)
\ No newline at end of file
diff --git a/docs/zh/seatunnel-engine/telemetry/grafana-dashboard.json b/docs/zh/seatunnel-engine/telemetry/grafana-dashboard.json
new file mode 100644
index 00000000000..7a87e47ff38
--- /dev/null
+++ b/docs/zh/seatunnel-engine/telemetry/grafana-dashboard.json
@@ -0,0 +1 @@
+{"annotations":{"list":[{"builtIn":1,"datasource":{"type":"prometheus","uid":"edwo9tknxxgqof"},"enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","target":{"limit":100,"matchAny":false,"tags":[],"type":"dashboard"},"type":"dashboard"}]},"editable":true,"fiscalYearStartMonth":0,"graphTooltip":0,"id":8,"links":[],"liveNow":false,"panels":[{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"super-light-blue","value":null},{"color":"red","value":100000}]}},"overrides":[]},"gridPos":{"h":4,"w":12,"x":0,"y":0},"id":17,"options":{"colorMode":"background","graphMode":"none","justifyMode":"center","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"textMode":"auto","wideLayout":true},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"node_count{instance=~\"$instance\"}","interval":"","legendFormat":"","range":true,"refId":"A"}],"title":"Total Node Count","type":"stat"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":4,"w":12,"x":12,"y":0},"id":18,"options":{"colorMode":"background","graphMode":"none","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showPercentChange":false,"text":{"titleSize":1},"textMode":"auto","wideLayout":true},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"sum(node_state{instance=~\"$instance\"})","interval":"","legendFormat":"__auto","range":true,"refId":"A"}],"title":"UP Node Count","type":"stat"},{"collapsed":false,"datasource":{"type":"prometheus","uid":"edwo9tknxxgqof"},"gridPos":{"h":1,"w":24,"x":0,"y":4},"id":22,"panels":[],"targets":[{"datasource":{"type":"prometheus","uid":"edwo9tknxxgqof"},"refId":"A"}],"title":"Hazelcast Partition","type":"row"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":12,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"smooth","lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":7,"w":12,"x":0,"y":5},"id":32,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"hazelcast_partition_partitionCount{instance=~\"$instance\"}","interval":"","legendFormat":"{{instance}}","range":true,"refId":"A"}],"title":"partitionCount","type":"timeseries"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":12,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":7,"w":12,"x":12,"y":5},"id":33,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"hazelcast_partition_activePartition{instance=~\"$instance\"}","interval":"","legendFormat":"{{instance}}","range":true,"refId":"A"}],"title":"activePartition","type":"timeseries"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":12,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"smooth","lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":7,"w":12,"x":0,"y":12},"id":34,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"pluginVersion":"8.3.3","targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"hazelcast_partition_isClusterSafe{instance=~\"$instance\"}","interval":"","legendFormat":"{{instance}}","range":true,"refId":"A"}],"title":"isClusterSafe","type":"timeseries"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":12,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"smooth","lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":7,"w":12,"x":12,"y":12},"id":35,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"hazelcast_partition_isLocalMemberSafe{instance=~\"$instance\"}","interval":"","legendFormat":"{{instance}}","range":true,"refId":"A"}],"title":"isLocalMemberSafe","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"edwo9tknxxgqof"},"gridPos":{"h":1,"w":24,"x":0,"y":19},"id":20,"targets":[{"datasource":{"type":"prometheus","uid":"edwo9tknxxgqof"},"refId":"A"}],"title":"Hazelcast Executor","type":"row"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":12,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"smooth","lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":6,"w":24,"x":0,"y":20},"id":24,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"right","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"hazelcast_executor_executedCount{instance=~\"$instance\"}","interval":"","legendFormat":"{{instance}}-{{type}}","range":true,"refId":"A"}],"title":"executedCount","type":"timeseries"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":12,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"smooth","lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":26},"id":26,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"right","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"hazelcast_executor_isTerminated{instance=~\"$instance\"}","interval":"","legendFormat":"{{instance}}-{{type}}","range":true,"refId":"A"}],"title":"isTerminated","type":"timeseries"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":12,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"smooth","lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":26},"id":25,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"right","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"hazelcast_executor_isShutdown{instance=~\"$instance\"}","interval":"","legendFormat":"{{instance}}-{{type}}","range":true,"refId":"A"}],"title":"isShutdown","type":"timeseries"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":12,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"smooth","lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":34},"id":28,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"right","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"hazelcast_executor_poolSize{instance=~\"$instance\"}","interval":"","legendFormat":"{{instance}}-{{type}}","range":true,"refId":"A"}],"title":"poolSize","type":"timeseries"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":12,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"smooth","lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":34},"id":27,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"right","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"hazelcast_executor_maxPoolSize{instance=~\"$instance\"}","interval":"","legendFormat":"{{instance}}-{{type}}","range":true,"refId":"A"}],"title":"maxPoolSize","type":"timeseries"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":42},"id":30,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"right","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"hazelcast_executor_queueRemainingCapacity{instance=~\"$instance\"}","interval":"","legendFormat":"{{instance}}-{{type}}","range":true,"refId":"A"}],"title":"queueRemainingCapacity","type":"timeseries"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":0,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":42},"id":29,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"right","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"hazelcast_executor_queueSize{instance=~\"$instance\"}","interval":"","legendFormat":"{{instance}}-{{type}}","range":true,"refId":"A"}],"title":"queueSize","type":"timeseries"},{"collapsed":false,"datasource":{"type":"prometheus","uid":"edwo9tknxxgqof"},"gridPos":{"h":1,"w":24,"x":0,"y":50},"id":7,"panels":[],"targets":[{"datasource":{"type":"prometheus","uid":"edwo9tknxxgqof"},"refId":"A"}],"title":"System","type":"row"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"description":"","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":18,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"smooth","lineStyle":{"fill":"solid"},"lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":51},"id":9,"interval":"300","options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"rate(process_cpu_seconds_total{instance=~\"$instance\"}[$__interval])*100","interval":"","legendFormat":"{{instance}}","range":true,"refId":"A"}],"title":"Cpu Usage","type":"timeseries"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"description":"","fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":22,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"smooth","lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":51},"id":10,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"100 * (jvm_memory_bytes_used{instance=~\"$instance\",area=\"heap\"} / jvm_memory_bytes_max{instance=~\"$instance\",area=\"heap\"})","interval":"","legendFormat":"{{instance}}","range":true,"refId":"A"}],"title":"Heap Memory Usage","type":"timeseries"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":12,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"smooth","lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":59},"id":12,"interval":"300","options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"increase(jvm_gc_collection_seconds_count[$__interval])","interval":"","legendFormat":"{{instance}}-{{gc}}","range":true,"refId":"A"}],"title":"GC Count","type":"timeseries"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":12,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"smooth","lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":59},"id":13,"interval":"300","options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"increase(jvm_gc_collection_seconds_sum{instance=~\"$instance\"}[$__interval])*1000","interval":"","legendFormat":"{{instance}}-{{gc}}","range":true,"refId":"A"}],"title":"GC Cost Time","type":"timeseries"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":12,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":8,"w":24,"x":0,"y":67},"id":14,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"right","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"jvm_threads_current{instance=~\"$instance\"}","interval":"","legendFormat":"{{instance}}-current","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"jvm_threads_daemon{instance=~\"$instance\"}","hide":false,"interval":"","legendFormat":"{{instance}}-daemon","range":true,"refId":"B"},{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"jvm_threads_peak{instance=~\"$instance\"}","hide":false,"interval":"","legendFormat":"{{instance}}-peak","range":true,"refId":"C"},{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"jvm_threads_deadlocked{instance=~\"$instance\"}","hide":false,"interval":"","legendFormat":"{{instance}}-deadlocked","range":true,"refId":"D"}],"title":"Jvm Threads","type":"timeseries"},{"collapsed":false,"datasource":{"type":"prometheus","uid":"edwo9tknxxgqof"},"gridPos":{"h":1,"w":24,"x":0,"y":75},"id":5,"panels":[],"targets":[{"datasource":{"type":"prometheus","uid":"edwo9tknxxgqof"},"refId":"A"}],"title":"Job","type":"row"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"continuous-YlBl"},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null}]}},"overrides":[]},"gridPos":{"h":6,"w":24,"x":0,"y":76},"id":2,"options":{"displayMode":"basic","maxVizHeight":300,"minVizHeight":16,"minVizWidth":8,"namePlacement":"auto","orientation":"vertical","reduceOptions":{"calcs":["lastNotNull"],"fields":"","values":false},"showUnfilled":true,"sizing":"auto","valueMode":"color"},"pluginVersion":"11.2.0","targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"sum(job_count) by (type) ","hide":false,"interval":"","legendFormat":"__auto","range":true,"refId":"A"}],"title":"Job Count","type":"bargauge"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":12,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"smooth","lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":6,"w":12,"x":0,"y":82},"id":3,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"job_thread_pool_activeCount{instance=~\"$instance\"}","interval":"","legendFormat":"{{instance}}-{{type}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"job_thread_pool_corePoolSize{instance=~\"$instance\"}","hide":false,"interval":"","legendFormat":"{{instance}}-{{type}}","range":true,"refId":"B"},{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"job_thread_pool_maximumPoolSize{instance=~\"$instance\"}","hide":true,"interval":"","legendFormat":"{{instance}}-{{type}}","range":true,"refId":"C"},{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"job_thread_pool_poolSize{instance=~\"$instance\"}","hide":false,"interval":"","legendFormat":"{{instance}}-{{type}}","range":true,"refId":"D"}],"title":"Job Thread Pool","type":"timeseries"},{"datasource":{"default":true,"type":"prometheus","uid":"edwo9tknxxgqof"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":12,"gradientMode":"opacity","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"smooth","lineWidth":1,"pointSize":1,"scaleDistribution":{"type":"linear"},"showPoints":"auto","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":6,"w":12,"x":12,"y":82},"id":15,"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"single","sort":"none"}},"targets":[{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"job_thread_pool_completedTask_total{instance=~\"$instance\"}","interval":"","legendFormat":"{{instance}}-{{type}}","range":true,"refId":"A"},{"datasource":{"type":"prometheus","uid":"jUi2yaj4k"},"editorMode":"code","exemplar":true,"expr":"job_thread_pool_task_total{instance=~\"$instance\"}","hide":false,"interval":"","legendFormat":"{{instance}}-{{type}}","range":true,"refId":"B"}],"title":"Job Thread Pool Total","type":"timeseries"}],"refresh":"30s","schemaVersion":39,"tags":[],"templating":{"list":[{"current":{"selected":true,"text":["All"],"value":["$__all"]},"datasource":{"type":"prometheus","uid":"edwo9tknxxgqof"},"definition":"label_values(cluster_info,instance)","description":"instance","hide":0,"includeAll":true,"label":"","multi":true,"name":"instance","options":[],"query":{"qryType":5,"query":"label_values(cluster_info,instance)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":1,"regex":"","skipUrlSync":false,"sort":0,"type":"query"}]},"time":{"from":"now-15m","to":"now"},"timepicker":{},"timezone":"","title":"Seatunnel","uid":"bdx1j097hmku8d","version":11,"weekStart":""}
\ No newline at end of file
diff --git a/docs/zh/seatunnel-engine/telemetry/metrics.txt b/docs/zh/seatunnel-engine/telemetry/metrics.txt
new file mode 100644
index 00000000000..25dc805602b
--- /dev/null
+++ b/docs/zh/seatunnel-engine/telemetry/metrics.txt
@@ -0,0 +1,296 @@
+# HELP jvm_buffer_pool_used_bytes Used bytes of a given JVM buffer pool.
+# TYPE jvm_buffer_pool_used_bytes gauge
+jvm_buffer_pool_used_bytes{pool="mapped",} 0.0
+jvm_buffer_pool_used_bytes{pool="direct",} 414142.0
+# HELP jvm_buffer_pool_capacity_bytes Bytes capacity of a given JVM buffer pool.
+# TYPE jvm_buffer_pool_capacity_bytes gauge
+jvm_buffer_pool_capacity_bytes{pool="mapped",} 0.0
+jvm_buffer_pool_capacity_bytes{pool="direct",} 414139.0
+# HELP jvm_buffer_pool_used_buffers Used buffers of a given JVM buffer pool.
+# TYPE jvm_buffer_pool_used_buffers gauge
+jvm_buffer_pool_used_buffers{pool="mapped",} 0.0
+jvm_buffer_pool_used_buffers{pool="direct",} 7.0
+# HELP jvm_gc_collection_seconds Time spent in a given JVM garbage collector in seconds.
+# TYPE jvm_gc_collection_seconds summary
+jvm_gc_collection_seconds_count{gc="G1 Young Generation",} 6.0
+jvm_gc_collection_seconds_sum{gc="G1 Young Generation",} 0.047
+jvm_gc_collection_seconds_count{gc="G1 Old Generation",} 0.0
+jvm_gc_collection_seconds_sum{gc="G1 Old Generation",} 0.0
+# HELP jvm_memory_objects_pending_finalization The number of objects waiting in the finalizer queue.
+# TYPE jvm_memory_objects_pending_finalization gauge
+jvm_memory_objects_pending_finalization 0.0
+# HELP jvm_memory_bytes_used Used bytes of a given JVM memory area.
+# TYPE jvm_memory_bytes_used gauge
+jvm_memory_bytes_used{area="heap",} 8.4778896E7
+jvm_memory_bytes_used{area="nonheap",} 7.2728624E7
+# HELP jvm_memory_bytes_committed Committed (bytes) of a given JVM memory area.
+# TYPE jvm_memory_bytes_committed gauge
+jvm_memory_bytes_committed{area="heap",} 5.36870912E8
+jvm_memory_bytes_committed{area="nonheap",} 7.7594624E7
+# HELP jvm_memory_bytes_max Max (bytes) of a given JVM memory area.
+# TYPE jvm_memory_bytes_max gauge
+jvm_memory_bytes_max{area="heap",} 8.589934592E9
+jvm_memory_bytes_max{area="nonheap",} -1.0
+# HELP jvm_memory_bytes_init Initial bytes of a given JVM memory area.
+# TYPE jvm_memory_bytes_init gauge
+jvm_memory_bytes_init{area="heap",} 5.36870912E8
+jvm_memory_bytes_init{area="nonheap",} 7667712.0
+# HELP jvm_memory_pool_bytes_used Used bytes of a given JVM memory pool.
+# TYPE jvm_memory_pool_bytes_used gauge
+jvm_memory_pool_bytes_used{pool="CodeHeap 'non-nmethods'",} 1307520.0
+jvm_memory_pool_bytes_used{pool="Metaspace",} 4.9585376E7
+jvm_memory_pool_bytes_used{pool="CodeHeap 'profiled nmethods'",} 1.2327296E7
+jvm_memory_pool_bytes_used{pool="Compressed Class Space",} 6124368.0
+jvm_memory_pool_bytes_used{pool="G1 Eden Space",} 5.4525952E7
+jvm_memory_pool_bytes_used{pool="G1 Old Gen",} 1.3475728E7
+jvm_memory_pool_bytes_used{pool="G1 Survivor Space",} 1.6777216E7
+jvm_memory_pool_bytes_used{pool="CodeHeap 'non-profiled nmethods'",} 3384064.0
+# HELP jvm_memory_pool_bytes_committed Committed bytes of a given JVM memory pool.
+# TYPE jvm_memory_pool_bytes_committed gauge
+jvm_memory_pool_bytes_committed{pool="CodeHeap 'non-nmethods'",} 2555904.0
+jvm_memory_pool_bytes_committed{pool="Metaspace",} 5.2035584E7
+jvm_memory_pool_bytes_committed{pool="CodeHeap 'profiled nmethods'",} 1.2386304E7
+jvm_memory_pool_bytes_committed{pool="Compressed Class Space",} 7208960.0
+jvm_memory_pool_bytes_committed{pool="G1 Eden Space",} 3.20864256E8
+jvm_memory_pool_bytes_committed{pool="G1 Old Gen",} 1.9922944E8
+jvm_memory_pool_bytes_committed{pool="G1 Survivor Space",} 1.6777216E7
+jvm_memory_pool_bytes_committed{pool="CodeHeap 'non-profiled nmethods'",} 3407872.0
+# HELP jvm_memory_pool_bytes_max Max bytes of a given JVM memory pool.
+# TYPE jvm_memory_pool_bytes_max gauge
+jvm_memory_pool_bytes_max{pool="CodeHeap 'non-nmethods'",} 5849088.0
+jvm_memory_pool_bytes_max{pool="Metaspace",} -1.0
+jvm_memory_pool_bytes_max{pool="CodeHeap 'profiled nmethods'",} 1.22896384E8
+jvm_memory_pool_bytes_max{pool="Compressed Class Space",} 1.073741824E9
+jvm_memory_pool_bytes_max{pool="G1 Eden Space",} -1.0
+jvm_memory_pool_bytes_max{pool="G1 Old Gen",} 8.589934592E9
+jvm_memory_pool_bytes_max{pool="G1 Survivor Space",} -1.0
+jvm_memory_pool_bytes_max{pool="CodeHeap 'non-profiled nmethods'",} 1.22912768E8
+# HELP jvm_memory_pool_bytes_init Initial bytes of a given JVM memory pool.
+# TYPE jvm_memory_pool_bytes_init gauge
+jvm_memory_pool_bytes_init{pool="CodeHeap 'non-nmethods'",} 2555904.0
+jvm_memory_pool_bytes_init{pool="Metaspace",} 0.0
+jvm_memory_pool_bytes_init{pool="CodeHeap 'profiled nmethods'",} 2555904.0
+jvm_memory_pool_bytes_init{pool="Compressed Class Space",} 0.0
+jvm_memory_pool_bytes_init{pool="G1 Eden Space",} 2.7262976E7
+jvm_memory_pool_bytes_init{pool="G1 Old Gen",} 5.09607936E8
+jvm_memory_pool_bytes_init{pool="G1 Survivor Space",} 0.0
+jvm_memory_pool_bytes_init{pool="CodeHeap 'non-profiled nmethods'",} 2555904.0
+# HELP jvm_memory_pool_collection_used_bytes Used bytes after last collection of a given JVM memory pool.
+# TYPE jvm_memory_pool_collection_used_bytes gauge
+jvm_memory_pool_collection_used_bytes{pool="G1 Eden Space",} 0.0
+jvm_memory_pool_collection_used_bytes{pool="G1 Old Gen",} 0.0
+jvm_memory_pool_collection_used_bytes{pool="G1 Survivor Space",} 1.6777216E7
+# HELP jvm_memory_pool_collection_committed_bytes Committed after last collection bytes of a given JVM memory pool.
+# TYPE jvm_memory_pool_collection_committed_bytes gauge
+jvm_memory_pool_collection_committed_bytes{pool="G1 Eden Space",} 3.20864256E8
+jvm_memory_pool_collection_committed_bytes{pool="G1 Old Gen",} 0.0
+jvm_memory_pool_collection_committed_bytes{pool="G1 Survivor Space",} 1.6777216E7
+# HELP jvm_memory_pool_collection_max_bytes Max bytes after last collection of a given JVM memory pool.
+# TYPE jvm_memory_pool_collection_max_bytes gauge
+jvm_memory_pool_collection_max_bytes{pool="G1 Eden Space",} -1.0
+jvm_memory_pool_collection_max_bytes{pool="G1 Old Gen",} 8.589934592E9
+jvm_memory_pool_collection_max_bytes{pool="G1 Survivor Space",} -1.0
+# HELP jvm_memory_pool_collection_init_bytes Initial after last collection bytes of a given JVM memory pool.
+# TYPE jvm_memory_pool_collection_init_bytes gauge
+jvm_memory_pool_collection_init_bytes{pool="G1 Eden Space",} 2.7262976E7
+jvm_memory_pool_collection_init_bytes{pool="G1 Old Gen",} 5.09607936E8
+jvm_memory_pool_collection_init_bytes{pool="G1 Survivor Space",} 0.0
+# HELP job_thread_pool_activeCount The activeCount of seatunnel coordinator job's executor cached thread pool
+# TYPE job_thread_pool_activeCount gauge
+job_thread_pool_activeCount{cluster="seatunnel",address="127.0.0.1:5801",} 0.0
+# HELP job_thread_pool_completedTask_total The completedTask of seatunnel coordinator job's executor cached thread pool
+# TYPE job_thread_pool_completedTask_total counter
+job_thread_pool_completedTask_total{cluster="seatunnel",address="127.0.0.1:5801",} 1.0
+# HELP job_thread_pool_corePoolSize The corePoolSize of seatunnel coordinator job's executor cached thread pool
+# TYPE job_thread_pool_corePoolSize gauge
+job_thread_pool_corePoolSize{cluster="seatunnel",address="127.0.0.1:5801",} 0.0
+# HELP job_thread_pool_maximumPoolSize The maximumPoolSize of seatunnel coordinator job's executor cached thread pool
+# TYPE job_thread_pool_maximumPoolSize gauge
+job_thread_pool_maximumPoolSize{cluster="seatunnel",address="127.0.0.1:5801",} 2.147483647E9
+# HELP job_thread_pool_poolSize The poolSize of seatunnel coordinator job's executor cached thread pool
+# TYPE job_thread_pool_poolSize gauge
+job_thread_pool_poolSize{cluster="seatunnel",address="127.0.0.1:5801",} 0.0
+# HELP job_thread_pool_task_total The taskCount of seatunnel coordinator job's executor cached thread pool
+# TYPE job_thread_pool_task_total counter
+job_thread_pool_task_total{cluster="seatunnel",address="127.0.0.1:5801",} 1.0
+# HELP job_thread_pool_queueTaskCount The queueTaskCount of seatunnel coordinator job's executor cached thread pool
+# TYPE job_thread_pool_queueTaskCount gauge
+job_thread_pool_queueTaskCount{cluster="seatunnel",address="127.0.0.1:5801",} 0.0
+# HELP job_thread_pool_rejection_total The rejectionCount of seatunnel coordinator job's executor cached thread pool
+# TYPE job_thread_pool_rejection_total counter
+job_thread_pool_rejection_total{cluster="seatunnel",address="127.0.0.1:5801",} 0.0
+# HELP jvm_memory_pool_allocated_bytes_total Total bytes allocated in a given JVM memory pool. Only updated after GC, not continuously.
+# TYPE jvm_memory_pool_allocated_bytes_total counter
+jvm_memory_pool_allocated_bytes_total{pool="CodeHeap 'profiled nmethods'",} 1.1970688E7
+jvm_memory_pool_allocated_bytes_total{pool="G1 Old Gen",} 1.3475728E7
+jvm_memory_pool_allocated_bytes_total{pool="G1 Eden Space",} 1.61480704E8
+jvm_memory_pool_allocated_bytes_total{pool="CodeHeap 'non-profiled nmethods'",} 3166720.0
+jvm_memory_pool_allocated_bytes_total{pool="G1 Survivor Space",} 1.6777216E7
+jvm_memory_pool_allocated_bytes_total{pool="Compressed Class Space",} 6084208.0
+jvm_memory_pool_allocated_bytes_total{pool="Metaspace",} 4.927032E7
+jvm_memory_pool_allocated_bytes_total{pool="CodeHeap 'non-nmethods'",} 1303936.0
+# HELP jvm_threads_current Current thread count of a JVM
+# TYPE jvm_threads_current gauge
+jvm_threads_current 114.0
+# HELP jvm_threads_daemon Daemon thread count of a JVM
+# TYPE jvm_threads_daemon gauge
+jvm_threads_daemon 10.0
+# HELP jvm_threads_peak Peak thread count of a JVM
+# TYPE jvm_threads_peak gauge
+jvm_threads_peak 124.0
+# HELP jvm_threads_started_total Started thread count of a JVM
+# TYPE jvm_threads_started_total counter
+jvm_threads_started_total 140.0
+# HELP jvm_threads_deadlocked Cycles of JVM-threads that are in deadlock waiting to acquire object monitors or ownable synchronizers
+# TYPE jvm_threads_deadlocked gauge
+jvm_threads_deadlocked 0.0
+# HELP jvm_threads_deadlocked_monitor Cycles of JVM-threads that are in deadlock waiting to acquire object monitors
+# TYPE jvm_threads_deadlocked_monitor gauge
+jvm_threads_deadlocked_monitor 0.0
+# HELP jvm_threads_state Current count of threads by state
+# TYPE jvm_threads_state gauge
+jvm_threads_state{state="NEW",} 0.0
+jvm_threads_state{state="TERMINATED",} 0.0
+jvm_threads_state{state="RUNNABLE",} 12.0
+jvm_threads_state{state="BLOCKED",} 0.0
+jvm_threads_state{state="WAITING",} 80.0
+jvm_threads_state{state="TIMED_WAITING",} 22.0
+jvm_threads_state{state="UNKNOWN",} 0.0
+# HELP cluster_info Cluster info
+# TYPE cluster_info gauge
+cluster_info{cluster="seatunnel",hazelcastVersion="5.1",master="127.0.0.1:5801",} 1.0
+# HELP cluster_time Cluster start time
+# TYPE cluster_time gauge
+cluster_time{cluster="seatunnel",hazelcastVersion="5.1",} 1.725364524614E12
+# HELP node_count Cluster node total count
+# TYPE node_count gauge
+node_count{cluster="seatunnel",} 1.0
+# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
+# TYPE process_cpu_seconds_total counter
+process_cpu_seconds_total 16.511054
+# HELP process_start_time_seconds Start time of the process since unix epoch in seconds.
+# TYPE process_start_time_seconds gauge
+process_start_time_seconds 1.725363614623E9
+# HELP process_open_fds Number of open file descriptors.
+# TYPE process_open_fds gauge
+process_open_fds 162.0
+# HELP process_max_fds Maximum number of open file descriptors.
+# TYPE process_max_fds gauge
+process_max_fds 10240.0
+# HELP job_count All job counts of seatunnel cluster
+# TYPE job_count gauge
+job_count{cluster="seatunnel",type="canceled",} 0.0
+job_count{cluster="seatunnel",type="cancelling",} 0.0
+job_count{cluster="seatunnel",type="created",} 0.0
+job_count{cluster="seatunnel",type="failed",} 0.0
+job_count{cluster="seatunnel",type="failing",} 0.0
+job_count{cluster="seatunnel",type="finished",} 0.0
+job_count{cluster="seatunnel",type="running",} 0.0
+job_count{cluster="seatunnel",type="scheduled",} 0.0
+# HELP node_state Whether is up of seatunnel node
+# TYPE node_state gauge
+node_state{cluster="seatunnel",address="127.0.0.1:5801",} 1.0
+# HELP hazelcast_executor_executedCount The hazelcast executor executedCount of seatunnel cluster node
+# TYPE hazelcast_executor_executedCount gauge
+hazelcast_executor_executedCount{cluster="seatunnel",address="127.0.0.1:5801",type="async",} 0.0
+hazelcast_executor_executedCount{cluster="seatunnel",address="127.0.0.1:5801",type="client",} 0.0
+hazelcast_executor_executedCount{cluster="seatunnel",address="127.0.0.1:5801",type="clientBlocking",} 0.0
+hazelcast_executor_executedCount{cluster="seatunnel",address="127.0.0.1:5801",type="clientQuery",} 0.0
+hazelcast_executor_executedCount{cluster="seatunnel",address="127.0.0.1:5801",type="io",} 224.0
+hazelcast_executor_executedCount{cluster="seatunnel",address="127.0.0.1:5801",type="offloadable",} 0.0
+hazelcast_executor_executedCount{cluster="seatunnel",address="127.0.0.1:5801",type="scheduled",} 16469.0
+hazelcast_executor_executedCount{cluster="seatunnel",address="127.0.0.1:5801",type="system",} 0.0
+# HELP hazelcast_executor_isShutdown The hazelcast executor isShutdown of seatunnel cluster node
+# TYPE hazelcast_executor_isShutdown gauge
+hazelcast_executor_isShutdown{cluster="seatunnel",address="127.0.0.1:5801",type="async",} 0.0
+hazelcast_executor_isShutdown{cluster="seatunnel",address="127.0.0.1:5801",type="client",} 0.0
+hazelcast_executor_isShutdown{cluster="seatunnel",address="127.0.0.1:5801",type="clientBlocking",} 0.0
+hazelcast_executor_isShutdown{cluster="seatunnel",address="127.0.0.1:5801",type="clientQuery",} 0.0
+hazelcast_executor_isShutdown{cluster="seatunnel",address="127.0.0.1:5801",type="io",} 0.0
+hazelcast_executor_isShutdown{cluster="seatunnel",address="127.0.0.1:5801",type="offloadable",} 0.0
+hazelcast_executor_isShutdown{cluster="seatunnel",address="127.0.0.1:5801",type="scheduled",} 0.0
+hazelcast_executor_isShutdown{cluster="seatunnel",address="127.0.0.1:5801",type="system",} 0.0
+# HELP hazelcast_executor_isTerminated The hazelcast executor isTerminated of seatunnel cluster node
+# TYPE hazelcast_executor_isTerminated gauge
+hazelcast_executor_isTerminated{cluster="seatunnel",address="127.0.0.1:5801",type="async",} 0.0
+hazelcast_executor_isTerminated{cluster="seatunnel",address="127.0.0.1:5801",type="client",} 0.0
+hazelcast_executor_isTerminated{cluster="seatunnel",address="127.0.0.1:5801",type="clientBlocking",} 0.0
+hazelcast_executor_isTerminated{cluster="seatunnel",address="127.0.0.1:5801",type="clientQuery",} 0.0
+hazelcast_executor_isTerminated{cluster="seatunnel",address="127.0.0.1:5801",type="io",} 0.0
+hazelcast_executor_isTerminated{cluster="seatunnel",address="127.0.0.1:5801",type="offloadable",} 0.0
+hazelcast_executor_isTerminated{cluster="seatunnel",address="127.0.0.1:5801",type="scheduled",} 0.0
+hazelcast_executor_isTerminated{cluster="seatunnel",address="127.0.0.1:5801",type="system",} 0.0
+# HELP hazelcast_executor_maxPoolSize The hazelcast executor maxPoolSize of seatunnel cluster node
+# TYPE hazelcast_executor_maxPoolSize gauge
+hazelcast_executor_maxPoolSize{cluster="seatunnel",address="127.0.0.1:5801",type="async",} 10.0
+hazelcast_executor_maxPoolSize{cluster="seatunnel",address="127.0.0.1:5801",type="client",} 10.0
+hazelcast_executor_maxPoolSize{cluster="seatunnel",address="127.0.0.1:5801",type="clientBlocking",} 200.0
+hazelcast_executor_maxPoolSize{cluster="seatunnel",address="127.0.0.1:5801",type="clientQuery",} 10.0
+hazelcast_executor_maxPoolSize{cluster="seatunnel",address="127.0.0.1:5801",type="io",} 16.0
+hazelcast_executor_maxPoolSize{cluster="seatunnel",address="127.0.0.1:5801",type="offloadable",} 10.0
+hazelcast_executor_maxPoolSize{cluster="seatunnel",address="127.0.0.1:5801",type="scheduled",} 20.0
+hazelcast_executor_maxPoolSize{cluster="seatunnel",address="127.0.0.1:5801",type="system",} 10.0
+# HELP hazelcast_executor_poolSize The hazelcast executor poolSize of seatunnel cluster node
+# TYPE hazelcast_executor_poolSize gauge
+hazelcast_executor_poolSize{cluster="seatunnel",address="127.0.0.1:5801",type="async",} 0.0
+hazelcast_executor_poolSize{cluster="seatunnel",address="127.0.0.1:5801",type="client",} 0.0
+hazelcast_executor_poolSize{cluster="seatunnel",address="127.0.0.1:5801",type="clientBlocking",} 0.0
+hazelcast_executor_poolSize{cluster="seatunnel",address="127.0.0.1:5801",type="clientQuery",} 0.0
+hazelcast_executor_poolSize{cluster="seatunnel",address="127.0.0.1:5801",type="io",} 0.0
+hazelcast_executor_poolSize{cluster="seatunnel",address="127.0.0.1:5801",type="offloadable",} 0.0
+hazelcast_executor_poolSize{cluster="seatunnel",address="127.0.0.1:5801",type="scheduled",} 0.0
+hazelcast_executor_poolSize{cluster="seatunnel",address="127.0.0.1:5801",type="system",} 0.0
+# HELP hazelcast_executor_queueRemainingCapacity The hazelcast executor queueRemainingCapacity of seatunnel cluster
+# TYPE hazelcast_executor_queueRemainingCapacity gauge
+hazelcast_executor_queueRemainingCapacity{cluster="seatunnel",address="127.0.0.1:5801",type="async",} 100000.0
+hazelcast_executor_queueRemainingCapacity{cluster="seatunnel",address="127.0.0.1:5801",type="client",} 1000000.0
+hazelcast_executor_queueRemainingCapacity{cluster="seatunnel",address="127.0.0.1:5801",type="clientBlocking",} 1000000.0
+hazelcast_executor_queueRemainingCapacity{cluster="seatunnel",address="127.0.0.1:5801",type="clientQuery",} 1000000.0
+hazelcast_executor_queueRemainingCapacity{cluster="seatunnel",address="127.0.0.1:5801",type="io",} 2.147483647E9
+hazelcast_executor_queueRemainingCapacity{cluster="seatunnel",address="127.0.0.1:5801",type="offloadable",} 100000.0
+hazelcast_executor_queueRemainingCapacity{cluster="seatunnel",address="127.0.0.1:5801",type="scheduled",} 1000000.0
+hazelcast_executor_queueRemainingCapacity{cluster="seatunnel",address="127.0.0.1:5801",type="system",} 2.147483647E9
+# HELP hazelcast_executor_queueSize The hazelcast executor queueSize of seatunnel cluster node
+# TYPE hazelcast_executor_queueSize gauge
+hazelcast_executor_queueSize{cluster="seatunnel",address="127.0.0.1:5801",type="async",} 0.0
+hazelcast_executor_queueSize{cluster="seatunnel",address="127.0.0.1:5801",type="client",} 0.0
+hazelcast_executor_queueSize{cluster="seatunnel",address="127.0.0.1:5801",type="clientBlocking",} 0.0
+hazelcast_executor_queueSize{cluster="seatunnel",address="127.0.0.1:5801",type="clientQuery",} 0.0
+hazelcast_executor_queueSize{cluster="seatunnel",address="127.0.0.1:5801",type="io",} 0.0
+hazelcast_executor_queueSize{cluster="seatunnel",address="127.0.0.1:5801",type="offloadable",} 0.0
+hazelcast_executor_queueSize{cluster="seatunnel",address="127.0.0.1:5801",type="scheduled",} 0.0
+hazelcast_executor_queueSize{cluster="seatunnel",address="127.0.0.1:5801",type="system",} 0.0
+# HELP hazelcast_partition_partitionCount The partitionCount of seatunnel cluster node
+# TYPE hazelcast_partition_partitionCount gauge
+hazelcast_partition_partitionCount{cluster="seatunnel",address="127.0.0.1:5801",} 271.0
+# HELP hazelcast_partition_activePartition The activePartition of seatunnel cluster node
+# TYPE hazelcast_partition_activePartition gauge
+hazelcast_partition_activePartition{cluster="seatunnel",address="127.0.0.1:5801",} 271.0
+# HELP hazelcast_partition_isClusterSafe Whether is cluster safe of partition
+# TYPE hazelcast_partition_isClusterSafe gauge
+hazelcast_partition_isClusterSafe{cluster="seatunnel",address="127.0.0.1:5801",} 1.0
+# HELP hazelcast_partition_isLocalMemberSafe Whether is local member safe of partition
+# TYPE hazelcast_partition_isLocalMemberSafe gauge
+hazelcast_partition_isLocalMemberSafe{cluster="seatunnel",address="127.0.0.1:5801",} 1.0
+# HELP jvm_info VM version info
+# TYPE jvm_info gauge
+jvm_info{runtime="OpenJDK Runtime Environment",vendor="Azul Systems, Inc.",version="11.0.13+8-LTS",} 1.0
+# HELP jvm_classes_currently_loaded The number of classes that are currently loaded in the JVM
+# TYPE jvm_classes_currently_loaded gauge
+jvm_classes_currently_loaded 9168.0
+# HELP jvm_classes_loaded_total The total number of classes that have been loaded since the JVM has started execution
+# TYPE jvm_classes_loaded_total counter
+jvm_classes_loaded_total 9168.0
+# HELP jvm_classes_unloaded_total The total number of classes that have been unloaded since the JVM has started execution
+# TYPE jvm_classes_unloaded_total counter
+jvm_classes_unloaded_total 0.0
+# HELP jvm_memory_pool_allocated_bytes_created Total bytes allocated in a given JVM memory pool. Only updated after GC, not continuously.
+# TYPE jvm_memory_pool_allocated_bytes_created gauge
+jvm_memory_pool_allocated_bytes_created{pool="CodeHeap 'profiled nmethods'",} 1.725364266616E9
+jvm_memory_pool_allocated_bytes_created{pool="G1 Old Gen",} 1.725364266619E9
+jvm_memory_pool_allocated_bytes_created{pool="G1 Eden Space",} 1.725364266619E9
+jvm_memory_pool_allocated_bytes_created{pool="CodeHeap 'non-profiled nmethods'",} 1.725364266619E9
+jvm_memory_pool_allocated_bytes_created{pool="G1 Survivor Space",} 1.725364266619E9
+jvm_memory_pool_allocated_bytes_created{pool="Compressed Class Space",} 1.725364266619E9
+jvm_memory_pool_allocated_bytes_created{pool="Metaspace",} 1.725364266619E9
+jvm_memory_pool_allocated_bytes_created{pool="CodeHeap 'non-nmethods'",} 1.725364266619E9
\ No newline at end of file
diff --git a/docs/zh/seatunnel-engine/telemetry/openmetrics.txt b/docs/zh/seatunnel-engine/telemetry/openmetrics.txt
new file mode 100644
index 00000000000..cf34553c51c
--- /dev/null
+++ b/docs/zh/seatunnel-engine/telemetry/openmetrics.txt
@@ -0,0 +1,295 @@
+# TYPE jvm_buffer_pool_used_bytes gauge
+# HELP jvm_buffer_pool_used_bytes Used bytes of a given JVM buffer pool.
+jvm_buffer_pool_used_bytes{pool="mapped"} 0.0
+jvm_buffer_pool_used_bytes{pool="direct"} 414142.0
+# TYPE jvm_buffer_pool_capacity_bytes gauge
+# HELP jvm_buffer_pool_capacity_bytes Bytes capacity of a given JVM buffer pool.
+jvm_buffer_pool_capacity_bytes{pool="mapped"} 0.0
+jvm_buffer_pool_capacity_bytes{pool="direct"} 414139.0
+# TYPE jvm_buffer_pool_used_buffers gauge
+# HELP jvm_buffer_pool_used_buffers Used buffers of a given JVM buffer pool.
+jvm_buffer_pool_used_buffers{pool="mapped"} 0.0
+jvm_buffer_pool_used_buffers{pool="direct"} 7.0
+# TYPE jvm_gc_collection_seconds summary
+# HELP jvm_gc_collection_seconds Time spent in a given JVM garbage collector in seconds.
+jvm_gc_collection_seconds_count{gc="G1 Young Generation"} 6.0
+jvm_gc_collection_seconds_sum{gc="G1 Young Generation"} 0.047
+jvm_gc_collection_seconds_count{gc="G1 Old Generation"} 0.0
+jvm_gc_collection_seconds_sum{gc="G1 Old Generation"} 0.0
+# TYPE jvm_memory_objects_pending_finalization gauge
+# HELP jvm_memory_objects_pending_finalization The number of objects waiting in the finalizer queue.
+jvm_memory_objects_pending_finalization 0.0
+# TYPE jvm_memory_bytes_used gauge
+# HELP jvm_memory_bytes_used Used bytes of a given JVM memory area.
+jvm_memory_bytes_used{area="heap"} 8.0584592E7
+jvm_memory_bytes_used{area="nonheap"} 7.2669072E7
+# TYPE jvm_memory_bytes_committed gauge
+# HELP jvm_memory_bytes_committed Committed (bytes) of a given JVM memory area.
+jvm_memory_bytes_committed{area="heap"} 5.36870912E8
+jvm_memory_bytes_committed{area="nonheap"} 7.7529088E7
+# TYPE jvm_memory_bytes_max gauge
+# HELP jvm_memory_bytes_max Max (bytes) of a given JVM memory area.
+jvm_memory_bytes_max{area="heap"} 8.589934592E9
+jvm_memory_bytes_max{area="nonheap"} -1.0
+# TYPE jvm_memory_bytes_init gauge
+# HELP jvm_memory_bytes_init Initial bytes of a given JVM memory area.
+jvm_memory_bytes_init{area="heap"} 5.36870912E8
+jvm_memory_bytes_init{area="nonheap"} 7667712.0
+# TYPE jvm_memory_pool_bytes_used gauge
+# HELP jvm_memory_pool_bytes_used Used bytes of a given JVM memory pool.
+jvm_memory_pool_bytes_used{pool="CodeHeap 'non-nmethods'"} 1307520.0
+jvm_memory_pool_bytes_used{pool="Metaspace"} 4.9582016E7
+jvm_memory_pool_bytes_used{pool="CodeHeap 'profiled nmethods'"} 1.2312704E7
+jvm_memory_pool_bytes_used{pool="Compressed Class Space"} 6124368.0
+jvm_memory_pool_bytes_used{pool="G1 Eden Space"} 5.0331648E7
+jvm_memory_pool_bytes_used{pool="G1 Old Gen"} 1.3475728E7
+jvm_memory_pool_bytes_used{pool="G1 Survivor Space"} 1.6777216E7
+jvm_memory_pool_bytes_used{pool="CodeHeap 'non-profiled nmethods'"} 3342464.0
+# TYPE jvm_memory_pool_bytes_committed gauge
+# HELP jvm_memory_pool_bytes_committed Committed bytes of a given JVM memory pool.
+jvm_memory_pool_bytes_committed{pool="CodeHeap 'non-nmethods'"} 2555904.0
+jvm_memory_pool_bytes_committed{pool="Metaspace"} 5.2035584E7
+jvm_memory_pool_bytes_committed{pool="CodeHeap 'profiled nmethods'"} 1.2320768E7
+jvm_memory_pool_bytes_committed{pool="Compressed Class Space"} 7208960.0
+jvm_memory_pool_bytes_committed{pool="G1 Eden Space"} 3.20864256E8
+jvm_memory_pool_bytes_committed{pool="G1 Old Gen"} 1.9922944E8
+jvm_memory_pool_bytes_committed{pool="G1 Survivor Space"} 1.6777216E7
+jvm_memory_pool_bytes_committed{pool="CodeHeap 'non-profiled nmethods'"} 3407872.0
+# TYPE jvm_memory_pool_bytes_max gauge
+# HELP jvm_memory_pool_bytes_max Max bytes of a given JVM memory pool.
+jvm_memory_pool_bytes_max{pool="CodeHeap 'non-nmethods'"} 5849088.0
+jvm_memory_pool_bytes_max{pool="Metaspace"} -1.0
+jvm_memory_pool_bytes_max{pool="CodeHeap 'profiled nmethods'"} 1.22896384E8
+jvm_memory_pool_bytes_max{pool="Compressed Class Space"} 1.073741824E9
+jvm_memory_pool_bytes_max{pool="G1 Eden Space"} -1.0
+jvm_memory_pool_bytes_max{pool="G1 Old Gen"} 8.589934592E9
+jvm_memory_pool_bytes_max{pool="G1 Survivor Space"} -1.0
+jvm_memory_pool_bytes_max{pool="CodeHeap 'non-profiled nmethods'"} 1.22912768E8
+# TYPE jvm_memory_pool_bytes_init gauge
+# HELP jvm_memory_pool_bytes_init Initial bytes of a given JVM memory pool.
+jvm_memory_pool_bytes_init{pool="CodeHeap 'non-nmethods'"} 2555904.0
+jvm_memory_pool_bytes_init{pool="Metaspace"} 0.0
+jvm_memory_pool_bytes_init{pool="CodeHeap 'profiled nmethods'"} 2555904.0
+jvm_memory_pool_bytes_init{pool="Compressed Class Space"} 0.0
+jvm_memory_pool_bytes_init{pool="G1 Eden Space"} 2.7262976E7
+jvm_memory_pool_bytes_init{pool="G1 Old Gen"} 5.09607936E8
+jvm_memory_pool_bytes_init{pool="G1 Survivor Space"} 0.0
+jvm_memory_pool_bytes_init{pool="CodeHeap 'non-profiled nmethods'"} 2555904.0
+# TYPE jvm_memory_pool_collection_used_bytes gauge
+# HELP jvm_memory_pool_collection_used_bytes Used bytes after last collection of a given JVM memory pool.
+jvm_memory_pool_collection_used_bytes{pool="G1 Eden Space"} 0.0
+jvm_memory_pool_collection_used_bytes{pool="G1 Old Gen"} 0.0
+jvm_memory_pool_collection_used_bytes{pool="G1 Survivor Space"} 1.6777216E7
+# TYPE jvm_memory_pool_collection_committed_bytes gauge
+# HELP jvm_memory_pool_collection_committed_bytes Committed after last collection bytes of a given JVM memory pool.
+jvm_memory_pool_collection_committed_bytes{pool="G1 Eden Space"} 3.20864256E8
+jvm_memory_pool_collection_committed_bytes{pool="G1 Old Gen"} 0.0
+jvm_memory_pool_collection_committed_bytes{pool="G1 Survivor Space"} 1.6777216E7
+# TYPE jvm_memory_pool_collection_max_bytes gauge
+# HELP jvm_memory_pool_collection_max_bytes Max bytes after last collection of a given JVM memory pool.
+jvm_memory_pool_collection_max_bytes{pool="G1 Eden Space"} -1.0
+jvm_memory_pool_collection_max_bytes{pool="G1 Old Gen"} 8.589934592E9
+jvm_memory_pool_collection_max_bytes{pool="G1 Survivor Space"} -1.0
+# TYPE jvm_memory_pool_collection_init_bytes gauge
+# HELP jvm_memory_pool_collection_init_bytes Initial after last collection bytes of a given JVM memory pool.
+jvm_memory_pool_collection_init_bytes{pool="G1 Eden Space"} 2.7262976E7
+jvm_memory_pool_collection_init_bytes{pool="G1 Old Gen"} 5.09607936E8
+jvm_memory_pool_collection_init_bytes{pool="G1 Survivor Space"} 0.0
+# TYPE job_thread_pool_activeCount gauge
+# HELP job_thread_pool_activeCount The activeCount of seatunnel coordinator job's executor cached thread pool
+job_thread_pool_activeCount{cluster="seatunnel",address="127.0.0.1:5801"} 0.0
+# TYPE job_thread_pool_completedTask counter
+# HELP job_thread_pool_completedTask The completedTask of seatunnel coordinator job's executor cached thread pool
+job_thread_pool_completedTask_total{cluster="seatunnel",address="127.0.0.1:5801"} 1.0
+# TYPE job_thread_pool_corePoolSize gauge
+# HELP job_thread_pool_corePoolSize The corePoolSize of seatunnel coordinator job's executor cached thread pool
+job_thread_pool_corePoolSize{cluster="seatunnel",address="127.0.0.1:5801"} 0.0
+# TYPE job_thread_pool_maximumPoolSize gauge
+# HELP job_thread_pool_maximumPoolSize The maximumPoolSize of seatunnel coordinator job's executor cached thread pool
+job_thread_pool_maximumPoolSize{cluster="seatunnel",address="127.0.0.1:5801"} 2.147483647E9
+# TYPE job_thread_pool_poolSize gauge
+# HELP job_thread_pool_poolSize The poolSize of seatunnel coordinator job's executor cached thread pool
+job_thread_pool_poolSize{cluster="seatunnel",address="127.0.0.1:5801"} 0.0
+# TYPE job_thread_pool_task counter
+# HELP job_thread_pool_task The taskCount of seatunnel coordinator job's executor cached thread pool
+job_thread_pool_task_total{cluster="seatunnel",address="127.0.0.1:5801"} 1.0
+# TYPE job_thread_pool_queueTaskCount gauge
+# HELP job_thread_pool_queueTaskCount The queueTaskCount of seatunnel coordinator job's executor cached thread pool
+job_thread_pool_queueTaskCount{cluster="seatunnel",address="127.0.0.1:5801"} 0.0
+# TYPE job_thread_pool_rejection counter
+# HELP job_thread_pool_rejection The rejectionCount of seatunnel coordinator job's executor cached thread pool
+job_thread_pool_rejection_total{cluster="seatunnel",address="127.0.0.1:5801"} 0.0
+# TYPE jvm_memory_pool_allocated_bytes counter
+# HELP jvm_memory_pool_allocated_bytes Total bytes allocated in a given JVM memory pool. Only updated after GC, not continuously.
+jvm_memory_pool_allocated_bytes_total{pool="CodeHeap 'profiled nmethods'"} 1.1970688E7
+jvm_memory_pool_allocated_bytes_created{pool="CodeHeap 'profiled nmethods'"} 1.725364266616E9
+jvm_memory_pool_allocated_bytes_total{pool="G1 Old Gen"} 1.3475728E7
+jvm_memory_pool_allocated_bytes_created{pool="G1 Old Gen"} 1.725364266619E9
+jvm_memory_pool_allocated_bytes_total{pool="G1 Eden Space"} 1.61480704E8
+jvm_memory_pool_allocated_bytes_created{pool="G1 Eden Space"} 1.725364266619E9
+jvm_memory_pool_allocated_bytes_total{pool="CodeHeap 'non-profiled nmethods'"} 3166720.0
+jvm_memory_pool_allocated_bytes_created{pool="CodeHeap 'non-profiled nmethods'"} 1.725364266619E9
+jvm_memory_pool_allocated_bytes_total{pool="G1 Survivor Space"} 1.6777216E7
+jvm_memory_pool_allocated_bytes_created{pool="G1 Survivor Space"} 1.725364266619E9
+jvm_memory_pool_allocated_bytes_total{pool="Compressed Class Space"} 6084208.0
+jvm_memory_pool_allocated_bytes_created{pool="Compressed Class Space"} 1.725364266619E9
+jvm_memory_pool_allocated_bytes_total{pool="Metaspace"} 4.927032E7
+jvm_memory_pool_allocated_bytes_created{pool="Metaspace"} 1.725364266619E9
+jvm_memory_pool_allocated_bytes_total{pool="CodeHeap 'non-nmethods'"} 1303936.0
+jvm_memory_pool_allocated_bytes_created{pool="CodeHeap 'non-nmethods'"} 1.725364266619E9
+# TYPE jvm_threads_current gauge
+# HELP jvm_threads_current Current thread count of a JVM
+jvm_threads_current 114.0
+# TYPE jvm_threads_daemon gauge
+# HELP jvm_threads_daemon Daemon thread count of a JVM
+jvm_threads_daemon 10.0
+# TYPE jvm_threads_peak gauge
+# HELP jvm_threads_peak Peak thread count of a JVM
+jvm_threads_peak 124.0
+# TYPE jvm_threads_started counter
+# HELP jvm_threads_started Started thread count of a JVM
+jvm_threads_started_total 140.0
+# TYPE jvm_threads_deadlocked gauge
+# HELP jvm_threads_deadlocked Cycles of JVM-threads that are in deadlock waiting to acquire object monitors or ownable synchronizers
+jvm_threads_deadlocked 0.0
+# TYPE jvm_threads_deadlocked_monitor gauge
+# HELP jvm_threads_deadlocked_monitor Cycles of JVM-threads that are in deadlock waiting to acquire object monitors
+jvm_threads_deadlocked_monitor 0.0
+# TYPE jvm_threads_state gauge
+# HELP jvm_threads_state Current count of threads by state
+jvm_threads_state{state="NEW"} 0.0
+jvm_threads_state{state="TERMINATED"} 0.0
+jvm_threads_state{state="RUNNABLE"} 12.0
+jvm_threads_state{state="BLOCKED"} 0.0
+jvm_threads_state{state="WAITING"} 80.0
+jvm_threads_state{state="TIMED_WAITING"} 22.0
+jvm_threads_state{state="UNKNOWN"} 0.0
+# TYPE cluster_info gauge
+# HELP cluster_info Cluster info
+cluster_info{cluster="seatunnel",hazelcastVersion="5.1",master="127.0.0.1:5801"} 1.0
+# TYPE cluster_time gauge
+# HELP cluster_time Cluster start time
+cluster_time{cluster="seatunnel",hazelcastVersion="5.1"} 1.725364506292E12
+# TYPE node_count gauge
+# HELP node_count Cluster node total count
+node_count{cluster="seatunnel"} 1.0
+# TYPE process_cpu_seconds counter
+# HELP process_cpu_seconds Total user and system CPU time spent in seconds.
+process_cpu_seconds_total 16.267225
+# TYPE process_start_time_seconds gauge
+# HELP process_start_time_seconds Start time of the process since unix epoch in seconds.
+process_start_time_seconds 1.725363614623E9
+# TYPE process_open_fds gauge
+# HELP process_open_fds Number of open file descriptors.
+process_open_fds 162.0
+# TYPE process_max_fds gauge
+# HELP process_max_fds Maximum number of open file descriptors.
+process_max_fds 10240.0
+# TYPE job_count gauge
+# HELP job_count All job counts of seatunnel cluster
+job_count{cluster="seatunnel",type="canceled"} 0.0
+job_count{cluster="seatunnel",type="cancelling"} 0.0
+job_count{cluster="seatunnel",type="created"} 0.0
+job_count{cluster="seatunnel",type="failed"} 0.0
+job_count{cluster="seatunnel",type="failing"} 0.0
+job_count{cluster="seatunnel",type="finished"} 0.0
+job_count{cluster="seatunnel",type="running"} 0.0
+job_count{cluster="seatunnel",type="scheduled"} 0.0
+# TYPE node_state gauge
+# HELP node_state Whether is up of seatunnel node
+node_state{cluster="seatunnel",address="127.0.0.1:5801"} 1.0
+# TYPE hazelcast_executor_executedCount gauge
+# HELP hazelcast_executor_executedCount The hazelcast executor executedCount of seatunnel cluster node
+hazelcast_executor_executedCount{cluster="seatunnel",address="127.0.0.1:5801",type="async"} 0.0
+hazelcast_executor_executedCount{cluster="seatunnel",address="127.0.0.1:5801",type="client"} 0.0
+hazelcast_executor_executedCount{cluster="seatunnel",address="127.0.0.1:5801",type="clientBlocking"} 0.0
+hazelcast_executor_executedCount{cluster="seatunnel",address="127.0.0.1:5801",type="clientQuery"} 0.0
+hazelcast_executor_executedCount{cluster="seatunnel",address="127.0.0.1:5801",type="io"} 221.0
+hazelcast_executor_executedCount{cluster="seatunnel",address="127.0.0.1:5801",type="offloadable"} 0.0
+hazelcast_executor_executedCount{cluster="seatunnel",address="127.0.0.1:5801",type="scheduled"} 16133.0
+hazelcast_executor_executedCount{cluster="seatunnel",address="127.0.0.1:5801",type="system"} 0.0
+# TYPE hazelcast_executor_isShutdown gauge
+# HELP hazelcast_executor_isShutdown The hazelcast executor isShutdown of seatunnel cluster node
+hazelcast_executor_isShutdown{cluster="seatunnel",address="127.0.0.1:5801",type="async"} 0.0
+hazelcast_executor_isShutdown{cluster="seatunnel",address="127.0.0.1:5801",type="client"} 0.0
+hazelcast_executor_isShutdown{cluster="seatunnel",address="127.0.0.1:5801",type="clientBlocking"} 0.0
+hazelcast_executor_isShutdown{cluster="seatunnel",address="127.0.0.1:5801",type="clientQuery"} 0.0
+hazelcast_executor_isShutdown{cluster="seatunnel",address="127.0.0.1:5801",type="io"} 0.0
+hazelcast_executor_isShutdown{cluster="seatunnel",address="127.0.0.1:5801",type="offloadable"} 0.0
+hazelcast_executor_isShutdown{cluster="seatunnel",address="127.0.0.1:5801",type="scheduled"} 0.0
+hazelcast_executor_isShutdown{cluster="seatunnel",address="127.0.0.1:5801",type="system"} 0.0
+# TYPE hazelcast_executor_isTerminated gauge
+# HELP hazelcast_executor_isTerminated The hazelcast executor isTerminated of seatunnel cluster node
+hazelcast_executor_isTerminated{cluster="seatunnel",address="127.0.0.1:5801",type="async"} 0.0
+hazelcast_executor_isTerminated{cluster="seatunnel",address="127.0.0.1:5801",type="client"} 0.0
+hazelcast_executor_isTerminated{cluster="seatunnel",address="127.0.0.1:5801",type="clientBlocking"} 0.0
+hazelcast_executor_isTerminated{cluster="seatunnel",address="127.0.0.1:5801",type="clientQuery"} 0.0
+hazelcast_executor_isTerminated{cluster="seatunnel",address="127.0.0.1:5801",type="io"} 0.0
+hazelcast_executor_isTerminated{cluster="seatunnel",address="127.0.0.1:5801",type="offloadable"} 0.0
+hazelcast_executor_isTerminated{cluster="seatunnel",address="127.0.0.1:5801",type="scheduled"} 0.0
+hazelcast_executor_isTerminated{cluster="seatunnel",address="127.0.0.1:5801",type="system"} 0.0
+# TYPE hazelcast_executor_maxPoolSize gauge
+# HELP hazelcast_executor_maxPoolSize The hazelcast executor maxPoolSize of seatunnel cluster node
+hazelcast_executor_maxPoolSize{cluster="seatunnel",address="127.0.0.1:5801",type="async"} 10.0
+hazelcast_executor_maxPoolSize{cluster="seatunnel",address="127.0.0.1:5801",type="client"} 10.0
+hazelcast_executor_maxPoolSize{cluster="seatunnel",address="127.0.0.1:5801",type="clientBlocking"} 200.0
+hazelcast_executor_maxPoolSize{cluster="seatunnel",address="127.0.0.1:5801",type="clientQuery"} 10.0
+hazelcast_executor_maxPoolSize{cluster="seatunnel",address="127.0.0.1:5801",type="io"} 16.0
+hazelcast_executor_maxPoolSize{cluster="seatunnel",address="127.0.0.1:5801",type="offloadable"} 10.0
+hazelcast_executor_maxPoolSize{cluster="seatunnel",address="127.0.0.1:5801",type="scheduled"} 20.0
+hazelcast_executor_maxPoolSize{cluster="seatunnel",address="127.0.0.1:5801",type="system"} 10.0
+# TYPE hazelcast_executor_poolSize gauge
+# HELP hazelcast_executor_poolSize The hazelcast executor poolSize of seatunnel cluster node
+hazelcast_executor_poolSize{cluster="seatunnel",address="127.0.0.1:5801",type="async"} 0.0
+hazelcast_executor_poolSize{cluster="seatunnel",address="127.0.0.1:5801",type="client"} 0.0
+hazelcast_executor_poolSize{cluster="seatunnel",address="127.0.0.1:5801",type="clientBlocking"} 0.0
+hazelcast_executor_poolSize{cluster="seatunnel",address="127.0.0.1:5801",type="clientQuery"} 0.0
+hazelcast_executor_poolSize{cluster="seatunnel",address="127.0.0.1:5801",type="io"} 0.0
+hazelcast_executor_poolSize{cluster="seatunnel",address="127.0.0.1:5801",type="offloadable"} 0.0
+hazelcast_executor_poolSize{cluster="seatunnel",address="127.0.0.1:5801",type="scheduled"} 0.0
+hazelcast_executor_poolSize{cluster="seatunnel",address="127.0.0.1:5801",type="system"} 0.0
+# TYPE hazelcast_executor_queueRemainingCapacity gauge
+# HELP hazelcast_executor_queueRemainingCapacity The hazelcast executor queueRemainingCapacity of seatunnel cluster
+hazelcast_executor_queueRemainingCapacity{cluster="seatunnel",address="127.0.0.1:5801",type="async"} 100000.0
+hazelcast_executor_queueRemainingCapacity{cluster="seatunnel",address="127.0.0.1:5801",type="client"} 1000000.0
+hazelcast_executor_queueRemainingCapacity{cluster="seatunnel",address="127.0.0.1:5801",type="clientBlocking"} 1000000.0
+hazelcast_executor_queueRemainingCapacity{cluster="seatunnel",address="127.0.0.1:5801",type="clientQuery"} 1000000.0
+hazelcast_executor_queueRemainingCapacity{cluster="seatunnel",address="127.0.0.1:5801",type="io"} 2.147483647E9
+hazelcast_executor_queueRemainingCapacity{cluster="seatunnel",address="127.0.0.1:5801",type="offloadable"} 100000.0
+hazelcast_executor_queueRemainingCapacity{cluster="seatunnel",address="127.0.0.1:5801",type="scheduled"} 1000000.0
+hazelcast_executor_queueRemainingCapacity{cluster="seatunnel",address="127.0.0.1:5801",type="system"} 2.147483647E9
+# TYPE hazelcast_executor_queueSize gauge
+# HELP hazelcast_executor_queueSize The hazelcast executor queueSize of seatunnel cluster node
+hazelcast_executor_queueSize{cluster="seatunnel",address="127.0.0.1:5801",type="async"} 0.0
+hazelcast_executor_queueSize{cluster="seatunnel",address="127.0.0.1:5801",type="client"} 0.0
+hazelcast_executor_queueSize{cluster="seatunnel",address="127.0.0.1:5801",type="clientBlocking"} 0.0
+hazelcast_executor_queueSize{cluster="seatunnel",address="127.0.0.1:5801",type="clientQuery"} 0.0
+hazelcast_executor_queueSize{cluster="seatunnel",address="127.0.0.1:5801",type="io"} 0.0
+hazelcast_executor_queueSize{cluster="seatunnel",address="127.0.0.1:5801",type="offloadable"} 0.0
+hazelcast_executor_queueSize{cluster="seatunnel",address="127.0.0.1:5801",type="scheduled"} 0.0
+hazelcast_executor_queueSize{cluster="seatunnel",address="127.0.0.1:5801",type="system"} 0.0
+# TYPE hazelcast_partition_partitionCount gauge
+# HELP hazelcast_partition_partitionCount The partitionCount of seatunnel cluster node
+hazelcast_partition_partitionCount{cluster="seatunnel",address="127.0.0.1:5801"} 271.0
+# TYPE hazelcast_partition_activePartition gauge
+# HELP hazelcast_partition_activePartition The activePartition of seatunnel cluster node
+hazelcast_partition_activePartition{cluster="seatunnel",address="127.0.0.1:5801"} 271.0
+# TYPE hazelcast_partition_isClusterSafe gauge
+# HELP hazelcast_partition_isClusterSafe Whether is cluster safe of partition
+hazelcast_partition_isClusterSafe{cluster="seatunnel",address="127.0.0.1:5801"} 1.0
+# TYPE hazelcast_partition_isLocalMemberSafe gauge
+# HELP hazelcast_partition_isLocalMemberSafe Whether is local member safe of partition
+hazelcast_partition_isLocalMemberSafe{cluster="seatunnel",address="127.0.0.1:5801"} 1.0
+# TYPE jvm info
+# HELP jvm VM version info
+jvm_info{runtime="OpenJDK Runtime Environment",vendor="Azul Systems, Inc.",version="11.0.13+8-LTS"} 1.0
+# TYPE jvm_classes_currently_loaded gauge
+# HELP jvm_classes_currently_loaded The number of classes that are currently loaded in the JVM
+jvm_classes_currently_loaded 9168.0
+# TYPE jvm_classes_loaded counter
+# HELP jvm_classes_loaded The total number of classes that have been loaded since the JVM has started execution
+jvm_classes_loaded_total 9168.0
+# TYPE jvm_classes_unloaded counter
+# HELP jvm_classes_unloaded The total number of classes that have been unloaded since the JVM has started execution
+jvm_classes_unloaded_total 0.0
+# EOF
\ No newline at end of file
diff --git a/docs/zh/seatunnel-engine/user-command.md b/docs/zh/seatunnel-engine/user-command.md
index d4d06d25a78..1ceea35c85d 100644
--- a/docs/zh/seatunnel-engine/user-command.md
+++ b/docs/zh/seatunnel-engine/user-command.md
@@ -1,7 +1,6 @@
---
-
-sidebar_position: 12
---------------------
+sidebar_position: 13
+---
# 命令行工具
@@ -84,7 +83,7 @@ bin/seatunnel.sh --config $SEATUNNEL_HOME/config/v2.batch.config.template
## 查看作业状态
```shell
-./bin/seatunnel.sh -j <jobId>
+./bin/seatunnel.sh -j
```
该命令会输出指定作业的状态信息
@@ -102,13 +101,13 @@ bin/seatunnel.sh --config $SEATUNNEL_HOME/config/v2.batch.config.template
--metrics 参数可以获取指定作业的监控信息
```shell
-./bin/seatunnel.sh --metrics <jobId>
+./bin/seatunnel.sh --metrics
```
## 暂停作业
```shell
-./bin/seatunnel.sh -s <jobId>
+./bin/seatunnel.sh -s
```
该命令会暂停指定作业,注意,只有开启了checkpoint的作业才支持暂停作业(实时同步作业默认开启checkpoint,批处理作业默认不开启checkpoint需要通过在 `env` 中配置checkpoint.interval来开启checkpoint)。
@@ -118,7 +117,7 @@ bin/seatunnel.sh --config $SEATUNNEL_HOME/config/v2.batch.config.template
## 恢复作业
```shell
-./bin/seatunnel.sh -r <jobId> -c $SEATUNNEL_HOME/config/v2.batch.config.template
+./bin/seatunnel.sh -r -c $SEATUNNEL_HOME/config/v2.batch.config.template
```
该命令会恢复指定作业,注意,只有开启了checkpoint的作业才支持恢复作业(实时同步作业默认开启checkpoint,批处理作业默认不开启checkpoint需要通过在 `env` 中配置checkpoint.interval来开启checkpoint)。
@@ -130,10 +129,21 @@ bin/seatunnel.sh --config $SEATUNNEL_HOME/config/v2.batch.config.template
## 取消作业
```shell
-./bin/seatunnel.sh -can <jobId>
+./bin/seatunnel.sh -can [ ...]
```
该命令会取消指定作业,取消作业后,作业会被停止,作业的状态会变为`CANCELED`。
+支持批量取消作业,可以一次取消多个作业。
+
被cancel的作业的所有断点信息都将被删除,无法通过seatunnel.sh -r <jobId>恢复。
+## 配置JVM参数
+
+我们可以通过以下方式为 SeaTunnel Engine 客户端配置 JVM 参数:
+
+1. 添加JVM参数到`$SEATUNNEL_HOME/config/jvm_client_options`文件中。
+
+ 在 `$SEATUNNEL_HOME/config/jvm_client_options` 文件中修改 JVM 参数。请注意,该文件中的 JVM 参数将应用于使用 `seatunnel.sh` 提交的所有作业,包括 Local 模式和 Cluster 模式。
+
+2. 在提交作业时添加 JVM 参数。例如,`sh bin/seatunnel.sh --config $SEATUNNEL_HOME/config/v2.batch.config.template -DJvmOption="-Xms2G -Xmx2G"`
diff --git a/docs/zh/seatunnel-engine/web-ui.md b/docs/zh/seatunnel-engine/web-ui.md
new file mode 100644
index 00000000000..35b551225ec
--- /dev/null
+++ b/docs/zh/seatunnel-engine/web-ui.md
@@ -0,0 +1,47 @@
+# Web UI
+
+## 访问
+
+在访问 web ui 之前我们需要开启 http rest api。首先需要在`seatunnel.yaml`配置文件中配置
+
+```
+seatunnel:
+ engine:
+ http:
+ enable-http: true
+ port: 8080
+```
+
+然后访问 `http://ip:8080/#/overview`
+
+## 概述
+
+Apache SeaTunnel 的 Web UI 提供了一个友好的用户界面,用于监控和管理 SeaTunnel 作业。通过 Web UI,用户可以实时查看当前运行的作业、已完成的作业,以及集群中工作节点和管理节点的状态。主要功能模块包括 Jobs、Workers 和 Master,每个模块都提供了详细的状态信息和操作选项,帮助用户高效地管理和优化其数据处理流程。
+![overview.png](../../images/ui/overview.png)
+
+## 作业
+
+### 运行中的作业
+
+“运行中的作业”模块列出了当前正在执行的所有 SeaTunnel 作业。用户可以查看每个作业的基本信息,包括作业 ID、提交时间、状态、执行时间等。点击具体作业可以查看更多详细信息,如任务分布、资源使用情况和日志输出,便于用户实时监控作业进度并及时处理潜在问题。
+![running.png](../../images/ui/running.png)
+![detail.png](../../images/ui/detail.png)
+
+### 已完成的作业
+
+“已完成的作业”模块展示了所有已成功完成或失败的 SeaTunnel 作业。此部分提供了每个作业的执行结果、完成时间、耗时以及失败原因(如果有)。用户可以通过此模块回顾过去的作业记录,分析作业性能,进行故障排查或重复执行某些特定作业。
+![finished.png](../../images/ui/finished.png)
+
+## 工作节点
+
+### 工作节点信息
+
+“工作节点”模块展示了集群中所有工作节点的详细信息,包括每个工作节点的地址、运行状态、CPU 和内存使用情况、正在执行的任务数量等。通过该模块,用户可以监控各个工作节点的健康状况,及时发现和处理资源瓶颈或节点故障,确保 SeaTunnel 集群的稳定运行。
+![workers.png](../../images/ui/workers.png)
+
+## 管理节点
+
+### 管理节点信息
+
+“管理节点”模块提供了 SeaTunnel 集群中主节点的状态和配置信息。用户可以查看 Master 节点的地址、运行状态、负责的作业调度情况以及整体集群的资源分配情况。该模块帮助用户全面了解集群的核心管理部分,便于进行集群配置优化和故障排查。
+![master.png](../../images/ui/master.png)
diff --git a/docs/zh/start-v2/docker/docker.md b/docs/zh/start-v2/docker/docker.md
new file mode 100644
index 00000000000..1c4bc5d4b10
--- /dev/null
+++ b/docs/zh/start-v2/docker/docker.md
@@ -0,0 +1,393 @@
+---
+sidebar_position: 3
+---
+
+# 使用Docker进行部署
+
+## 使用Docker启用本地模式
+
+### Zeta 引擎
+
+#### 下载镜像
+
+```shell
+docker pull apache/seatunnel:
+```
+
+当下载完成后,可以使用如下命令来提交任务
+
+```shell
+# Run fake source to console sink
+docker run --rm -it apache/seatunnel: ./bin/seatunnel.sh -m local -c config/v2.batch.config.template
+
+# Run job with custom config file
+docker run --rm -it -v //:/config apache/seatunnel: ./bin/seatunnel.sh -m local -c /config/fake_to_console.conf
+
+# Example
+# If you config file is in /tmp/job/fake_to_console.conf
+docker run --rm -it -v /tmp/job/:/config apache/seatunnel: ./bin/seatunnel.sh -m local -c /config/fake_to_console.conf
+
+# Set JVM options when running
+docker run --rm -it -v /tmp/job/:/config apache/seatunnel: ./bin/seatunnel.sh -DJvmOption="-Xms4G -Xmx4G" -m local -c /config/fake_to_console.conf
+```
+
+#### 自己构建镜像
+
+从源代码构建。下载源码的方式和下载二进制包的方式是一样的。
+你可以从[下载地址](https://seatunnel.apache.org/download/)下载源码, 或者从[GitHub 仓库](https://github.com/apache/seatunnel/releases)克隆源代码
+
+##### 一个命令来构建容器
+```shell
+cd seatunnel
+# Use already sett maven profile
+mvn -B clean install -Dmaven.test.skip=true -Dmaven.javadoc.skip=true -Dlicense.skipAddThirdParty=true -D"docker.build.skip"=false -D"docker.verify.skip"=false -D"docker.push.skip"=true -D"docker.tag"=2.3.9 -Dmaven.deploy.skip -D"skip.spotless"=true --no-snapshot-updates -Pdocker,seatunnel
+
+# Check the docker image
+docker images | grep apache/seatunnel
+```
+
+##### 分步骤构建
+```shell
+# Build binary package from source code
+mvn clean package -DskipTests -Dskip.spotless=true
+
+# Build docker image
+cd seatunnel-dist
+docker build -f src/main/docker/Dockerfile --build-arg VERSION=2.3.9 -t apache/seatunnel:2.3.9 .
+
+# If you build from dev branch, you should add SNAPSHOT suffix to the version
+docker build -f src/main/docker/Dockerfile --build-arg VERSION=2.3.9-SNAPSHOT -t apache/seatunnel:2.3.9-SNAPSHOT .
+
+# Check the docker image
+docker images | grep apache/seatunnel
+```
+
+Dockerfile文件内容为:
+```dockerfile
+FROM openjdk:8
+
+ARG VERSION
+# Build from Source Code And Copy it into image
+COPY ./target/apache-seatunnel-${VERSION}-bin.tar.gz /opt/
+
+# Download From Internet
+# Please Note this file only include fake/console connector, You'll need to download the other connectors manually
+# wget -P /opt https://dlcdn.apache.org/seatunnel/2.3.6/apache-seatunnel-${VERSION}-bin.tar.gz
+
+RUN cd /opt && \
+ tar -zxvf apache-seatunnel-${VERSION}-bin.tar.gz && \
+ mv apache-seatunnel-${VERSION} seatunnel && \
+ rm apache-seatunnel-${VERSION}-bin.tar.gz && \
+ cp seatunnel/config/log4j2_client.properties seatunnel/config/log4j2.properties && \
+ cp seatunnel/config/hazelcast-master.yaml seatunnel/config/hazelcast-worker.yaml
+
+WORKDIR /opt/seatunnel
+```
+
+### Spark/Flink引擎
+
+
+#### 挂载 Spark/Flink
+
+默认设值下,Spark的目录为`/opt/spark`, Flink的目录为 `/opt/flink`.
+如果你需要运行Spark或Flink引擎,你需要将相关依赖挂载到`/opt/spark`或`/opt/flink`目录下.
+
+```shell
+docker run \
+ -v :/opt/spark \
+ -v :/opt/flink \
+ ...
+```
+
+或者你可以在Dockerfile中修改 `SPARK_HOME`, `FLINK_HOME`环境变量,并且重新构建基础镜像,然后再进行挂载.
+
+```dockerfile
+FROM apache/seatunnel
+
+ENV SPARK_HOME=
+
+...
+
+```
+
+```shell
+docker run \
+ -v : \
+ ...
+```
+
+### 提交任务
+
+不同引擎和同一引擎的不同版本命令不同,请选择正确的命令。
+
+- Spark
+
+```shell
+# spark2
+docker run --rm -it apache/seatunnel bash ./bin/start-seatunnel-spark-2-connector-v2.sh -c config/v2.batch.config.template
+
+# spark3
+docker run --rm -it apache/seatunnel bash ./bin/start-seatunnel-spark-3-connector-v2.sh -c config/v2.batch.config.template
+```
+
+- Flink
+ 在提交作业之前,您需要先启动 Flink 集群。
+
+```shell
+# flink version between `1.12.x` and `1.14.x`
+docker run --rm -it apache/seatunnel bash -c '/bin/start-cluster.sh && ./bin/start-seatunnel-flink-13-connector-v2.sh -c config/v2.streaming.conf.template'
+# flink version between `1.15.x` and `1.16.x`
+docker run --rm -it apache/seatunnel bash -c '/bin/start-cluster.sh && ./bin/start-seatunnel-flink-15-connector-v2.sh -c config/v2.streaming.conf.template'
+```
+
+
+
+## 使用Docker配置集群模式
+
+docker下的集群模式仅支持Zeta引擎
+
+有两种方式来启动集群
+
+
+### 直接使用Docker
+
+#### 创建一个network
+```shell
+docker network create seatunnel-network
+```
+
+#### 启动节点
+- 启动master节点
+```shell
+## start master and export 5801 port
+docker run -d --name seatunnel_master \
+ --network seatunnel-network \
+ --rm \
+ -p 5801:5801 \
+ apache/seatunnel \
+ ./bin/seatunnel-cluster.sh -r master
+```
+
+- 获取容器的ip
+```shell
+docker inspect seatunnel_master
+```
+运行此命令获取master容器的ip
+
+- 启动worker节点
+```shell
+# 将ST_DOCKER_MEMBER_LIST设置为master容器的ip
+docker run -d --name seatunnel_worker_1 \
+ --network seatunnel-network \
+ --rm \
+ -e ST_DOCKER_MEMBER_LIST=172.18.0.2:5801 \
+ apache/seatunnel \
+ ./bin/seatunnel-cluster.sh -r worker
+
+## 启动第二个worker节点
+# 将ST_DOCKER_MEMBER_LIST设置为master容器的ip
+docker run -d --name seatunnel_worker_2 \
+ --network seatunnel-network \
+ --rm \
+ -e ST_DOCKER_MEMBER_LIST=172.18.0.2:5801 \
+ apache/seatunnel \
+ ./bin/seatunnel-cluster.sh -r worker
+
+```
+
+#### 集群扩容
+
+```shell
+# 将ST_DOCKER_MEMBER_LIST设置为已经启动的master容器的ip
+docker run -d --name seatunnel_master \
+ --network seatunnel-network \
+ --rm \
+ -e ST_DOCKER_MEMBER_LIST=172.18.0.2:5801 \
+ apache/seatunnel \
+ ./bin/seatunnel-cluster.sh -r master
+```
+
+运行这个命令创建一个worker节点
+```shell
+# 将ST_DOCKER_MEMBER_LIST设置为master容器的ip
+docker run -d --name seatunnel_worker_1 \
+ --network seatunnel-network \
+ --rm \
+ -e ST_DOCKER_MEMBER_LIST=172.18.0.2:5801 \
+ apache/seatunnel \
+ ./bin/seatunnel-cluster.sh -r worker
+```
+
+### 使用docker-compose
+`docker-compose.yaml` 配置文件为:
+```yaml
+version: '3.8'
+
+services:
+ master:
+ image: apache/seatunnel
+ container_name: seatunnel_master
+ environment:
+ - ST_DOCKER_MEMBER_LIST=172.16.0.2,172.16.0.3,172.16.0.4
+ entrypoint: >
+ /bin/sh -c "
+ /opt/seatunnel/bin/seatunnel-cluster.sh -r master
+ "
+ ports:
+ - "5801:5801"
+ networks:
+ seatunnel_network:
+ ipv4_address: 172.16.0.2
+
+ worker1:
+ image: apache/seatunnel
+ container_name: seatunnel_worker_1
+ environment:
+ - ST_DOCKER_MEMBER_LIST=172.16.0.2,172.16.0.3,172.16.0.4
+ entrypoint: >
+ /bin/sh -c "
+ /opt/seatunnel/bin/seatunnel-cluster.sh -r worker
+ "
+ depends_on:
+ - master
+ networks:
+ seatunnel_network:
+ ipv4_address: 172.16.0.3
+
+ worker2:
+ image: apache/seatunnel
+ container_name: seatunnel_worker_2
+ environment:
+ - ST_DOCKER_MEMBER_LIST=172.16.0.2,172.16.0.3,172.16.0.4
+ entrypoint: >
+ /bin/sh -c "
+ /opt/seatunnel/bin/seatunnel-cluster.sh -r worker
+ "
+ depends_on:
+ - master
+ networks:
+ seatunnel_network:
+ ipv4_address: 172.16.0.4
+
+networks:
+ seatunnel_network:
+ driver: bridge
+ ipam:
+ config:
+ - subnet: 172.16.0.0/24
+
+```
+运行 `docker-compose up`命令来启动集群,该配置会启动一个master节点,2个worker节点
+
+
+启动完成后,可以运行`docker logs -f seatunne_master`, `docker logs -f seatunnel_worker_1`来查看节点的日志
+当你访问`http://localhost:5801/hazelcast/rest/maps/system-monitoring-information` 时,可以看到集群的状态为1个master节点,2个worker节点.
+
+#### 集群扩容
+当你需要对集群扩容, 例如需要添加一个worker节点时
+```yaml
+version: '3.8'
+
+services:
+ master:
+ image: apache/seatunnel
+ container_name: seatunnel_master
+ environment:
+ - ST_DOCKER_MEMBER_LIST=172.16.0.2,172.16.0.3,172.16.0.4
+ entrypoint: >
+ /bin/sh -c "
+ /opt/seatunnel/bin/seatunnel-cluster.sh -r master
+ "
+ ports:
+ - "5801:5801"
+ networks:
+ seatunnel_network:
+ ipv4_address: 172.16.0.2
+
+ worker1:
+ image: apache/seatunnel
+ container_name: seatunnel_worker_1
+ environment:
+ - ST_DOCKER_MEMBER_LIST=172.16.0.2,172.16.0.3,172.16.0.4
+ entrypoint: >
+ /bin/sh -c "
+ /opt/seatunnel/bin/seatunnel-cluster.sh -r worker
+ "
+ depends_on:
+ - master
+ networks:
+ seatunnel_network:
+ ipv4_address: 172.16.0.3
+
+ worker2:
+ image: apache/seatunnel
+ container_name: seatunnel_worker_2
+ environment:
+ - ST_DOCKER_MEMBER_LIST=172.16.0.2,172.16.0.3,172.16.0.4
+ entrypoint: >
+ /bin/sh -c "
+ /opt/seatunnel/bin/seatunnel-cluster.sh -r worker
+ "
+ depends_on:
+ - master
+ networks:
+ seatunnel_network:
+ ipv4_address: 172.16.0.4
+ ####
+ ## 添加新节点配置
+ ####
+ worker3:
+ image: apache/seatunnel
+ container_name: seatunnel_worker_3
+ environment:
+ - ST_DOCKER_MEMBER_LIST=172.16.0.2,172.16.0.3,172.16.0.4,172.16.0.5 # 添加ip到这里
+ entrypoint: >
+ /bin/sh -c "
+ /opt/seatunnel/bin/seatunnel-cluster.sh -r worker
+ "
+ depends_on:
+ - master
+ networks:
+ seatunnel_network:
+ ipv4_address: 172.16.0.5 # 设置新节点ip
+
+networks:
+ seatunnel_network:
+ driver: bridge
+ ipam:
+ config:
+ - subnet: 172.16.0.0/24
+
+```
+
+然后运行`docker-compose up -d`命令, 将会新建一个worker节点, 已有的节点不会重启.
+
+### 提交作业到集群
+
+#### 使用docker container作为客户端
+- 提交任务
+```shell
+# 将ST_DOCKER_MEMBER_LIST设置为master容器的ip
+docker run --name seatunnel_client \
+ --network seatunnel-network \
+ -e ST_DOCKER_MEMBER_LIST=172.18.0.2:5801 \
+ --rm \
+ apache/seatunnel \
+ ./bin/seatunnel.sh -c config/v2.batch.config.template
+```
+
+- 查看作业列表
+```shell
+# 将ST_DOCKER_MEMBER_LIST设置为master容器的ip
+docker run --name seatunnel_client \
+ --network seatunnel-network \
+ -e ST_DOCKER_MEMBER_LIST=172.18.0.2:5801 \
+ --rm \
+ apache/seatunnel \
+ ./bin/seatunnel.sh -l
+```
+
+更多其他命令请参考[命令行工具](../../seatunnel-engine/user-command.md)
+
+#### 使用RestAPI
+请参考 [提交作业](../../seatunnel-engine/rest-api-v2.md#提交作业)
\ No newline at end of file
diff --git a/docs/zh/start-v2/locally/deployment.md b/docs/zh/start-v2/locally/deployment.md
index 9fa70f16040..927f5476ece 100644
--- a/docs/zh/start-v2/locally/deployment.md
+++ b/docs/zh/start-v2/locally/deployment.md
@@ -1,54 +1,47 @@
---
-
sidebar_position: 1
--------------------
+---
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
-# 本地部署
+# 部署
-## 步骤 1: 准备工作
+## 准备工作
在开始本地运行前,您需要确保您已经安装了SeaTunnel所需要的以下软件:
* 安装[Java](https://www.java.com/en/download/) (Java 8 或 11, 其他高于Java 8的版本理论上也可以工作) 以及设置 `JAVA_HOME`。
-## 步骤 2: 下载 SeaTunnel
+## 下载 SeaTunnel 发行包
+
+### 下载二进制包
进入[SeaTunnel下载页面](https://seatunnel.apache.org/download)下载最新版本的二进制安装包`seatunnel--bin.tar.gz`
或者您也可以通过终端下载:
```shell
-export version="2.3.6"
+export version="2.3.9"
wget "https://archive.apache.org/dist/seatunnel/${version}/apache-seatunnel-${version}-bin.tar.gz"
tar -xzvf "apache-seatunnel-${version}-bin.tar.gz"
```
-## 步骤 3: 下载连接器插件
+### 下载连接器插件
-从2.2.0-beta版本开始,二进制包不再默认提供连接器依赖,因此在第一次使用时,您需要执行以下命令来安装连接器:(当然,您也可以从 [Apache Maven Repository](https://repo.maven.apache.org/maven2/org/apache/seatunnel/) 手动下载连接器,然后将其移动至`connectors/seatunnel`目录下)。
+从2.2.0-beta版本开始,二进制包不再默认提供连接器依赖,因此在第一次使用时,您需要执行以下命令来安装连接器:(当然,您也可以从 [Apache Maven Repository](https://repo.maven.apache.org/maven2/org/apache/seatunnel/) 手动下载连接器,然后将其移动至`connectors/`目录下,如果是2.3.5之前则需要放入`connectors/seatunnel`目录下)。
```bash
sh bin/install-plugin.sh
```
-如果您需要指定的连接器版本,以2.3.6为例,您需要执行如下命令:
+如果您需要指定的连接器版本,以2.3.9为例,您需要执行如下命令:
```bash
-sh bin/install-plugin.sh 2.3.6
-```
-
-通常您并不需要所有的连接器插件,可以通过配置`config/plugin_config`来指定您所需要的插件,例如,您只需要`connector-console`插件,那么您可以修改plugin.properties配置文件如下:
-
-```plugin_config
---seatunnel-connectors--
-connector-console
---end--
+sh bin/install-plugin.sh 2.3.9
```
-如果您希望示例应用程序能正常工作,那么您需要添加以下插件:
+通常情况下,你不需要所有的连接器插件。你可以通过配置`config/plugin_config`来指定所需的插件。例如,如果你想让示例应用程序正常工作,你将需要`connector-console`和`connector-fake`插件。你可以修改`plugin_config`配置文件,如下所示:
```plugin_config
--seatunnel-connectors--
@@ -65,4 +58,33 @@ connector-console
:::
-现在,您已经完成了SeaTunnel部署。您可以按照[快速开始](quick-start-seatunnel-engine.md)来配置并运行数据同步作业了。
+## 从源码构建SeaTunnel
+
+### 下载源码
+
+从源码构建SeaTunnel。下载源码的方式与下载二进制包的方式相同。
+您可以从[下载页面](https://seatunnel.apache.org/download/)下载源码,或者从[GitHub仓库](https://github.com/apache/seatunnel/releases)克隆源码。
+
+### 构建源码
+
+```shell
+cd seatunnel
+sh ./mvnw clean install -DskipTests -Dskip.spotless=true
+# 获取构建好的二进制包
+cp seatunnel-dist/target/apache-seatunnel-2.3.9-bin.tar.gz /The-Path-You-Want-To-Copy
+
+cd /The-Path-You-Want-To-Copy
+tar -xzvf "apache-seatunnel-${version}-bin.tar.gz"
+```
+
+当从源码构建时,所有的连接器插件和一些必要的依赖(例如:mysql驱动)都包含在二进制包中。您可以直接使用连接器插件,而无需单独安装它们。
+
+# 启动SeaTunnel
+
+现在您已经下载了SeaTunnel二进制包和连接器插件。接下来,您可以选择不同的引擎选项来运行同步任务。
+
+如果您使用Flink来运行同步任务,则无需部署SeaTunnel引擎服务集群。您可以参考[Flink 引擎快速开始](quick-start-flink.md)来运行您的同步任务。
+
+如果您使用Spark来运行同步任务,则无需部署SeaTunnel引擎服务集群。您可以参考[Spark 引擎快速开始](quick-start-spark.md)来运行您的同步任务。
+
+如果您使用内置的SeaTunnel引擎(Zeta)来运行任务,则需要先部署SeaTunnel引擎服务。请参考[SeaTunnel 引擎快速开始](quick-start-seatunnel-engine.md)。
diff --git a/docs/zh/start-v2/locally/quick-start-flink.md b/docs/zh/start-v2/locally/quick-start-flink.md
index 09189c91dce..efd27343235 100644
--- a/docs/zh/start-v2/locally/quick-start-flink.md
+++ b/docs/zh/start-v2/locally/quick-start-flink.md
@@ -1,9 +1,8 @@
---
-
sidebar_position: 3
--------------------
+---
-# Flink Engine快速开始
+# Flink 引擎快速开始
## 步骤 1: 部署SeaTunnel及连接器
@@ -28,7 +27,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 16
schema = {
fields {
@@ -41,8 +40,8 @@ source {
transform {
FieldMapper {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
field_mapper = {
age = age
name = new_name
@@ -52,7 +51,7 @@ transform {
sink {
Console {
- source_table_name = "fake1"
+ plugin_input = "fake1"
}
}
@@ -71,7 +70,7 @@ cd "apache-seatunnel-${version}"
./bin/start-seatunnel-flink-13-connector-v2.sh --config ./config/v2.streaming.conf.template
```
-Flink版本`1.15.x`到`1.16.x`
+Flink版本`1.15.x`到`1.18.x`
```shell
cd "apache-seatunnel-${version}"
@@ -105,7 +104,7 @@ row=16 : SGZCr, 94186144
## 此外
-现在,您已经快速浏览了SeaTunnel使用Flink引擎的方式,可以通过在[连接器](/docs/category/connector-v2)中找到SeaTunnel所支持的所有sources和sinks。
-如果您想要了解更多关于SeaTunnel运行在Flink上的信息,请参阅[基于Flink的SeaTunnel](../../other-engine/flink.md)。
+- 开始编写您自己的配置文件,选择您想要使用的[连接器](../../connector-v2/source),并根据连接器的文档配置参数。
+- 如果您想要了解更多关于SeaTunnel运行在Flink上的信息,请参阅[基于Flink的SeaTunnel](../../other-engine/flink.md)。
+- SeaTunnel有内置的`Zeta`引擎,它是作为SeaTunnel的默认引擎。您可以参考[快速开始](quick-start-seatunnel-engine.md)配置和运行数据同步作业。
-SeaTunnel有内置的`Zeta`引擎,它是作为SeaTunnel的默认引擎。您可以参考[快速开始](quick-start-seatunnel-engine.md)配置和运行数据同步作业。
diff --git a/docs/zh/start-v2/locally/quick-start-seatunnel-engine.md b/docs/zh/start-v2/locally/quick-start-seatunnel-engine.md
index cd7a9e88e3e..a24baca61d2 100644
--- a/docs/zh/start-v2/locally/quick-start-seatunnel-engine.md
+++ b/docs/zh/start-v2/locally/quick-start-seatunnel-engine.md
@@ -1,9 +1,8 @@
---
-
sidebar_position: 2
--------------------
+---
-# SeaTunnel Engine快速开始
+# SeaTunnel 引擎快速开始
## 步骤 1: 部署SeaTunnel及连接器
@@ -22,7 +21,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 16
schema = {
fields {
@@ -35,8 +34,8 @@ source {
transform {
FieldMapper {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
field_mapper = {
age = age
name = new_name
@@ -46,7 +45,7 @@ transform {
sink {
Console {
- source_table_name = "fake1"
+ plugin_input = "fake1"
}
}
@@ -94,7 +93,106 @@ SeaTunnel控制台将会打印一些如下日志信息:
2022-12-19 11:01:46,491 INFO org.apache.seatunnel.connectors.seatunnel.console.sink.ConsoleSinkWriter - subtaskIndex=0 rowIndex=16: SeaTunnelRow#tableId=-1 SeaTunnelRow#kind=INSERT: mIJDt, 995616438
```
+## 扩展示例:从 MySQL 到 Doris 批处理模式
+
+### 步骤1:下载连接器
+首先,您需要在`${SEATUNNEL_HOME}/config/plugin_config`文件中加入连接器名称,然后,执行命令来安装连接器(当然,您也可以从 [Apache Maven Repository](https://repo.maven.apache.org/maven2/org/apache/seatunnel/) 手动下载连接器,然后将其移动至`connectors/`目录下),最后,确认连接器`connector-jdbc`、`connector-doris`在`${SEATUNNEL_HOME}/connectors/`目录下即可。
+
+```bash
+# 配置连接器名称
+--seatunnel-connectors--
+connector-jdbc
+connector-doris
+--end--
+```
+
+```bash
+# 安装连接器
+sh bin/install-plugin.sh
+```
+
+### 步骤2:放入 MySQL 驱动
+
+您需要下载 [jdbc driver jar package](https://mvnrepository.com/artifact/mysql/mysql-connector-java) 驱动,并放置在 `${SEATUNNEL_HOME}/lib/`目录下
+
+### 步骤3:添加作业配置文件来定义作业
+
+```bash
+cd seatunnel/job/
+
+vim st.conf
+
+env {
+ parallelism = 2
+ job.mode = "BATCH"
+}
+source {
+ Jdbc {
+ url = "jdbc:mysql://localhost:3306/test"
+ driver = "com.mysql.cj.jdbc.Driver"
+ connection_check_timeout_sec = 100
+ user = "user"
+ password = "pwd"
+ table_path = "test.table_name"
+ query = "select * from test.table_name"
+ }
+}
+
+sink {
+ Doris {
+ fenodes = "doris_ip:8030"
+ username = "user"
+ password = "pwd"
+ database = "test_db"
+ table = "table_name"
+ sink.enable-2pc = "true"
+ sink.label-prefix = "test-cdc"
+ doris.config = {
+ format = "json"
+ read_json_by_line="true"
+ }
+ }
+}
+```
+
+关于配置的更多信息请查看[配置的基本概念](../../concept/config.md)
+
+### 步骤 4: 运行SeaTunnel应用程序
+
+您可以通过以下命令启动应用程序:
+
+```shell
+cd seatunnel/
+./bin/seatunnel.sh --config ./job/st.conf -m local
+
+```
+
+**查看输出**: 当您运行该命令时,您可以在控制台中看到它的输出。您可以认为这是命令运行成功或失败的标志。
+
+SeaTunnel控制台将会打印一些如下日志信息:
+
+```shell
+***********************************************
+ Job Statistic Information
+***********************************************
+Start Time : 2024-08-13 10:21:49
+End Time : 2024-08-13 10:21:53
+Total Time(s) : 4
+Total Read Count : 1000
+Total Write Count : 1000
+Total Failed Count : 0
+***********************************************
+```
+
+:::tip
+
+如果您想优化自己的作业,请参照连接器使用文档
+
+:::
+
+
## 此外
-现在,您已经快速浏览了SeaTunnel,可以通过[连接器](../../../en/connector-v2/source/FakeSource.md)来找到SeaTunnel所支持的所有sources和sinks。
-如果您想要了解更多关于信息,请参阅[SeaTunnel引擎](../../seatunnel-engine/about.md). 在这里你将了解如何部署SeaTunnel Engine的集群模式以及如何在集群模式下使用。
+- 开始编写您自己的配置文件,选择您想要使用的[连接器](../../connector-v2/source),并根据连接器的文档配置参数。
+- 如果您想要了解更多关于信息,请参阅[SeaTunnel引擎](../../seatunnel-engine/about.md). 在这里你将了解如何部署SeaTunnel Engine的集群模式以及如何在集群模式下使用。
+
diff --git a/docs/zh/start-v2/locally/quick-start-spark.md b/docs/zh/start-v2/locally/quick-start-spark.md
index fbd0fa15fe5..8e3f8fdeba6 100644
--- a/docs/zh/start-v2/locally/quick-start-spark.md
+++ b/docs/zh/start-v2/locally/quick-start-spark.md
@@ -1,9 +1,8 @@
---
-
sidebar_position: 4
--------------------
+---
-# Spark引擎快速开始
+# Spark 引擎快速开始
## 步骤 1: 部署SeaTunnel及连接器
@@ -29,7 +28,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 16
schema = {
fields {
@@ -42,8 +41,8 @@ source {
transform {
FieldMapper {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
field_mapper = {
age = age
name = new_name
@@ -53,7 +52,7 @@ transform {
sink {
Console {
- source_table_name = "fake1"
+ plugin_input = "fake1"
}
}
@@ -112,7 +111,7 @@ row=16 : SGZCr, 94186144
## 此外
-现在,您已经快速浏览了SeaTunnel使用Spark引擎的方式,可以通过在[连接器](/docs/category/connector-v2)中找到SeaTunnel所支持的所有source和sink。
-如果您想要了解更多关于SeaTunnel运行在Spark上的信息,请参阅[基于Spark的SeaTunnel](../../../en/other-engine/spark.md)。
+- 开始编写您自己的配置文件,选择您想要使用的[连接器](../../connector-v2/source),并根据连接器的文档配置参数。
+- 如果您想要了解更多关于SeaTunnel运行在Spark上的信息,请参阅[基于Spark的SeaTunnel](../../../en/other-engine/spark.md)。
+- SeaTunnel有内置的`Zeta`引擎,它是作为SeaTunnel的默认引擎。您可以参考[快速开始](quick-start-seatunnel-engine.md)配置和运行数据同步作业。
-SeaTunnel有内置的`Zeta`引擎,它是作为SeaTunnel的默认引擎。您可以参考[快速开始](quick-start-seatunnel-engine.md)配置和运行数据同步作业。
diff --git a/docs/zh/transform-v2/common-options.md b/docs/zh/transform-v2/common-options.md
index 9a756760f2c..101283c9ae8 100644
--- a/docs/zh/transform-v2/common-options.md
+++ b/docs/zh/transform-v2/common-options.md
@@ -2,22 +2,28 @@
> 源端连接器的常见参数
-| 参数名称 | 参数类型 | 是否必须 | 默认值 |
-|-------------------|--------|------|-----|
-| result_table_name | string | no | - |
-| source_table_name | string | no | - |
+:::warn
-### source_table_name [string]
+旧的配置名称 `result_table_name`/`source_table_name` 已经过时,请尽快迁移到新名称 `plugin_output`/`plugin_input`。
-当未指定 `source_table_name` 时,当前插件在配置文件中处理由前一个插件输出的数据集 `(dataset)` ;
+:::
-当指定了 `source_table_name` 时,当前插件正在处理与该参数对应的数据集
+| 参数名称 | 参数类型 | 是否必须 | 默认值 |
+|---------------|--------|------|-----|
+| plugin_output | string | no | - |
+| plugin_input | string | no | - |
-### result_table_name [string]
+### plugin_input [string]
-当未指定 `result_table_name` 时,此插件处理的数据不会被注册为其他插件可以直接访问的数据集,也不会被称为临时表 `(table)`;
+当未指定 `plugin_input` 时,当前插件在配置文件中处理由前一个插件输出的数据集 `(dataset)` ;
-当指定了 `result_table_name` 时,此插件处理的数据将被注册为其他插件可以直接访问的数据集 `(dataset)`,或者被称为临时表 `(table)`。在这里注册的数据集可以通过指定 `source_table_name` 被其他插件直接访问。
+当指定了 `plugin_input` 时,当前插件正在处理与该参数对应的数据集
+
+### plugin_output [string]
+
+当未指定 `plugin_output` 时,此插件处理的数据不会被注册为其他插件可以直接访问的数据集,也不会被称为临时表 `(table)`;
+
+当指定了 `plugin_output` 时,此插件处理的数据将被注册为其他插件可以直接访问的数据集 `(dataset)`,或者被称为临时表 `(table)`。在这里注册的数据集可以通过指定 `plugin_input` 被其他插件直接访问。
## 示例
diff --git a/docs/zh/transform-v2/copy.md b/docs/zh/transform-v2/copy.md
index a4ca5c613a7..707bc5233e7 100644
--- a/docs/zh/transform-v2/copy.md
+++ b/docs/zh/transform-v2/copy.md
@@ -36,8 +36,8 @@
```
transform {
Copy {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
fields {
name1 = name
name2 = name
diff --git a/docs/zh/transform-v2/dynamic-compile.md b/docs/zh/transform-v2/dynamic-compile.md
new file mode 100644
index 00000000000..c9cc8708164
--- /dev/null
+++ b/docs/zh/transform-v2/dynamic-compile.md
@@ -0,0 +1,227 @@
+# DynamicCompile
+
+> 动态编译插件
+
+## 描述
+
+:::tip
+
+特别申明
+您需要确保服务的安全性,并防止攻击者上传破坏性代码
+
+:::
+
+提供一种可编程的方式来处理行,允许用户自定义任何业务行为,甚至基于现有行字段作为参数的RPC请求,或者通过从其他数据源检索相关数据来扩展字段。为了区分业务,您还可以定义多个转换进行组合,
+如果转换过于复杂,可能会影响性能
+
+## 属性
+
+| name | type | required | default value |
+|------------------|--------|----------|---------------|
+| source_code | string | no | |
+| compile_language | Enum | yes | |
+| compile_pattern | Enum | no | SOURCE_CODE |
+| absolute_path | string | no | |
+
+
+### common options [string]
+
+转换插件的常见参数, 请参考 [Transform Plugin](common-options.md) 了解详情。
+
+### compile_language [Enum]
+
+Java中的某些语法可能不受支持,请参阅https://github.com/janino-compiler/janino
+GROOVY,JAVA
+
+### compile_pattern [Enum]
+
+SOURCE_CODE,ABSOLUTE_PATH
+选择 SOURCE_CODE,SOURCE_CODE 属性必填;选择ABSOLUTE_PATH,ABSOLUTE_PATH属性必填。
+
+### absolute_path [string]
+
+服务器上Java或Groovy文件的绝对路径
+
+### source_code [string]
+源代码
+
+#### 关于source_code
+在代码中,你必须实现两个方法
+- `Column[] getInlineOutputColumns(CatalogTable inputCatalogTable)`
+- `Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow)`
+
+`getInlineOutputColumns`方法中,入参类型为`CatalogTable`,返回结果为`Column[]`。
+你可以从入参的`CatalogTable`获取当前表的表结构。
+在返回结果中,如果字段已经存在,则会根据返回结果进行覆盖,如果不存在,则会添加到现有表结构中。
+
+`getInlineOutputFieldValues`方法,入参类型为`SeaTunnelRowAccessor`,返回结果为`Object[]`
+你可以从`SeaTunnelRowAccessor`获取到当前行的数据,进行自己的定制化数据处理逻辑。
+返回结果中,数组长度需要与`getInlineOutputColumns`方法返回的长度一致,并且里面的字段值顺序也需要保持一致。
+
+如果有第三方依赖包,请将它们放在${SEATUNNEL_HOME}/lib中,如果您使用spark或flink,则需要将其放在相应服务的libs下。
+你需要重启集群服务,才能重新加载这些依赖。
+
+
+## Example
+
+源端数据读取的表格如下:
+
+| name | age | card |
+|----------|-----|------|
+| Joy Ding | 20 | 123 |
+| May Ding | 20 | 123 |
+| Kin Dom | 30 | 123 |
+| Joy Dom | 30 | 123 |
+
+我们将使用`DynamicCompile`对数据进行修改,添加一列`compile_language`字段,并且将`age`字段更新,当`age=20`时将其更新为`40`
+
+- 使用groovy
+```hacon
+transform {
+ DynamicCompile {
+ plugin_input = "fake"
+ plugin_output = "groovy_out"
+ compile_language="GROOVY"
+ compile_pattern="SOURCE_CODE"
+ source_code="""
+ import org.apache.seatunnel.api.table.catalog.Column
+ import org.apache.seatunnel.api.table.type.SeaTunnelRowAccessor
+ import org.apache.seatunnel.api.table.catalog.CatalogTable
+ import org.apache.seatunnel.api.table.catalog.PhysicalColumn;
+ import org.apache.seatunnel.api.table.type.*;
+ import java.util.ArrayList;
+ class demo {
+ public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) {
+ PhysicalColumn col1 =
+ PhysicalColumn.of(
+ "compile_language",
+ BasicType.STRING_TYPE,
+ 10L,
+ true,
+ "",
+ "");
+ PhysicalColumn col2 =
+ PhysicalColumn.of(
+ "age",
+ BasicType.INT_TYPE,
+ 0L,
+ false,
+ false,
+ ""
+ );
+ return new Column[]{
+ col1, col2
+ };
+ }
+
+
+ public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) {
+ Object[] fieldValues = new Object[2];
+ // get age
+ Object ageField = inputRow.getField(1);
+ fieldValues[0] = "GROOVY";
+ if (Integer.parseInt(ageField.toString()) == 20) {
+ fieldValues[1] = 40;
+ } else {
+ fieldValues[1] = ageField;
+ }
+ return fieldValues;
+ }
+ };"""
+
+ }
+}
+```
+
+- 使用java
+```hacon
+transform {
+ DynamicCompile {
+ plugin_input = "fake"
+ plugin_output = "java_out"
+ compile_language="JAVA"
+ compile_pattern="SOURCE_CODE"
+ source_code="""
+ import org.apache.seatunnel.api.table.catalog.Column;
+ import org.apache.seatunnel.api.table.type.SeaTunnelRowAccessor;
+ import org.apache.seatunnel.api.table.catalog.*;
+ import org.apache.seatunnel.api.table.type.*;
+ import java.util.ArrayList;
+ public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) {
+ PhysicalColumn col1 =
+ PhysicalColumn.of(
+ "compile_language",
+ BasicType.STRING_TYPE,
+ 10L,
+ true,
+ "",
+ "");
+ PhysicalColumn col2 =
+ PhysicalColumn.of(
+ "age",
+ BasicType.INT_TYPE,
+ 0L,
+ false,
+ false,
+ ""
+ );
+ return new Column[]{
+ col1, col2
+ };
+ }
+
+
+ public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) {
+ Object[] fieldValues = new Object[2];
+ // get age
+ Object ageField = inputRow.getField(1);
+ fieldValues[0] = "JAVA";
+ if (Integer.parseInt(ageField.toString()) == 20) {
+ fieldValues[1] = 40;
+ } else {
+ fieldValues[1] = ageField;
+ }
+ return fieldValues;
+ }
+ """
+
+ }
+ }
+ ```
+- 指定源码文件路径
+```hacon
+ transform {
+ DynamicCompile {
+ plugin_input = "fake"
+ plugin_output = "groovy_out"
+ compile_language="GROOVY"
+ compile_pattern="ABSOLUTE_PATH"
+ absolute_path="""/tmp/GroovyFile"""
+
+ }
+}
+```
+
+那么结果表 `groovy_out` 中的数据将会更新为:
+
+| name | age | card | compile_language |
+|----------|-----|------|------------------|
+| Joy Ding | 40 | 123 | GROOVY |
+| May Ding | 40 | 123 | GROOVY |
+| Kin Dom | 30 | 123 | GROOVY |
+| Joy Dom | 30 | 123 | GROOVY |
+
+那么结果表 `java_out` 中的数据将会更新为:
+
+| name | age | card | compile_language |
+|----------|-----|------|------------------|
+| Joy Ding | 40 | 123 | JAVA |
+| May Ding | 40 | 123 | JAVA |
+| Kin Dom | 30 | 123 | JAVA |
+| Joy Dom | 30 | 123 | JAVA |
+
+更多复杂例子可以参考
+https://github.com/apache/seatunnel/tree/dev/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf
+
+## Changelog
+
diff --git a/docs/zh/transform-v2/embedding.md b/docs/zh/transform-v2/embedding.md
new file mode 100644
index 00000000000..e05c9c24422
--- /dev/null
+++ b/docs/zh/transform-v2/embedding.md
@@ -0,0 +1,382 @@
+# Embedding
+
+> Embedding Transform Plugin
+
+## 描述
+
+`Embedding` 转换插件利用 embedding 模型将文本数据转换为向量化表示。此转换可以应用于各种字段。该插件支持多种模型提供商,并且可以与不同的API集成。
+
+## 配置选项
+
+| 名称 | 类型 | 是否必填 | 默认值 | 描述 |
+|--------------------------------|--------|------|-----|------------------------------------------------------------------|
+| model_provider | enum | 是 | - | embedding模型的提供商。可选项包括 `QIANFAN`、`OPENAI` 等。 |
+| api_key | string | 是 | - | 用于验证embedding服务的API密钥。 |
+| secret_key | string | 是 | - | 用于额外验证的密钥。一些提供商可能需要此密钥进行安全的API请求。 |
+| single_vectorized_input_number | int | 否 | 1 | 单次请求向量化的输入数量。默认值为1。 |
+| vectorization_fields | map | 是 | - | 输入字段和相应的输出向量字段之间的映射。 |
+| model | string | 是 | - | 要使用的具体embedding模型。例如,如果提供商为OPENAI,可以指定 `text-embedding-3-small`。 |
+| api_path | string | 否 | - | embedding服务的API。通常由模型提供商提供。 |
+| oauth_path | string | 否 | - | oauth 服务的 API 。 |
+| custom_config | map | 否 | | 模型的自定义配置。 |
+| custom_response_parse | string | 否 | | 使用 JsonPath 解析模型响应的方式。示例:`$.choices[*].message.content`。 |
+| custom_request_headers | map | 否 | | 发送到模型的请求的自定义头信息。 |
+| custom_request_body | map | 否 | | 请求体的自定义配置。支持占位符如 `${model}`、`${input}`。 |
+
+### embedding_model_provider
+
+用于生成 embedding 的模型提供商。常见选项包括 `DOUBAO`、`QIANFAN`、`OPENAI` 等,同时可选择 `CUSTOM` 实现自定义 embedding
+模型的请求以及获取。
+
+### api_key
+
+用于验证 embedding 服务请求的API密钥。通常由模型提供商在你注册他们的服务时提供。
+
+### secret_key
+
+用于额外验证的密钥。一些提供商可能要求此密钥以确保API请求的安全性。
+
+### single_vectorized_input_number
+
+指定单次请求向量化的输入数量。默认值为1。根据处理能力和模型提供商的API限制进行调整。
+
+### vectorization_fields
+
+输入字段和相应的输出向量字段之间的映射。这使得插件可以理解要向量化的文本字段以及如何存储生成的向量。
+
+```hocon
+vectorization_fields {
+ book_intro_vector = book_intro
+ author_biography_vector = author_biography
+}
+```
+
+### model
+
+要使用的具体 embedding 模型。这取决于`embedding_model_provider`。例如,如果使用 OPENAI ,可以指定 `text-embedding-3-small`。
+
+### api_path
+
+用于向 embedding 服务发送请求的API。根据提供商和所用模型的不同可能有所变化。通常由模型提供商提供。
+
+### oauth_path
+
+用于向oauth服务发送请求的API,获取对应的认证信息。根据提供商和所用模型的不同可能有所变化。通常由模型提供商提供。
+
+### custom_config
+
+`custom_config` 选项允许您为模型提供额外的自定义配置。这是一个映射,您可以在其中定义特定模型可能需要的各种设置。
+
+### custom_response_parse
+
+`custom_response_parse` 选项允许您指定如何解析模型的响应。您可以使用 JsonPath
+从响应中提取所需的特定数据。例如,使用 `$.data[*].embedding` 提取如下json中的 `embedding` 字段
+值,获取 `List` 嵌套 `List` 的结果。JsonPath
+的使用请参考 [JsonPath 快速入门](https://github.com/json-path/JsonPath?tab=readme-ov-file#getting-started)
+
+```json
+{
+ "object": "list",
+ "data": [
+ {
+ "object": "embedding",
+ "index": 0,
+ "embedding": [
+ -0.006929283495992422,
+ -0.005336422007530928,
+ -0.00004547132266452536,
+ -0.024047505110502243
+ ]
+ }
+ ],
+ "model": "text-embedding-3-small",
+ "usage": {
+ "prompt_tokens": 5,
+ "total_tokens": 5
+ }
+}
+```
+
+### custom_request_headers
+
+`custom_request_headers` 选项允许您定义应包含在发送到模型 API 的请求中的自定义头信息。如果 API
+需要标准头信息之外的额外头信息,例如授权令牌、内容类型等,这个选项会非常有用。
+
+### custom_request_body
+
+`custom_request_body` 选项支持占位符:
+
+- `${model}`:用于模型名称的占位符。
+- `${input}`:用于确定输入值的占位符,同时根据 body value 的类型定义请求体请求类型。例如:`["${input}"]` -> ["input"] (
+ list)。
+
+### common options
+
+转换插件的常见参数, 请参考 [Transform Plugin](common-options.md) 了解详情
+
+## 示例配置
+
+```hocon
+env {
+ job.mode = "BATCH"
+}
+
+source {
+ FakeSource {
+ row.num = 5
+ schema = {
+ fields {
+ book_id = "int"
+ book_name = "string"
+ book_intro = "string"
+ author_biography = "string"
+ }
+ }
+ rows = [
+ {fields = [1, "To Kill a Mockingbird",
+ "Set in the American South during the 1930s, To Kill a Mockingbird tells the story of young Scout Finch and her brother, Jem, who are growing up in a world of racial inequality and injustice. Their father, Atticus Finch, is a lawyer who defends a black man falsely accused of raping a white woman, teaching his children valuable lessons about morality, courage, and empathy.",
+ "Harper Lee (1926–2016) was an American novelist best known for To Kill a Mockingbird, which won the Pulitzer Prize in 1961. Lee was born in Monroeville, Alabama, and the town served as inspiration for the fictional Maycomb in her novel. Despite the success of her book, Lee remained a private person and published only one other novel, Go Set a Watchman, which was written before To Kill a Mockingbird but released in 2015 as a sequel."
+ ], kind = INSERT}
+ {fields = [2, "1984",
+ "1984 is a dystopian novel set in a totalitarian society governed by Big Brother. The story follows Winston Smith, a man who works for the Party rewriting history. Winston begins to question the Party’s control and seeks truth and freedom in a society where individuality is crushed. The novel explores themes of surveillance, propaganda, and the loss of personal autonomy.",
+ "George Orwell (1903–1950) was the pen name of Eric Arthur Blair, an English novelist, essayist, journalist, and critic. Orwell is best known for his works 1984 and Animal Farm, both of which are critiques of totalitarian regimes. His writing is characterized by lucid prose, awareness of social injustice, opposition to totalitarianism, and support of democratic socialism. Orwell’s work remains influential, and his ideas have shaped contemporary discussions on politics and society."
+ ], kind = INSERT}
+ {fields = [3, "Pride and Prejudice",
+ "Pride and Prejudice is a romantic novel that explores the complex relationships between different social classes in early 19th century England. The story centers on Elizabeth Bennet, a young woman with strong opinions, and Mr. Darcy, a wealthy but reserved gentleman. The novel deals with themes of love, marriage, and societal expectations, offering keen insights into human behavior.",
+ "Jane Austen (1775–1817) was an English novelist known for her sharp social commentary and keen observations of the British landed gentry. Her works, including Sense and Sensibility, Emma, and Pride and Prejudice, are celebrated for their wit, realism, and biting critique of the social class structure of her time. Despite her relatively modest life, Austen’s novels have gained immense popularity, and she is considered one of the greatest novelists in the English language."
+ ], kind = INSERT}
+ {fields = [4, "The Great GatsbyThe Great Gatsby",
+ "The Great Gatsby is a novel about the American Dream and the disillusionment that can come with it. Set in the 1920s, the story follows Nick Carraway as he becomes entangled in the lives of his mysterious neighbor, Jay Gatsby, and the wealthy elite of Long Island. Gatsby's obsession with the beautiful Daisy Buchanan drives the narrative, exploring themes of wealth, love, and the decay of the American Dream.",
+ "F. Scott Fitzgerald (1896–1940) was an American novelist and short story writer, widely regarded as one of the greatest American writers of the 20th century. Born in St. Paul, Minnesota, Fitzgerald is best known for his novel The Great Gatsby, which is often considered the quintessential work of the Jazz Age. His works often explore themes of youth, wealth, and the American Dream, reflecting the turbulence and excesses of the 1920s."
+ ], kind = INSERT}
+ {fields = [5, "Moby-Dick",
+ "Moby-Dick is an epic tale of obsession and revenge. The novel follows the journey of Captain Ahab, who is on a relentless quest to kill the white whale, Moby Dick, that once maimed him. Narrated by Ishmael, a sailor aboard Ahab’s ship, the story delves into themes of fate, humanity, and the struggle between man and nature. The novel is also rich with symbolism and philosophical musings.",
+ "Herman Melville (1819–1891) was an American novelist, short story writer, and poet of the American Renaissance period. Born in New York City, Melville gained initial fame with novels such as Typee and Omoo, but it was Moby-Dick, published in 1851, that would later be recognized as his masterpiece. Melville’s work is known for its complexity, symbolism, and exploration of themes such as man’s place in the universe, the nature of evil, and the quest for meaning. Despite facing financial difficulties and critical neglect during his lifetime, Melville’s reputation soared posthumously, and he is now considered one of the great American authors."
+ ], kind = INSERT}
+ ]
+ plugin_output = "fake"
+ }
+}
+
+transform {
+ Embedding {
+ plugin_input = "fake"
+ embedding_model_provider = QIANFAN
+ model = bge_large_en
+ api_key = xxxxxxxxxx
+ secret_key = xxxxxxxxxx
+ api_path = xxxxxxxxxx
+ vectorization_fields {
+ book_intro_vector = book_intro
+ author_biography_vector = author_biography
+ }
+ plugin_output = "embedding_output"
+ }
+}
+
+sink {
+ Assert {
+ plugin_input = "embedding_output"
+
+
+ rules =
+ {
+ field_rules = [
+ {
+ field_name = book_id
+ field_type = int
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = book_name
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = book_intro
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = author_biography
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = book_intro_vector
+ field_type = float_vector
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = author_biography_vector
+ field_type = float_vector
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ }
+ ]
+ }
+ }
+}
+```
+
+### Customize the embedding model
+
+```hocon
+
+env {
+ job.mode = "BATCH"
+}
+
+source {
+ FakeSource {
+ row.num = 5
+ schema = {
+ fields {
+ book_id = "int"
+ book_name = "string"
+ book_intro = "string"
+ author_biography = "string"
+ }
+ }
+ rows = [
+ {fields = [1, "To Kill a Mockingbird",
+ "Set in the American South during the 1930s, To Kill a Mockingbird tells the story of young Scout Finch and her brother, Jem, who are growing up in a world of racial inequality and injustice. Their father, Atticus Finch, is a lawyer who defends a black man falsely accused of raping a white woman, teaching his children valuable lessons about morality, courage, and empathy.",
+ "Harper Lee (1926–2016) was an American novelist best known for To Kill a Mockingbird, which won the Pulitzer Prize in 1961. Lee was born in Monroeville, Alabama, and the town served as inspiration for the fictional Maycomb in her novel. Despite the success of her book, Lee remained a private person and published only one other novel, Go Set a Watchman, which was written before To Kill a Mockingbird but released in 2015 as a sequel."
+ ], kind = INSERT}
+ {fields = [2, "1984",
+ "1984 is a dystopian novel set in a totalitarian society governed by Big Brother. The story follows Winston Smith, a man who works for the Party rewriting history. Winston begins to question the Party’s control and seeks truth and freedom in a society where individuality is crushed. The novel explores themes of surveillance, propaganda, and the loss of personal autonomy.",
+ "George Orwell (1903–1950) was the pen name of Eric Arthur Blair, an English novelist, essayist, journalist, and critic. Orwell is best known for his works 1984 and Animal Farm, both of which are critiques of totalitarian regimes. His writing is characterized by lucid prose, awareness of social injustice, opposition to totalitarianism, and support of democratic socialism. Orwell’s work remains influential, and his ideas have shaped contemporary discussions on politics and society."
+ ], kind = INSERT}
+ {fields = [3, "Pride and Prejudice",
+ "Pride and Prejudice is a romantic novel that explores the complex relationships between different social classes in early 19th century England. The story centers on Elizabeth Bennet, a young woman with strong opinions, and Mr. Darcy, a wealthy but reserved gentleman. The novel deals with themes of love, marriage, and societal expectations, offering keen insights into human behavior.",
+ "Jane Austen (1775–1817) was an English novelist known for her sharp social commentary and keen observations of the British landed gentry. Her works, including Sense and Sensibility, Emma, and Pride and Prejudice, are celebrated for their wit, realism, and biting critique of the social class structure of her time. Despite her relatively modest life, Austen’s novels have gained immense popularity, and she is considered one of the greatest novelists in the English language."
+ ], kind = INSERT}
+ {fields = [4, "The Great GatsbyThe Great Gatsby",
+ "The Great Gatsby is a novel about the American Dream and the disillusionment that can come with it. Set in the 1920s, the story follows Nick Carraway as he becomes entangled in the lives of his mysterious neighbor, Jay Gatsby, and the wealthy elite of Long Island. Gatsby's obsession with the beautiful Daisy Buchanan drives the narrative, exploring themes of wealth, love, and the decay of the American Dream.",
+ "F. Scott Fitzgerald (1896–1940) was an American novelist and short story writer, widely regarded as one of the greatest American writers of the 20th century. Born in St. Paul, Minnesota, Fitzgerald is best known for his novel The Great Gatsby, which is often considered the quintessential work of the Jazz Age. His works often explore themes of youth, wealth, and the American Dream, reflecting the turbulence and excesses of the 1920s."
+ ], kind = INSERT}
+ {fields = [5, "Moby-Dick",
+ "Moby-Dick is an epic tale of obsession and revenge. The novel follows the journey of Captain Ahab, who is on a relentless quest to kill the white whale, Moby Dick, that once maimed him. Narrated by Ishmael, a sailor aboard Ahab’s ship, the story delves into themes of fate, humanity, and the struggle between man and nature. The novel is also rich with symbolism and philosophical musings.",
+ "Herman Melville (1819–1891) was an American novelist, short story writer, and poet of the American Renaissance period. Born in New York City, Melville gained initial fame with novels such as Typee and Omoo, but it was Moby-Dick, published in 1851, that would later be recognized as his masterpiece. Melville’s work is known for its complexity, symbolism, and exploration of themes such as man’s place in the universe, the nature of evil, and the quest for meaning. Despite facing financial difficulties and critical neglect during his lifetime, Melville’s reputation soared posthumously, and he is now considered one of the great American authors."
+ ], kind = INSERT}
+ ]
+ plugin_output = "fake"
+ }
+}
+
+transform {
+ Embedding {
+ plugin_input = "fake"
+ model_provider = CUSTOM
+ model = text-embedding-3-small
+ api_key = xxxxxxxx
+ api_path = "http://mockserver:1080/v1/doubao/embedding"
+ single_vectorized_input_number = 2
+ vectorization_fields {
+ book_intro_vector = book_intro
+ author_biography_vector = author_biography
+ }
+ custom_config={
+ custom_response_parse = "$.data[*].embedding"
+ custom_request_headers = {
+ "Content-Type"= "application/json"
+ "Authorization"= "Bearer xxxxxxx
+ }
+ custom_request_body ={
+ modelx = "${model}"
+ inputx = ["${input}"]
+ }
+ }
+ plugin_output = "embedding_output_1"
+ }
+}
+
+sink {
+ Assert {
+ plugin_input = "embedding_output_1"
+ rules =
+ {
+ field_rules = [
+ {
+ field_name = book_id
+ field_type = int
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = book_name
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = book_intro
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = author_biography
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = book_intro_vector
+ field_type = float_vector
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = author_biography_vector
+ field_type = float_vector
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ }
+ ]
+ }
+ }
+}
+
+```
\ No newline at end of file
diff --git a/docs/zh/transform-v2/field-mapper.md b/docs/zh/transform-v2/field-mapper.md
index 298d3fa72c9..9c2f82dee80 100644
--- a/docs/zh/transform-v2/field-mapper.md
+++ b/docs/zh/transform-v2/field-mapper.md
@@ -36,8 +36,8 @@
```
transform {
FieldMapper {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
field_mapper = {
id = id
card = card
diff --git a/docs/zh/transform-v2/filter-rowkind.md b/docs/zh/transform-v2/filter-rowkind.md
index 74d2b2d5b1e..60bb6208539 100644
--- a/docs/zh/transform-v2/filter-rowkind.md
+++ b/docs/zh/transform-v2/filter-rowkind.md
@@ -39,7 +39,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 100
schema = {
fields {
@@ -53,15 +53,15 @@ source {
transform {
FilterRowKind {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
exclude_kinds = ["INSERT"]
}
}
sink {
Console {
- source_table_name = "fake1"
+ plugin_input = "fake1"
}
}
```
diff --git a/docs/zh/transform-v2/filter.md b/docs/zh/transform-v2/filter.md
index 1f02c999a37..66937b00b9b 100644
--- a/docs/zh/transform-v2/filter.md
+++ b/docs/zh/transform-v2/filter.md
@@ -43,8 +43,8 @@
```
transform {
Filter {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
include_fields = [name, card]
}
}
@@ -55,8 +55,8 @@ transform {
```
transform {
Filter {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
exclude_fields = [age]
}
}
diff --git a/docs/zh/transform-v2/jsonpath.md b/docs/zh/transform-v2/jsonpath.md
index 449f0f6a77f..a83767e0c19 100644
--- a/docs/zh/transform-v2/jsonpath.md
+++ b/docs/zh/transform-v2/jsonpath.md
@@ -8,24 +8,33 @@
## 属性
-| 名称 | 类型 | 是否必须 | 默认值 |
-|---------|-------|------|-----|
-| Columns | Array | Yes | |
+| 名称 | 类型 | 是否必须 | 默认值 |
+|----------------------|-------|------|------|
+| columns | Array | Yes | |
+| row_error_handle_way | Enum | No | FAIL |
### common options [string]
转换插件的常见参数, 请参考 [Transform Plugin](common-options.md) 了解详情
-### fields[array]
+### row_error_handle_way [Enum]
+
+该选项用于指定当该行发生错误时的处理方式,默认值为 `FAIL`。
+
+- FAIL:选择`FAIL`时,数据格式错误会阻塞并抛出异常。
+- SKIP:选择`SKIP`时,数据格式错误会跳过该行数据。
+
+### columns[array]
#### 属性
-| 名称 | 类型 | 是否必须 | 默认值 |
-|------------|--------|------|--------|
-| src_field | String | Yes | |
-| dest_field | String | Yes | |
-| path | String | Yes | |
-| dest_type | String | No | String |
+| 名称 | 类型 | 是否必须 | 默认值 |
+|-------------------------|--------|------|--------|
+| src_field | String | Yes | |
+| dest_field | String | Yes | |
+| path | String | Yes | |
+| dest_type | String | No | String |
+| column_error_handle_way | Enum | No | |
#### src_field
@@ -51,6 +60,14 @@
> Jsonpath
+#### column_error_handle_way [Enum]
+
+该选项用于指定当列发生错误时的处理方式。
+
+- FAIL:选择`FAIL`时,数据格式错误会阻塞并抛出异常。
+- SKIP:选择`SKIP`时,数据格式错误会跳过此列数据。
+- SKIP_ROW:选择`SKIP_ROW`时,数据格式错误会跳过此行数据。
+
## 读取 JSON 示例
从源读取的数据是像这样的 JSON
@@ -76,8 +93,8 @@
```json
transform {
JsonPath {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
columns = [
{
"src_field" = "data"
@@ -155,23 +172,25 @@ transform {
JsonPath 转换将 seatunnel 的值转换为一个数组。
-```json
+```hocon
transform {
JsonPath {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
+
+ row_error_handle_way = FAIL
columns = [
{
"src_field" = "col"
"path" = "$[0]"
"dest_field" = "name"
- "dest_type" = "string"
+ "dest_type" = "string"
},
- {
+ {
"src_field" = "col"
"path" = "$[1]"
"dest_field" = "age"
- "dest_type" = "int"
+ "dest_type" = "int"
}
]
}
@@ -184,6 +203,94 @@ transform {
|------|-----|----------|-------|
| a | 18 | ["a",18] | ... |
+## 配置异常数据处理策略
+
+您可以配置 `row_error_handle_way` 与 `column_error_handle_way` 来处理异常数据,两者都是非必填项。
+
+`row_error_handle_way` 配置对行数据内所有数据异常进行处理,`column_error_handle_way` 配置对某列数据异常进行处理,优先级高于 `row_error_handle_way`。
+
+### 跳过异常数据行
+
+配置跳过任意列有异常的整行数据
+
+```hocon
+transform {
+ JsonPath {
+
+ row_error_handle_way = SKIP
+
+ columns = [
+ {
+ "src_field" = "json_data"
+ "path" = "$.f1"
+ "dest_field" = "json_data_f1"
+ },
+ {
+ "src_field" = "json_data"
+ "path" = "$.f2"
+ "dest_field" = "json_data_f2"
+ }
+ ]
+ }
+}
+```
+
+### 跳过部分异常数据列
+
+配置仅对 `json_data_f1` 列数据异常跳过,填充空值,其他列数据异常继续抛出异常中断处理程序
+
+```hocon
+transform {
+ JsonPath {
+
+ row_error_handle_way = FAIL
+
+ columns = [
+ {
+ "src_field" = "json_data"
+ "path" = "$.f1"
+ "dest_field" = "json_data_f1"
+
+ "column_error_handle_way" = "SKIP"
+ },
+ {
+ "src_field" = "json_data"
+ "path" = "$.f2"
+ "dest_field" = "json_data_f2"
+ }
+ ]
+ }
+}
+```
+
+### 部分列异常跳过整行
+
+配置仅对 `json_data_f1` 列数据异常跳过整行数据,其他列数据异常继续抛出异常中断处理程序
+
+```hocon
+transform {
+ JsonPath {
+
+ row_error_handle_way = FAIL
+
+ columns = [
+ {
+ "src_field" = "json_data"
+ "path" = "$.f1"
+ "dest_field" = "json_data_f1"
+
+ "column_error_handle_way" = "SKIP_ROW"
+ },
+ {
+ "src_field" = "json_data"
+ "path" = "$.f2"
+ "dest_field" = "json_data_f2"
+ }
+ ]
+ }
+}
+```
+
## 更新日志
* 添加 JsonPath 转换
diff --git a/docs/zh/transform-v2/llm.md b/docs/zh/transform-v2/llm.md
new file mode 100644
index 00000000000..7b505bde243
--- /dev/null
+++ b/docs/zh/transform-v2/llm.md
@@ -0,0 +1,328 @@
+# LLM
+
+> LLM 转换插件
+
+## 描述
+
+利用大型语言模型 (LLM) 的强大功能来处理数据,方法是将数据发送到 LLM 并接收生成的结果。利用 LLM 的功能来标记、清理、丰富数据、执行数据推理等。
+
+## 属性
+
+| 名称 | 类型 | 是否必须 | 默认值 |
+|------------------------| ------ | -------- |-------------|
+| model_provider | enum | yes | |
+| output_data_type | enum | no | String |
+| output_column_name | string | no | llm_output |
+| prompt | string | yes | |
+| inference_columns | list | no | |
+| model | string | yes | |
+| api_key | string | yes | |
+| api_path | string | no | |
+| custom_config | map | no | |
+| custom_response_parse | string | no | |
+| custom_request_headers | map | no | |
+| custom_request_body | map | no | |
+
+### model_provider
+
+要使用的模型提供者。可用选项为:
+OPENAI、DOUBAO、KIMIAI、MICROSOFT, CUSTOM
+
+> tips: 如果使用 Microsoft, 请确保 api_path 配置不能为空
+
+### output_data_type
+
+输出数据的数据类型。可用选项为:
+STRING,INT,BIGINT,DOUBLE,BOOLEAN.
+默认值为 STRING。
+
+### output_column_name
+
+自定义输出数据字段名称。自定义字段名称与现有字段名称相同时,将替换为`llm_output`。
+
+### prompt
+
+发送到 LLM 的提示。此参数定义 LLM 将如何处理和返回数据,例如:
+
+从源读取的数据是这样的表格:
+
+| name | age |
+|---------------|-----|
+| Jia Fan | 20 |
+| Hailin Wang | 20 |
+| Eric | 20 |
+| Guangdong Liu | 20 |
+
+我们可以使用以下提示:
+
+```
+Determine whether someone is Chinese or American by their name
+```
+
+这将返回:
+
+| name | age | llm_output |
+|---------------|-----|------------|
+| Jia Fan | 20 | Chinese |
+| Hailin Wang | 20 | Chinese |
+| Eric | 20 | American |
+| Guangdong Liu | 20 | Chinese |
+
+### inference_columns
+
+`inference_columns`选项允许您指定应该将输入数据中的哪些列用作LLM的输入。默认情况下,所有列都将用作输入。
+
+For example:
+```hocon
+transform {
+ LLM {
+ model_provider = OPENAI
+ model = gpt-4o-mini
+ api_key = sk-xxx
+ inference_columns = ["name", "age"]
+ prompt = "Determine whether someone is Chinese or American by their name"
+ }
+}
+```
+
+### model
+
+要使用的模型。不同的模型提供者有不同的模型。例如,OpenAI 模型可以是 `gpt-4o-mini`。
+如果使用 OpenAI 模型,请参考 https://platform.openai.com/docs/models/model-endpoint-compatibility 文档的`/v1/chat/completions` 端点。
+
+### api_key
+
+用于模型提供者的 API 密钥。
+如果使用 OpenAI 模型,请参考 https://platform.openai.com/docs/api-reference/api-keys 文档的如何获取 API 密钥。
+
+### api_path
+
+用于模型提供者的 API 路径。在大多数情况下,您不需要更改此配置。如果使用 API 代理的服务,您可能需要将其配置为代理的 API 地址。
+
+### custom_config
+
+`custom_config` 选项允许您为模型提供额外的自定义配置。这是一个 Map,您可以在其中定义特定模型可能需要的各种设置。
+
+### custom_response_parse
+
+`custom_response_parse` 选项允许您指定如何解析模型的响应。您可以使用 JsonPath
+从响应中提取所需的特定数据。例如,使用 `$.choices[*].message.content` 提取如下json中的 `content` 字段
+值。JsonPath 的使用请参考 [JsonPath 快速入门](https://github.com/json-path/JsonPath?tab=readme-ov-file#getting-started)
+
+```json
+{
+ "id": "chatcmpl-9s4hoBNGV0d9Mudkhvgzg64DAWPnx",
+ "object": "chat.completion",
+ "created": 1722674828,
+ "model": "gpt-4o-mini",
+ "choices": [
+ {
+ "index": 0,
+ "message": {
+ "role": "assistant",
+ "content": "[\"Chinese\"]"
+ },
+ "logprobs": null,
+ "finish_reason": "stop"
+ }
+ ],
+ "usage": {
+ "prompt_tokens": 107,
+ "completion_tokens": 3,
+ "total_tokens": 110
+ },
+ "system_fingerprint": "fp_0f03d4f0ee",
+ "code": 0,
+ "msg": "ok"
+}
+```
+
+### custom_request_headers
+
+`custom_request_headers` 选项允许您定义应包含在发送到模型 API 的请求中的自定义头信息。如果 API
+需要标准头信息之外的额外头信息,例如授权令牌、内容类型等,这个选项会非常有用。
+
+### custom_request_body
+
+`custom_request_body` 选项支持占位符:
+
+- `${model}`:用于模型名称的占位符。
+- `${input}`:用于确定输入值的占位符,同时根据 body value 的类型定义请求体请求类型。例如:`"${input}"` -> "input"。
+- `${prompt}`:用于 LLM 模型提示的占位符。
+
+### common options [string]
+
+转换插件的常见参数, 请参考 [Transform Plugin](common-options.md) 了解详情
+
+## tips
+大模型API接口通常会有速率限制,可以配合Seatunnel的限速配置,已确保任务顺利运行。
+Seatunnel限速配置,请参考[speed-limit](../concept/speed-limit.md)了解详情
+
+## 示例 OPENAI
+
+通过 LLM 确定用户所在的国家。
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+ read_limit.rows_per_second = 10
+}
+
+source {
+ FakeSource {
+ row.num = 5
+ schema = {
+ fields {
+ id = "int"
+ name = "string"
+ }
+ }
+ rows = [
+ {fields = [1, "Jia Fan"], kind = INSERT}
+ {fields = [2, "Hailin Wang"], kind = INSERT}
+ {fields = [3, "Tomas"], kind = INSERT}
+ {fields = [4, "Eric"], kind = INSERT}
+ {fields = [5, "Guangdong Liu"], kind = INSERT}
+ ]
+ }
+}
+
+transform {
+ LLM {
+ model_provider = OPENAI
+ model = gpt-4o-mini
+ api_key = sk-xxx
+ prompt = "Determine whether someone is Chinese or American by their name"
+ }
+}
+
+sink {
+ console {
+ }
+}
+```
+
+## 示例 KIMIAI
+
+通过 LLM 判断人名是否中国历史上的帝王
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+ read_limit.rows_per_second = 10
+}
+
+source {
+ FakeSource {
+ row.num = 5
+ schema = {
+ fields {
+ id = "int"
+ name = "string"
+ }
+ }
+ rows = [
+ {fields = [1, "诸葛亮"], kind = INSERT}
+ {fields = [2, "李世民"], kind = INSERT}
+ {fields = [3, "孙悟空"], kind = INSERT}
+ {fields = [4, "朱元璋"], kind = INSERT}
+ {fields = [5, "乔治·华盛顿"], kind = INSERT}
+ ]
+ }
+}
+
+transform {
+ LLM {
+ model_provider = KIMIAI
+ model = moonshot-v1-8k
+ api_key = sk-xxx
+ prompt = "判断是否是中国历史上的帝王"
+ output_data_type = boolean
+ }
+}
+
+sink {
+ console {
+ }
+}
+```
+### Customize the LLM model
+
+```hocon
+env {
+ job.mode = "BATCH"
+}
+
+source {
+ FakeSource {
+ row.num = 5
+ schema = {
+ fields {
+ id = "int"
+ name = "string"
+ }
+ }
+ rows = [
+ {fields = [1, "Jia Fan"], kind = INSERT}
+ {fields = [2, "Hailin Wang"], kind = INSERT}
+ {fields = [3, "Tomas"], kind = INSERT}
+ {fields = [4, "Eric"], kind = INSERT}
+ {fields = [5, "Guangdong Liu"], kind = INSERT}
+ ]
+ plugin_output = "fake"
+ }
+}
+
+transform {
+ LLM {
+ plugin_input = "fake"
+ model_provider = CUSTOM
+ model = gpt-4o-mini
+ api_key = sk-xxx
+ prompt = "Determine whether someone is Chinese or American by their name"
+ openai.api_path = "http://mockserver:1080/v1/chat/completions"
+ custom_config={
+ custom_response_parse = "$.choices[*].message.content"
+ custom_request_headers = {
+ Content-Type = "application/json"
+ Authorization = "Bearer xxxxxxxx"
+ }
+ custom_request_body ={
+ model = "${model}"
+ messages = [
+ {
+ role = "system"
+ content = "${prompt}"
+ },
+ {
+ role = "user"
+ content = "${input}"
+ }]
+ }
+ }
+ plugin_output = "llm_output"
+ }
+}
+
+sink {
+ Assert {
+ plugin_input = "llm_output"
+ rules =
+ {
+ field_rules = [
+ {
+ field_name = llm_output
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ }
+ ]
+ }
+ }
+}
+```
diff --git a/docs/zh/transform-v2/metadata.md b/docs/zh/transform-v2/metadata.md
new file mode 100644
index 00000000000..f0ff383f6cf
--- /dev/null
+++ b/docs/zh/transform-v2/metadata.md
@@ -0,0 +1,85 @@
+# Metadata
+
+> Metadata transform plugin
+
+## Description
+元数据转换插件,用于将元数据字段添加到数据中
+
+## 支持的元数据
+
+| Key | DataType | Description |
+|:---------:|:--------:|:-----------------------:|
+| Database | string | 包含该行的数据库名 |
+| Table | string | 包含该行的数表名 |
+| RowKind | string | 行类型 |
+| EventTime | Long | |
+| Delay | Long | 数据抽取时间与数据库变更时间的差 |
+| Partition | string | 包含该行对应数表的分区字段,多个使用`,`连接 |
+
+### 注意事项
+ `Delay` `Partition`目前只适用于cdc系列连接器,除外TiDB-CDC
+
+## 配置选项
+
+| name | type | required | default value | Description |
+|:---------------:|------|:--------:|:-------------:|-------------------|
+| metadata_fields | map | 是 | - | 元数据字段与输入字段相应的映射关系 |
+
+### metadata_fields [map]
+
+元数据字段和相应的输出字段之间的映射关系
+
+```hocon
+metadata_fields {
+ database = c_database
+ table = c_table
+ rowKind = c_rowKind
+ ts_ms = c_ts_ms
+ delay = c_delay
+}
+```
+
+## 示例
+
+```yaml
+
+env {
+ parallelism = 1
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+ read_limit.bytes_per_second = 7000000
+ read_limit.rows_per_second = 400
+}
+
+source {
+ MySQL-CDC {
+ plugin_output = "customers_mysql_cdc"
+ server-id = 5652
+ username = "root"
+ password = "zdyk_Dev@2024"
+ table-names = ["source.user"]
+ base-url = "jdbc:mysql://172.16.17.123:3306/source"
+ }
+}
+
+transform {
+ Metadata {
+ metadata_fields {
+ Database = database
+ Table = table
+ RowKind = rowKind
+ EventTime = ts_ms
+ Delay = delay
+ }
+ plugin_output = "trans_result"
+ }
+}
+
+sink {
+ Console {
+ plugin_input = "custom_name"
+ }
+}
+
+```
+
diff --git a/docs/zh/transform-v2/replace.md b/docs/zh/transform-v2/replace.md
index 99eef89a1ab..6f8c15743bd 100644
--- a/docs/zh/transform-v2/replace.md
+++ b/docs/zh/transform-v2/replace.md
@@ -56,8 +56,8 @@
```
transform {
Replace {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
replace_field = "name"
pattern = " "
replacement = "_"
@@ -84,7 +84,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 100
schema = {
fields {
@@ -97,8 +97,8 @@ source {
transform {
Replace {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
replace_field = "name"
pattern = ".+"
replacement = "b"
@@ -108,7 +108,7 @@ transform {
sink {
Console {
- source_table_name = "fake1"
+ plugin_input = "fake1"
}
}
```
diff --git a/docs/zh/transform-v2/rowkind-extractor.md b/docs/zh/transform-v2/rowkind-extractor.md
new file mode 100644
index 00000000000..cfa4d8fd6c4
--- /dev/null
+++ b/docs/zh/transform-v2/rowkind-extractor.md
@@ -0,0 +1,112 @@
+# RowKindExtractor
+
+> RowKindExtractor transform plugin
+
+## Description
+
+将CDC Row 转换为 Append only Row, 转换后的行扩展了RowKind字段
+Example:
+CDC row: -D 1, test1, test2
+transformed Row: +I 1,test1,test2,DELETE
+
+## Options
+
+| name | type | required | default value |
+|-------------------|--------|----------|---------------|
+| custom_field_name | string | yes | row_kind |
+| transform_type | enum | yes | SHORT |
+
+### custom_field_name [string]
+
+RowKind列的自定义名
+
+### transform_type [enum]
+
+格式化RowKind值 , 配置为 `SHORT` 或 `FULL`
+
+`SHORT` : +I, -U , +U, -D
+`FULL` : INSERT, UPDATE_BEFORE, UPDATE_AFTER , DELETE
+
+## Examples
+
+```yaml
+
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ FakeSource {
+ schema = {
+ fields {
+ pk_id = bigint
+ name = string
+ score = int
+ }
+ primaryKey {
+ name = "pk_id"
+ columnNames = [pk_id]
+ }
+ }
+ rows = [
+ {
+ kind = INSERT
+ fields = [1, "A", 100]
+ },
+ {
+ kind = INSERT
+ fields = [2, "B", 100]
+ },
+ {
+ kind = INSERT
+ fields = [3, "C", 100]
+ },
+ {
+ kind = INSERT
+ fields = [4, "D", 100]
+ },
+ {
+ kind = UPDATE_BEFORE
+ fields = [1, "A", 100]
+ },
+ {
+ kind = UPDATE_AFTER
+ fields = [1, "F", 100]
+ }
+ {
+ kind = UPDATE_BEFORE
+ fields = [2, "B", 100]
+ },
+ {
+ kind = UPDATE_AFTER
+ fields = [2, "G", 100]
+ },
+ {
+ kind = DELETE
+ fields = [3, "C", 100]
+ },
+ {
+ kind = DELETE
+ fields = [4, "D", 100]
+ }
+ ]
+ }
+}
+
+transform {
+ RowKindExtractor {
+ custom_field_name = "custom_name"
+ transform_type = FULL
+ plugin_output = "trans_result"
+ }
+}
+
+sink {
+ Console {
+ plugin_input = "custom_name"
+ }
+}
+
+```
+
diff --git a/docs/zh/transform-v2/split.md b/docs/zh/transform-v2/split.md
index ef8c3f58540..7fba623520a 100644
--- a/docs/zh/transform-v2/split.md
+++ b/docs/zh/transform-v2/split.md
@@ -46,8 +46,8 @@
```
transform {
Split {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
separator = " "
split_field = "name"
output_fields = [first_name, second_name]
diff --git a/docs/zh/transform-v2/sql-functions.md b/docs/zh/transform-v2/sql-functions.md
index cd90b948674..7e3f8454e1d 100644
--- a/docs/zh/transform-v2/sql-functions.md
+++ b/docs/zh/transform-v2/sql-functions.md
@@ -302,6 +302,15 @@ REPEAT(NAME || ' ', 10)
REPLACE(NAME, ' ')
+
+### SPLIT
+
+将字符串切分成数组。
+
+示例:
+
+select SPLIT(test,';') as arrays
+
### SOUNDEX
```SOUNDEX(string)```
@@ -880,7 +889,7 @@ CALL FROM_UNIXTIME(1672502400, 'yyyy-MM-dd HH:mm:ss','UTC+6')
将一个值转换为另一个数据类型。
-支持的数据类型有:STRING | VARCHAR,INT | INTEGER,LONG | BIGINT,BYTE,FLOAT,DOUBLE,DECIMAL(p,s),TIMESTAMP,DATE,TIME
+支持的数据类型有:STRING | VARCHAR,INT | INTEGER,LONG | BIGINT,BYTE,FLOAT,DOUBLE,DECIMAL(p,s),TIMESTAMP,DATE,TIME,BYTES
示例:
@@ -964,3 +973,37 @@ from
示例:
case when c_string in ('c_string') then 1 else 0 end
+
+### UUID
+
+```UUID()```
+
+通过java函数生成uuid
+
+示例:
+
+select UUID() as seatunnel_uuid
+
+
+### ARRAY
+
+生成一个数组。
+
+示例:
+
+select ARRAY('test1','test2','test3') as arrays
+
+### LATERAL VIEW
+#### EXPLODE
+
+将 array 列展开成多行。
+OUTER EXPLODE 当 array 为NULL或者为空时,返回NULL
+EXPLODE(SPLIT(FIELD_NAME,separator))用来切分字符串类型,SPLIT 第一个参数是字段名,第二个参数是分隔符
+EXPLODE(ARRAY(value1,value2)) 用于自定义数组切分,在原有基础上生成一个新的字段。
+```
+SELECT * FROM fake
+ LATERAL VIEW EXPLODE ( SPLIT ( NAME, ',' ) ) AS NAME
+ LATERAL VIEW EXPLODE ( SPLIT ( pk_id, ';' ) ) AS pk_id
+ LATERAL VIEW OUTER EXPLODE ( age ) AS age
+ LATERAL VIEW OUTER EXPLODE ( ARRAY(1,1) ) AS num
+```
diff --git a/docs/zh/transform-v2/sql-udf.md b/docs/zh/transform-v2/sql-udf.md
index 4c1a3777408..df03b5db5c2 100644
--- a/docs/zh/transform-v2/sql-udf.md
+++ b/docs/zh/transform-v2/sql-udf.md
@@ -109,8 +109,8 @@ public class ExampleUDF implements ZetaUDF {
```
transform {
Sql {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
query = "select id, example(name) as name, age from fake"
}
}
diff --git a/docs/zh/transform-v2/sql.md b/docs/zh/transform-v2/sql.md
index 1b56f1fef3f..87d9380e7a0 100644
--- a/docs/zh/transform-v2/sql.md
+++ b/docs/zh/transform-v2/sql.md
@@ -12,11 +12,11 @@ SQL 转换使用内存中的 SQL 引擎,我们可以通过 SQL 函数和 SQL
| 名称 | 类型 | 是否必须 | 默认值 |
|-------------------|--------|------|-----|
-| source_table_name | string | yes | - |
-| result_table_name | string | yes | - |
+| plugin_input | string | yes | - |
+| plugin_output | string | yes | - |
| query | string | yes | - |
-### source_table_name [string]
+### plugin_input [string]
源表名称,查询 SQL 表名称必须与此字段匹配。
@@ -43,8 +43,8 @@ SQL 转换使用内存中的 SQL 引擎,我们可以通过 SQL 函数和 SQL
```
transform {
Sql {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
query = "select id, concat(name, '_') as name, age+1 as age from fake where id>0"
}
}
@@ -66,7 +66,7 @@ transform {
```hacon
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 100
string.template = ["innerQuery"]
schema = {
@@ -123,7 +123,7 @@ env {
source {
FakeSource {
- result_table_name = "fake"
+ plugin_output = "fake"
row.num = 100
schema = {
fields {
@@ -137,15 +137,15 @@ source {
transform {
Sql {
- source_table_name = "fake"
- result_table_name = "fake1"
+ plugin_input = "fake"
+ plugin_output = "fake1"
query = "select id, concat(name, '_') as name, age+1 as age from fake where id>0"
}
}
sink {
Console {
- source_table_name = "fake1"
+ plugin_input = "fake1"
}
}
```
diff --git a/docs/zh/transform-v2/transform-multi-table.md b/docs/zh/transform-v2/transform-multi-table.md
new file mode 100644
index 00000000000..2881f319e7a
--- /dev/null
+++ b/docs/zh/transform-v2/transform-multi-table.md
@@ -0,0 +1,124 @@
+---
+sidebar_position: 2
+---
+
+# Transform的多表转换
+
+SeaTunnel transform支持多表转换,在上游插件输出多个表的时候特别有用,能够在一个transform中完成所有的转换操作。目前SeaTunnel很多Connectors支持多表输出,比如`JDBCSource`、`MySQL-CDC`
+等。所有的Transform都可以通过如下配置实现多表转换。
+
+:::tip
+
+多表Transform没有对Transform能力的限制,任何Transform的配置都可以在多表Transform中使用。多表Transform的作用针对数据流中的多个表进行单独的处理,并将多个表的Transform配置合并到一个Transform中,方便用户管理。
+
+:::
+
+## 属性
+
+| Name | Type | Required | Default | Description |
+|----------------------------|--------|----------|---------|--------------------------------------------------------------------------------------------------|
+| table_match_regex | String | No | .* | 表名的正则表达式,通过正则表达式来匹配需要进行转换的表,默认匹配所有的表。注意这个表名是上游的真正表名,不是result_table_name。 |
+| table_transform | List | No | - | 可以通过table_transform列表来指定部分表的规则,当在table_transform中配置某个表的转换规则后,外层针对当前表的规则不会生效,以table_transform中的为准 |
+| table_transform.table_path | String | No | - | 当在table_transform中配置某个表的转换规则后,需要使用table_path字段指定表名,表名需要包含`databaseName[.schemaName].tableName`。 |
+
+## 匹配逻辑
+
+假设我们从上游读取了5张表,分别为`test.abc`,`test.abcd`,`test.xyz`,`test.xyzxyz`,`test.www`。他们的表结构一致,都有`id`、`name`、`age`三个字段。
+
+| id | name | age |
+
+现在我们想通过Copy transform将这5张表的数据进行复制,具体需求是,`test.abc`,`test.abcd`表需要将`name`复制为`name1`,
+`test.xyz`表需要复制为`name2`,`test.xyzxyz`表需要复制为`name3`,`test.www`数据结构不变。那么我们可以通过如下配置来实现:
+
+```hocon
+transform {
+ Copy {
+ source_table_name = "fake" // 可选的读取数据集名
+ result_table_name = "fake1" // 可选的输出数据集名
+
+ table_match_regex = "test.a.*" // 1. 通过正则表达式匹配需要进行转换的表,test.a.*表示匹配test.abc和test.abcd
+ src_field = "name" // 源字段
+ dest_field = "name1" // 目标字段
+ table_transform = [{
+ table_path = "test.xyz" // 2. 指定表名进行转换
+ src_field = "name" // 源字段
+ dest_field = "name2" // 目标字段
+ }, {
+ table_path = "test.xyzxyz"
+ src_field = "name"
+ dest_field = "name3"
+ }]
+ }
+}
+```
+
+### 解释
+
+1. 通过第一层的正则表达式,和对应的Copy transform options配置,我们可以匹配到`test.abc`和`test.abcd`表,将`name`字段复制为`name1`。
+2. 通过`table_transform`配置,我们可以指定`test.xyz`表,将`name`字段复制为`name2`。
+
+这样我们就可以通过一个transform完成对多个表的转换操作。
+
+对于每个表来说,配置的优先级是:`table_transform` > `table_match_regex`。如果所有的规则都没有匹配到,那么该表将不会进行任何转换操作。
+
+针对每个表来说,他们的Transform配置是:
+
+- **test.abc**和**test.abcd**
+
+```hocon
+transform {
+ Copy {
+ src_field = "name"
+ dest_field = "name1"
+ }
+}
+```
+
+输出表结构:
+
+| id | name | age | name1 |
+
+- **test.xyz**
+
+```hocon
+transform {
+ Copy {
+ src_field = "name"
+ dest_field = "name2"
+ }
+}
+```
+
+输出表结构:
+
+| id | name | age | name2 |
+
+- **test.xyzxyz**
+
+```hocon
+transform {
+ Copy {
+ src_field = "name"
+ dest_field = "name3"
+ }
+}
+```
+
+输出表结构:
+
+| id | name | age | name3 |
+
+- **test.www**
+
+```hocon
+transform {
+ // 无需转换
+}
+```
+
+输出表结构:
+
+| id | name | age |
+
+我们使用了Copy Transform作为了示例,实际上所有的Transform都支持多表转换,只需要在对应的Transform中配置即可。
+
diff --git a/plugin-mapping.properties b/plugin-mapping.properties
index 9936afcbaaf..c494686161e 100644
--- a/plugin-mapping.properties
+++ b/plugin-mapping.properties
@@ -85,6 +85,7 @@ seatunnel.sink.InfluxDB = connector-influxdb
seatunnel.source.GoogleSheets = connector-google-sheets
seatunnel.sink.GoogleFirestore = connector-google-firestore
seatunnel.sink.Tablestore = connector-tablestore
+seatunnel.source.Tablestore = connector-tablestore
seatunnel.source.Lemlist = connector-http-lemlist
seatunnel.source.Klaviyo = connector-http-klaviyo
seatunnel.sink.Slack = connector-slack
@@ -103,6 +104,7 @@ seatunnel.source.Maxcompute = connector-maxcompute
seatunnel.sink.Maxcompute = connector-maxcompute
seatunnel.source.MySQL-CDC = connector-cdc-mysql
seatunnel.source.MongoDB-CDC = connector-cdc-mongodb
+seatunnel.source.TiDB-CDC = connector-cdc-tidb
seatunnel.sink.S3Redshift = connector-s3-redshift
seatunnel.source.Web3j = connector-web3j
seatunnel.source.TDengine = connector-tdengine
@@ -129,3 +131,27 @@ seatunnel.source.ObsFile = connector-file-obs
seatunnel.sink.ObsFile = connector-file-obs
seatunnel.source.Milvus = connector-milvus
seatunnel.sink.Milvus = connector-milvus
+seatunnel.sink.ActiveMQ = connector-activemq
+seatunnel.source.Prometheus = connector-prometheus
+seatunnel.sink.Prometheus = connector-prometheus
+seatunnel.source.Qdrant = connector-qdrant
+seatunnel.sink.Qdrant = connector-qdrant
+seatunnel.source.Sls = connector-sls
+seatunnel.sink.Sls = connector-sls
+seatunnel.source.Typesense = connector-typesense
+seatunnel.sink.Typesense = connector-typesense
+seatunnel.source.Opengauss-CDC = connector-cdc-opengauss
+
+seatunnel.transform.Sql = seatunnel-transforms-v2
+seatunnel.transform.FieldMapper = seatunnel-transforms-v2
+seatunnel.transform.Filter = seatunnel-transforms-v2
+seatunnel.transform.FilterRowKind = seatunnel-transforms-v2
+seatunnel.transform.JsonPath = seatunnel-transforms-v2
+seatunnel.transform.Replace = seatunnel-transforms-v2
+seatunnel.transform.Split = seatunnel-transforms-v2
+seatunnel.transform.Copy = seatunnel-transforms-v2
+seatunnel.transform.DynamicCompile = seatunnel-transforms-v2
+seatunnel.transform.LLM = seatunnel-transforms-v2
+seatunnel.transform.Embedding = seatunnel-transforms-v2
+seatunnel.transform.RowKindExtractor = seatunnel-transforms-v2
+seatunnel.transform.Metadata = seatunnel-transforms-v2
diff --git a/pom.xml b/pom.xml
index d12d47bf234..75f598400f9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -56,7 +56,7 @@
- 2.3.6-SNAPSHOT
+ 2.3.9-SNAPSHOT
2.1.1
UTF-8
1.8
@@ -113,13 +113,17 @@
1.13
3.0.0
apache
+ seatunnel
${project.version}
+ true
+ true
+ true
1.81
4.13.2
5.9.0
4.11.0
1.3.3
- 3.3.0
+ 3.4.1
3.2.0
4.0.4
1.3.0
@@ -131,11 +135,13 @@
2.0.0
1.17.6
2.29.0
- 4.5
+ 4.9
2.7.0
4.12.0
4.0.16
+ 9.4.56.v20240826
+ 4.0.4
false
true
@@ -150,6 +156,12 @@
true
false
+
+ 0.16.0
+ true
+
+ 3.1.4
+
@@ -539,6 +551,25 @@
test
+
+
+ io.prometheus
+ simpleclient
+ ${prometheus.simpleclient.version}
+
+
+
+ io.prometheus
+ simpleclient_hotspot
+ ${prometheus.simpleclient.version}
+
+
+
+ io.prometheus
+ simpleclient_httpserver
+ ${prometheus.simpleclient.version}
+
+
@@ -763,6 +794,99 @@
maven-dependency-plugin
${maven-dependency-plugin.version}
+
+ org.codehaus.mojo
+ exec-maven-plugin
+ ${exec-maven-plugin.version}
+
+
+ docker-build
+
+ exec
+
+ package
+
+ ${docker.build.skip}
+
+ 1
+
+ docker
+ ${project.basedir}
+
+ buildx
+ build
+ --load
+ --no-cache
+ -t
+ ${docker.hub}/${docker.repo}:${docker.tag}
+ -t
+ ${docker.hub}/${docker.repo}:latest
+ ${project.basedir}
+ --build-arg
+ VERSION=${project.version}
+ --file=src/main/docker/Dockerfile
+
+
+
+
+ docker-verify
+
+ exec
+
+ verify
+
+ ${docker.verify.skip}
+
+ 1
+
+ docker
+ ${project.basedir}
+
+ run
+ --rm
+ ${docker.hub}/${docker.repo}:${docker.tag}
+ bash
+ ./bin/seatunnel.sh
+ -e
+ local
+ -c
+ config/v2.batch.config.template
+
+
+
+
+ docker-push
+
+ exec
+
+ install
+
+ ${docker.push.skip}
+
+ 1
+
+ docker
+ ${project.basedir}
+
+ buildx
+ build
+ --platform
+ linux/amd64,linux/arm64
+ --no-cache
+ --push
+ -t
+ ${docker.hub}/${docker.repo}:${docker.tag}
+ -t
+ ${docker.hub}/${docker.repo}:latest
+ ${project.basedir}
+ --build-arg
+ VERSION=${project.version}
+ --file=src/main/docker/Dockerfile
+
+
+
+
+
@@ -862,15 +986,16 @@
project
-
-
- docs/**/*.md
-
-
- **/.github/**/*.md
-
-
-
+
+
+
+
+
+
+
+
+
+
true
diff --git a/release-note.md b/release-note.md
index 32067c22dfa..6147093eee7 100644
--- a/release-note.md
+++ b/release-note.md
@@ -58,6 +58,7 @@
- [Connector-v2] [Mongodb] Support to convert to double from numeric type that mongodb saved it as numeric internally (#6997)
- [Connector-v2] [Redis] Using scan replace keys operation command,support batchWrite in single mode(#7030,#7085)
- [Connector-V2] [Clickhouse] Add a new optional configuration `clickhouse.config` to the source connector of ClickHouse (#7143)
+- [Connector-V2] [Redis] Redis scan command supports versions 3, 4, 5, 6, 7 (#7666)
### Zeta(ST-Engine)
@@ -87,6 +88,7 @@
- [zeta] dynamically replace the value of the variable at runtime (#4950)
- [Zeta] Add from_unixtime function (#5462)
- [zeta] Fix CDC task restore throw NPE (#5507)
+- [Zeta] Fix a checkpoint storage document with OSS (#7507)
### E2E
@@ -198,6 +200,7 @@
- [Connector-V2] [Assert] Support field type assert and field value equality assert for full data types (#6275)
- [Connector-V2] [Iceberg] Support iceberg sink #6198
- [Connector-V2] [FILE-OBS] Add Huawei Cloud OBS connector #4578
+- [Connector-V2] [ElasticsSource] Source support multiSource (#6730)
### Zeta(ST-Engine)
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/common/CommonOptions.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/common/CommonOptions.java
index a3227440efe..13f223698f0 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/common/CommonOptions.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/common/CommonOptions.java
@@ -30,28 +30,30 @@ public interface CommonOptions {
.noDefaultValue()
.withDescription("Name of the SPI plugin class.");
- Option RESULT_TABLE_NAME =
- Options.key("result_table_name")
+ Option PLUGIN_OUTPUT =
+ Options.key("plugin_output")
.stringType()
.noDefaultValue()
+ .withFallbackKeys("result_table_name")
.withDescription(
- "When result_table_name is not specified, "
+ "When plugin_output is not specified, "
+ "the data processed by this plugin will not be registered as a data set (dataStream/dataset) "
+ "that can be directly accessed by other plugins, or called a temporary table (table)"
- + "When result_table_name is specified, "
+ + "When plugin_output is specified, "
+ "the data processed by this plugin will be registered as a data set (dataStream/dataset) "
+ "that can be directly accessed by other plugins, or called a temporary table (table) . "
+ "The data set (dataStream/dataset) registered here can be directly accessed by other plugins "
- + "by specifying source_table_name .");
+ + "by specifying plugin_input .");
- Option> SOURCE_TABLE_NAME =
- Options.key("source_table_name")
+ Option> PLUGIN_INPUT =
+ Options.key("plugin_input")
.listType()
.noDefaultValue()
+ .withFallbackKeys("source_table_name")
.withDescription(
- "When source_table_name is not specified, "
+ "When plugin_input is not specified, "
+ "the current plug-in processes the data set dataset output by the previous plugin in the configuration file. "
- + "When source_table_name is specified, the current plug-in is processing the data set corresponding to this parameter.");
+ + "When plugin_input is specified, the current plug-in is processing the data set corresponding to this parameter.");
Option PARALLELISM =
Options.key("parallelism")
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/configuration/util/OptionRule.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/configuration/util/OptionRule.java
index 684620245c1..0d700bdbc11 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/configuration/util/OptionRule.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/configuration/util/OptionRule.java
@@ -165,7 +165,7 @@ public Builder conditional(
@NonNull Option>... requiredOptions) {
verifyConditionalExists(conditionalOption);
- if (expectValues.size() == 0) {
+ if (expectValues.isEmpty()) {
throw new OptionValidationException(
String.format(
"conditional option '%s' must have expect values .",
@@ -187,7 +187,7 @@ public Builder conditional(
RequiredOption.ConditionalRequiredOptions option =
RequiredOption.ConditionalRequiredOptions.of(
expression, new ArrayList<>(Arrays.asList(requiredOptions)));
- verifyRequiredOptionDuplicate(option);
+ verifyRequiredOptionDuplicate(option, true);
this.requiredOptions.add(option);
return this;
}
@@ -204,7 +204,7 @@ public Builder conditional(
RequiredOption.ConditionalRequiredOptions.of(
expression, new ArrayList<>(Arrays.asList(requiredOptions)));
- verifyRequiredOptionDuplicate(conditionalRequiredOption);
+ verifyRequiredOptionDuplicate(conditionalRequiredOption, true);
this.requiredOptions.add(conditionalRequiredOption);
return this;
}
@@ -242,12 +242,30 @@ private void verifyDuplicateWithOptionOptions(
}
private void verifyRequiredOptionDuplicate(@NonNull RequiredOption requiredOption) {
+ verifyRequiredOptionDuplicate(requiredOption, false);
+ }
+
+ /**
+ * Verifies if there are duplicate options within the required options.
+ *
+ * @param requiredOption The required option to be verified
+ * @param ignoreVerifyDuplicateOptions Whether to ignore duplicate option verification If
+ * the value is true, the existing items in OptionOptions are ignored Currently, it
+ * applies only to conditional
+ * @throws OptionValidationException If duplicate options are found
+ */
+ private void verifyRequiredOptionDuplicate(
+ @NonNull RequiredOption requiredOption,
+ @NonNull Boolean ignoreVerifyDuplicateOptions) {
requiredOption
.getOptions()
.forEach(
option -> {
- verifyDuplicateWithOptionOptions(
- option, requiredOption.getClass().getSimpleName());
+ if (!ignoreVerifyDuplicateOptions) {
+ // Check if required option that duplicate with option options
+ verifyDuplicateWithOptionOptions(
+ option, requiredOption.getClass().getSimpleName());
+ }
requiredOptions.forEach(
ro -> {
if (ro
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/kerberos/KerberosConfig.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/kerberos/KerberosConfig.java
new file mode 100644
index 00000000000..d501a3ea49a
--- /dev/null
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/kerberos/KerberosConfig.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.kerberos;
+
+import org.apache.seatunnel.api.configuration.Option;
+import org.apache.seatunnel.api.configuration.Options;
+
+public class KerberosConfig {
+
+ public static final Option KERBEROS_PRINCIPAL =
+ Options.key("kerberos_principal")
+ .stringType()
+ .noDefaultValue()
+ .withDescription("When use kerberos, we should set kerberos user principal");
+
+ public static final Option KRB5_PATH =
+ Options.key("krb5_path")
+ .stringType()
+ .defaultValue("/etc/krb5.conf")
+ .withDescription(
+ "When use kerberos, we should set krb5 path file path such as '/seatunnel/krb5.conf' or use the default path '/etc/krb5.conf'");
+
+ public static final Option KERBEROS_KEYTAB_PATH =
+ Options.key("kerberos_keytab_path")
+ .stringType()
+ .noDefaultValue()
+ .withDescription("When using kerberos, We should specify the keytab path");
+}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSaveModeHandler.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSaveModeHandler.java
index bbbe99281b2..269b3181597 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSaveModeHandler.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSaveModeHandler.java
@@ -59,6 +59,11 @@ public DefaultSaveModeHandler(
customSql);
}
+ @Override
+ public void open() {
+ catalog.open();
+ }
+
@Override
public void handleSchemaSaveMode() {
switch (schemaSaveMode) {
@@ -71,6 +76,8 @@ public void handleSchemaSaveMode() {
case ERROR_WHEN_SCHEMA_NOT_EXIST:
errorWhenSchemaNotExist();
break;
+ case IGNORE:
+ break;
default:
throw new UnsupportedOperationException("Unsupported save mode: " + schemaSaveMode);
}
@@ -151,21 +158,18 @@ protected void dropTable() {
catalog.dropTable(tablePath, true);
}
- protected void createTable() {
+ protected void createTablePreCheck() {
if (!catalog.databaseExists(tablePath.getDatabaseName())) {
- TablePath databasePath = TablePath.of(tablePath.getDatabaseName(), "");
try {
log.info(
"Creating database {} with action {}",
tablePath.getDatabaseName(),
catalog.previewAction(
- Catalog.ActionType.CREATE_DATABASE,
- databasePath,
- Optional.empty()));
+ Catalog.ActionType.CREATE_DATABASE, tablePath, Optional.empty()));
} catch (UnsupportedOperationException ignore) {
log.info("Creating database {}", tablePath.getDatabaseName());
}
- catalog.createDatabase(databasePath, true);
+ catalog.createDatabase(tablePath, true);
}
try {
log.info(
@@ -178,6 +182,10 @@ protected void createTable() {
} catch (UnsupportedOperationException ignore) {
log.info("Creating table {}", tablePath);
}
+ }
+
+ protected void createTable() {
+ createTablePreCheck();
catalog.createTable(tablePath, catalogTable, true);
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSinkWriterContext.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSinkWriterContext.java
index 73af75f22ca..74ae4a0eb4d 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSinkWriterContext.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSinkWriterContext.java
@@ -25,18 +25,21 @@
/** The default {@link SinkWriter.Context} implement class. */
public class DefaultSinkWriterContext implements SinkWriter.Context {
private final int subtask;
+ private final int numberOfParallelSubtasks;
private final EventListener eventListener;
- public DefaultSinkWriterContext(int subtask) {
- this(subtask, new DefaultEventProcessor());
+ public DefaultSinkWriterContext(int subtask, int parallelism) {
+ this(subtask, parallelism, new DefaultEventProcessor());
}
- public DefaultSinkWriterContext(String jobId, int subtask) {
- this(subtask, new DefaultEventProcessor(jobId));
+ public DefaultSinkWriterContext(String jobId, int subtask, int parallelism) {
+ this(subtask, parallelism, new DefaultEventProcessor(jobId));
}
- public DefaultSinkWriterContext(int subtask, EventListener eventListener) {
+ public DefaultSinkWriterContext(
+ int subtask, int numberOfParallelSubtasks, EventListener eventListener) {
this.subtask = subtask;
+ this.numberOfParallelSubtasks = numberOfParallelSubtasks;
this.eventListener = eventListener;
}
@@ -45,6 +48,10 @@ public int getIndexOfSubtask() {
return subtask;
}
+ public int getNumberOfParallelSubtasks() {
+ return numberOfParallelSubtasks;
+ }
+
@Override
public MetricsContext getMetricsContext() {
// TODO Waiting for Flink and Spark to implement MetricsContext
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SaveModeHandler.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SaveModeHandler.java
index e75c2215dda..3eddaf05140 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SaveModeHandler.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SaveModeHandler.java
@@ -22,6 +22,8 @@
public interface SaveModeHandler extends AutoCloseable {
+ void open();
+
void handleSchemaSaveMode();
void handleDataSaveMode();
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SchemaSaveMode.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SchemaSaveMode.java
index f3da320d742..cee39ca8e63 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SchemaSaveMode.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SchemaSaveMode.java
@@ -27,4 +27,7 @@ public enum SchemaSaveMode {
// Error will be reported when the table does not exist
ERROR_WHEN_SCHEMA_NOT_EXIST,
+
+ // Ignore creation
+ IGNORE
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SeaTunnelSink.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SeaTunnelSink.java
index cd869a3ca8f..954bec748ce 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SeaTunnelSink.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SeaTunnelSink.java
@@ -21,6 +21,7 @@
import org.apache.seatunnel.api.common.SeaTunnelPluginLifeCycle;
import org.apache.seatunnel.api.serialization.Serializer;
import org.apache.seatunnel.api.source.SeaTunnelJobAware;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
@@ -135,4 +136,13 @@ default Optional> getCommitInfoSerializer() {
default Optional> getAggregatedCommitInfoSerializer() {
return Optional.empty();
}
+
+ /**
+ * Get the catalog table of the sink.
+ *
+ * @return Optional of catalog table.
+ */
+ default Optional getWriteCatalogTable() {
+ return Optional.empty();
+ }
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkCommonOptions.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkCommonOptions.java
index 598193d695f..9c6538ac87c 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkCommonOptions.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkCommonOptions.java
@@ -28,5 +28,5 @@ public class SinkCommonOptions {
Options.key("multi_table_sink_replica")
.intType()
.defaultValue(1)
- .withDescription("The replica number of multi table sink");
+ .withDescription("The replica number of multi table sink writer");
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkWriter.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkWriter.java
index 785f1065dd4..103b282a24b 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkWriter.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkWriter.java
@@ -19,7 +19,7 @@
import org.apache.seatunnel.api.common.metrics.MetricsContext;
import org.apache.seatunnel.api.event.EventListener;
-import org.apache.seatunnel.api.table.event.SchemaChangeEvent;
+import org.apache.seatunnel.api.table.schema.event.SchemaChangeEvent;
import java.io.IOException;
import java.io.Serializable;
@@ -46,13 +46,20 @@ public interface SinkWriter {
*/
void write(T element) throws IOException;
+ /** @deprecated instead by {@link SupportSchemaEvolutionSinkWriter} TODO: remove this method */
+ @Deprecated
+ default void applySchemaChange(SchemaChangeEvent event) throws IOException {}
+
/**
- * apply schema change to third party data receiver.
+ * prepare the commit, will be called before {@link #snapshotState(long checkpointId)}. If you
+ * need to use 2pc, you can return the commit info in this method, and receive the commit info
+ * in {@link SinkCommitter#commit(List)}. If this method failed (by throw exception), **Only**
+ * Spark engine will call {@link #abortPrepare()}
*
- * @param event
- * @throws IOException
+ * @return the commit info need to commit
*/
- default void applySchemaChange(SchemaChangeEvent event) throws IOException {}
+ @Deprecated
+ Optional prepareCommit() throws IOException;
/**
* prepare the commit, will be called before {@link #snapshotState(long checkpointId)}. If you
@@ -60,9 +67,13 @@ default void applySchemaChange(SchemaChangeEvent event) throws IOException {}
* in {@link SinkCommitter#commit(List)}. If this method failed (by throw exception), **Only**
* Spark engine will call {@link #abortPrepare()}
*
+ * @param checkpointId checkpointId
* @return the commit info need to commit
+ * @throws IOException If fail to prepareCommit
*/
- Optional prepareCommit() throws IOException;
+ default Optional prepareCommit(long checkpointId) throws IOException {
+ return prepareCommit();
+ }
/**
* @return The writer's state.
@@ -92,6 +103,11 @@ interface Context extends Serializable {
/** @return The index of this subtask. */
int getIndexOfSubtask();
+ /** @return parallelism of this writer. */
+ default int getNumberOfParallelSubtasks() {
+ return 1;
+ }
+
/** @return metricsContext of this reader. */
MetricsContext getMetricsContext();
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SupportSchemaEvolutionSink.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SupportSchemaEvolutionSink.java
new file mode 100644
index 00000000000..d5b33763457
--- /dev/null
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SupportSchemaEvolutionSink.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.sink;
+
+import org.apache.seatunnel.api.table.schema.SchemaChangeType;
+
+import java.util.List;
+
+public interface SupportSchemaEvolutionSink {
+
+ /**
+ * The sink connector supports schema evolution types.
+ *
+ * @return the supported schema change types
+ */
+ List supports();
+}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SupportSchemaEvolutionSinkWriter.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SupportSchemaEvolutionSinkWriter.java
new file mode 100644
index 00000000000..54727ec9505
--- /dev/null
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SupportSchemaEvolutionSinkWriter.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.sink;
+
+import org.apache.seatunnel.api.table.schema.event.SchemaChangeEvent;
+
+import java.io.IOException;
+
+public interface SupportSchemaEvolutionSinkWriter {
+
+ /**
+ * apply schema change to third party data receiver.
+ *
+ * @param event
+ * @throws IOException
+ */
+ void applySchemaChange(SchemaChangeEvent event) throws IOException;
+}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/TablePlaceholder.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/TablePlaceholder.java
index f599e221350..2f78ce89e39 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/TablePlaceholder.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/TablePlaceholder.java
@@ -17,211 +17,48 @@
package org.apache.seatunnel.api.sink;
-import org.apache.seatunnel.api.configuration.ReadonlyConfig;
-import org.apache.seatunnel.api.table.catalog.CatalogTable;
-import org.apache.seatunnel.api.table.catalog.ConstraintKey;
-import org.apache.seatunnel.api.table.catalog.PrimaryKey;
-import org.apache.seatunnel.api.table.catalog.TableIdentifier;
-import org.apache.seatunnel.api.table.catalog.TableSchema;
-
-import org.apache.commons.lang3.ObjectUtils;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.util.stream.Collectors;
-
-public class TablePlaceholder {
- // Placeholder ${database_name} or ${database_name:default_value}
- public static final String REPLACE_DATABASE_NAME_KEY = "database_name";
- // Placeholder ${schema_name} or ${schema_name:default_value}
- public static final String REPLACE_SCHEMA_NAME_KEY = "schema_name";
- // Placeholder ${schema_full_name} or ${schema_full_name:default_value}
- public static final String REPLACE_SCHEMA_FULL_NAME_KEY = "schema_full_name";
- // Placeholder ${table_name} or ${table_name:default_value}
- public static final String REPLACE_TABLE_NAME_KEY = "table_name";
- // Placeholder ${table_full_name} or ${table_full_name:default_value}
- public static final String REPLACE_TABLE_FULL_NAME_KEY = "table_full_name";
- // Placeholder ${primary_key} or ${primary_key:default_value}
- public static final String REPLACE_PRIMARY_KEY = "primary_key";
- // Placeholder ${unique_key} or ${unique_key:default_value}
- public static final String REPLACE_UNIQUE_KEY = "unique_key";
- // Placeholder ${field_names} or ${field_names:default_value}
- public static final String REPLACE_FIELD_NAMES_KEY = "field_names";
- public static final String NAME_DELIMITER = ".";
- public static final String FIELD_DELIMITER = ",";
-
- private static String replacePlaceholders(String input, String placeholderName, String value) {
- return replacePlaceholders(input, placeholderName, value, null);
- }
-
- private static String replacePlaceholders(
- String input, String placeholderName, String value, String defaultValue) {
- String placeholderRegex = "\\$\\{" + Pattern.quote(placeholderName) + "(:[^}]*)?\\}";
- Pattern pattern = Pattern.compile(placeholderRegex);
- Matcher matcher = pattern.matcher(input);
-
- StringBuffer result = new StringBuffer();
- while (matcher.find()) {
- String replacement =
- value != null && !value.isEmpty()
- ? value
- : (matcher.group(1) != null
- ? matcher.group(1).substring(1).trim()
- : defaultValue);
- if (replacement == null) {
- continue;
- }
- matcher.appendReplacement(result, Matcher.quoteReplacement(replacement));
- }
- matcher.appendTail(result);
- return result.toString();
- }
-
- private static String replaceTableIdentifier(
- String placeholder, TableIdentifier identifier, String defaultValue) {
- placeholder =
- replacePlaceholders(
- placeholder,
- REPLACE_DATABASE_NAME_KEY,
- identifier.getDatabaseName(),
- defaultValue);
- placeholder =
- replacePlaceholders(
- placeholder,
- REPLACE_SCHEMA_NAME_KEY,
- identifier.getSchemaName(),
- defaultValue);
- placeholder =
- replacePlaceholders(
- placeholder,
- REPLACE_TABLE_NAME_KEY,
- identifier.getTableName(),
- defaultValue);
-
- List fullPath = new ArrayList<>();
- if (identifier.getDatabaseName() != null) {
- fullPath.add(identifier.getDatabaseName());
- }
- if (identifier.getSchemaName() != null) {
- fullPath.add(identifier.getSchemaName());
+import java.util.HashSet;
+import java.util.Set;
+
+public enum TablePlaceholder {
+
+ // Placeholder ${database_name} or${database_name:default_value}
+ REPLACE_DATABASE_NAME_KEY("database_name"),
+ // Placeholder ${schema_name} or${schema_name:default_value}
+ REPLACE_SCHEMA_NAME_KEY("schema_name"),
+ // Placeholder ${schema_full_name} or${schema_full_name:default_value}
+ REPLACE_SCHEMA_FULL_NAME_KEY("schema_full_name"),
+ // Placeholder ${table_name} or${table_name:default_value}
+ REPLACE_TABLE_NAME_KEY("table_name"),
+ // Placeholder ${table_full_name} or${table_full_name:default_value}
+ REPLACE_TABLE_FULL_NAME_KEY("table_full_name"),
+ // Placeholder ${primary_key} or${primary_key:default_value}
+ REPLACE_PRIMARY_KEY("primary_key"),
+ // Placeholder ${unique_key} or${unique_key:default_value}
+ REPLACE_UNIQUE_KEY("unique_key"),
+ // Placeholder ${field_names} or${field_names:default_value}
+ REPLACE_FIELD_NAMES_KEY("field_names");
+
+ private static Set PLACEHOLDER_KEYS = new HashSet<>();
+
+ static {
+ // O(1) complexity, using static to load all system placeholders
+ for (TablePlaceholder placeholder : TablePlaceholder.values()) {
+ PLACEHOLDER_KEYS.add(placeholder.getPlaceholder());
}
- if (!fullPath.isEmpty()) {
- placeholder =
- replacePlaceholders(
- placeholder,
- REPLACE_SCHEMA_FULL_NAME_KEY,
- String.join(NAME_DELIMITER, fullPath),
- defaultValue);
- }
-
- if (identifier.getTableName() != null) {
- fullPath.add(identifier.getTableName());
- }
- if (!fullPath.isEmpty()) {
- placeholder =
- replacePlaceholders(
- placeholder,
- REPLACE_TABLE_FULL_NAME_KEY,
- String.join(NAME_DELIMITER, fullPath),
- defaultValue);
- }
- return placeholder;
- }
-
- public static String replaceTableIdentifier(String placeholder, TableIdentifier identifier) {
- return replaceTableIdentifier(placeholder, identifier, "");
}
- public static String replaceTablePrimaryKey(String placeholder, PrimaryKey primaryKey) {
- if (primaryKey != null && !primaryKey.getColumnNames().isEmpty()) {
- String pkFieldsString = String.join(FIELD_DELIMITER, primaryKey.getColumnNames());
- return replacePlaceholders(placeholder, REPLACE_PRIMARY_KEY, pkFieldsString);
- }
- return placeholder;
- }
-
- public static String replaceTableUniqueKey(
- String placeholder, List constraintKeys) {
- Optional ukFieldsString =
- constraintKeys.stream()
- .filter(
- e ->
- e.getConstraintType()
- .equals(ConstraintKey.ConstraintType.UNIQUE_KEY))
- .findFirst()
- .map(
- e ->
- e.getColumnNames().stream()
- .map(f -> f.getColumnName())
- .collect(Collectors.joining(FIELD_DELIMITER)));
- if (ukFieldsString.isPresent()) {
- return replacePlaceholders(placeholder, REPLACE_UNIQUE_KEY, ukFieldsString.get());
- }
- return placeholder;
- }
+ private final String key;
- public static String replaceTableFieldNames(String placeholder, TableSchema schema) {
- return replacePlaceholders(
- placeholder,
- REPLACE_FIELD_NAMES_KEY,
- String.join(FIELD_DELIMITER, schema.getFieldNames()));
+ TablePlaceholder(String placeholder) {
+ this.key = placeholder;
}
- public static ReadonlyConfig replaceTablePlaceholder(
- ReadonlyConfig config, CatalogTable table) {
- return replaceTablePlaceholder(config, table, Collections.emptyList());
+ public String getPlaceholder() {
+ return key;
}
- public static ReadonlyConfig replaceTablePlaceholder(
- ReadonlyConfig config, CatalogTable table, Collection excludeKeys) {
- Map copyOnWriteData = ObjectUtils.clone(config.getSourceMap());
- for (String key : copyOnWriteData.keySet()) {
- if (excludeKeys.contains(key)) {
- continue;
- }
- Object value = copyOnWriteData.get(key);
- if (value != null) {
- if (value instanceof String) {
- String strValue = (String) value;
- strValue = replaceTableIdentifier(strValue, table.getTableId());
- strValue =
- replaceTablePrimaryKey(
- strValue, table.getTableSchema().getPrimaryKey());
- strValue =
- replaceTableUniqueKey(
- strValue, table.getTableSchema().getConstraintKeys());
- strValue = replaceTableFieldNames(strValue, table.getTableSchema());
- copyOnWriteData.put(key, strValue);
- } else if (value instanceof List) {
- List listValue = (List) value;
- if (listValue.size() == 1 && listValue.get(0) instanceof String) {
- String strValue = (String) listValue.get(0);
- if (strValue.equals("${" + REPLACE_PRIMARY_KEY + "}")) {
- strValue =
- replaceTablePrimaryKey(
- strValue, table.getTableSchema().getPrimaryKey());
- listValue = Arrays.asList(strValue.split(FIELD_DELIMITER));
- } else if (strValue.equals("${" + REPLACE_UNIQUE_KEY + "}")) {
- strValue =
- replaceTableUniqueKey(
- strValue, table.getTableSchema().getConstraintKeys());
- listValue = Arrays.asList(strValue.split(FIELD_DELIMITER));
- } else if (strValue.equals("${" + REPLACE_FIELD_NAMES_KEY + "}")) {
- strValue = replaceTableFieldNames(strValue, table.getTableSchema());
- listValue = Arrays.asList(strValue.split(FIELD_DELIMITER));
- }
- copyOnWriteData.put(key, listValue);
- }
- }
- }
- }
- return ReadonlyConfig.fromMap(copyOnWriteData);
+ public static boolean isSystemPlaceholder(String str) {
+ return PLACEHOLDER_KEYS.contains(str);
}
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/TablePlaceholderProcessor.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/TablePlaceholderProcessor.java
new file mode 100644
index 00000000000..4b7f9df3aff
--- /dev/null
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/TablePlaceholderProcessor.java
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.sink;
+
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.ConstraintKey;
+import org.apache.seatunnel.api.table.catalog.PrimaryKey;
+import org.apache.seatunnel.api.table.catalog.TableIdentifier;
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+
+import org.apache.commons.lang3.ObjectUtils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+import static org.apache.seatunnel.common.utils.PlaceholderUtils.replacePlaceholders;
+
+public class TablePlaceholderProcessor {
+
+ public static final String NAME_DELIMITER = ".";
+
+ public static final String FIELD_DELIMITER = ",";
+
+ private static String replaceTableIdentifier(
+ String placeholder, TableIdentifier identifier, String defaultValue) {
+ placeholder =
+ replacePlaceholders(
+ placeholder,
+ TablePlaceholder.REPLACE_DATABASE_NAME_KEY.getPlaceholder(),
+ identifier.getDatabaseName(),
+ defaultValue);
+ placeholder =
+ replacePlaceholders(
+ placeholder,
+ TablePlaceholder.REPLACE_SCHEMA_NAME_KEY.getPlaceholder(),
+ identifier.getSchemaName(),
+ defaultValue);
+ placeholder =
+ replacePlaceholders(
+ placeholder,
+ TablePlaceholder.REPLACE_TABLE_NAME_KEY.getPlaceholder(),
+ identifier.getTableName(),
+ defaultValue);
+
+ List fullPath = new ArrayList<>();
+ if (identifier.getDatabaseName() != null) {
+ fullPath.add(identifier.getDatabaseName());
+ }
+ if (identifier.getSchemaName() != null) {
+ fullPath.add(identifier.getSchemaName());
+ }
+ if (!fullPath.isEmpty()) {
+ placeholder =
+ replacePlaceholders(
+ placeholder,
+ TablePlaceholder.REPLACE_SCHEMA_FULL_NAME_KEY.getPlaceholder(),
+ String.join(NAME_DELIMITER, fullPath),
+ defaultValue);
+ }
+
+ if (identifier.getTableName() != null) {
+ fullPath.add(identifier.getTableName());
+ }
+ if (!fullPath.isEmpty()) {
+ placeholder =
+ replacePlaceholders(
+ placeholder,
+ TablePlaceholder.REPLACE_TABLE_FULL_NAME_KEY.getPlaceholder(),
+ String.join(NAME_DELIMITER, fullPath),
+ defaultValue);
+ }
+ return placeholder;
+ }
+
+ public static String replaceTableIdentifier(String placeholder, TableIdentifier identifier) {
+ return replaceTableIdentifier(placeholder, identifier, "");
+ }
+
+ public static String replaceTablePrimaryKey(String placeholder, PrimaryKey primaryKey) {
+ if (primaryKey != null && !primaryKey.getColumnNames().isEmpty()) {
+ String pkFieldsString = String.join(FIELD_DELIMITER, primaryKey.getColumnNames());
+ return replacePlaceholders(
+ placeholder,
+ TablePlaceholder.REPLACE_PRIMARY_KEY.getPlaceholder(),
+ pkFieldsString);
+ }
+ return placeholder;
+ }
+
+ public static String replaceTableUniqueKey(
+ String placeholder, List constraintKeys) {
+ Optional ukFieldsString =
+ constraintKeys.stream()
+ .filter(
+ e ->
+ e.getConstraintType()
+ .equals(ConstraintKey.ConstraintType.UNIQUE_KEY))
+ .findFirst()
+ .map(
+ e ->
+ e.getColumnNames().stream()
+ .map(f -> f.getColumnName())
+ .collect(Collectors.joining(FIELD_DELIMITER)));
+ if (ukFieldsString.isPresent()) {
+ return replacePlaceholders(
+ placeholder,
+ TablePlaceholder.REPLACE_UNIQUE_KEY.getPlaceholder(),
+ ukFieldsString.get());
+ }
+ return placeholder;
+ }
+
+ public static String replaceTableFieldNames(String placeholder, TableSchema schema) {
+ return replacePlaceholders(
+ placeholder,
+ TablePlaceholder.REPLACE_FIELD_NAMES_KEY.getPlaceholder(),
+ String.join(FIELD_DELIMITER, schema.getFieldNames()));
+ }
+
+ public static ReadonlyConfig replaceTablePlaceholder(
+ ReadonlyConfig config, CatalogTable table) {
+ return replaceTablePlaceholder(config, table, Collections.emptyList());
+ }
+
+ public static ReadonlyConfig replaceTablePlaceholder(
+ ReadonlyConfig config, CatalogTable table, Collection excludeKeys) {
+ Map copyOnWriteData = ObjectUtils.clone(config.getSourceMap());
+ for (String key : copyOnWriteData.keySet()) {
+ if (excludeKeys.contains(key)) {
+ continue;
+ }
+ Object value = copyOnWriteData.get(key);
+ if (value != null) {
+ if (value instanceof String) {
+ String strValue = (String) value;
+ strValue = replaceTableIdentifier(strValue, table.getTableId());
+ strValue =
+ replaceTablePrimaryKey(
+ strValue, table.getTableSchema().getPrimaryKey());
+ strValue =
+ replaceTableUniqueKey(
+ strValue, table.getTableSchema().getConstraintKeys());
+ strValue = replaceTableFieldNames(strValue, table.getTableSchema());
+ copyOnWriteData.put(key, strValue);
+ } else if (value instanceof List) {
+ List listValue = (List) value;
+ if (listValue.size() == 1 && listValue.get(0) instanceof String) {
+ String strValue = (String) listValue.get(0);
+ if (strValue.equals(
+ "${"
+ + TablePlaceholder.REPLACE_PRIMARY_KEY.getPlaceholder()
+ + "}")) {
+ strValue =
+ replaceTablePrimaryKey(
+ strValue, table.getTableSchema().getPrimaryKey());
+ listValue = Arrays.asList(strValue.split(FIELD_DELIMITER));
+ } else if (strValue.equals(
+ "${"
+ + TablePlaceholder.REPLACE_UNIQUE_KEY.getPlaceholder()
+ + "}")) {
+ strValue =
+ replaceTableUniqueKey(
+ strValue, table.getTableSchema().getConstraintKeys());
+ listValue = Arrays.asList(strValue.split(FIELD_DELIMITER));
+ } else if (strValue.equals(
+ "${"
+ + TablePlaceholder.REPLACE_FIELD_NAMES_KEY.getPlaceholder()
+ + "}")) {
+ strValue = replaceTableFieldNames(strValue, table.getTableSchema());
+ listValue = Arrays.asList(strValue.split(FIELD_DELIMITER));
+ }
+ copyOnWriteData.put(key, listValue);
+ }
+ }
+ }
+ }
+ return ReadonlyConfig.fromMap(copyOnWriteData);
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableAggregatedCommitInfo.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableAggregatedCommitInfo.java
similarity index 93%
rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableAggregatedCommitInfo.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableAggregatedCommitInfo.java
index 5d378140e94..585a8f4e068 100644
--- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableAggregatedCommitInfo.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableAggregatedCommitInfo.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.connectors.seatunnel.common.multitablesink;
+package org.apache.seatunnel.api.sink.multitablesink;
import lombok.AllArgsConstructor;
import lombok.Getter;
diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableCommitInfo.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableCommitInfo.java
similarity index 86%
rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableCommitInfo.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableCommitInfo.java
index 21faf0c7edc..d541c891fde 100644
--- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableCommitInfo.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableCommitInfo.java
@@ -15,16 +15,16 @@
* limitations under the License.
*/
-package org.apache.seatunnel.connectors.seatunnel.common.multitablesink;
+package org.apache.seatunnel.api.sink.multitablesink;
import lombok.AllArgsConstructor;
import lombok.Getter;
import java.io.Serializable;
-import java.util.Map;
+import java.util.concurrent.ConcurrentMap;
@Getter
@AllArgsConstructor
public class MultiTableCommitInfo implements Serializable {
- private Map commitInfo;
+ private ConcurrentMap commitInfo;
}
diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSink.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSink.java
similarity index 71%
rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSink.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSink.java
index 7abb176117d..23f4fc455bb 100644
--- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSink.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSink.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.connectors.seatunnel.common.multitablesink;
+package org.apache.seatunnel.api.sink.multitablesink;
import org.apache.seatunnel.api.common.JobContext;
import org.apache.seatunnel.api.serialization.DefaultSerializer;
@@ -25,11 +25,19 @@
import org.apache.seatunnel.api.sink.SinkCommitter;
import org.apache.seatunnel.api.sink.SinkCommonOptions;
import org.apache.seatunnel.api.sink.SinkWriter;
+import org.apache.seatunnel.api.sink.SupportSchemaEvolutionSink;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.TablePath;
import org.apache.seatunnel.api.table.factory.MultiTableFactoryContext;
+import org.apache.seatunnel.api.table.schema.SchemaChangeType;
import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+import lombok.Getter;
+
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -39,12 +47,13 @@
public class MultiTableSink
implements SeaTunnelSink<
- SeaTunnelRow,
- MultiTableState,
- MultiTableCommitInfo,
- MultiTableAggregatedCommitInfo> {
+ SeaTunnelRow,
+ MultiTableState,
+ MultiTableCommitInfo,
+ MultiTableAggregatedCommitInfo>,
+ SupportSchemaEvolutionSink {
- private final Map sinks;
+ @Getter private final Map sinks;
private final int replicaNum;
public MultiTableSink(MultiTableFactoryContext context) {
@@ -61,22 +70,26 @@ public String getPluginName() {
public SinkWriter createWriter(
SinkWriter.Context context) throws IOException {
Map> writers = new HashMap<>();
+ Map sinkWritersContext = new HashMap<>();
for (int i = 0; i < replicaNum; i++) {
for (String tableIdentifier : sinks.keySet()) {
SeaTunnelSink sink = sinks.get(tableIdentifier);
int index = context.getIndexOfSubtask() * replicaNum + i;
writers.put(
SinkIdentifier.of(tableIdentifier, index),
- sink.createWriter(new SinkContextProxy(index, context)));
+ sink.createWriter(new SinkContextProxy(index, replicaNum, context)));
+ sinkWritersContext.put(SinkIdentifier.of(tableIdentifier, index), context);
}
}
- return new MultiTableSinkWriter(writers, replicaNum);
+ return new MultiTableSinkWriter(writers, replicaNum, sinkWritersContext);
}
@Override
public SinkWriter restoreWriter(
SinkWriter.Context context, List states) throws IOException {
Map> writers = new HashMap<>();
+ Map sinkWritersContext = new HashMap<>();
+
for (int i = 0; i < replicaNum; i++) {
for (String tableIdentifier : sinks.keySet()) {
SeaTunnelSink sink = sinks.get(tableIdentifier);
@@ -93,15 +106,17 @@ public SinkWriter restoreWr
if (state.isEmpty()) {
writers.put(
sinkIdentifier,
- sink.createWriter(new SinkContextProxy(index, context)));
+ sink.createWriter(new SinkContextProxy(index, replicaNum, context)));
} else {
writers.put(
sinkIdentifier,
- sink.restoreWriter(new SinkContextProxy(index, context), state));
+ sink.restoreWriter(
+ new SinkContextProxy(index, replicaNum, context), state));
}
+ sinkWritersContext.put(SinkIdentifier.of(tableIdentifier, index), context);
}
}
- return new MultiTableSinkWriter(writers, replicaNum);
+ return new MultiTableSinkWriter(writers, replicaNum, sinkWritersContext);
}
@Override
@@ -147,6 +162,21 @@ public Optional> getCommitInfoSerializer() {
return Optional.of(new MultiTableSinkAggregatedCommitter(aggCommitters));
}
+ public List getSinkTables() {
+
+ List tablePaths = new ArrayList<>();
+ List values = new ArrayList<>(sinks.values());
+ for (int i = 0; i < values.size(); i++) {
+ if (values.get(i).getWriteCatalogTable().isPresent()) {
+ tablePaths.add(
+ ((CatalogTable) values.get(i).getWriteCatalogTable().get()).getTablePath());
+ } else {
+ tablePaths.add(TablePath.of(sinks.keySet().toArray(new String[0])[i]));
+ }
+ }
+ return tablePaths;
+ }
+
@Override
public Optional>
getAggregatedCommitInfoSerializer() {
@@ -157,4 +187,18 @@ public Optional> getCommitInfoSerializer() {
public void setJobContext(JobContext jobContext) {
sinks.values().forEach(sink -> sink.setJobContext(jobContext));
}
+
+ @Override
+ public Optional getWriteCatalogTable() {
+ return SeaTunnelSink.super.getWriteCatalogTable();
+ }
+
+ @Override
+ public List supports() {
+ SeaTunnelSink firstSink = sinks.entrySet().iterator().next().getValue();
+ if (firstSink instanceof SupportSchemaEvolutionSink) {
+ return ((SupportSchemaEvolutionSink) firstSink).supports();
+ }
+ return Collections.emptyList();
+ }
}
diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkAggregatedCommitter.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkAggregatedCommitter.java
similarity index 99%
rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkAggregatedCommitter.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkAggregatedCommitter.java
index 31dd91f1eec..6ed04d871bf 100644
--- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkAggregatedCommitter.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkAggregatedCommitter.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.connectors.seatunnel.common.multitablesink;
+package org.apache.seatunnel.api.sink.multitablesink;
import org.apache.seatunnel.api.sink.MultiTableResourceManager;
import org.apache.seatunnel.api.sink.SinkAggregatedCommitter;
diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkCommitter.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkCommitter.java
similarity index 98%
rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkCommitter.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkCommitter.java
index ed52fafb002..113e269fd07 100644
--- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkCommitter.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkCommitter.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.connectors.seatunnel.common.multitablesink;
+package org.apache.seatunnel.api.sink.multitablesink;
import org.apache.seatunnel.api.sink.SinkCommitter;
diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkFactory.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkFactory.java
similarity index 96%
rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkFactory.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkFactory.java
index 00e1e1ab133..08db91b7c8e 100644
--- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkFactory.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkFactory.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.connectors.seatunnel.common.multitablesink;
+package org.apache.seatunnel.api.sink.multitablesink;
import org.apache.seatunnel.api.configuration.util.OptionRule;
import org.apache.seatunnel.api.table.connector.TableSink;
diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkWriter.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkWriter.java
similarity index 73%
rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkWriter.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkWriter.java
index 12163676d7d..89d7e8d13c0 100644
--- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkWriter.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkWriter.java
@@ -15,13 +15,15 @@
* limitations under the License.
*/
-package org.apache.seatunnel.connectors.seatunnel.common.multitablesink;
+package org.apache.seatunnel.api.sink.multitablesink;
import org.apache.seatunnel.api.sink.MultiTableResourceManager;
import org.apache.seatunnel.api.sink.SinkWriter;
import org.apache.seatunnel.api.sink.SupportMultiTableSinkWriter;
-import org.apache.seatunnel.api.table.event.SchemaChangeEvent;
+import org.apache.seatunnel.api.sink.SupportSchemaEvolutionSinkWriter;
+import org.apache.seatunnel.api.table.schema.event.SchemaChangeEvent;
import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+import org.apache.seatunnel.api.tracing.MDCTracer;
import lombok.extern.slf4j.Slf4j;
@@ -33,6 +35,8 @@
import java.util.Optional;
import java.util.Random;
import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
@@ -42,11 +46,14 @@
@Slf4j
public class MultiTableSinkWriter
- implements SinkWriter {
+ implements SinkWriter,
+ SupportSchemaEvolutionSinkWriter {
private final Map> sinkWriters;
+ private final Map sinkWritersContext;
private final Map> sinkPrimaryKeys = new HashMap<>();
- private final List>> sinkWritersWithIndex;
+ private final List>>
+ sinkWritersWithIndex;
private final List runnable = new ArrayList<>();
private final Random random = new Random();
private final List> blockingQueues = new ArrayList<>();
@@ -55,26 +62,34 @@ public class MultiTableSinkWriter
private volatile boolean submitted = false;
public MultiTableSinkWriter(
- Map> sinkWriters, int queueSize) {
+ Map> sinkWriters,
+ int queueSize,
+ Map sinkWritersContext) {
this.sinkWriters = sinkWriters;
+ this.sinkWritersContext = sinkWritersContext;
AtomicInteger cnt = new AtomicInteger(0);
executorService =
- Executors.newFixedThreadPool(
- // we use it in `MultiTableWriterRunnable` and `prepare commit task`, so it
- // should be double.
- queueSize * 2,
- runnable -> {
- Thread thread = new Thread(runnable);
- thread.setDaemon(true);
- thread.setName(
- "st-multi-table-sink-writer" + "-" + cnt.incrementAndGet());
- return thread;
- });
+ MDCTracer.tracing(
+ Executors.newFixedThreadPool(
+ // we use it in `MultiTableWriterRunnable` and `prepare commit
+ // task`, so it
+ // should be double.
+ queueSize * 2,
+ runnable -> {
+ Thread thread = new Thread(runnable);
+ thread.setDaemon(true);
+ thread.setName(
+ "st-multi-table-sink-writer"
+ + "-"
+ + cnt.incrementAndGet());
+ return thread;
+ }));
sinkWritersWithIndex = new ArrayList<>();
for (int i = 0; i < queueSize; i++) {
BlockingQueue queue = new LinkedBlockingQueue<>(1024);
Map> tableIdWriterMap = new HashMap<>();
- Map> sinkIdentifierMap = new HashMap<>();
+ ConcurrentMap> sinkIdentifierMap =
+ new ConcurrentHashMap<>();
int queueIndex = i;
sinkWriters.entrySet().stream()
.filter(entry -> entry.getKey().getIndex() % queueSize == queueIndex)
@@ -84,6 +99,7 @@ public MultiTableSinkWriter(
entry.getKey().getTableIdentifier(), entry.getValue());
sinkIdentifierMap.put(entry.getKey(), entry.getValue());
});
+
sinkWritersWithIndex.add(sinkIdentifierMap);
blockingQueues.add(queue);
MultiTableWriterRunnable r = new MultiTableWriterRunnable(tableIdWriterMap, queue);
@@ -133,9 +149,24 @@ public void applySchemaChange(SchemaChangeEvent event) throws IOException {
.getKey()
.getTableIdentifier()
.equals(event.tablePath().getFullName())) {
+ log.info(
+ "Start apply schema change for table {} sub-writer {}",
+ sinkWriterEntry.getKey().getTableIdentifier(),
+ sinkWriterEntry.getKey().getIndex());
synchronized (runnable.get(i)) {
- sinkWriterEntry.getValue().applySchemaChange(event);
+ if (sinkWriterEntry.getValue()
+ instanceof SupportSchemaEvolutionSinkWriter) {
+ ((SupportSchemaEvolutionSinkWriter) sinkWriterEntry.getValue())
+ .applySchemaChange(event);
+ } else {
+ // TODO remove deprecated method
+ sinkWriterEntry.getValue().applySchemaChange(event);
+ }
}
+ log.info(
+ "Finish apply schema change for table {} sub-writer {}",
+ sinkWriterEntry.getKey().getTableIdentifier(),
+ sinkWriterEntry.getKey().getIndex());
}
}
}
@@ -197,9 +228,15 @@ public List snapshotState(long checkpointId) throws IOException
@Override
public Optional prepareCommit() throws IOException {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional prepareCommit(long checkpointId) throws IOException {
checkQueueRemain();
subSinkErrorCheck();
- MultiTableCommitInfo multiTableCommitInfo = new MultiTableCommitInfo(new HashMap<>());
+ MultiTableCommitInfo multiTableCommitInfo =
+ new MultiTableCommitInfo(new ConcurrentHashMap<>());
List> futures = new ArrayList<>();
for (int i = 0; i < sinkWritersWithIndex.size(); i++) {
int subWriterIndex = i;
@@ -214,7 +251,9 @@ public Optional prepareCommit() throws IOException {
.entrySet()) {
Optional> commit;
try {
- commit = sinkWriterEntry.getValue().prepareCommit();
+ SinkWriter sinkWriter =
+ sinkWriterEntry.getValue();
+ commit = sinkWriter.prepareCommit(checkpointId);
} catch (IOException e) {
throw new RuntimeException(e);
}
@@ -234,6 +273,9 @@ public Optional prepareCommit() throws IOException {
throw new RuntimeException(e);
}
}
+ if (multiTableCommitInfo.getCommitInfo().isEmpty()) {
+ return Optional.empty();
+ }
return Optional.of(multiTableCommitInfo);
}
@@ -267,26 +309,30 @@ public void abortPrepare() {
@Override
public void close() throws IOException {
- Throwable firstE = null;
+ // The variables used in lambda expressions should be final or valid final, so they are
+ // modified to arrays
+ final Throwable[] firstE = {null};
try {
checkQueueRemain();
} catch (Exception e) {
- firstE = e;
+ firstE[0] = e;
}
executorService.shutdownNow();
for (int i = 0; i < sinkWritersWithIndex.size(); i++) {
synchronized (runnable.get(i)) {
- for (SinkWriter sinkWriter :
- sinkWritersWithIndex.get(i).values()) {
- try {
- sinkWriter.close();
- } catch (Throwable e) {
- if (firstE == null) {
- firstE = e;
- }
- log.error("close error", e);
- }
- }
+ Map> sinkIdentifierSinkWriterMap =
+ sinkWritersWithIndex.get(i);
+ sinkIdentifierSinkWriterMap.forEach(
+ (identifier, sinkWriter) -> {
+ try {
+ sinkWriter.close();
+ } catch (Throwable e) {
+ if (firstE[0] == null) {
+ firstE[0] = e;
+ }
+ log.error("close error", e);
+ }
+ });
}
}
try {
@@ -296,8 +342,8 @@ public void close() throws IOException {
} catch (Throwable e) {
log.error("close resourceManager error", e);
}
- if (firstE != null) {
- throw new RuntimeException(firstE);
+ if (firstE[0] != null) {
+ throw new RuntimeException(firstE[0]);
}
}
diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableState.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableState.java
similarity index 93%
rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableState.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableState.java
index 43f5d8bd996..ac7db893ba0 100644
--- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableState.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableState.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.connectors.seatunnel.common.multitablesink;
+package org.apache.seatunnel.api.sink.multitablesink;
import lombok.AllArgsConstructor;
import lombok.Getter;
diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableWriterRunnable.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableWriterRunnable.java
similarity index 97%
rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableWriterRunnable.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableWriterRunnable.java
index ce22e0e2e20..3026dc778b8 100644
--- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableWriterRunnable.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableWriterRunnable.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.connectors.seatunnel.common.multitablesink;
+package org.apache.seatunnel.api.sink.multitablesink;
import org.apache.seatunnel.api.sink.SinkWriter;
import org.apache.seatunnel.api.table.type.SeaTunnelRow;
diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/SinkContextProxy.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/SinkContextProxy.java
similarity index 81%
rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/SinkContextProxy.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/SinkContextProxy.java
index f7691ddedff..5f4bf75f6ff 100644
--- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/SinkContextProxy.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/SinkContextProxy.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.connectors.seatunnel.common.multitablesink;
+package org.apache.seatunnel.api.sink.multitablesink;
import org.apache.seatunnel.api.common.metrics.MetricsContext;
import org.apache.seatunnel.api.event.EventListener;
@@ -25,10 +25,13 @@ public class SinkContextProxy implements SinkWriter.Context {
private final int index;
+ private final int replicaNum;
+
private final SinkWriter.Context context;
- public SinkContextProxy(int index, SinkWriter.Context context) {
+ public SinkContextProxy(int index, int replicaNum, SinkWriter.Context context) {
this.index = index;
+ this.replicaNum = replicaNum;
this.context = context;
}
@@ -37,6 +40,11 @@ public int getIndexOfSubtask() {
return index;
}
+ @Override
+ public int getNumberOfParallelSubtasks() {
+ return context.getNumberOfParallelSubtasks() * replicaNum;
+ }
+
@Override
public MetricsContext getMetricsContext() {
return context.getMetricsContext();
diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/SinkIdentifier.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/SinkIdentifier.java
similarity index 94%
rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/SinkIdentifier.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/SinkIdentifier.java
index 18f7484853d..50eac7c0d9d 100644
--- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/SinkIdentifier.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/SinkIdentifier.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.connectors.seatunnel.common.multitablesink;
+package org.apache.seatunnel.api.sink.multitablesink;
import lombok.EqualsAndHashCode;
import lombok.Getter;
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/Collector.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/Collector.java
index 51ace474e5f..895e4aa6db1 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/Collector.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/Collector.java
@@ -17,7 +17,7 @@
package org.apache.seatunnel.api.source;
-import org.apache.seatunnel.api.table.event.SchemaChangeEvent;
+import org.apache.seatunnel.api.table.schema.event.SchemaChangeEvent;
/**
* A {@link Collector} is used to collect data from {@link SourceReader}.
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/SupportSchemaEvolution.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/SupportSchemaEvolution.java
new file mode 100644
index 00000000000..23a1edcc331
--- /dev/null
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/source/SupportSchemaEvolution.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.source;
+
+import org.apache.seatunnel.api.table.schema.SchemaChangeType;
+
+import java.util.List;
+
+public interface SupportSchemaEvolution {
+
+ /**
+ * Whether the source connector supports schema evolution.
+ *
+ * @return the supported schema change types
+ */
+ List supports();
+}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/Catalog.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/Catalog.java
index 05b7ab114c4..f75c012f8fe 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/Catalog.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/Catalog.java
@@ -239,6 +239,25 @@ default void buildColumnsWithErrorCheck(
void createTable(TablePath tablePath, CatalogTable table, boolean ignoreIfExists)
throws TableAlreadyExistException, DatabaseNotExistException, CatalogException;
+ /**
+ * Create a new table in this catalog.
+ *
+ * @param tablePath Path of the table
+ * @param table The table definition
+ * @param ignoreIfExists Flag to specify behavior when a table with the given name already exist
+ * @param createIndex If you want to create index or not
+ * @throws TableAlreadyExistException thrown if the table already exists in the catalog and
+ * ignoreIfExists is false
+ * @throws DatabaseNotExistException thrown if the database in tablePath doesn't exist in the
+ * catalog
+ * @throws CatalogException in case of any runtime exception
+ */
+ default void createTable(
+ TablePath tablePath, CatalogTable table, boolean ignoreIfExists, boolean createIndex)
+ throws TableAlreadyExistException, DatabaseNotExistException, CatalogException {
+ createTable(tablePath, table, ignoreIfExists);
+ }
+
/**
* Drop an existing table in this catalog.
*
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/CatalogOptions.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/CatalogOptions.java
index 2d1a3bc41b8..046ac1dbed2 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/CatalogOptions.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/CatalogOptions.java
@@ -17,6 +17,8 @@
package org.apache.seatunnel.api.table.catalog;
+import org.apache.seatunnel.shade.com.fasterxml.jackson.core.type.TypeReference;
+
import org.apache.seatunnel.api.configuration.Option;
import org.apache.seatunnel.api.configuration.Options;
@@ -56,4 +58,12 @@ public interface CatalogOptions {
.withDescription(
"The table names RegEx of the database to capture."
+ "The table name needs to include the database name, for example: database_.*\\.table_.*");
+
+ Option>> TABLE_LIST =
+ Options.key("table_list")
+ .type(new TypeReference>>() {})
+ .noDefaultValue()
+ .withDescription(
+ "SeaTunnel Multi Table Schema, acts on structed data sources. "
+ + "such as jdbc, paimon, doris, etc");
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/CatalogTableUtil.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/CatalogTableUtil.java
index eafaedf05d2..95eaa5563b2 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/CatalogTableUtil.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/CatalogTableUtil.java
@@ -157,7 +157,8 @@ public static SeaTunnelDataType convertToDataType(
}
}
- public static MultipleRowType convertToMultipleRowType(List catalogTables) {
+ @Deprecated
+ private static MultipleRowType convertToMultipleRowType(List catalogTables) {
Map rowTypeMap = new HashMap<>();
for (CatalogTable catalogTable : catalogTables) {
String tableId = catalogTable.getTableId().toTablePath().toString();
@@ -215,9 +216,9 @@ public static CatalogTable buildWithConfig(String catalogName, ReadonlyConfig re
schemaConfig.get(
TableSchemaOptions.TableIdentifierOptions.SCHEMA_FIRST));
} else {
- Optional resultTableNameOptional =
- readonlyConfig.getOptional(CommonOptions.RESULT_TABLE_NAME);
- tablePath = resultTableNameOptional.map(TablePath::of).orElse(TablePath.DEFAULT);
+ Optional pluginOutputIdentifierOptional =
+ readonlyConfig.getOptional(CommonOptions.PLUGIN_OUTPUT);
+ tablePath = pluginOutputIdentifierOptional.map(TablePath::of).orElse(TablePath.DEFAULT);
}
return CatalogTable.of(
@@ -254,7 +255,7 @@ public static CatalogTable newCatalogTable(
finalColumns.add(column);
} else {
finalColumns.add(
- PhysicalColumn.of(fieldNames[i], fieldTypes[i], 0, false, null, null));
+ PhysicalColumn.of(fieldNames[i], fieldTypes[i], 0, true, null, null));
}
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/PhysicalColumn.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/PhysicalColumn.java
index db9da1b2b75..2a425000222 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/PhysicalColumn.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/PhysicalColumn.java
@@ -215,11 +215,25 @@ public static PhysicalColumn of(
String comment,
String sourceType,
Map options) {
+ return new PhysicalColumn(
+ name, dataType, columnLength, nullable, defaultValue, comment, sourceType, options);
+ }
+
+ public static PhysicalColumn of(
+ String name,
+ SeaTunnelDataType> dataType,
+ Long columnLength,
+ Integer scale,
+ boolean nullable,
+ Object defaultValue,
+ String comment,
+ String sourceType,
+ Map options) {
return new PhysicalColumn(
name,
dataType,
columnLength,
- null,
+ scale,
nullable,
defaultValue,
comment,
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/SeaTunnelDataTypeConvertorUtil.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/SeaTunnelDataTypeConvertorUtil.java
index cc7ec83fb12..8230ca32eed 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/SeaTunnelDataTypeConvertorUtil.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/SeaTunnelDataTypeConvertorUtil.java
@@ -31,6 +31,7 @@
import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
import org.apache.seatunnel.api.table.type.SqlType;
+import org.apache.seatunnel.api.table.type.VectorType;
import org.apache.seatunnel.common.exception.CommonError;
public class SeaTunnelDataTypeConvertorUtil {
@@ -80,6 +81,16 @@ public static SeaTunnelDataType> deserializeSeaTunnelDataType(
return LocalTimeType.LOCAL_DATE_TIME_TYPE;
case MAP:
return parseMapType(field, columnType);
+ case BINARY_VECTOR:
+ return VectorType.VECTOR_BINARY_TYPE;
+ case FLOAT_VECTOR:
+ return VectorType.VECTOR_FLOAT_TYPE;
+ case FLOAT16_VECTOR:
+ return VectorType.VECTOR_FLOAT16_TYPE;
+ case BFLOAT16_VECTOR:
+ return VectorType.VECTOR_BFLOAT16_TYPE;
+ case SPARSE_FLOAT_VECTOR:
+ return VectorType.VECTOR_SPARSE_FLOAT_TYPE;
default:
throw CommonError.unsupportedDataType("SeaTunnel", columnType, field);
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TableIdentifier.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TableIdentifier.java
index 2d39f9b9842..101081255cc 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TableIdentifier.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TableIdentifier.java
@@ -17,15 +17,16 @@
package org.apache.seatunnel.api.table.catalog;
+import org.apache.commons.lang3.StringUtils;
+
import lombok.EqualsAndHashCode;
import lombok.Getter;
-import lombok.RequiredArgsConstructor;
+import lombok.NonNull;
import java.io.Serializable;
@Getter
@EqualsAndHashCode
-@RequiredArgsConstructor
public final class TableIdentifier implements Serializable {
private static final long serialVersionUID = 1L;
@@ -35,7 +36,18 @@ public final class TableIdentifier implements Serializable {
private final String schemaName;
- private final String tableName;
+ @NonNull private final String tableName;
+
+ public TableIdentifier(
+ String catalogName, String databaseName, String schemaName, @NonNull String tableName) {
+ this.catalogName = catalogName;
+ this.databaseName = databaseName;
+ this.schemaName = schemaName;
+ this.tableName = tableName;
+ if (StringUtils.isEmpty(tableName)) {
+ throw new IllegalArgumentException("tableName cannot be empty");
+ }
+ }
public static TableIdentifier of(String catalogName, String databaseName, String tableName) {
return new TableIdentifier(catalogName, databaseName, null, tableName);
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TablePath.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TablePath.java
index 12572621874..30edc7ac80e 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TablePath.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TablePath.java
@@ -17,9 +17,11 @@
package org.apache.seatunnel.api.table.catalog;
+import org.apache.commons.lang3.StringUtils;
+
import lombok.EqualsAndHashCode;
import lombok.Getter;
-import lombok.RequiredArgsConstructor;
+import lombok.NonNull;
import java.io.Serializable;
import java.util.ArrayList;
@@ -27,12 +29,20 @@
@Getter
@EqualsAndHashCode
-@RequiredArgsConstructor
public final class TablePath implements Serializable {
private static final long serialVersionUID = 1L;
private final String databaseName;
private final String schemaName;
- private final String tableName;
+ @NonNull private final String tableName;
+
+ public TablePath(String databaseName, String schemaName, @NonNull String tableName) {
+ this.databaseName = databaseName;
+ this.schemaName = schemaName;
+ this.tableName = tableName;
+ if (StringUtils.isEmpty(tableName)) {
+ throw new IllegalArgumentException("tableName cannot be empty");
+ }
+ }
public static final TablePath DEFAULT = TablePath.of("default", "default", "default");
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TableSchema.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TableSchema.java
index d327a0668be..2238da26171 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TableSchema.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TableSchema.java
@@ -20,25 +20,37 @@
import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
-import lombok.AllArgsConstructor;
+import lombok.AccessLevel;
import lombok.Data;
+import lombok.Getter;
import java.io.Serializable;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
/** Represent a physical table schema. */
@Data
-@AllArgsConstructor
public final class TableSchema implements Serializable {
private static final long serialVersionUID = 1L;
private final List columns;
+ @Getter(AccessLevel.PRIVATE)
+ private final List columnNames;
+
private final PrimaryKey primaryKey;
private final List constraintKeys;
+ public TableSchema(
+ List columns, PrimaryKey primaryKey, List constraintKeys) {
+ this.columns = columns;
+ this.columnNames = columns.stream().map(Column::getName).collect(Collectors.toList());
+ this.primaryKey = primaryKey;
+ this.constraintKeys = constraintKeys;
+ }
+
public static Builder builder() {
return new Builder();
}
@@ -58,7 +70,23 @@ public SeaTunnelRowType toPhysicalRowDataType() {
}
public String[] getFieldNames() {
- return columns.stream().map(Column::getName).toArray(String[]::new);
+ return columnNames.toArray(new String[0]);
+ }
+
+ public int indexOf(String columnName) {
+ return columnNames.indexOf(columnName);
+ }
+
+ public Column getColumn(String columnName) {
+ return columns.get(indexOf(columnName));
+ }
+
+ public boolean contains(String columnName) {
+ return columnNames.contains(columnName);
+ }
+
+ public List getColumns() {
+ return Collections.unmodifiableList(columns);
}
public static final class Builder {
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/schema/ReadonlyConfigParser.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/schema/ReadonlyConfigParser.java
index e043c0ecd72..8cbea1de838 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/schema/ReadonlyConfigParser.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/schema/ReadonlyConfigParser.java
@@ -95,7 +95,8 @@ public List parse(ReadonlyConfig schemaConfig) {
String value = entry.getValue();
SeaTunnelDataType> dataType =
SeaTunnelDataTypeConvertorUtil.deserializeSeaTunnelDataType(key, value);
- PhysicalColumn column = PhysicalColumn.of(key, dataType, 0, true, null, null);
+ PhysicalColumn column =
+ PhysicalColumn.of(key, dataType, null, null, true, null, null);
columns.add(column);
}
return columns;
@@ -130,14 +131,12 @@ public List parse(ReadonlyConfig schemaConfig) {
new IllegalArgumentException(
"schema.columns.* config need option [type], please correct your config first"));
- Integer columnLength =
+ Long columnLength =
columnConfig.get(
TableSchemaOptions.ColumnOptions.COLUMN_LENGTH);
-
Integer columnScale =
columnConfig.get(
TableSchemaOptions.ColumnOptions.COLUMN_SCALE);
-
Boolean nullable =
columnConfig.get(TableSchemaOptions.ColumnOptions.NULLABLE);
Object defaultValue =
@@ -148,7 +147,7 @@ public List parse(ReadonlyConfig schemaConfig) {
return PhysicalColumn.of(
name,
seaTunnelDataType,
- Long.valueOf(columnLength),
+ columnLength,
columnScale,
nullable,
defaultValue,
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/schema/TableSchemaOptions.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/schema/TableSchemaOptions.java
index 9ede187ea96..34ca23ced42 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/schema/TableSchemaOptions.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/schema/TableSchemaOptions.java
@@ -55,6 +55,14 @@ public static class TableIdentifierOptions {
.noDefaultValue()
.withDescription("SeaTunnel Schema");
+ public static final Option>> TABLE_CONFIGS =
+ Options.key("tables_configs")
+ .type(new TypeReference>>() {})
+ .noDefaultValue()
+ .withDescription(
+ "SeaTunnel Multi Table Schema, acts on unstructed data sources. "
+ + "such as file, assert, mongodb, etc");
+
// We should use ColumnOptions instead of FieldOptions
@Deprecated
public static class FieldOptions {
@@ -92,10 +100,10 @@ public static class ColumnOptions {
.noDefaultValue()
.withDescription("SeaTunnel Schema Column scale");
- public static final Option COLUMN_LENGTH =
+ public static final Option COLUMN_LENGTH =
Options.key("columnLength")
- .intType()
- .defaultValue(0)
+ .longType()
+ .defaultValue(0L)
.withDescription("SeaTunnel Schema Column Length");
public static final Option NULLABLE =
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/converter/BasicTypeDefine.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/converter/BasicTypeDefine.java
index d15529e0a4e..e7c3c04110f 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/converter/BasicTypeDefine.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/converter/BasicTypeDefine.java
@@ -31,6 +31,8 @@ public class BasicTypeDefine implements Serializable {
protected String columnType;
// e.g. `varchar` for MySQL
protected String dataType;
+ // It's jdbc sql type(java.sql.Types) not SeaTunnel SqlType
+ protected int sqlType;
protected T nativeType;
// e.g. `varchar` length is 10
protected Long length;
diff --git a/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/dto/source/SourceIndexInfo.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/ChangeStreamTableSourceCheckpoint.java
similarity index 75%
rename from seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/dto/source/SourceIndexInfo.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/ChangeStreamTableSourceCheckpoint.java
index 6c0a5667da7..dfd08fd9ee7 100644
--- a/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/dto/source/SourceIndexInfo.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/ChangeStreamTableSourceCheckpoint.java
@@ -15,21 +15,20 @@
* limitations under the License.
*/
-package org.apache.seatunnel.connectors.seatunnel.elasticsearch.dto.source;
+package org.apache.seatunnel.api.table.factory;
import lombok.AllArgsConstructor;
import lombok.Data;
import java.io.Serializable;
import java.util.List;
-import java.util.Map;
@Data
@AllArgsConstructor
-public class SourceIndexInfo implements Serializable {
- private String index;
- private List source;
- private Map query;
- private String scrollTime;
- private int scrollSize;
+public class ChangeStreamTableSourceCheckpoint implements Serializable {
+ // The state of the enumerator, from checkpoint data
+ private byte[] enumeratorState;
+
+ // The splits of the enumerator, from checkpoint data
+ public List> splits;
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/ChangeStreamTableSourceFactory.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/ChangeStreamTableSourceFactory.java
new file mode 100644
index 00000000000..3fe40bf7d0e
--- /dev/null
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/ChangeStreamTableSourceFactory.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.table.factory;
+
+import org.apache.seatunnel.api.serialization.DefaultSerializer;
+import org.apache.seatunnel.api.serialization.Serializer;
+import org.apache.seatunnel.api.source.SeaTunnelSource;
+import org.apache.seatunnel.api.source.SourceSplit;
+import org.apache.seatunnel.api.table.connector.TableSource;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * A factory to create a {@link TableSource} for a {@link SeaTunnelSource} that supports change
+ * stream. e.g. CDC/MQ Source The factory can be used to restore the source from the checkpoint
+ * state. The factory can also be used to serialize and deserialize the checkpoint state.
+ */
+public interface ChangeStreamTableSourceFactory extends TableSourceFactory {
+
+ /**
+ * see {@link SeaTunnelSource#getSplitSerializer()}.
+ *
+ * @return
+ * @param
+ */
+ default Serializer getSplitSerializer() {
+ return new DefaultSerializer<>();
+ }
+
+ /**
+ * see {@link SeaTunnelSource#getEnumeratorStateSerializer()}.
+ *
+ * @return
+ * @param
+ */
+ default Serializer getEnumeratorStateSerializer() {
+ return new DefaultSerializer<>();
+ }
+
+ /**
+ * Create a {@link ChangeStreamTableSourceState} from the given {@link
+ * ChangeStreamTableSourceCheckpoint}. The default implementation uses the {@link
+ * #getSplitSerializer()} and {@link #getEnumeratorStateSerializer()} to deserialize the splits
+ * and enumerator state.
+ *
+ * If the splits or enumerator state is null, the corresponding field in the returned state
+ * will be null.
+ *
+ * @param checkpoint
+ * @return
+ * @param
+ * @param
+ * @throws IOException
+ */
+ default
+ ChangeStreamTableSourceState deserializeTableSourceState(
+ ChangeStreamTableSourceCheckpoint checkpoint) throws IOException {
+ StateT enumeratorState = null;
+ if (checkpoint.getEnumeratorState() != null) {
+ Serializer enumeratorStateSerializer = getEnumeratorStateSerializer();
+ enumeratorState =
+ enumeratorStateSerializer.deserialize(checkpoint.getEnumeratorState());
+ }
+
+ List> deserializedSplits = new ArrayList<>();
+ if (checkpoint.getSplits() != null && !checkpoint.getSplits().isEmpty()) {
+ Serializer splitSerializer = getSplitSerializer();
+ List> splits = checkpoint.getSplits();
+ for (int i = 0; i < splits.size(); i++) {
+ List subTaskSplits = splits.get(i);
+ if (subTaskSplits == null || subTaskSplits.isEmpty()) {
+ deserializedSplits.add(Collections.emptyList());
+ } else {
+ List deserializedSubTaskSplits = new ArrayList<>(subTaskSplits.size());
+ for (byte[] split : subTaskSplits) {
+ if (split != null) {
+ deserializedSubTaskSplits.add(splitSerializer.deserialize(split));
+ }
+ }
+ deserializedSplits.add(deserializedSubTaskSplits);
+ }
+ }
+ }
+ return new ChangeStreamTableSourceState<>(enumeratorState, deserializedSplits);
+ }
+
+ /**
+ * Restore the source from the checkpoint state.
+ *
+ * @param context
+ * @param state checkpoint state
+ * @return
+ * @param
+ * @param
+ * @param
+ */
+
+ TableSource restoreSource(
+ TableSourceFactoryContext context,
+ ChangeStreamTableSourceState state);
+}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/ChangeStreamTableSourceState.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/ChangeStreamTableSourceState.java
new file mode 100644
index 00000000000..a7146c56949
--- /dev/null
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/ChangeStreamTableSourceState.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.table.factory;
+
+import org.apache.seatunnel.api.source.SourceSplit;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+
+import java.io.Serializable;
+import java.util.List;
+
+/**
+ * The state of the enumerator and splits of the enumerator, which is used to resume the enumerator
+ * and reader.
+ *
+ * @param
+ * @param
+ */
+@Data
+@AllArgsConstructor
+public class ChangeStreamTableSourceState {
+ // The state of the enumerator, which is used to resume the enumerator.
+ private StateT enumeratorState;
+
+ // The splits of the enumerator, which is used to resume the reader.
+ public List> splits;
+}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java
index 668ff2a43c8..c94b88be7cc 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java
@@ -23,6 +23,7 @@
import org.apache.seatunnel.api.configuration.util.OptionRule;
import org.apache.seatunnel.api.env.ParsingMode;
import org.apache.seatunnel.api.sink.SeaTunnelSink;
+import org.apache.seatunnel.api.sink.multitablesink.MultiTableSinkFactory;
import org.apache.seatunnel.api.source.SeaTunnelSource;
import org.apache.seatunnel.api.source.SourceOptions;
import org.apache.seatunnel.api.source.SourceSplit;
@@ -43,7 +44,6 @@
import java.io.Serializable;
import java.net.URL;
-import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@@ -65,12 +65,31 @@ public final class FactoryUtil {
public static
Tuple2, List> createAndPrepareSource(
ReadonlyConfig options, ClassLoader classLoader, String factoryIdentifier) {
+ return restoreAndPrepareSource(options, classLoader, factoryIdentifier, null);
+ }
+
+ public static
+ Tuple2, List> restoreAndPrepareSource(
+ ReadonlyConfig options,
+ ClassLoader classLoader,
+ String factoryIdentifier,
+ ChangeStreamTableSourceCheckpoint checkpoint) {
try {
final TableSourceFactory factory =
discoverFactory(classLoader, TableSourceFactory.class, factoryIdentifier);
- SeaTunnelSource source =
- createAndPrepareSource(factory, options, classLoader);
+ SeaTunnelSource source;
+ if (factory instanceof ChangeStreamTableSourceFactory && checkpoint != null) {
+ ChangeStreamTableSourceFactory changeStreamTableSourceFactory =
+ (ChangeStreamTableSourceFactory) factory;
+ ChangeStreamTableSourceState state =
+ changeStreamTableSourceFactory.deserializeTableSourceState(checkpoint);
+ source =
+ restoreAndPrepareSource(
+ changeStreamTableSourceFactory, options, classLoader, state);
+ } else {
+ source = createAndPrepareSource(factory, options, classLoader);
+ }
List catalogTables;
try {
catalogTables = source.getProducedCatalogTables();
@@ -78,7 +97,7 @@ Tuple2, List> createAndPrepareS
// TODO remove it when all connector use `getProducedCatalogTables`
SeaTunnelDataType seaTunnelDataType = source.getProducedType();
final String tableId =
- options.getOptional(CommonOptions.RESULT_TABLE_NAME).orElse(DEFAULT_ID);
+ options.getOptional(CommonOptions.PLUGIN_OUTPUT).orElse(DEFAULT_ID);
catalogTables =
CatalogTableUtil.convertDataTypeToCatalogTables(seaTunnelDataType, tableId);
}
@@ -112,6 +131,19 @@ SeaTunnelSource createAndPrepareSource(
return tableSource.createSource();
}
+ private static
+ SeaTunnelSource restoreAndPrepareSource(
+ ChangeStreamTableSourceFactory factory,
+ ReadonlyConfig options,
+ ClassLoader classLoader,
+ ChangeStreamTableSourceState state) {
+ TableSourceFactoryContext context = new TableSourceFactoryContext(options, classLoader);
+ ConfigValidator.of(context.getOptions()).validate(factory.optionRule());
+ LOG.info("Restore create source from checkpoint state: {}", state);
+ TableSource tableSource = factory.restoreSource(context, state);
+ return tableSource.createSource();
+ }
+
public static
SeaTunnelSink createAndPrepareSink(
CatalogTable catalogTable,
@@ -151,7 +183,7 @@ SeaTunnelSink createMultiTableSi
ClassLoader classLoader) {
try {
TableSinkFactory factory =
- discoverFactory(classLoader, TableSinkFactory.class, "MultiTableSink");
+ new MultiTableSinkFactory();
MultiTableFactoryContext context =
new MultiTableFactoryContext(options, classLoader, sinks);
ConfigValidator.of(context.getOptions()).validate(factory.optionRule());
@@ -306,16 +338,15 @@ public static OptionRule sinkFullOptionRule(@NonNull TableSinkFactory factory) {
return sinkOptionRule;
}
- public static SeaTunnelTransform> createAndPrepareTransform(
- CatalogTable catalogTable,
+ public static SeaTunnelTransform> createAndPrepareMultiTableTransform(
+ List catalogTables,
ReadonlyConfig options,
ClassLoader classLoader,
String factoryIdentifier) {
final TableTransformFactory factory =
discoverFactory(classLoader, TableTransformFactory.class, factoryIdentifier);
TableTransformFactoryContext context =
- new TableTransformFactoryContext(
- Collections.singletonList(catalogTable), options, classLoader);
+ new TableTransformFactoryContext(catalogTables, options, classLoader);
ConfigValidator.of(context.getOptions()).validate(factory.optionRule());
return factory.createTransform(context).createTransform();
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableFactoryContext.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableFactoryContext.java
index 10436da09b8..5664e48b4e6 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableFactoryContext.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableFactoryContext.java
@@ -18,9 +18,16 @@
package org.apache.seatunnel.api.table.factory;
import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.common.utils.SeaTunnelException;
+
+import org.apache.commons.lang3.StringUtils;
import lombok.Getter;
+import java.util.ArrayList;
+import java.util.List;
+
@Getter
public abstract class TableFactoryContext {
@@ -31,4 +38,25 @@ public TableFactoryContext(ReadonlyConfig options, ClassLoader classLoader) {
this.options = options;
this.classLoader = classLoader;
}
+
+ protected static void checkCatalogTableIllegal(List catalogTables) {
+ for (CatalogTable catalogTable : catalogTables) {
+ List alreadyChecked = new ArrayList<>();
+ for (String fieldName : catalogTable.getTableSchema().getFieldNames()) {
+ if (StringUtils.isBlank(fieldName)) {
+ throw new SeaTunnelException(
+ String.format(
+ "Table %s field name cannot be empty",
+ catalogTable.getTablePath().getFullName()));
+ }
+ if (alreadyChecked.contains(fieldName)) {
+ throw new SeaTunnelException(
+ String.format(
+ "Table %s field %s duplicate",
+ catalogTable.getTablePath().getFullName(), fieldName));
+ }
+ alreadyChecked.add(fieldName);
+ }
+ }
+ }
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactoryContext.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactoryContext.java
index 9565bad6a03..b83c1087e20 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactoryContext.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactoryContext.java
@@ -18,21 +18,27 @@
package org.apache.seatunnel.api.table.factory;
import org.apache.seatunnel.api.configuration.ReadonlyConfig;
-import org.apache.seatunnel.api.sink.TablePlaceholder;
+import org.apache.seatunnel.api.sink.TablePlaceholderProcessor;
import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import com.google.common.annotations.VisibleForTesting;
import lombok.Getter;
import java.util.Collection;
+import java.util.Collections;
@Getter
public class TableSinkFactoryContext extends TableFactoryContext {
private final CatalogTable catalogTable;
- protected TableSinkFactoryContext(
+ @VisibleForTesting
+ public TableSinkFactoryContext(
CatalogTable catalogTable, ReadonlyConfig options, ClassLoader classLoader) {
super(options, classLoader);
+ if (catalogTable != null) {
+ checkCatalogTableIllegal(Collections.singletonList(catalogTable));
+ }
this.catalogTable = catalogTable;
}
@@ -42,7 +48,7 @@ public static TableSinkFactoryContext replacePlaceholderAndCreate(
ClassLoader classLoader,
Collection excludeTablePlaceholderReplaceKeys) {
ReadonlyConfig rewriteConfig =
- TablePlaceholder.replaceTablePlaceholder(
+ TablePlaceholderProcessor.replaceTablePlaceholder(
options, catalogTable, excludeTablePlaceholderReplaceKeys);
return new TableSinkFactoryContext(catalogTable, rewriteConfig, classLoader);
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableTransformFactoryContext.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableTransformFactoryContext.java
index bf8176c7a8d..8e274a8e5e5 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableTransformFactoryContext.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableTransformFactoryContext.java
@@ -32,6 +32,7 @@ public class TableTransformFactoryContext extends TableFactoryContext {
public TableTransformFactoryContext(
List catalogTables, ReadonlyConfig options, ClassLoader classLoader) {
super(options, classLoader);
+ checkCatalogTableIllegal(catalogTables);
this.catalogTables = catalogTables;
}
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/SchemaChangeType.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/SchemaChangeType.java
new file mode 100644
index 00000000000..e2a08c4e3a4
--- /dev/null
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/SchemaChangeType.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.table.schema;
+
+public enum SchemaChangeType {
+ /** Add column to table. */
+ ADD_COLUMN,
+ /** Drop column from table. */
+ DROP_COLUMN,
+ /** Update column in table. */
+ UPDATE_COLUMN,
+ /** Rename column in table. */
+ RENAME_COLUMN;
+}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableAddColumnEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableAddColumnEvent.java
similarity index 97%
rename from seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableAddColumnEvent.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableAddColumnEvent.java
index 7bb2218d885..6b9332b5f7f 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableAddColumnEvent.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableAddColumnEvent.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.api.table.event;
+package org.apache.seatunnel.api.table.schema.event;
import org.apache.seatunnel.api.event.EventType;
import org.apache.seatunnel.api.table.catalog.Column;
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableChangeColumnEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableChangeColumnEvent.java
similarity index 86%
rename from seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableChangeColumnEvent.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableChangeColumnEvent.java
index 672f0998667..e6aa9f5f9e0 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableChangeColumnEvent.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableChangeColumnEvent.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.api.table.event;
+package org.apache.seatunnel.api.table.schema.event;
import org.apache.seatunnel.api.event.EventType;
import org.apache.seatunnel.api.table.catalog.Column;
@@ -26,7 +26,10 @@
@Getter
@ToString(callSuper = true)
-public class AlterTableChangeColumnEvent extends AlterTableAddColumnEvent {
+public class AlterTableChangeColumnEvent extends AlterTableColumnEvent {
+ private final Column column;
+ private final boolean first;
+ private final String afterColumn;
private final String oldColumn;
public AlterTableChangeColumnEvent(
@@ -35,8 +38,11 @@ public AlterTableChangeColumnEvent(
Column column,
boolean first,
String afterColumn) {
- super(tableIdentifier, column, first, afterColumn);
+ super(tableIdentifier);
this.oldColumn = oldColumn;
+ this.column = column;
+ this.first = first;
+ this.afterColumn = afterColumn;
}
public static AlterTableChangeColumnEvent changeFirst(
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableColumnEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableColumnEvent.java
similarity index 95%
rename from seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableColumnEvent.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableColumnEvent.java
index 97076560f02..5f7aa001483 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableColumnEvent.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableColumnEvent.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.api.table.event;
+package org.apache.seatunnel.api.table.schema.event;
import org.apache.seatunnel.api.table.catalog.TableIdentifier;
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableColumnsEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableColumnsEvent.java
similarity index 97%
rename from seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableColumnsEvent.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableColumnsEvent.java
index ce487681767..4ebae018748 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableColumnsEvent.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableColumnsEvent.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.api.table.event;
+package org.apache.seatunnel.api.table.schema.event;
import org.apache.seatunnel.api.event.EventType;
import org.apache.seatunnel.api.table.catalog.TableIdentifier;
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableDropColumnEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableDropColumnEvent.java
similarity index 96%
rename from seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableDropColumnEvent.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableDropColumnEvent.java
index ea4b204142a..f67c310527d 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableDropColumnEvent.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableDropColumnEvent.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.api.table.event;
+package org.apache.seatunnel.api.table.schema.event;
import org.apache.seatunnel.api.event.EventType;
import org.apache.seatunnel.api.table.catalog.TableIdentifier;
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableEvent.java
similarity index 95%
rename from seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableEvent.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableEvent.java
index 475dd1ce77a..b624c6f03a7 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableEvent.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableEvent.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.api.table.event;
+package org.apache.seatunnel.api.table.schema.event;
import org.apache.seatunnel.api.table.catalog.TableIdentifier;
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableModifyColumnEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableModifyColumnEvent.java
similarity index 84%
rename from seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableModifyColumnEvent.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableModifyColumnEvent.java
index 342d24ce73f..0cc93804dae 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableModifyColumnEvent.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableModifyColumnEvent.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.api.table.event;
+package org.apache.seatunnel.api.table.schema.event;
import org.apache.seatunnel.api.event.EventType;
import org.apache.seatunnel.api.table.catalog.Column;
@@ -26,10 +26,17 @@
@Getter
@ToString(callSuper = true)
-public class AlterTableModifyColumnEvent extends AlterTableAddColumnEvent {
+public class AlterTableModifyColumnEvent extends AlterTableColumnEvent {
+ private final Column column;
+ private final boolean first;
+ private final String afterColumn;
+
public AlterTableModifyColumnEvent(
TableIdentifier tableIdentifier, Column column, boolean first, String afterColumn) {
- super(tableIdentifier, column, first, afterColumn);
+ super(tableIdentifier);
+ this.column = column;
+ this.first = first;
+ this.afterColumn = afterColumn;
}
public static AlterTableModifyColumnEvent modifyFirst(
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableNameEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableNameEvent.java
similarity index 93%
rename from seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableNameEvent.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableNameEvent.java
index 9454f6a5469..4d642630353 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/AlterTableNameEvent.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/AlterTableNameEvent.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.api.table.event;
+package org.apache.seatunnel.api.table.schema.event;
import org.apache.seatunnel.api.event.EventType;
import org.apache.seatunnel.api.table.catalog.TableIdentifier;
@@ -26,7 +26,7 @@
@Getter
@ToString(callSuper = true)
-public class AlterTableNameEvent extends AlterTableColumnEvent {
+public class AlterTableNameEvent extends AlterTableEvent {
private final TableIdentifier newTableIdentifier;
public AlterTableNameEvent(
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/SchemaChangeEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/SchemaChangeEvent.java
similarity index 78%
rename from seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/SchemaChangeEvent.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/SchemaChangeEvent.java
index b3d73db9f15..2fbc96e4034 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/SchemaChangeEvent.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/SchemaChangeEvent.java
@@ -15,9 +15,10 @@
* limitations under the License.
*/
-package org.apache.seatunnel.api.table.event;
+package org.apache.seatunnel.api.table.schema.event;
import org.apache.seatunnel.api.event.Event;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
import org.apache.seatunnel.api.table.catalog.TableIdentifier;
import org.apache.seatunnel.api.table.catalog.TablePath;
@@ -39,4 +40,18 @@ default TablePath tablePath() {
* @return
*/
TableIdentifier tableIdentifier();
+
+ /**
+ * Get the table struct after the change
+ *
+ * @return
+ */
+ CatalogTable getChangeAfter();
+
+ /**
+ * Set the table struct after the change
+ *
+ * @param table
+ */
+ void setChangeAfter(CatalogTable table);
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/TableEvent.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/TableEvent.java
similarity index 90%
rename from seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/TableEvent.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/TableEvent.java
index af08377a9cb..d10a55c7ba0 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/TableEvent.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/event/TableEvent.java
@@ -15,8 +15,9 @@
* limitations under the License.
*/
-package org.apache.seatunnel.api.table.event;
+package org.apache.seatunnel.api.table.schema.event;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
import org.apache.seatunnel.api.table.catalog.TableIdentifier;
import org.apache.seatunnel.api.table.catalog.TablePath;
@@ -34,6 +35,7 @@ public abstract class TableEvent implements SchemaChangeEvent {
@Getter @Setter private String jobId;
@Getter @Setter private String statement;
@Getter @Setter protected String sourceDialectName;
+ @Getter @Setter private CatalogTable changeAfter;
@Override
public TableIdentifier tableIdentifier() {
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/AlterTableEventHandler.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/AlterTableEventHandler.java
similarity index 86%
rename from seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/AlterTableEventHandler.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/AlterTableEventHandler.java
index b0972ec68a0..a55d33f16aa 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/AlterTableEventHandler.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/AlterTableEventHandler.java
@@ -15,18 +15,18 @@
* limitations under the License.
*/
-package org.apache.seatunnel.api.table.event.handler;
+package org.apache.seatunnel.api.table.schema.handler;
import org.apache.seatunnel.api.table.catalog.Column;
-import org.apache.seatunnel.api.table.event.AlterTableAddColumnEvent;
-import org.apache.seatunnel.api.table.event.AlterTableChangeColumnEvent;
-import org.apache.seatunnel.api.table.event.AlterTableColumnEvent;
-import org.apache.seatunnel.api.table.event.AlterTableColumnsEvent;
-import org.apache.seatunnel.api.table.event.AlterTableDropColumnEvent;
-import org.apache.seatunnel.api.table.event.AlterTableEvent;
-import org.apache.seatunnel.api.table.event.AlterTableModifyColumnEvent;
-import org.apache.seatunnel.api.table.event.AlterTableNameEvent;
-import org.apache.seatunnel.api.table.event.SchemaChangeEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableAddColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableChangeColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableColumnsEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableDropColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableModifyColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableNameEvent;
+import org.apache.seatunnel.api.table.schema.event.SchemaChangeEvent;
import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
@@ -35,6 +35,8 @@
import java.util.LinkedList;
import java.util.List;
+/** @deprecated instead by {@link AlterTableSchemaEventHandler} */
+@Deprecated
public class AlterTableEventHandler implements DataTypeChangeEventHandler {
private SeaTunnelRowType dataType;
@@ -154,10 +156,19 @@ private SeaTunnelRowType applyChangeColumn(
String oldColumn = changeColumnEvent.getOldColumn();
int oldColumnIndex = dataType.indexOf(oldColumn);
+ // The operation of rename column which only has the name of old column and the name of new
+ // column,
+ // so we need to fill the data type which is the same as the old column.
+ SeaTunnelDataType> fieldType = dataType.getFieldType(oldColumnIndex);
+ Column column = changeColumnEvent.getColumn();
+ if (column.getDataType() == null) {
+ column = column.copy(fieldType);
+ }
+
return applyModifyColumn(
dataType,
oldColumnIndex,
- changeColumnEvent.getColumn(),
+ column,
changeColumnEvent.isFirst(),
changeColumnEvent.getAfterColumn());
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/AlterTableSchemaEventHandler.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/AlterTableSchemaEventHandler.java
new file mode 100644
index 00000000000..43f92a0a3eb
--- /dev/null
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/AlterTableSchemaEventHandler.java
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.table.schema.handler;
+
+import org.apache.seatunnel.api.table.catalog.Column;
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+import org.apache.seatunnel.api.table.schema.event.AlterTableAddColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableChangeColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableColumnsEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableDropColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableModifyColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableNameEvent;
+import org.apache.seatunnel.api.table.schema.event.SchemaChangeEvent;
+import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
+
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class AlterTableSchemaEventHandler implements TableSchemaChangeEventHandler {
+ private TableSchema schema;
+
+ @Override
+ public TableSchema get() {
+ return schema;
+ }
+
+ @Override
+ public TableSchemaChangeEventHandler reset(TableSchema schema) {
+ this.schema = schema;
+ return this;
+ }
+
+ @Override
+ public TableSchema apply(SchemaChangeEvent event) {
+ AlterTableEvent alterTableEvent = (AlterTableEvent) event;
+ return apply(schema, alterTableEvent);
+ }
+
+ private TableSchema apply(TableSchema schema, AlterTableEvent alterTableEvent) {
+ if (alterTableEvent instanceof AlterTableNameEvent) {
+ return schema;
+ }
+ if (alterTableEvent instanceof AlterTableDropColumnEvent) {
+ return applyDropColumn(schema, (AlterTableDropColumnEvent) alterTableEvent);
+ }
+ if (alterTableEvent instanceof AlterTableModifyColumnEvent) {
+ return applyModifyColumn(schema, (AlterTableModifyColumnEvent) alterTableEvent);
+ }
+ if (alterTableEvent instanceof AlterTableChangeColumnEvent) {
+ return applyChangeColumn(schema, (AlterTableChangeColumnEvent) alterTableEvent);
+ }
+ if (alterTableEvent instanceof AlterTableAddColumnEvent) {
+ return applyAddColumn(schema, (AlterTableAddColumnEvent) alterTableEvent);
+ }
+ if (alterTableEvent instanceof AlterTableColumnsEvent) {
+ TableSchema newSchema = schema;
+ for (AlterTableColumnEvent columnEvent :
+ ((AlterTableColumnsEvent) alterTableEvent).getEvents()) {
+ newSchema = apply(newSchema, columnEvent);
+ }
+ return newSchema;
+ }
+
+ throw new UnsupportedOperationException(
+ "Unsupported alter table event: " + alterTableEvent);
+ }
+
+ private TableSchema applyAddColumn(
+ TableSchema schema, AlterTableAddColumnEvent addColumnEvent) {
+ LinkedList originFields = new LinkedList<>(Arrays.asList(schema.getFieldNames()));
+ Column column = addColumnEvent.getColumn();
+ if (originFields.contains(column.getName())) {
+ return applyModifyColumn(
+ schema,
+ new AlterTableModifyColumnEvent(
+ addColumnEvent.tableIdentifier(),
+ addColumnEvent.getColumn(),
+ addColumnEvent.isFirst(),
+ addColumnEvent.getAfterColumn()));
+ }
+
+ LinkedList newColumns = new LinkedList<>(schema.getColumns());
+ if (addColumnEvent.isFirst()) {
+ newColumns.addFirst(column);
+ } else if (addColumnEvent.getAfterColumn() != null) {
+ int index = originFields.indexOf(addColumnEvent.getAfterColumn());
+ newColumns.add(index + 1, column);
+ } else {
+ newColumns.addLast(column);
+ }
+
+ return TableSchema.builder()
+ .columns(newColumns)
+ .primaryKey(schema.getPrimaryKey())
+ .constraintKey(schema.getConstraintKeys())
+ .build();
+ }
+
+ private TableSchema applyDropColumn(
+ TableSchema schema, AlterTableDropColumnEvent dropColumnEvent) {
+ List newColumns =
+ schema.getColumns().stream()
+ .filter(c -> !c.getName().equals(dropColumnEvent.getColumn()))
+ .collect(Collectors.toList());
+
+ return TableSchema.builder()
+ .columns(newColumns)
+ .primaryKey(schema.getPrimaryKey())
+ .constraintKey(schema.getConstraintKeys())
+ .build();
+ }
+
+ private TableSchema applyModifyColumn(
+ TableSchema schema, AlterTableModifyColumnEvent modifyColumnEvent) {
+ List fieldNames = Arrays.asList(schema.getFieldNames());
+ if (!fieldNames.contains(modifyColumnEvent.getColumn().getName())) {
+ return schema;
+ }
+
+ String modifyColumnName = modifyColumnEvent.getColumn().getName();
+ int modifyColumnIndex = fieldNames.indexOf(modifyColumnName);
+ return applyModifyColumn(
+ schema,
+ modifyColumnIndex,
+ modifyColumnEvent.getColumn(),
+ modifyColumnEvent.isFirst(),
+ modifyColumnEvent.getAfterColumn());
+ }
+
+ private TableSchema applyChangeColumn(
+ TableSchema schema, AlterTableChangeColumnEvent changeColumnEvent) {
+ String oldColumn = changeColumnEvent.getOldColumn();
+ int oldColumnIndex = schema.indexOf(oldColumn);
+
+ // The operation of rename column which only has the name of old column and the name of new
+ // column,
+ // so we need to fill the data type which is the same as the old column.
+ Column column = changeColumnEvent.getColumn();
+ if (column.getDataType() == null) {
+ SeaTunnelDataType> fieldType = schema.getColumn(oldColumn).getDataType();
+ column = column.copy(fieldType);
+ }
+
+ return applyModifyColumn(
+ schema,
+ oldColumnIndex,
+ column,
+ changeColumnEvent.isFirst(),
+ changeColumnEvent.getAfterColumn());
+ }
+
+ private TableSchema applyModifyColumn(
+ TableSchema schema, int columnIndex, Column column, boolean first, String afterColumn) {
+ LinkedList originColumns = new LinkedList<>(schema.getColumns());
+
+ if (first) {
+ originColumns.remove(columnIndex);
+ originColumns.addFirst(column);
+ } else if (afterColumn != null) {
+ originColumns.remove(columnIndex);
+
+ int index =
+ originColumns.stream()
+ .filter(c -> c.getName().equals(afterColumn))
+ .findFirst()
+ .map(originColumns::indexOf)
+ .get();
+ originColumns.add(index + 1, column);
+ } else {
+ originColumns.set(columnIndex, column);
+ }
+ return TableSchema.builder()
+ .columns(originColumns)
+ .primaryKey(schema.getPrimaryKey())
+ .constraintKey(schema.getConstraintKeys())
+ .build();
+ }
+}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/DataTypeChangeEventDispatcher.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/DataTypeChangeEventDispatcher.java
similarity index 78%
rename from seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/DataTypeChangeEventDispatcher.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/DataTypeChangeEventDispatcher.java
index ec4f69334f7..0fd1e7f6ab7 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/DataTypeChangeEventDispatcher.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/DataTypeChangeEventDispatcher.java
@@ -15,16 +15,16 @@
* limitations under the License.
*/
-package org.apache.seatunnel.api.table.event.handler;
+package org.apache.seatunnel.api.table.schema.handler;
-import org.apache.seatunnel.api.table.event.AlterTableAddColumnEvent;
-import org.apache.seatunnel.api.table.event.AlterTableChangeColumnEvent;
-import org.apache.seatunnel.api.table.event.AlterTableColumnsEvent;
-import org.apache.seatunnel.api.table.event.AlterTableDropColumnEvent;
-import org.apache.seatunnel.api.table.event.AlterTableEvent;
-import org.apache.seatunnel.api.table.event.AlterTableModifyColumnEvent;
-import org.apache.seatunnel.api.table.event.AlterTableNameEvent;
-import org.apache.seatunnel.api.table.event.SchemaChangeEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableAddColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableChangeColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableColumnsEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableDropColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableModifyColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableNameEvent;
+import org.apache.seatunnel.api.table.schema.event.SchemaChangeEvent;
import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
import lombok.extern.slf4j.Slf4j;
@@ -32,6 +32,8 @@
import java.util.HashMap;
import java.util.Map;
+/** @deprecated instead by {@link TableSchemaChangeEventDispatcher} */
+@Deprecated
@Slf4j
public class DataTypeChangeEventDispatcher implements DataTypeChangeEventHandler {
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/DataTypeChangeEventHandler.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/DataTypeChangeEventHandler.java
similarity index 88%
rename from seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/DataTypeChangeEventHandler.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/DataTypeChangeEventHandler.java
index 01d8924d531..3202d0eb53e 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/DataTypeChangeEventHandler.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/DataTypeChangeEventHandler.java
@@ -15,11 +15,13 @@
* limitations under the License.
*/
-package org.apache.seatunnel.api.table.event.handler;
+package org.apache.seatunnel.api.table.schema.handler;
-import org.apache.seatunnel.api.table.event.SchemaChangeEvent;
+import org.apache.seatunnel.api.table.schema.event.SchemaChangeEvent;
import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
+/** @deprecated instead by {@link TableSchemaChangeEventHandler} */
+@Deprecated
public interface DataTypeChangeEventHandler extends SchemaChangeEventHandler {
SeaTunnelRowType get();
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/SchemaChangeEventHandler.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/SchemaChangeEventHandler.java
similarity index 88%
rename from seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/SchemaChangeEventHandler.java
rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/SchemaChangeEventHandler.java
index 167dc6cc315..fc28134c33a 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/event/handler/SchemaChangeEventHandler.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/SchemaChangeEventHandler.java
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package org.apache.seatunnel.api.table.event.handler;
+package org.apache.seatunnel.api.table.schema.handler;
-import org.apache.seatunnel.api.table.event.SchemaChangeEvent;
+import org.apache.seatunnel.api.table.schema.event.SchemaChangeEvent;
import java.io.Serializable;
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/TableSchemaChangeEventDispatcher.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/TableSchemaChangeEventDispatcher.java
new file mode 100644
index 00000000000..37cef6c5c3b
--- /dev/null
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/TableSchemaChangeEventDispatcher.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.table.schema.handler;
+
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+import org.apache.seatunnel.api.table.schema.event.AlterTableAddColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableChangeColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableColumnsEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableDropColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableModifyColumnEvent;
+import org.apache.seatunnel.api.table.schema.event.AlterTableNameEvent;
+import org.apache.seatunnel.api.table.schema.event.SchemaChangeEvent;
+
+import lombok.extern.slf4j.Slf4j;
+
+import java.util.HashMap;
+import java.util.Map;
+
+@Slf4j
+public class TableSchemaChangeEventDispatcher implements TableSchemaChangeEventHandler {
+
+ private final Map handlers;
+ private TableSchema schema;
+
+ public TableSchemaChangeEventDispatcher() {
+ this.handlers = createHandlers();
+ }
+
+ @Override
+ public TableSchema get() {
+ return schema;
+ }
+
+ @Override
+ public TableSchemaChangeEventHandler reset(TableSchema schema) {
+ this.schema = schema;
+ return this;
+ }
+
+ @Override
+ public TableSchema apply(SchemaChangeEvent event) {
+ TableSchemaChangeEventHandler handler = handlers.get(event.getClass());
+ if (handler == null) {
+ log.warn("Not found handler for event: {}", event.getClass());
+ return schema;
+ }
+ return handler.reset(schema).apply(event);
+ }
+
+ private static Map createHandlers() {
+ Map handlers = new HashMap<>();
+
+ AlterTableSchemaEventHandler alterTableEventHandler = new AlterTableSchemaEventHandler();
+ handlers.put(AlterTableEvent.class, alterTableEventHandler);
+ handlers.put(AlterTableNameEvent.class, alterTableEventHandler);
+ handlers.put(AlterTableColumnsEvent.class, alterTableEventHandler);
+ handlers.put(AlterTableAddColumnEvent.class, alterTableEventHandler);
+ handlers.put(AlterTableModifyColumnEvent.class, alterTableEventHandler);
+ handlers.put(AlterTableDropColumnEvent.class, alterTableEventHandler);
+ handlers.put(AlterTableChangeColumnEvent.class, alterTableEventHandler);
+ return handlers;
+ }
+}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/TableSchemaChangeEventHandler.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/TableSchemaChangeEventHandler.java
new file mode 100644
index 00000000000..b411217a576
--- /dev/null
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/schema/handler/TableSchemaChangeEventHandler.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.table.schema.handler;
+
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+import org.apache.seatunnel.api.table.schema.event.SchemaChangeEvent;
+
+public interface TableSchemaChangeEventHandler extends SchemaChangeEventHandler {
+
+ TableSchema get();
+
+ TableSchemaChangeEventHandler reset(TableSchema schema);
+
+ default TableSchema handle(SchemaChangeEvent event) {
+ if (get() == null) {
+ throw new IllegalStateException("Handler not reset");
+ }
+
+ try {
+ return apply(event);
+ } finally {
+ reset(null);
+ if (get() != null) {
+ throw new IllegalStateException("Handler not reset");
+ }
+ }
+ }
+
+ TableSchema apply(SchemaChangeEvent event);
+}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/CommonOptions.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/CommonOptions.java
new file mode 100644
index 00000000000..8b5b36682a8
--- /dev/null
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/CommonOptions.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.table.type;
+
+import org.apache.seatunnel.api.table.catalog.Column;
+
+import lombok.Getter;
+
+/**
+ * Common option keys of SeaTunnel {@link Column#getOptions()} / {@link SeaTunnelRow#getOptions()}.
+ * Used to store some extra information of the column value.
+ */
+@Getter
+public enum CommonOptions {
+ /**
+ * The key of {@link Column#getOptions()} to specify the column value is a json format string.
+ */
+ JSON("Json", false),
+ /** The key of {@link Column#getOptions()} to specify the column value is a metadata field. */
+ METADATA("Metadata", false),
+ /**
+ * The key of {@link SeaTunnelRow#getOptions()} to store the partition value of the row value.
+ */
+ PARTITION("Partition", true),
+ /**
+ * The key of {@link SeaTunnelRow#getOptions()} to store the DATABASE value of the row value.
+ */
+ DATABASE("Database", true),
+ /** The key of {@link SeaTunnelRow#getOptions()} to store the TABLE value of the row value. */
+ TABLE("Table", true),
+ /**
+ * The key of {@link SeaTunnelRow#getOptions()} to store the ROW_KIND value of the row value.
+ */
+ ROW_KIND("RowKind", true),
+ /**
+ * The key of {@link SeaTunnelRow#getOptions()} to store the EVENT_TIME value of the row value.
+ */
+ EVENT_TIME("EventTime", true),
+ /** The key of {@link SeaTunnelRow#getOptions()} to store the DELAY value of the row value. */
+ DELAY("Delay", true);
+
+ private final String name;
+ private final boolean supportMetadataTrans;
+
+ CommonOptions(String name, boolean supportMetadataTrans) {
+ this.name = name;
+ this.supportMetadataTrans = supportMetadataTrans;
+ }
+
+ public static CommonOptions fromName(String name) {
+ for (CommonOptions option : CommonOptions.values()) {
+ if (option.getName().equals(name)) {
+ return option;
+ }
+ }
+ throw new IllegalArgumentException("Unknown option name: " + name);
+ }
+}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/MetadataUtil.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/MetadataUtil.java
new file mode 100644
index 00000000000..42ab2035768
--- /dev/null
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/MetadataUtil.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.table.type;
+
+import org.apache.seatunnel.api.table.catalog.TablePath;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+import java.util.stream.Stream;
+
+import static org.apache.seatunnel.api.table.type.CommonOptions.DELAY;
+import static org.apache.seatunnel.api.table.type.CommonOptions.EVENT_TIME;
+import static org.apache.seatunnel.api.table.type.CommonOptions.PARTITION;
+
+public class MetadataUtil {
+
+ public static final List METADATA_FIELDS;
+
+ static {
+ METADATA_FIELDS = new ArrayList<>();
+ Stream.of(CommonOptions.values())
+ .filter(CommonOptions::isSupportMetadataTrans)
+ .map(CommonOptions::getName)
+ .forEach(METADATA_FIELDS::add);
+ }
+
+ public static void setDelay(SeaTunnelRow row, Long delay) {
+ row.getOptions().put(DELAY.getName(), delay);
+ }
+
+ public static void setPartition(SeaTunnelRow row, String[] partition) {
+ row.getOptions().put(PARTITION.getName(), partition);
+ }
+
+ public static void setEventTime(SeaTunnelRow row, Long delay) {
+ row.getOptions().put(EVENT_TIME.getName(), delay);
+ }
+
+ public static Long getDelay(SeaTunnelRowAccessor row) {
+ return (Long) row.getOptions().get(DELAY.getName());
+ }
+
+ public static String getDatabase(SeaTunnelRowAccessor row) {
+ if (row.getTableId() == null) {
+ return null;
+ }
+ return TablePath.of(row.getTableId()).getDatabaseName();
+ }
+
+ public static String getTable(SeaTunnelRowAccessor row) {
+ if (row.getTableId() == null) {
+ return null;
+ }
+ return TablePath.of(row.getTableId()).getTableName();
+ }
+
+ public static String getRowKind(SeaTunnelRowAccessor row) {
+ return row.getRowKind().shortString();
+ }
+
+ public static String getPartitionStr(SeaTunnelRowAccessor row) {
+ Object partition = row.getOptions().get(PARTITION.getName());
+ return Objects.nonNull(partition) ? String.join(",", (String[]) partition) : null;
+ }
+
+ public static String[] getPartition(SeaTunnelRowAccessor row) {
+ return (String[]) row.getOptions().get(PARTITION.getName());
+ }
+
+ public static Long getEventTime(SeaTunnelRowAccessor row) {
+ return (Long) row.getOptions().get(EVENT_TIME.getName());
+ }
+
+ public static boolean isMetadataField(String fieldName) {
+ return METADATA_FIELDS.contains(fieldName);
+ }
+}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java
index 95a36b796c4..84e172f2dfd 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java
@@ -18,7 +18,9 @@
package org.apache.seatunnel.api.table.type;
import java.io.Serializable;
+import java.nio.ByteBuffer;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
@@ -28,10 +30,12 @@ public final class SeaTunnelRow implements Serializable {
/** Table identifier. */
private String tableId = "";
/** The kind of change that a row describes in a changelog. */
- private RowKind kind = RowKind.INSERT;
+ private RowKind rowKind = RowKind.INSERT;
/** The array to store the actual internal format values. */
private final Object[] fields;
+ private Map options;
+
private volatile int size;
public SeaTunnelRow(int arity) {
@@ -50,8 +54,12 @@ public void setTableId(String tableId) {
this.tableId = tableId;
}
- public void setRowKind(RowKind kind) {
- this.kind = kind;
+ public void setRowKind(RowKind rowKind) {
+ this.rowKind = rowKind;
+ }
+
+ public void setOptions(Map options) {
+ this.options = options;
}
public int getArity() {
@@ -63,7 +71,14 @@ public String getTableId() {
}
public RowKind getRowKind() {
- return this.kind;
+ return this.rowKind;
+ }
+
+ public Map getOptions() {
+ if (options == null) {
+ options = new HashMap<>();
+ }
+ return options;
}
public Object[] getFields() {
@@ -142,7 +157,12 @@ private int getBytesForValue(Object v, SeaTunnelDataType> dataType) {
case TIMESTAMP:
return 48;
case FLOAT_VECTOR:
- return getArrayNotNullSize((Object[]) v) * 4;
+ case FLOAT16_VECTOR:
+ case BFLOAT16_VECTOR:
+ case BINARY_VECTOR:
+ return ((ByteBuffer) v).capacity();
+ case SPARSE_FLOAT_VECTOR:
+ return ((Map, ?>) v).entrySet().size() * 8;
case ARRAY:
SeaTunnelDataType elementType = ((ArrayType) dataType).getElementType();
if (elementType instanceof DecimalType) {
@@ -289,6 +309,9 @@ private int getBytesForValue(Object v) {
size += getBytesForValue(entry.getKey()) + getBytesForValue(entry.getValue());
}
return size;
+ case "HeapByteBuffer":
+ case "ByteBuffer":
+ return ((ByteBuffer) v).capacity();
case "SeaTunnelRow":
int rowSize = 0;
SeaTunnelRow row = (SeaTunnelRow) v;
@@ -320,13 +343,13 @@ public boolean equals(Object o) {
}
SeaTunnelRow that = (SeaTunnelRow) o;
return Objects.equals(tableId, that.tableId)
- && kind == that.kind
+ && rowKind == that.rowKind
&& Arrays.deepEquals(fields, that.fields);
}
@Override
public int hashCode() {
- int result = Objects.hash(tableId, kind);
+ int result = Objects.hash(tableId, rowKind);
result = 31 * result + Arrays.deepHashCode(fields);
return result;
}
@@ -337,7 +360,7 @@ public String toString() {
+ "tableId="
+ tableId
+ ", kind="
- + kind.shortString()
+ + rowKind.shortString()
+ ", fields="
+ Arrays.toString(fields)
+ '}';
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRowAccessor.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRowAccessor.java
new file mode 100644
index 00000000000..6bbca49cd52
--- /dev/null
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRowAccessor.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.table.type;
+
+import lombok.AllArgsConstructor;
+
+import java.util.Map;
+
+@AllArgsConstructor
+public class SeaTunnelRowAccessor {
+ private final SeaTunnelRow row;
+
+ public int getArity() {
+ return row.getArity();
+ }
+
+ public String getTableId() {
+ return row.getTableId();
+ }
+
+ public RowKind getRowKind() {
+ return row.getRowKind();
+ }
+
+ public Object getField(int pos) {
+ return row.getField(pos);
+ }
+
+ public Object[] getFields() {
+ return row.getFields();
+ }
+
+ public Map getOptions() {
+ return row.getOptions();
+ }
+}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/VectorType.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/VectorType.java
index 39d2849f1a5..75c5dff8a10 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/VectorType.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/VectorType.java
@@ -17,21 +17,29 @@
package org.apache.seatunnel.api.table.type;
+import org.apache.seatunnel.api.annotation.Experimental;
+
import java.nio.ByteBuffer;
import java.util.Map;
import java.util.Objects;
+/**
+ * VectorType represents a vector type in SeaTunnel.
+ *
+ * Experimental feature, use with caution
+ */
+@Experimental
public class VectorType implements SeaTunnelDataType {
private static final long serialVersionUID = 2L;
- public static final VectorType VECTOR_FLOAT_TYPE =
- new VectorType<>(Float.class, SqlType.FLOAT_VECTOR);
+ public static final VectorType VECTOR_FLOAT_TYPE =
+ new VectorType<>(ByteBuffer.class, SqlType.FLOAT_VECTOR);
public static final VectorType VECTOR_SPARSE_FLOAT_TYPE =
new VectorType<>(Map.class, SqlType.SPARSE_FLOAT_VECTOR);
- public static final VectorType VECTOR_BINARY_TYPE =
- new VectorType<>(Byte.class, SqlType.BINARY_VECTOR);
+ public static final VectorType VECTOR_BINARY_TYPE =
+ new VectorType<>(ByteBuffer.class, SqlType.BINARY_VECTOR);
public static final VectorType VECTOR_FLOAT16_TYPE =
new VectorType<>(ByteBuffer.class, SqlType.FLOAT16_VECTOR);
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/tracing/MDCCallable.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/tracing/MDCCallable.java
new file mode 100644
index 00000000000..f3cae160dab
--- /dev/null
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/tracing/MDCCallable.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.tracing;
+
+import java.util.concurrent.Callable;
+
+/**
+ * Callable that sets MDC context before calling the delegate and clears it afterwards.
+ *
+ * @param