diff --git a/.gitignore b/.gitignore index 83b6aea2..22c7425d 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ ckmanpasswd migrate znodefix znode_count +cmd/ckmanctl/ckmanctl pkged.go static/dist/ coverage.xml diff --git a/Dockerfile b/Dockerfile index b96e88d1..fb8ec9b7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,10 +3,7 @@ FROM debian:stable-slim RUN mkdir -p /etc/ckman && cd /etc/ckman && \ mkdir bin run logs conf package ADD ./ckman /etc/ckman/bin/ckman -ADD ./migrate /etc/ckman/bin/migrate -ADD ./ckmanpasswd /etc/ckman/bin/ckmanpasswd -ADD ./znodefix /etc/ckman/bin/znodefix -ADD ./znode_count /etc/ckman/bin/znode_count +ADD ./cmd/ckmanctl/ckmanctl /etc/ckman/bin/ckmanctl ADD ./README.md /etc/ckman/package/README.md ADD ./resources/ckman.hjson /etc/ckman/conf ADD ./resources/migrate.hjson /etc/ckman/conf diff --git a/Dockerfile.test b/Dockerfile.test index 12db2d28..73cac1ab 100644 --- a/Dockerfile.test +++ b/Dockerfile.test @@ -2,8 +2,8 @@ # You can run command like: "docker build -f Dockerfile.test -t ckman-clickhouse:centos-7 ." # the offical image is eoitek/ckman-clickhouse:centos-7, You can pull it from dockerhub. -#FROM centos:7 -FROM ccr.ccs.tencentyun.com/library/centos:7 +FROM centos:7 +#FROM ccr.ccs.tencentyun.com/library/centos:7 WORKDIR /var/ RUN yum -y update && yum install -y openssh* \ diff --git a/Makefile b/Makefile index d8d3d5b9..bc0f601f 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,7 @@ GOARCH?=$(shell go env GOARCH) TARNAME=${PKGDIR}-${VERSION}-${DATE}.${OS}.$(GOARCH).tar.gz TAG?=$(shell date +%y%m%d) LDFLAGS=-ldflags "-X main.BuildTimeStamp=${TIME} -X main.GitCommitHash=${REVISION} -X main.Version=${VERSION}" +GCFLAGS=-gcflags "all=-N -l" PUB_KEY=$(shell cat resources/eoi_public_key.pub 2>/dev/null) export GOPROXY=https://goproxy.cn,direct @@ -28,10 +29,13 @@ frontend: backend: @rm -rf ${PKGFULLDIR} go build ${LDFLAGS} - go build ${LDFLAGS} -o ckmanpasswd cmd/password/password.go - go build ${LDFLAGS} -o migrate cmd/migrate/migrate.go - go build ${LDFLAGS} -o znodefix cmd/znodefix/znodefix.go - go build ${LDFLAGS} -o znode_count cmd/znodecnt/znodecount.go + go build ${LDFLAGS} -o cmd/ckmanctl/ckmanctl cmd/ckmanctl/ckmanctl.go + +.PHONY: debug +debug: + @rm -rf ${PKGFULLDIR} + go build ${GCFLAGS} ${LDFLAGS} + go build ${LDFLAGS} -o cmd/ckmanctl/ckmanctl cmd/ckmanctl/ckmanctl.go .PHONY: pre pre: @@ -66,10 +70,7 @@ package:build @rm -rf ${PKGFULLDIR_TMP} @mkdir -p ${PKGFULLDIR_TMP}/bin ${PKGFULLDIR_TMP}/conf ${PKGFULLDIR_TMP}/run ${PKGFULLDIR_TMP}/logs ${PKGFULLDIR_TMP}/package ${PKGFULLDIR_TMP}/dbscript @mv ${SHDIR}/ckman ${PKGFULLDIR_TMP}/bin - @mv ${SHDIR}/ckmanpasswd ${PKGFULLDIR_TMP}/bin - @mv ${SHDIR}/migrate ${PKGFULLDIR_TMP}/bin - @mv ${SHDIR}/znodefix ${PKGFULLDIR_TMP}/bin - @mv ${SHDIR}/znode_count ${PKGFULLDIR_TMP}/bin + @mv ${SHDIR}/cmd/ckmanctl/ckmanctl ${PKGFULLDIR_TMP}/binn @cp ${SHDIR}/resources/start ${PKGFULLDIR_TMP}/bin @cp ${SHDIR}/resources/stop ${PKGFULLDIR_TMP}/bin @cp ${SHDIR}/resources/yaml2json.${GOARCH} ${PKGFULLDIR_TMP}/bin/yaml2json diff --git a/ckconfig/custom.go b/ckconfig/custom.go index 81d64510..688e415f 100644 --- a/ckconfig/custom.go +++ b/ckconfig/custom.go @@ -19,6 +19,7 @@ func root(conf *model.CKManClickHouseConfig, ipv6Enable bool) map[string]interfa output["default_replica_path"] = "/clickhouse/tables/{cluster}/{database}/{table}/{shard}" output["default_replica_name"] = "{replica}" output["tcp_port"] = conf.Port + output["http_port"] = conf.HttpPort if ipv6Enable { output["listen_host"] = "::" } else { @@ -173,6 +174,25 @@ func expert(exp map[string]string) map[string]interface{} { return common.ConvertMapping(output) } +func query_cache() map[string]interface{} { + output := make(map[string]interface{}) + output["query_cache"] = map[string]interface{}{ + "max_size_in_bytes": 1073741824, + "max_entries": 1024, + "max_entry_size_in_bytes": 1048576, + "max_entry_size_in_rows": 30000000, + } + return output +} +func merge_tree_metadata_cache() map[string]interface{} { + output := make(map[string]interface{}) + output["merge_tree_metadata_cache"] = map[string]interface{}{ + "lru_cache_size": 1073741824, + "continue_if_corrupted": true, + } + return output +} + func GenerateCustomXML(filename string, conf *model.CKManClickHouseConfig, ipv6Enable bool) (string, error) { rootTag := "yandex" if common.CompareClickHouseVersion(conf.Version, "22.x") >= 0 { @@ -185,6 +205,12 @@ func GenerateCustomXML(filename string, conf *model.CKManClickHouseConfig, ipv6E mergo.Merge(&custom, system_log()) mergo.Merge(&custom, distributed_ddl(conf.Cluster)) mergo.Merge(&custom, prometheus()) + if common.CompareClickHouseVersion(conf.Version, "22.4.x") >= 0 { + mergo.Merge(&custom, merge_tree_metadata_cache()) + } + if common.CompareClickHouseVersion(conf.Version, "23.4.x") >= 0 { + mergo.Merge(&custom, query_cache()) + } storage_configuration, backups := storage(conf.Storage) mergo.Merge(&custom, storage_configuration) mergo.Merge(&custom, backups) diff --git a/ckconfig/custom_fake.xml b/ckconfig/custom_fake.xml index 5b567b04..b7f5b6c9 100644 --- a/ckconfig/custom_fake.xml +++ b/ckconfig/custom_fake.xml @@ -50,6 +50,10 @@ 0 0 + + true + 1073741824 + 30000 toYYYYMMDD(event_date) diff --git a/ckconfig/custom_test.go b/ckconfig/custom_test.go index 935aa94f..e2f928c0 100644 --- a/ckconfig/custom_test.go +++ b/ckconfig/custom_test.go @@ -111,7 +111,7 @@ func TestGenerateCustomXML(t *testing.T) { Cwd: "/home/eoi/clickhouse", NeedSudo: false, Path: "/data01/", - Version: "22.3.3.44", + Version: "23.3.3.44", } _, err := GenerateCustomXML("custom_fake.xml", conf, true) assert.Nil(t, err) diff --git a/ckconfig/keeper.go b/ckconfig/keeper.go new file mode 100644 index 00000000..112dcab3 --- /dev/null +++ b/ckconfig/keeper.go @@ -0,0 +1,93 @@ +package ckconfig + +import ( + "path" + + "github.com/housepower/ckman/common" + "github.com/housepower/ckman/model" + "github.com/imdario/mergo" +) + +func keeper_root(ipv6Enable bool) map[string]interface{} { + output := make(map[string]interface{}) + if ipv6Enable { + output["listen_host"] = "::" + } else { + output["listen_host"] = "0.0.0.0" + } + output["max_connections"] = 4096 + return output +} + +func keeper_server(conf *model.CKManClickHouseConfig, ipv6Enable bool, idx int) map[string]interface{} { + output := keeper_root(ipv6Enable) + output["logger"] = keeper_logger(conf) + keeper_server := make(map[string]interface{}) + mergo.Merge(&keeper_server, conf.KeeperConf.Expert) + keeper_server["tcp_port"] = conf.KeeperConf.TcpPort + keeper_server["server_id"] = idx + keeper_server["log_storage_path"] = conf.KeeperConf.LogPath + "clickhouse/coordination/logs" + keeper_server["snapshot_storage_path"] = conf.KeeperConf.SnapshotPath + "clickhouse/coordination/snapshots" + keeper_server["coordination_settings"] = coordination_settings(conf.KeeperConf.Coordination) + keeper_server["raft_configuration"] = raft_configuration(conf.KeeperConf) + output["keeper_server"] = keeper_server + return output +} + +// https://github.com/ClickHouse/ClickHouse/blob/master/src/Coordination/CoordinationSettings.h +func coordination_settings(coordination model.Coordination) map[string]interface{} { + output := make(map[string]interface{}) + mergo.Merge(&output, coordination.Expert) + output["operation_timeout_ms"] = coordination.OperationTimeoutMs + output["session_timeout_ms"] = coordination.SessionTimeoutMs + if coordination.ForceSync { + output["force_sync"] = "true" + } else { + output["force_sync"] = "false" + } + if coordination.AutoForwarding { + output["auto_forwarding"] = "true" + } else { + output["auto_forwarding"] = "false" + } + return output +} + +func keeper_logger(conf *model.CKManClickHouseConfig) map[string]interface{} { + output := make(map[string]interface{}) + output["level"] = "debug" + output["size"] = "1000M" + output["count"] = 10 + if !conf.NeedSudo { + output["log"] = path.Join(conf.Cwd, "log", "clickhouse-keeper", "clickhouse-keeper.log") + output["errorlog"] = path.Join(conf.Cwd, "log", "clickhouse-keeper", "clickhouse-keeper.err.log") + } + return output + +} + +func raft_configuration(conf *model.KeeperConf) []map[string]interface{} { + var outputs []map[string]interface{} + for idx, node := range conf.KeeperNodes { + output := make(map[string]interface{}) + output["server"] = map[string]interface{}{ + "id": idx + 1, + "hostname": node, + "port": conf.RaftPort, + } + outputs = append(outputs, output) + } + return outputs +} + +func GenerateKeeperXML(filename string, conf *model.CKManClickHouseConfig, ipv6Enable bool, idx int) (string, error) { + xml := common.NewXmlFile(filename) + rootTag := "clickhouse" + xml.Begin(rootTag) + xml.Merge(keeper_server(conf, ipv6Enable, idx)) + xml.End(rootTag) + if err := xml.Dump(); err != nil { + return filename, err + } + return filename, nil +} diff --git a/ckconfig/keeper_fake.xml b/ckconfig/keeper_fake.xml new file mode 100644 index 00000000..63d88b4d --- /dev/null +++ b/ckconfig/keeper_fake.xml @@ -0,0 +1,37 @@ + + + + false + true + 10000 + Information + 30000 + + /var/lib/clickhouse/coordination/log + + + 192.168.101.102 + 1 + 9181 + + + 192.168.101.105 + 2 + 9181 + + + 192.168.101.107 + 3 + 9181 + + + 2 + /var/lib/clickhouse/coordination/snapshots + 9181 + + :: + + debug + + 4096 + diff --git a/ckconfig/keeper_test.go b/ckconfig/keeper_test.go new file mode 100644 index 00000000..05fb0ced --- /dev/null +++ b/ckconfig/keeper_test.go @@ -0,0 +1,27 @@ +package ckconfig + +import ( + "testing" + + "github.com/housepower/ckman/model" + "github.com/stretchr/testify/assert" +) + +func TestGenerateKeeperXML(t *testing.T) { + conf := model.CKManClickHouseConfig{ + KeeperConf: &model.KeeperConf{ + KeeperNodes: []string{"192.168.101.102", "192.168.101.105", "192.168.101.107"}, + TcpPort: 9181, + RaftPort: 9234, + LogPath: "/var/lib/clickhouse/coordination/log", + SnapshotPath: "/var/lib/clickhouse/coordination/snapshots", + Coordination: model.Coordination{ + OperationTimeoutMs: 10000, + SessionTimeoutMs: 30000, + ForceSync: true, + }, + }, + } + _, err := GenerateKeeperXML("keeper_fake.xml", &conf, true, 2) + assert.Nil(t, err) +} diff --git a/ckconfig/metrika.go b/ckconfig/metrika.go index 4e9973f9..3c3591a2 100644 --- a/ckconfig/metrika.go +++ b/ckconfig/metrika.go @@ -3,6 +3,7 @@ package ckconfig import ( "github.com/housepower/ckman/common" "github.com/housepower/ckman/model" + "github.com/housepower/ckman/service/zookeeper" ) func GenerateMetrikaXML(filename string, conf *model.CKManClickHouseConfig) (string, error) { @@ -40,10 +41,11 @@ func GenZookeeperMetrika(indent int, conf *model.CKManClickHouseConfig) string { xml := common.NewXmlFile("") xml.SetIndent(indent) xml.Begin("zookeeper") - for index, zk := range conf.ZkNodes { + nodes, port := zookeeper.GetZkInfo(conf) + for index, zk := range nodes { xml.BeginwithAttr("node", []common.XMLAttr{{Key: "index", Value: index + 1}}) xml.Write("host", zk) - xml.Write("port", conf.ZkPort) + xml.Write("port", port) xml.End("node") } xml.End("zookeeper") @@ -59,25 +61,6 @@ func GenLocalMetrika(indent int, conf *model.CKManClickHouseConfig) string { secret = false } if secret { - xml.Comment(`Inter-server per-cluster secret for Distributed queries - default: no secret (no authentication will be performed) - - If set, then Distributed queries will be validated on shards, so at least: - - such cluster should exist on the shard, - - such cluster should have the same secret. - - And also (and which is more important), the initial_user will - be used as current user for the query. - - Right now the protocol is pretty simple and it only takes into account: - - cluster name - - query - - Also it will be nice if the following will be implemented: - - source hostname (see interserver_http_host), but then it will depends from DNS, - it can use IP address instead, but then the you need to get correct on the initiator node. - - target hostname / ip address (same notes as for source hostname) - - time-based security tokens`) xml.Write("secret", "foo") } for _, shard := range conf.Shards { diff --git a/cmd/ckmanctl/ckmanctl.go b/cmd/ckmanctl/ckmanctl.go new file mode 100644 index 00000000..293e2c50 --- /dev/null +++ b/cmd/ckmanctl/ckmanctl.go @@ -0,0 +1,102 @@ +package main + +/* +ckmanctl migrate -f /etc/ckman/conf/migrate.hjson +ckmanctl password +ckmanctl get znodes --path +ckmanctl delete znodes cluster suball +ckmanctl delete znodes cluster replica_queue +*/ + +import ( + "strings" + + "github.com/alecthomas/kingpin/v2" + + "github.com/housepower/ckman/cmd/metacache" + "github.com/housepower/ckman/cmd/migrate" + "github.com/housepower/ckman/cmd/password" + "github.com/housepower/ckman/cmd/znodes" + "github.com/housepower/ckman/log" +) + +var ( + migrateCmd = kingpin.Command("migrate", "migrate cluster config from old repersistence to new persistence") + m_conf = migrateCmd.Flag("conf", "migrate config file path").Default("/etc/ckman/conf/migrate.hjson").Short('c').String() + + //passwordCmd = kingpin.Command("password", "encrypt password") + + // ckmanctl get znodes /clickhouse/tables/chenyc1 -r -s 20 + getCmd = kingpin.Command("get", "get options") + g_znodes = getCmd.Command("znodes", "get znodes") + gz_host = g_znodes.Flag("host", "host").Short('h').Default("127.0.0.1:2181").String() + gz_recursive = g_znodes.Flag("recursive", "recursive").Short('r').Bool() + gz_sort = g_znodes.Flag("sort", "sort number").Short('s').Int() + gz_path = g_znodes.Arg("path", "path").Default("/clickhouse").String() + + deleteCmd = kingpin.Command("delete", "delete options") + d_znodes = deleteCmd.Command("znodes", "delete znodes") + dz_suball = d_znodes.Command("suball", "delete all subnodes") + dzs_cluster = dz_suball.Arg("cluster", "cluster").String() + dzs_dryrun = dz_suball.Flag("dryrun", "dryrun").Short('d').Bool() + dzs_path = dz_suball.Flag("path", "znode path").Short('p').String() + dzs_conf = dz_suball.Flag("conf", "config file path").Short('c').Default("/etc/ckman/conf/ckman.hjson").String() + + // ckmanctl delete znodes queue abc + dz_queue = d_znodes.Command("queue", "delete replica queue") + dzq_cluster = dz_queue.Arg("cluster", "cluster").String() + dzq_dryrun = dz_queue.Flag("dryrun", "dryrun").Short('d').Bool() + dzq_numtries = dz_queue.Flag("trynum", "num_tries").Short('n').Default("100").Int() + dzq_conf = dz_queue.Flag("conf", "config file path").Short('c').Default("/etc/ckman/conf/ckman.hjson").String() + + setCmd = kingpin.Command("set", "set options") + s_metacacheCmd = setCmd.Command("metacache", "set metacache options") + sm_cluster = s_metacacheCmd.Arg("cluster", "cluster").String() + sm_conf = s_metacacheCmd.Flag("conf", "config file path").Short('c').Default("/etc/ckman/conf/ckman.hjson").String() + sm_dryrun = s_metacacheCmd.Flag("dryrun", "dryrun").Short('d').Bool() +) + +func main() { + log.InitLoggerDefault("debug", []string{"/var/log/ckmanctl.log"}) + command := kingpin.Parse() + firstCmd := strings.Split(command, " ")[0] + switch firstCmd { + case "migrate": + migrate.MigrateHandle(*m_conf) + case "password": + password.PasswordHandle() + case "get": + znodes.ZCntHandle(znodes.ZCntOpts{ + Path: *gz_path, + Recursive: *gz_recursive, + SortNumber: *gz_sort, + Zkhosts: *gz_host, + }) + case "delete": + thirdCmd := strings.Split(command, " ")[2] + if thirdCmd == "suball" { + znodes.SuballHandle(znodes.ZSuballOpts{ + ClusterName: *dzs_cluster, + ConfigFile: *dzs_conf, + Dryrun: *dzs_dryrun, + Node: *dzs_path, + }) + } else if thirdCmd == "queue" { + znodes.ReplicaQueueHandle(znodes.ZReplicaQueueOpts{ + ClusterName: *dzq_cluster, + Dryrun: *dzq_dryrun, + NumTries: *dzq_numtries, + ConfigFile: *dzq_conf, + }) + } + case "set": + secondCmd := strings.Split(command, " ")[1] + if secondCmd == "metacache" { + metacache.MetacacheHandle(metacache.MetacacheOpts{ + ClusterName: *sm_cluster, + ConfigFile: *sm_conf, + Dryrun: *sm_dryrun, + }) + } + } +} diff --git a/cmd/metacache/metacache.go b/cmd/metacache/metacache.go new file mode 100644 index 00000000..d5f68298 --- /dev/null +++ b/cmd/metacache/metacache.go @@ -0,0 +1,85 @@ +package metacache + +import ( + "fmt" + "os" + + "github.com/housepower/ckman/common" + "github.com/housepower/ckman/config" + "github.com/housepower/ckman/log" + "github.com/housepower/ckman/model" + "github.com/housepower/ckman/repository" +) + +type MetacacheOpts struct { + ClusterName string + ConfigFile string + Dryrun bool +} + +func MetacacheHandle(opts MetacacheOpts) { + if err := config.ParseConfigFile(opts.ConfigFile, ""); err != nil { + fmt.Printf("Parse config file %s fail: %v\n", opts.ConfigFile, err) + os.Exit(-1) + } + + var conf config.CKManConfig + _ = common.DeepCopyByGob(&conf, &config.GlobalConfig) + err := common.Gsypt.Unmarshal(&config.GlobalConfig) + if err != nil { + fmt.Printf("gsypt config file %s fail: %v\n", opts.ConfigFile, err) + os.Exit(-2) + } + err = repository.InitPersistent() + if err != nil { + fmt.Printf("init persistent failed:%v\n", err) + os.Exit(-3) + } + + cluster, err := repository.Ps.GetClusterbyName(opts.ClusterName) + if err != nil { + fmt.Printf("get cluster %s failed:%v\n", opts.ClusterName, err) + os.Exit(-4) + } + + if common.CompareClickHouseVersion(cluster.Version, "22.4.x") < 0 { + fmt.Printf("cluster %s version %s not support metacache\n", opts.ClusterName, cluster.Version) + os.Exit(-5) + } + + ckConns := make(map[string]*common.Conn, len(cluster.Hosts)) + for _, host := range cluster.Hosts { + conn, err := common.ConnectClickHouse(host, model.ClickHouseDefaultDB, cluster.GetConnOption()) + if err == nil { + ckConns[host] = conn + } + } + + if len(ckConns) == 0 { + fmt.Printf("connect to cluster %s failed\n", opts.ClusterName) + os.Exit(-6) + } + + i := 0 + for host, conn := range ckConns { + _, dbTbls, err := common.GetMergeTreeTables("MergeTree", "", conn) + if err != nil { + fmt.Printf("[%s]get tables of cluster %s failed:%v\n", host, opts.ClusterName, err) + continue + } + for db, tbls := range dbTbls { + for _, table := range tbls { + query := fmt.Sprintf("ALTER TABLE `%s`.`%s` MODIFY SETTING use_metadata_cache = true", db, table) + log.Logger.Debugf("[%s][%s]%s", opts.ClusterName, host, query) + if opts.Dryrun { + fmt.Printf("[%4d][%s][%s]%s\n", i, opts.ClusterName, host, query) + i++ + } else { + if err = conn.Exec(query); err != nil { + fmt.Printf("[%s]%s failed:%v\n", host, query, err) + } + } + } + } + } +} diff --git a/cmd/migrate/migrate.go b/cmd/migrate/migrate.go index 5da85f36..46076a5b 100644 --- a/cmd/migrate/migrate.go +++ b/cmd/migrate/migrate.go @@ -1,13 +1,10 @@ -package main +package migrate import ( - "flag" - "fmt" "io" "os" "github.com/hjson/hjson-go/v4" - "github.com/housepower/ckman/common" "github.com/housepower/ckman/log" "github.com/housepower/ckman/repository" _ "github.com/housepower/ckman/repository/local" @@ -15,18 +12,6 @@ import ( "github.com/pkg/errors" ) -/* -* -auto migrate cluster config between diffrent persistent policy -eg. - - migrate --config=/etc/ckman/conf/migrate.hjson -*/ -type CmdOptions struct { - ShowVer bool - ConfigFile string -} - type PersistentConfig struct { Policy string Config map[string]interface{} @@ -39,29 +24,13 @@ type MigrateConfig struct { } var ( - cmdOps CmdOptions - GitCommitHash string - BuildTimeStamp string - psrc repository.PersistentMgr - pdst repository.PersistentMgr + psrc repository.PersistentMgr + pdst repository.PersistentMgr ) -func initCmdOptions() { - cmdOps = CmdOptions{ - ShowVer: false, - ConfigFile: "/etc/ckman/conf/migrate.hjson", - } - common.EnvBoolVar(&cmdOps.ShowVer, "v") - common.EnvStringVar(&cmdOps.ConfigFile, "config") - - flag.BoolVar(&cmdOps.ShowVer, "v", cmdOps.ShowVer, "show build version and quit") - flag.StringVar(&cmdOps.ConfigFile, "config", cmdOps.ConfigFile, "migrate config file") - flag.Parse() -} - -func ParseConfig() (MigrateConfig, error) { +func ParseConfig(conf string) (MigrateConfig, error) { var config MigrateConfig - f, err := os.Open(cmdOps.ConfigFile) + f, err := os.Open(conf) if err != nil { return MigrateConfig{}, errors.Wrap(err, "") } @@ -163,18 +132,10 @@ func Migrate() error { return nil } -func main() { - log.InitLoggerConsole() - initCmdOptions() - if cmdOps.ShowVer { - fmt.Println("Build Timestamp:", BuildTimeStamp) - fmt.Println("Git Commit Hash:", GitCommitHash) - os.Exit(0) - } - - config, err := ParseConfig() +func MigrateHandle(conf string) { + config, err := ParseConfig(conf) if err != nil { - log.Logger.Fatalf("parse config file %s failed: %v", cmdOps.ConfigFile, err) + log.Logger.Fatalf("parse config file %s failed: %v", conf, err) } psrc, err = PersistentCheck(config, config.Source) if err != nil { diff --git a/cmd/password/password.go b/cmd/password/password.go index ecb0d941..d61c990c 100644 --- a/cmd/password/password.go +++ b/cmd/password/password.go @@ -1,4 +1,4 @@ -package main +package password import ( "crypto/md5" @@ -11,7 +11,7 @@ import ( "golang.org/x/term" ) -func main() { +func PasswordHandle() { common.LoadUsers(path.Join(common.GetWorkDirectory(), "conf")) fmt.Println(`Password must be at least 8 characters long. Password must contain at least three character categories among the following: diff --git a/cmd/znodefix/znodefix.go b/cmd/znodefix/znodefix.go deleted file mode 100644 index e610217a..00000000 --- a/cmd/znodefix/znodefix.go +++ /dev/null @@ -1,94 +0,0 @@ -package main - -/* -znodefix --cluster=abc --config=/etc/ckman/conf/ckman.hjson --node=192.168.110.8 --dryrun -*/ - -import ( - "flag" - "fmt" - "os" - - "github.com/housepower/ckman/common" - "github.com/housepower/ckman/config" - "github.com/housepower/ckman/log" - "github.com/housepower/ckman/repository" - _ "github.com/housepower/ckman/repository/dm8" - _ "github.com/housepower/ckman/repository/local" - _ "github.com/housepower/ckman/repository/mysql" - _ "github.com/housepower/ckman/repository/postgres" - "github.com/housepower/ckman/service/zookeeper" -) - -type CmdOptions struct { - ShowVer bool - ClusterName string - Node string - ConfigFile string - Dryrun bool -} - -var ( - cmdOps CmdOptions - GitCommitHash string - BuildTimeStamp string -) - -func init() { - cmdOps = CmdOptions{ - ShowVer: false, - ConfigFile: "/etc/ckman/conf/migrate.hjson", - Dryrun: false, - } - common.EnvBoolVar(&cmdOps.ShowVer, "v") - common.EnvStringVar(&cmdOps.ConfigFile, "config") - common.EnvBoolVar(&cmdOps.Dryrun, "dryrun") - common.EnvStringVar(&cmdOps.Node, "node") - common.EnvStringVar(&cmdOps.ClusterName, "cluster") - - flag.BoolVar(&cmdOps.ShowVer, "v", cmdOps.ShowVer, "show build version and quit") - flag.StringVar(&cmdOps.ClusterName, "cluster", cmdOps.ClusterName, "fix znode on which cluster") - flag.StringVar(&cmdOps.Node, "node", cmdOps.Node, "clean which znode on zookeeper") - flag.StringVar(&cmdOps.ConfigFile, "config", cmdOps.ConfigFile, "ckman config file") - flag.BoolVar(&cmdOps.Dryrun, "dryrun", cmdOps.Dryrun, "only list which znode to clean") - flag.Parse() -} - -func main() { - if cmdOps.ShowVer { - fmt.Println("Build Timestamp:", BuildTimeStamp) - fmt.Println("Git Commit Hash:", GitCommitHash) - os.Exit(0) - } - if err := config.ParseConfigFile(cmdOps.ConfigFile, ""); err != nil { - fmt.Printf("Parse config file %s fail: %v\n", cmdOps.ConfigFile, err) - os.Exit(1) - } - - var conf config.CKManConfig - _ = common.DeepCopyByGob(&conf, &config.GlobalConfig) - err := common.Gsypt.Unmarshal(&config.GlobalConfig) - if err != nil { - fmt.Printf("gsypt config file %s fail: %v\n", cmdOps.ConfigFile, err) - os.Exit(1) - } - log.InitLoggerConsole() - err = repository.InitPersistent() - if err != nil { - log.Logger.Fatalf("init persistent failed:%v", err) - } - - cluster, err := repository.Ps.GetClusterbyName(cmdOps.ClusterName) - if err != nil { - log.Logger.Fatalf("get cluster %s failed:%v", cmdOps.ClusterName, err) - } - service, err := zookeeper.NewZkService(cluster.ZkNodes, cluster.ZkPort) - if err != nil { - log.Logger.Fatalf("can't create zookeeper instance:%v", err) - } - err = service.CleanZoopath(cluster, cmdOps.ClusterName, cmdOps.Node, cmdOps.Dryrun) - if err != nil { - log.Logger.Fatalf("clean zoopath error:%v", err) - } - log.Logger.Info("znode fixed successfully") -} diff --git a/cmd/znodecnt/znodecount.go b/cmd/znodes/count.go similarity index 54% rename from cmd/znodecnt/znodecount.go rename to cmd/znodes/count.go index d2a6744f..b28726a2 100644 --- a/cmd/znodecnt/znodecount.go +++ b/cmd/znodes/count.go @@ -1,11 +1,6 @@ -package main - -/* -znodefix --cluster=abc --config=/etc/ckman/conf/ckman.hjson --node=192.168.110.8 --dryrun -*/ +package znodes import ( - "flag" "fmt" "net" "sort" @@ -13,10 +8,6 @@ import ( "strings" "github.com/housepower/ckman/log" - _ "github.com/housepower/ckman/repository/dm8" - _ "github.com/housepower/ckman/repository/local" - _ "github.com/housepower/ckman/repository/mysql" - _ "github.com/housepower/ckman/repository/postgres" "github.com/housepower/ckman/service/zookeeper" ) @@ -32,24 +23,22 @@ func (v ZCntList) Swap(i, j int) { v[i], v[j] = v[j], v[i] } func (v ZCntList) Less(i, j int) bool { return v[i].Count < v[j].Count } var ( - zkhosts = flag.String("h", "127.0.0.1:2181", `zookeeper server hosts`) - node = flag.String("n", "/clickhouse", "znode") - recursive = flag.Bool("r", false, "show all sub znodes") - sort_number = flag.Int("s", 0, "return number znode count order by count desc") - zcnts ZCntList + zcnts ZCntList ) -func init() { - flag.Parse() +type ZCntOpts struct { + Zkhosts string + Path string + Recursive bool + SortNumber int } -func main() { - log.InitLoggerConsole() +func ZCntHandle(opts ZCntOpts) { log.Logger.Infof("znode_count start...") - log.Logger.Infof("zookeeper service: %s, root znode: %s, recursive: %v, sort_number: %d", *zkhosts, *node, *recursive, *sort_number) + log.Logger.Infof("zookeeper service: %s, root znode: %s, recursive: %v, sort_number: %d", opts.Zkhosts, opts.Path, opts.Recursive, opts.SortNumber) var hosts []string var zkPort int - for _, zkhost := range strings.Split(*zkhosts, ",") { + for _, zkhost := range strings.Split(opts.Zkhosts, ",") { host, port, _ := net.SplitHostPort(zkhost) hosts = append(hosts, host) zkPort, _ = strconv.Atoi(port) @@ -60,19 +49,19 @@ func main() { log.Logger.Fatalf("can't create zookeeper instance:%v", err) } - if err = ZkCount(service); err != nil { + if err = ZkCount(service, opts); err != nil { log.Logger.Fatalf("%v\n", err) } } -func ZkCount(service *zookeeper.ZkService) error { - cnt, err := count(service, *node) +func ZkCount(service *zookeeper.ZkService, opts ZCntOpts) error { + cnt, err := count(service, opts.Path, opts) if err != nil { return err } - if *sort_number == 0 { - fmt.Printf("%s\t%d\n", *node, cnt) + if opts.SortNumber == 0 { + fmt.Printf("%s\t%d\n", opts.Path, cnt) } else { for _, z := range zcnts { fmt.Printf("%s\t%d\n", z.Node, z.Count) @@ -82,7 +71,7 @@ func ZkCount(service *zookeeper.ZkService) error { return nil } -func count(service *zookeeper.ZkService, znode string) (int32, error) { +func count(service *zookeeper.ZkService, znode string, opts ZCntOpts) (int32, error) { var cnt int32 _, n, err := service.Conn.Get(znode) if err != nil { @@ -93,14 +82,14 @@ func count(service *zookeeper.ZkService, znode string) (int32, error) { } else { children, _, _ := service.Conn.Children(znode) for _, child := range children { - if c, err := count(service, znode+"/"+child); err != nil { + if c, err := count(service, znode+"/"+child, opts); err != nil { return cnt, err } else { cnt += c } } cnt++ - if *sort_number > 0 { + if opts.SortNumber > 0 { zcnts = append(zcnts, ZCnt{ Node: znode, Count: cnt, @@ -108,11 +97,11 @@ func count(service *zookeeper.ZkService, znode string) (int32, error) { sort.Sort(sort.Reverse(zcnts)) - if len(zcnts) > *sort_number { - zcnts = zcnts[:*sort_number] + if len(zcnts) > opts.SortNumber { + zcnts = zcnts[:opts.SortNumber] } } else { - if *recursive { + if opts.Recursive { fmt.Printf("%s\t%d\n", znode, cnt) } } diff --git a/cmd/znodes/replica_queue.go b/cmd/znodes/replica_queue.go new file mode 100644 index 00000000..e371f856 --- /dev/null +++ b/cmd/znodes/replica_queue.go @@ -0,0 +1,72 @@ +package znodes + +import ( + "fmt" + + "github.com/housepower/ckman/common" + "github.com/housepower/ckman/config" + "github.com/housepower/ckman/log" + "github.com/housepower/ckman/repository" + "github.com/housepower/ckman/service/clickhouse" + "github.com/housepower/ckman/service/cron" + "github.com/housepower/ckman/service/zookeeper" +) + +type ZReplicaQueueOpts struct { + ClusterName string + ConfigFile string + Dryrun bool + NumTries int +} + +func ReplicaQueueHandle(opts ZReplicaQueueOpts) { + if err := config.ParseConfigFile(opts.ConfigFile, ""); err != nil { + log.Logger.Fatalf("Parse config file %s fail: %v\n", opts.ConfigFile, err) + } + + var conf config.CKManConfig + _ = common.DeepCopyByGob(&conf, &config.GlobalConfig) + err := common.Gsypt.Unmarshal(&config.GlobalConfig) + if err != nil { + log.Logger.Fatalf("gsypt config file %s fail: %v\n", opts.ConfigFile, err) + } + err = repository.InitPersistent() + if err != nil { + log.Logger.Fatalf("init persistent failed:%v", err) + } + + cluster, err := repository.Ps.GetClusterbyName(opts.ClusterName) + if err != nil { + log.Logger.Fatalf("get cluster %s failed:%v", opts.ClusterName, err) + } + + ckService := clickhouse.NewCkService(&cluster) + if err = ckService.InitCkService(); err != nil { + log.Logger.Fatalf("[%s]init clickhouse service failed: %v", cluster.Cluster, err) + } + nodes, port := zookeeper.GetZkInfo(&cluster) + zkService, err := zookeeper.NewZkService(nodes, port) + if err != nil { + log.Logger.Fatalf("can't create zookeeper instance:%v", err) + } + // remove replica_queue in zookeeper + znodes, err := cron.GetReplicaQueueZnodes(ckService, opts.NumTries) + if err != nil { + log.Logger.Infof("[%s]remove replica_queue from zookeeper failed: %v", cluster.Cluster, err) + } + fmt.Println() + if opts.Dryrun { + for i, znode := range znodes { + if i == 1000 { + break + } + fmt.Printf("[%4d]%s\n", i, znode) + } + if len(znodes) > 1000 { + fmt.Printf("\n %d znodes, only show first 1000", len(znodes)) + } + } else { + deleted, notexist := cron.RemoveZnodes(zkService, znodes) + fmt.Printf("[%s]remove [%d] replica_queue from zookeeper success, [%d] already deleted\n", cluster.Cluster, deleted, notexist) + } +} diff --git a/cmd/znodes/suball.go b/cmd/znodes/suball.go new file mode 100644 index 00000000..dfb1051c --- /dev/null +++ b/cmd/znodes/suball.go @@ -0,0 +1,48 @@ +package znodes + +import ( + "github.com/housepower/ckman/common" + "github.com/housepower/ckman/config" + "github.com/housepower/ckman/log" + "github.com/housepower/ckman/repository" + "github.com/housepower/ckman/service/zookeeper" +) + +type ZSuballOpts struct { + ConfigFile string + ClusterName string + Node string + Dryrun bool +} + +func SuballHandle(opts ZSuballOpts) { + if err := config.ParseConfigFile(opts.ConfigFile, ""); err != nil { + log.Logger.Fatalf("Parse config file %s fail: %v\n", opts.ConfigFile, err) + } + + var conf config.CKManConfig + _ = common.DeepCopyByGob(&conf, &config.GlobalConfig) + err := common.Gsypt.Unmarshal(&config.GlobalConfig) + if err != nil { + log.Logger.Fatalf("gsypt config file %s fail: %v\n", opts.ConfigFile, err) + } + err = repository.InitPersistent() + if err != nil { + log.Logger.Fatalf("init persistent failed:%v", err) + } + + cluster, err := repository.Ps.GetClusterbyName(opts.ClusterName) + if err != nil { + log.Logger.Fatalf("get cluster %s failed:%v", opts.ClusterName, err) + } + nodes, port := zookeeper.GetZkInfo(&cluster) + service, err := zookeeper.NewZkService(nodes, port) + if err != nil { + log.Logger.Fatalf("can't create zookeeper instance:%v", err) + } + err = service.CleanZoopath(cluster, opts.ClusterName, opts.Node, opts.Dryrun) + if err != nil { + log.Logger.Fatalf("clean zoopath error:%v", err) + } + log.Logger.Info("znode fixed successfully") +} diff --git a/common/ck.go b/common/ck.go index b7364ef4..44eda751 100644 --- a/common/ck.go +++ b/common/ck.go @@ -9,7 +9,6 @@ import ( "encoding/json" "fmt" "net" - "regexp" "strconv" "strings" "sync" @@ -156,7 +155,7 @@ func GetMergeTreeTables(engine string, database string, conn *Conn) ([]string, m var databases []string var err error dbtables := make(map[string][]string) - query := fmt.Sprintf("SELECT DISTINCT database, name FROM system.tables WHERE (match(engine, '%s')) AND (database NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA'))", engine) + query := fmt.Sprintf("SELECT DISTINCT database, name FROM system.tables WHERE (match(engine, '%s')) AND (database NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA') AND name NOT LIKE '.inner%%')", engine) if database != "" { query += fmt.Sprintf(" AND database = '%s'", database) } @@ -209,7 +208,7 @@ func GetShardAvaliableHosts(conf *model.CKManClickHouseConfig) ([]string, error) } if len(hosts) < len(conf.Shards) { log.Logger.Errorf("not all shard avaliable: %v", lastErr) - return []string{}, nil + return []string{}, lastErr } log.Logger.Debugf("hosts: %v", hosts) return hosts, nil @@ -340,35 +339,6 @@ WHERE match(engine, 'Distributed') AND (database = '%s') AND ((dist = '%s') OR ( return local, dist, nil } - -func ClikHouseExceptionDecode(err error) error { - var e *clickhouse.Exception - // TCP protocol - if errors.As(err, &e) { - return e - } - // HTTP protocol - if strings.HasPrefix(err.Error(), "clickhouse [execute]::") { - r := regexp.MustCompile(`.*Code:\s+(\d+)\.\s+(.*)`) - matchs := r.FindStringSubmatch(err.Error()) - if len(matchs) != 3 { - return err - } - code, err2 := strconv.Atoi(matchs[1]) - if err2 != nil { - return err - } - message := matchs[2] - e = &clickhouse.Exception{ - Code: int32(code), - Message: message, - } - return e - } - - return err -} - func Execute(conf *model.CKManClickHouseConfig, sql string) error { var wg sync.WaitGroup var lastErr error @@ -400,14 +370,10 @@ func WithAlterSync(version string) string { } func CheckTable(conn *Conn, database, table string) error { - query := fmt.Sprintf("CHECK TABLE `%s`.`%s`", database, table) - var res uint8 - err := conn.QueryRow(query).Scan(&res) - if err != nil { + query := fmt.Sprintf("DESC `%s`.`%s`", database, table) + _, err := conn.Query(query) + if err != nil && ExceptionAS(err, UNKNOWN_TABLE) { return err } - if res != 1 { - return errors.Errorf("check table %s.%s failed", database, table) - } return nil } diff --git a/common/ck_exception.go b/common/ck_exception.go new file mode 100644 index 00000000..4a496a8e --- /dev/null +++ b/common/ck_exception.go @@ -0,0 +1,55 @@ +package common + +import ( + "errors" + "regexp" + "strconv" + "strings" + + "github.com/ClickHouse/clickhouse-go/v2" +) + +const ( + OK int32 = 0 + BAD_ARGUMENTS int32 = 36 + UNKNOWN_TABLE int32 = 60 + REPLICA_ALREADY_EXISTS int32 = 253 + UNFINISHED int32 = 341 +) + +func ClikHouseExceptionDecode(err error) error { + var e *clickhouse.Exception + // TCP protocol + if errors.As(err, &e) { + return e + } + // HTTP protocol + if strings.HasPrefix(err.Error(), "clickhouse [execute]::") { + r := regexp.MustCompile(`.*Code:\s+(\d+)\.\s+(.*)`) + matchs := r.FindStringSubmatch(err.Error()) + if len(matchs) != 3 { + return err + } + code, err2 := strconv.Atoi(matchs[1]) + if err2 != nil { + return err + } + message := matchs[2] + e = &clickhouse.Exception{ + Code: int32(code), + Message: message, + } + return e + } + + return err +} + +func ExceptionAS(err error, code int32) bool { + var e *clickhouse.Exception + err = ClikHouseExceptionDecode(err) + if errors.As(err, &e) { + return e.Code == code + } + return false +} diff --git a/common/math.go b/common/math.go index 828f2e6a..bd445efb 100644 --- a/common/math.go +++ b/common/math.go @@ -19,15 +19,6 @@ func Decimal(value float64) float64 { return value } -func ArraySearch(target string, str_array []string) bool { - for _, str := range str_array { - if target == str { - return true - } - } - return false -} - func Md5CheckSum(s string) string { sum := md5.Sum([]byte(s)) return hex.EncodeToString(sum[:16]) diff --git a/common/math_test.go b/common/math_test.go index 921672ea..ccd042e2 100644 --- a/common/math_test.go +++ b/common/math_test.go @@ -131,3 +131,18 @@ func TestArraySearch(t *testing.T) { "aaa", "bbb", "ccc", "kkk", })) } + +func TestArrayRemove(t *testing.T) { + assert.Equal(t, []string{"aaa", "ccc", "kkk"}, ArrayRemove([]string{ + "aaa", "bbb", "ccc", "kkk", + }, "bbb")) + assert.Equal(t, []int{1, 2, 3, 4}, ArrayRemove([]int{ + 1, 2, 3, 4, 5, + }, 5)) + + arr := []string{ + "aaa", "bbb", "ccc", "kkk", + } + ArrayRemove(arr, "bbb") + assert.Equal(t, []string{"aaa", "bbb", "ccc", "kkk"}, arr) +} diff --git a/common/package.go b/common/package.go index 54156640..18cb7527 100644 --- a/common/package.go +++ b/common/package.go @@ -2,14 +2,15 @@ package common import ( "fmt" - "github.com/housepower/ckman/config" - "github.com/pkg/errors" "os" "path" "reflect" "sort" "strings" "sync" + + "github.com/housepower/ckman/config" + "github.com/pkg/errors" ) const ( @@ -18,6 +19,7 @@ const ( PkgModuleCommon string = "common" PkgModuleClient string = "client" PkgModuleServer string = "server" + PkgModuleKeeper string = "keeper" PkgSuffixRpm string = "rpm" PkgSuffixTgz string = "tgz" @@ -132,7 +134,7 @@ func parsePkgName(fname string) CkPackageFile { return file } -func GetPackages() error { +func LoadPackages() error { var files CkPackageFiles CkPackages.Range(func(k, v interface{}) bool { CkPackages.Delete(k) @@ -202,7 +204,7 @@ func GetPackages() error { func GetAllPackages() map[string]CkPackageFiles { pkgs := make(map[string]CkPackageFiles, 0) - _ = GetPackages() + _ = LoadPackages() CkPackages.Range(func(k, v interface{}) bool { key := k.(string) files := v.(CkPackageFiles) diff --git a/common/util.go b/common/util.go index b4da2407..8ed35795 100644 --- a/common/util.go +++ b/common/util.go @@ -259,8 +259,8 @@ func Shuffle(value []string) []string { return arr } -func ArrayDistinct(arr []string) []string { - set := make(map[string]struct{}, len(arr)) +func ArrayDistinct[T string | int | int64 | int32 | uint | uint64 | uint32 | float32 | float64](arr []T) []T { + set := make(map[T]struct{}, len(arr)) j := 0 for _, v := range arr { _, ok := set[v] @@ -274,6 +274,26 @@ func ArrayDistinct(arr []string) []string { return arr[:j] } +func ArrayRemove[T string | int | int64 | int32 | uint | uint64 | uint32 | float32 | float64](arr []T, elem T) []T { + var res []T + for _, v := range arr { + if v == elem { + continue + } + res = append(res, v) + } + return res +} + +func ArraySearch[T string | int | int64 | int32 | uint | uint64 | uint32 | float32 | float64](target T, str_array []T) bool { + for _, str := range str_array { + if target == str { + return true + } + } + return false +} + func TernaryExpression(condition bool, texpr, fexpr interface{}) interface{} { if condition { return texpr diff --git a/config/config.go b/config/config.go index f9ed4691..926da9ae 100644 --- a/config/config.go +++ b/config/config.go @@ -36,6 +36,7 @@ type CronJob struct { SyncLogicSchema string `yaml:"sync_logic_schema" json:"sync_logic_schema"` WatchClusterStatus string `yaml:"watch_cluster_status" json:"watch_cluster_status"` SyncDistSchema string `yaml:"sync_dist_schema" json:"sync_dist_schema"` + ClearZnodes string `yaml:"clear_znodes" json:"clear_znodes"` } type CKManConfig struct { diff --git a/controller/clickhouse.go b/controller/clickhouse.go index e5e49a99..0aba7714 100644 --- a/controller/clickhouse.go +++ b/controller/clickhouse.go @@ -1,6 +1,8 @@ package controller import ( + "bytes" + "encoding/csv" "fmt" "io" "net/http" @@ -14,7 +16,6 @@ import ( "github.com/housepower/ckman/repository" "github.com/pkg/errors" - client "github.com/ClickHouse/clickhouse-go/v2" "github.com/gin-gonic/gin" "github.com/housepower/ckman/common" "github.com/housepower/ckman/deploy" @@ -82,12 +83,28 @@ func (controller *ClickHouseController) ImportCluster(c *gin.Context) { conf.Password = req.Password conf.ZkNodes = req.ZkNodes conf.ZkPort = req.ZkPort - conf.ZkStatusPort = req.ZkStatusPort conf.PromHost = req.PromHost conf.PromPort = req.PromPort conf.AuthenticateType = model.SshPasswordNotSave conf.Mode = model.CkClusterImport conf.Normalize() + + if req.LogicCluster != "" { + logics, err := repository.Ps.GetLogicClusterbyName(req.LogicCluster) + if err == nil { + for _, cn := range logics { + clus, err := repository.Ps.GetClusterbyName(cn) + if err != nil { + controller.wrapfunc(c, model.E_DATA_SELECT_FAILED, err) + return + } + if clus.Mode == model.CkClusterDeploy { + controller.wrapfunc(c, model.E_DATA_DUPLICATED, fmt.Sprintf("logic %s has cluster %s which is depolyed", req.LogicCluster, cn)) + return + } + } + } + } code, err := clickhouse.GetCkClusterConfig(&conf) if err != nil { controller.wrapfunc(c, code, err) @@ -186,7 +203,7 @@ func (controller *ClickHouseController) DeleteCluster(c *gin.Context) { // @Accept json // @Param clusterName path string true "cluster name" default(test) // @Failure 200 {string} json "{"code":"5804","msg":"查询数据失败","data":null}" -// @Success 200 {string} json "{"code":"0000","msg":"ok", "data":{"mode":"import","hosts":["192.168.0.1","192.168.0.2","192.168.0.3","192.168.0.4"],"names":["node1","node2","node3","node4"],"port":9000,"httpPort":8123,"user":"ck","password":"123456","database":"default","cluster":"test","zkNodes":["192.168.0.1","192.168.0.2","192.168.0.3"],"zkPort":2181,"zkStatusPort":8080,"isReplica":true,"version":"20.8.5.45","sshUser":"","sshPassword":"","shards":[{"replicas":[{"ip":"192.168.0.1","hostname":"node1"},{"ip":"192.168.0.2","hostname":"node2"}]},{"replicas":[{"ip":"192.168.0.3","hostname":"node3"},{"ip":"192.168.0.4","hostname":"node4"}]}],"path":""}}" +// @Success 200 {string} json "{"code":"0000","msg":"ok", "data":{"mode":"import","hosts":["192.168.0.1","192.168.0.2","192.168.0.3","192.168.0.4"],"names":["node1","node2","node3","node4"],"port":9000,"httpPort":8123,"user":"ck","password":"123456","database":"default","cluster":"test","zkNodes":["192.168.0.1","192.168.0.2","192.168.0.3"],"zkPort":2181,"isReplica":true,"version":"20.8.5.45","sshUser":"","sshPassword":"","shards":[{"replicas":[{"ip":"192.168.0.1","hostname":"node1"},{"ip":"192.168.0.2","hostname":"node2"}]},{"replicas":[{"ip":"192.168.0.3","hostname":"node3"},{"ip":"192.168.0.4","hostname":"node4"}]}],"path":""}}" // @Router /api/v2/ck/cluster/{clusterName} [get] func (controller *ClickHouseController) GetCluster(c *gin.Context) { var err error @@ -212,7 +229,7 @@ func (controller *ClickHouseController) GetCluster(c *gin.Context) { // @Security ApiKeyAuth // @Tags clickhouse // @Accept json -// @Success 200 {string} json "{"code":"0000","msg":"ok", "data":{"test":{"mode":"import","hosts":["192.168.0.1","192.168.0.2","192.168.0.3","192.168.0.4"],"names":["node1","node2","node3","node4"],"port":9000,"httpPort":8123,"user":"ck","password":"123456","database":"default","cluster":"test","zkNodes":["192.168.0.1","192.168.0.2","192.168.0.3"],"zkPort":2181,"zkStatusPort":8080,"isReplica":true,"version":"20.8.5.45","sshUser":"","sshPassword":"","shards":[{"replicas":[{"ip":"192.168.0.1","hostname":"node1"},{"ip":"192.168.0.2","hostname":"node2"}]},{"replicas":[{"ip":"192.168.0.3","hostname":"node3"},{"ip":"192.168.0.4","hostname":"node4"}]}],"path":""}}}" +// @Success 200 {string} json "{"code":"0000","msg":"ok", "data":{"test":{"mode":"import","hosts":["192.168.0.1","192.168.0.2","192.168.0.3","192.168.0.4"],"names":["node1","node2","node3","node4"],"port":9000,"httpPort":8123,"user":"ck","password":"123456","database":"default","cluster":"test","zkNodes":["192.168.0.1","192.168.0.2","192.168.0.3"],"zkPort":2181,"isReplica":true,"version":"20.8.5.45","sshUser":"","sshPassword":"","shards":[{"replicas":[{"ip":"192.168.0.1","hostname":"node1"},{"ip":"192.168.0.2","hostname":"node2"}]},{"replicas":[{"ip":"192.168.0.3","hostname":"node3"},{"ip":"192.168.0.4","hostname":"node4"}]}],"path":""}}}" // @Failure 200 {string} json "{"code":"5804","msg":"数据查询失败","data":null}" // @Router /api/v2/ck/cluster [get] func (controller *ClickHouseController) GetClusters(c *gin.Context) { @@ -363,25 +380,6 @@ func (controller *ClickHouseController) CreateTable(c *gin.Context) { return } - // sync zookeeper path - if conf.IsReplica { - path, err := clickhouse.GetZkPath(ckService.Conn, params.DB, params.Name) - if err != nil { - controller.wrapfunc(c, model.E_ZOOKEEPER_ERROR, err) - return - } - tableName := fmt.Sprintf("%s.%s", params.DB, params.Name) - if conf.ZooPath == nil { - conf.ZooPath = make(map[string]string) - } - conf.ZooPath[tableName] = path - - if err = repository.Ps.UpdateCluster(conf); err != nil { - controller.wrapfunc(c, model.E_DATA_UPDATE_FAILED, err) - return - } - } - if req.DryRun { controller.wrapfunc(c, model.E_SUCCESS, statements) } else { @@ -441,18 +439,14 @@ func (controller *ClickHouseController) CreateDistTableOnLogic(c *gin.Context) { LogicCluster: *conf.LogicCluster, } if err = ckService.CreateDistTblOnLogic(¶ms); err != nil { - err = common.ClikHouseExceptionDecode(err) - var exception *client.Exception - if errors.As(err, &exception) { - if exception.Code == 60 && cluster != conf.Cluster { - log.Logger.Warnf("table %s.%s not exist on cluster %s, should create it", params.Database, params.TableName, cluster) - //means local table is not exist, will auto sync schema - con, err := repository.Ps.GetClusterbyName(cluster) - if err == nil { - // conf is current cluster, we believe that local table must be exist - clickhouse.SyncLogicTable(conf, con, req.Database, req.LocalTable) - continue - } + if common.ExceptionAS(err, common.UNKNOWN_TABLE) && cluster != conf.Cluster { + log.Logger.Warnf("table %s.%s not exist on cluster %s, should create it", params.Database, params.TableName, cluster) + //means local table is not exist, will auto sync schema + con, err := repository.Ps.GetClusterbyName(cluster) + if err == nil { + // conf is current cluster, we believe that local table must be exist + clickhouse.SyncLogicTable(conf, con, req.Database, req.LocalTable) + continue } } controller.wrapfunc(c, model.E_TBL_CREATE_FAILED, err) @@ -639,6 +633,57 @@ func (controller *ClickHouseController) AlterTable(c *gin.Context) { controller.wrapfunc(c, model.E_SUCCESS, nil) } +// @Summary 更新/删除表中的数据 +// @Description 更新/删除表中的数据 +// @version 1.0 +// @Security ApiKeyAuth +// @Tags clickhouse +// @Accept json +// @Param clusterName path string true "cluster name" default(test) +// @Param req body model.DMLOnLogicReq true "request body" +// @Success 200 {string} json "{"code":"0000","msg":"success","data":nil}" +// @Failure 200 {string} json "{"code":"5000","msg":"invalid params","data":""}" +// @Failure 200 {string} json "{"code":"5804","msg":"数据查询失败","data":""}" +// @Failure 200 {string} json "{"code":"5110","msg":"clickhouse连接失败","data":null}" +// @Failure 200 {string} json "{"code":"5809","msg":"修改表失败","data":null}" +// @Router /api/v2/ck/table/dml/{clusterName} [post] +func (controller *ClickHouseController) DMLOnLogic(c *gin.Context) { + var req model.DMLOnLogicReq + if err := model.DecodeRequestBody(c.Request, &req); err != nil { + controller.wrapfunc(c, model.E_INVALID_PARAMS, err) + return + } + clusterName := c.Param(ClickHouseClusterPath) + cluster, err := repository.Ps.GetClusterbyName(clusterName) + if err != nil { + controller.wrapfunc(c, model.E_RECORD_NOT_FOUND, err) + return + } + + logics, err := repository.Ps.GetLogicClusterbyName(*cluster.LogicCluster) + + if err != nil { + controller.wrapfunc(c, model.E_RECORD_NOT_FOUND, err) + return + } + + if req.Manipulation != model.DML_DELETE && req.Manipulation != model.DML_UPDATE { + controller.wrapfunc(c, model.E_INVALID_PARAMS, errors.New("manipulation is invalid")) + return + } + + if req.Manipulation == model.DML_UPDATE && len(req.KV) == 0 { + controller.wrapfunc(c, model.E_INVALID_PARAMS, errors.New("kv is empty")) + return + } + + if err := clickhouse.DMLOnLogic(logics, req); err != nil { + controller.wrapfunc(c, model.E_TBL_ALTER_FAILED, err) + return + } + controller.wrapfunc(c, model.E_SUCCESS, nil) +} + // @Summary 修改表TTL // @Description 修改表TTL // @version 1.0 @@ -646,7 +691,7 @@ func (controller *ClickHouseController) AlterTable(c *gin.Context) { // @Tags clickhouse // @Accept json // @Param clusterName path string true "cluster name" default(test) -// @Param req body model.AlterCkTableReq true "request body" +// @Param req body model.AlterTblsTTLReq true "request body" // @Success 200 {string} json "{"code":"0000","msg":"success","data":nil}" // @Failure 200 {string} json "{"code":"5000","msg":"invalid params","data":""}" // @Failure 200 {string} json "{"code":"5804","msg":"数据查询失败","data":""}" @@ -1165,6 +1210,62 @@ func (controller *ClickHouseController) QueryInfo(c *gin.Context) { controller.wrapfunc(c, model.E_SUCCESS, data) } +// @Summary 查询SQL +// @Description 查询SQL +// @version 1.0 +// @Security ApiKeyAuth +// @Tags clickhouse +// @Accept json +// @Param clusterName path string true "cluster name" default(test) +// @Param query query string true "sql" default(show databases) +// @Failure 200 {string} json "{"code":"5110","msg":"连接clickhouse失败","data":""}" +// @Failure 200 {string} json "{"code":"5804","msg":"数据查询失败","data":""}" +// @Success 200 {string} json "{"code":"0000","msg":"ok","data":[["name"],["default"],["system"]]}" +// @Router /api/v2/ck/query_export/{clusterName} [get] +func (controller *ClickHouseController) QueryExport(c *gin.Context) { + clusterName := c.Param(ClickHouseClusterPath) + host := c.Query("host") + query := c.Query("query") + query = strings.TrimRight(strings.TrimSpace(query), ";") + + var ckService *clickhouse.CkService + var err error + if host == "" { + ckService, err = clickhouse.GetCkService(clusterName) + if err != nil { + controller.wrapfunc(c, model.E_CH_CONNECT_FAILED, err) + return + } + } else { + ckService, err = clickhouse.GetCkNodeService(clusterName, host) + if err != nil { + controller.wrapfunc(c, model.E_CH_CONNECT_FAILED, err) + return + } + } + + data, err := ckService.QueryInfo(query) + if err != nil { + controller.wrapfunc(c, model.E_DATA_SELECT_FAILED, err) + return + } + fileName := fmt.Sprintf("ckman_query_%s_%s.csv", clusterName, time.Now().Format("2006-01-02T15:04:05")) + + buf := &bytes.Buffer{} + buf.WriteString("\xEF\xBB\xBE") + writer := csv.NewWriter(buf) + for _, row := range data { + var cells []string + for _, cell := range row { + cells = append(cells, fmt.Sprintf("%v", cell)) + } + writer.Write(cells) + } + writer.Flush() + c.Writer.Header().Set("Content-Disposition", "attachment;filename="+fileName) + c.Data(http.StatusOK, "application/octet-stream", buf.Bytes()) +} + // @Summary 升级集群 // @Description 升级集群,支持全量升级和滚动升级 // @version 1.0 @@ -1226,6 +1327,7 @@ func (controller *ClickHouseController) UpgradeCluster(c *gin.Context) { d := deploy.NewCkDeploy(conf) d.Packages = deploy.BuildPackages(req.PackageVersion, conf.PkgType, conf.Cwd) d.Ext.Policy = req.Policy + d.Ext.CurClusterOnly = true d.Conf.Hosts = chHosts taskId, err := deploy.CreateNewTask(clusterName, model.TaskTypeCKUpgrade, d) @@ -1308,20 +1410,6 @@ func (controller *ClickHouseController) StopCluster(c *gin.Context) { return } - // before stop, we need sync zoopath - /* - Since when destory cluster, the cluster must be stopped, - we cant't get zookeeper path by querying ck, - so need to save the ZooKeeper path before stopping the cluster. - */ - if conf.IsReplica { - err = clickhouse.GetReplicaZkPath(&conf) - if err != nil { - controller.wrapfunc(c, model.E_ZOOKEEPER_ERROR, err) - return - } - } - common.CloseConns(conf.Hosts) err = deploy.StopCkCluster(&conf) if err != nil { @@ -1425,7 +1513,10 @@ func (controller *ClickHouseController) RebalanceCluster(c *gin.Context) { return } } - allTable := common.TernaryExpression(c.Query("all") == "false" || req.ExceptMaxShard, false, true).(bool) + allTable := common.TernaryExpression(c.Query("all") == "false", false, true).(bool) + if req.ExceptMaxShard { + allTable = true + } err = clickhouse.RebalanceCluster(&conf, req.Keys, allTable, req.ExceptMaxShard) if err != nil { controller.wrapfunc(c, model.E_TBL_ALTER_FAILED, err) @@ -1447,7 +1538,7 @@ func (controller *ClickHouseController) RebalanceCluster(c *gin.Context) { // @Failure 200 {string} json "{"code":"5110","msg":"clickhouse连接失败","data":""}" // @Failure 200 {string} json "{"code":"5804","msg":"数据查询失败","data":""}" // @Failure 200 {string} json "{"code":"5800","msg":"集群不存在","data":""}" -// @Success 200 {string} json "{"code":"0000","msg":"success","data":{"test":{"mode":"import","hosts":["192.168.0.1","192.168.0.2","192.168.0.3","192.168.0.4"],"names":["node1","node2","node3","node4"],"port":9000,"httpPort":8123,"user":"ck","password":"123456","database":"default","cluster":"test","zkNodes":["192.168.0.1","192.168.0.2","192.168.0.3"],"zkPort":2181,"zkStatusPort":8080,"isReplica":true,"version":"20.8.5.45","sshUser":"","sshPassword":"","shards":[{"replicas":[{"ip":"192.168.0.1","hostname":"node1"},{"ip":"192.168.0.2","hostname":"node2"}]},{"replicas":[{"ip":"192.168.0.3","hostname":"node3"},{"ip":"192.168.0.4","hostname":"node4"}]}],"path":""}}}}" +// @Success 200 {string} json "{"code":"0000","msg":"success","data":{"test":{"mode":"import","hosts":["192.168.0.1","192.168.0.2","192.168.0.3","192.168.0.4"],"names":["node1","node2","node3","node4"],"port":9000,"httpPort":8123,"user":"ck","password":"123456","database":"default","cluster":"test","zkNodes":["192.168.0.1","192.168.0.2","192.168.0.3"],"zkPort":2181,"isReplica":true,"version":"20.8.5.45","sshUser":"","sshPassword":"","shards":[{"replicas":[{"ip":"192.168.0.1","hostname":"node1"},{"ip":"192.168.0.2","hostname":"node2"}]},{"replicas":[{"ip":"192.168.0.3","hostname":"node3"},{"ip":"192.168.0.4","hostname":"node4"}]}],"path":""}}}}" // @Router /api/v2/ck/get/{clusterName} [get] func (controller *ClickHouseController) GetClusterStatus(c *gin.Context) { clusterName := c.Param(ClickHouseClusterPath) @@ -1591,6 +1682,7 @@ func (controller *ClickHouseController) AddNode(c *gin.Context) { // install clickhouse and start service on the new node d := deploy.NewCkDeploy(conf) d.Conf.Hosts = req.Ips + d.Packages = deploy.BuildPackages(conf.Version, conf.PkgType, conf.Cwd) if reflect.DeepEqual(d.Packages, deploy.Packages{}) { err := errors.Errorf("package %s %s not found in localpath", conf.Version, conf.PkgType) @@ -1763,23 +1855,19 @@ func (controller *ClickHouseController) StartNode(c *gin.Context) { ckService.InitCkService() err := ckService.FetchSchemerFromOtherNode(host) if err != nil { - err = common.ClikHouseExceptionDecode(err) - var exception *client.Exception - if errors.As(err, &exception) { - if exception.Code == 253 { - //Code: 253: Replica /clickhouse/tables/XXX/XXX/replicas/{replica} already exists, clean the znode and retry - service, err := zookeeper.GetZkService(conf.Cluster) + if common.ExceptionAS(err, common.REPLICA_ALREADY_EXISTS) { + //Code: 253: Replica /clickhouse/tables/XXX/XXX/replicas/{replica} already exists, clean the znode and retry + service, err := zookeeper.GetZkService(conf.Cluster) + if err == nil { + err = service.CleanZoopath(conf, conf.Cluster, ip, false) if err == nil { - err = service.CleanZoopath(conf, conf.Cluster, ip, false) - if err == nil { - if err = ckService.FetchSchemerFromOtherNode(host); err != nil { - log.Logger.Errorf("fetch schema from other node failed again") - } + if err = ckService.FetchSchemerFromOtherNode(host); err != nil { + log.Logger.Errorf("fetch schema from other node failed again") + } } else { log.Logger.Errorf("can't create zookeeper instance:%v", err) } - } } } @@ -1958,15 +2046,8 @@ func (controller *ClickHouseController) GetTableMetric(c *gin.Context) { var gotError bool metrics, err := clickhouse.GetCkTableMetrics(&conf, database, cols) if err != nil { - gotError = true - err = common.ClikHouseExceptionDecode(err) - var exception *client.Exception - if errors.As(err, &exception) { - if exception.Code == 60 { - // we do not return error when system.query_log is not exist - gotError = false - } - } + // we do not return error when system.query_log is not exist + gotError = !common.ExceptionAS(err, common.UNKNOWN_TABLE) } if gotError { @@ -2034,15 +2115,8 @@ func (controller *ClickHouseController) GetOpenSessions(c *gin.Context) { var gotError bool sessions, err := clickhouse.GetCkOpenSessions(&conf, limit) if err != nil { - gotError = true - err = common.ClikHouseExceptionDecode(err) - var exception *client.Exception - if errors.As(err, &exception) { - if exception.Code == 60 { - // we do not return error when system.query_log is not exist - gotError = false - } - } + // we do not return error when system.query_log is not exist + gotError = !common.ExceptionAS(err, common.UNKNOWN_TABLE) } if gotError { controller.wrapfunc(c, model.E_DATA_SELECT_FAILED, err) @@ -2129,15 +2203,8 @@ func (controller *ClickHouseController) GetSlowSessions(c *gin.Context) { var gotError bool sessions, err := clickhouse.GetCkSlowSessions(&conf, cond) if err != nil { - gotError = true - err = common.ClikHouseExceptionDecode(err) - var exception *client.Exception - if errors.As(err, &exception) { - if exception.Code == 60 { - // we do not return error when system.query_log is not exist - gotError = false - } - } + // we do not return error when system.query_log is not exist + gotError = !common.ExceptionAS(err, common.UNKNOWN_TABLE) } if gotError { controller.wrapfunc(c, model.E_DATA_SELECT_FAILED, err) @@ -2169,15 +2236,8 @@ func (controller *ClickHouseController) GetDistDDLQueue(c *gin.Context) { var gotError bool ddlQueue, err := clickhouse.GetDistibutedDDLQueue(&conf) if err != nil { - gotError = true - err = common.ClikHouseExceptionDecode(err) - var exception *client.Exception - if errors.As(err, &exception) { - if exception.Code == 60 { - // we do not return error when system.query_log is not exist - gotError = false - } - } + // we do not return error when system.query_log is not exist + gotError = !common.ExceptionAS(err, common.UNKNOWN_TABLE) } if gotError { controller.wrapfunc(c, model.E_DATA_SELECT_FAILED, err) @@ -2519,7 +2579,7 @@ func (controller *ClickHouseController) ClusterSetting(c *gin.Context) { return } - restart, err := mergeClickhouseConfig(&conf, force) + restart, cc, err := mergeClickhouseConfig(&conf, force) if err != nil { controller.wrapfunc(c, model.E_INVALID_PARAMS, err) return @@ -2528,6 +2588,8 @@ func (controller *ClickHouseController) ClusterSetting(c *gin.Context) { d := deploy.NewCkDeploy(conf) d.Ext.Restart = restart d.Ext.Policy = policy + d.Ext.ChangeCk = cc + d.Ext.CurClusterOnly = true taskId, err := deploy.CreateNewTask(clusterName, model.TaskTypeCKSetting, d) if err != nil { controller.wrapfunc(c, model.E_DATA_INSERT_FAILED, err) @@ -2810,11 +2872,12 @@ func checkConfigParams(conf *model.CKManClickHouseConfig) error { return nil } -func mergeClickhouseConfig(conf *model.CKManClickHouseConfig, force bool) (bool, error) { +func mergeClickhouseConfig(conf *model.CKManClickHouseConfig, force bool) (bool, bool, error) { restart := false + config := false cluster, err := repository.Ps.GetClusterbyName(conf.Cluster) if err != nil { - return false, errors.Errorf("cluster %s is not exist", conf.Cluster) + return false, false, errors.Errorf("cluster %s is not exist", conf.Cluster) } conf.UnPack(cluster) storageChanged := !reflect.DeepEqual(cluster.Storage, conf.Storage) @@ -2822,38 +2885,27 @@ func mergeClickhouseConfig(conf *model.CKManClickHouseConfig, force bool) (bool, userconfChanged := !reflect.DeepEqual(cluster.UsersConf, conf.UsersConf) logicChaned := !reflect.DeepEqual(cluster.LogicCluster, conf.LogicCluster) zkChanged := !reflect.DeepEqual(cluster.ZkNodes, conf.ZkNodes) + keeperChanged := !reflect.DeepEqual(cluster.KeeperConf, conf.KeeperConf) noChangedFn := func() bool { return cluster.Port == conf.Port && + cluster.Comment == conf.Comment && cluster.AuthenticateType == conf.AuthenticateType && cluster.SshUser == conf.SshUser && cluster.SshPassword == conf.SshPassword && cluster.SshPort == conf.SshPort && cluster.Password == conf.Password && !storageChanged && !expertChanged && cluster.PromHost == conf.PromHost && cluster.PromPort == conf.PromPort && - cluster.ZkPort == conf.ZkPort && cluster.ZkStatusPort == conf.ZkStatusPort && - !userconfChanged && !logicChaned && !zkChanged + cluster.ZkPort == conf.ZkPort && + !userconfChanged && !logicChaned && !zkChanged && !keeperChanged } if !force { if noChangedFn() { - return false, errors.Errorf("all config are the same, it's no need to update") + return false, false, errors.Errorf("all config are the same, it's no need to update") } } - if zkChanged { - zknodes := append(cluster.ZkNodes, conf.ZkNodes...) - zknodes = common.ArrayDistinct(zknodes) - zkclusters, err := zookeeper.GetZkClusterNodes(cluster.ZkNodes[0], cluster.ZkStatusPort) - if err != nil { - return false, err - } - for _, node := range zknodes { - if !common.ArraySearch(node, zkclusters) { - return false, fmt.Errorf("node %s not in zookeeper cluster", node) - } - } - } if storageChanged { srcDisks := make(common.Map) dstDisks := make(common.Map) @@ -2882,21 +2934,21 @@ func mergeClickhouseConfig(conf *model.CKManClickHouseConfig, force bool) (bool, log.Logger.Debug(query) rows, err := svr.Conn.Query(query) if err != nil { - return false, err + return false, false, err } for rows.Next() { var policy string if err = rows.Scan(&policy); err == nil { rows.Close() - return false, fmt.Errorf("disk %v was refrenced by storage_policy %s, can't delete", disks, policy) + return false, false, fmt.Errorf("disk %v was refrenced by storage_policy %s, can't delete", disks, policy) } else { rows.Close() - return false, err + return false, false, err } } rows.Close() } else if !noChangedFn() { - return false, err + return false, false, err } } @@ -2927,21 +2979,21 @@ func mergeClickhouseConfig(conf *model.CKManClickHouseConfig, force bool) (bool, log.Logger.Debug(query) rows, err := svr.Conn.Query(query) if err != nil { - return false, err + return false, false, err } for rows.Next() { var database, table, policy_name string if err = rows.Scan(&database, &table, &policy_name); err == nil { rows.Close() - return false, fmt.Errorf("storage policy %s was refrenced by table %s.%s, can't delete", policy_name, database, table) + return false, false, fmt.Errorf("storage policy %s was refrenced by table %s.%s, can't delete", policy_name, database, table) } else { rows.Close() - return false, err + return false, false, err } } rows.Close() } else if !noChangedFn() { - return false, err + return false, false, err } } @@ -2954,12 +3006,20 @@ func mergeClickhouseConfig(conf *model.CKManClickHouseConfig, force bool) (bool, } } + if force || cluster.Port != conf.Port || + cluster.Password != conf.Password || storageChanged || expertChanged || + cluster.ZkPort != conf.ZkPort || + userconfChanged || logicChaned || zkChanged || keeperChanged { + config = true + } + // need restart - if cluster.Port != conf.Port || storageChanged || expertChanged { + if force || cluster.Port != conf.Port || storageChanged || expertChanged || keeperChanged { restart = true } // merge conf + cluster.Comment = conf.Comment cluster.Port = conf.Port cluster.AuthenticateType = conf.AuthenticateType cluster.SshUser = conf.SshUser @@ -2973,18 +3033,18 @@ func mergeClickhouseConfig(conf *model.CKManClickHouseConfig, force bool) (bool, cluster.UsersConf = conf.UsersConf cluster.LogicCluster = conf.LogicCluster cluster.ZkPort = conf.ZkPort - cluster.ZkStatusPort = conf.ZkStatusPort cluster.ZkNodes = conf.ZkNodes + cluster.KeeperConf = conf.KeeperConf if err = common.DeepCopyByGob(conf, cluster); err != nil { - return false, err + return false, false, err } - return restart, nil + return restart, config, nil } func genTTLExpress(ttls []model.CkTableTTL, storage *model.Storage) ([]string, error) { var express []string for _, ttl := range ttls { - expr := fmt.Sprintf("toDateTime(`%s`) + INTERVAL %d %s ", ttl.TimeCloumn, ttl.Interval, ttl.Unit) + expr := fmt.Sprintf("toDateTime(`%s`) + toInterval%s(%d) ", ttl.TimeCloumn, strings.Title(strings.ToLower(ttl.Unit)), ttl.Interval) if ttl.Action == model.TTLActionDelete { expr += ttl.Action } else if ttl.Action == model.TTLActionToVolume { diff --git a/controller/deploy.go b/controller/deploy.go index 266044a4..02d6e108 100644 --- a/controller/deploy.go +++ b/controller/deploy.go @@ -55,7 +55,12 @@ func (controller *DeployController) DeployCk(c *gin.Context) { tmp := deploy.NewCkDeploy(conf) tmp.Packages = deploy.BuildPackages(conf.Version, conf.PkgType, conf.Cwd) - + if conf.KeeperWithStanalone() { + if tmp.Packages.Keeper == "" { + controller.wrapfunc(c, model.E_DATA_CHECK_FAILED, errors.Errorf("keeper package not found")) + return + } + } taskId, err := deploy.CreateNewTask(conf.Cluster, model.TaskTypeCKDeploy, tmp) if err != nil { controller.wrapfunc(c, model.E_DATA_INSERT_FAILED, err) @@ -92,6 +97,9 @@ func checkDeployParams(conf *model.CKManClickHouseConfig, force bool) error { if conf.Cwd == "" { return errors.Errorf("cwd can't be empty for tgz deployment") } + if !strings.HasSuffix(conf.Cwd, "/") { + return errors.Errorf(fmt.Sprintf("path %s must end with '/'", conf.Cwd)) + } conf.NeedSudo = false if err = checkAccess(conf.Cwd, conf); err != nil { return errors.Wrapf(err, "check access error") @@ -112,10 +120,6 @@ func checkDeployParams(conf *model.CKManClickHouseConfig, force bool) error { } } - // if conf.Hosts, err = common.ParseHosts(conf.Hosts); err != nil { - // return err - // } - if !force { if err := common.CheckCkInstance(conf); err != nil { return err @@ -125,16 +129,48 @@ func checkDeployParams(conf *model.CKManClickHouseConfig, force bool) error { if err = MatchingPlatfrom(conf); err != nil { return err } - //if conf.IsReplica && len(conf.Hosts)%2 == 1 { - // return errors.Errorf("When supporting replica, the number of nodes must be even") - //} - //conf.Shards = GetShardsbyHosts(conf.Hosts, conf.IsReplica) + conf.IsReplica = true - if len(conf.ZkNodes) == 0 { - return errors.Errorf("zookeeper nodes must not be empty") - } - if conf.ZkNodes, err = common.ParseHosts(conf.ZkNodes); err != nil { - return err + if conf.Keeper == model.ClickhouseKeeper { + if conf.KeeperConf == nil { + return errors.Errorf("keeper conf must not be empty") + } + if conf.KeeperConf.Runtime == model.KeeperRuntimeStandalone { + if conf.KeeperConf.KeeperNodes, err = common.ParseHosts(conf.KeeperConf.KeeperNodes); err != nil { + return err + } + if len(conf.KeeperConf.KeeperNodes) == 0 { + return errors.Errorf("keeper nodes must not be empty") + } + } else if conf.KeeperConf.Runtime == model.KeeperRuntimeInternal { + if strings.HasSuffix(conf.PkgType, common.PkgSuffixTgz) { + return errors.Errorf("keeper internal runtime doesn't support tgz deployment") + } + conf.KeeperConf.KeeperNodes = make([]string, len(conf.Hosts)) + copy(conf.KeeperConf.KeeperNodes, conf.Hosts) + + } else { + return errors.Errorf("keeper runtime %s is not supported", conf.KeeperConf.Runtime) + } + if !strings.HasSuffix(conf.KeeperConf.LogPath, "/") { + return errors.Errorf(fmt.Sprintf("path %s must end with '/'", conf.KeeperConf.LogPath)) + } + if !strings.HasSuffix(conf.KeeperConf.SnapshotPath, "/") { + return errors.Errorf(fmt.Sprintf("path %s must end with '/'", conf.KeeperConf.SnapshotPath)) + } + if err := checkAccess(conf.KeeperConf.LogPath, conf); err != nil { + return errors.Wrapf(err, "check access error") + } + if err := checkAccess(conf.KeeperConf.SnapshotPath, conf); err != nil { + return errors.Wrapf(err, "check access error") + } + } else { + if len(conf.ZkNodes) == 0 { + return errors.Errorf("zookeeper nodes must not be empty") + } + if conf.ZkNodes, err = common.ParseHosts(conf.ZkNodes); err != nil { + return err + } } if conf.LogicCluster != nil { if conf.Cluster == *conf.LogicCluster { diff --git a/controller/metric.go b/controller/metric.go index 0ab47300..1ddffe10 100644 --- a/controller/metric.go +++ b/controller/metric.go @@ -12,6 +12,7 @@ import ( "github.com/housepower/ckman/log" "github.com/housepower/ckman/model" "github.com/housepower/ckman/repository" + "github.com/housepower/ckman/service/clickhouse" "github.com/housepower/ckman/service/prometheus" "github.com/pkg/errors" ) @@ -154,3 +155,63 @@ func (controller *MetricController) QueryRange(c *gin.Context) { controller.wrapfunc(c, model.E_SUCCESS, value) } + +func (controller *MetricController) QueryMetric(c *gin.Context) { + clusterName := c.Param(ClickHouseClusterPath) + conf, err := repository.Ps.GetClusterbyName(clusterName) + if err != nil { + controller.wrapfunc(c, model.E_RECORD_NOT_FOUND, fmt.Sprintf("cluster %s does not exist", clusterName)) + return + } + title := c.Query("title") + start, err := strconv.ParseInt(c.Query("start"), 10, 64) + if err != nil { + controller.wrapfunc(c, model.E_INVALID_PARAMS, err) + return + } + end, err := strconv.ParseInt(c.Query("end"), 10, 64) + if err != nil { + controller.wrapfunc(c, model.E_INVALID_PARAMS, err) + return + } + step, err := strconv.ParseInt(c.Query("step"), 10, 64) + if err != nil { + controller.wrapfunc(c, model.E_INVALID_PARAMS, err) + return + } + m, ok := model.MetricMap[title] + if !ok { + controller.wrapfunc(c, model.E_INVALID_PARAMS, fmt.Errorf("title %s not found", title)) + } + query := fmt.Sprintf(`SELECT toStartOfInterval(event_time, INTERVAL %d SECOND) AS t, %s + FROM %s + WHERE event_date >= %d AND event_time >= %d + AND event_date <= %d AND event_time <= %d + GROUP BY t + ORDER BY t WITH FILL STEP %d`, step, m.Field, m.Table, start, start, end, end, step) + log.Logger.Debugf("query: %v", query) + var rsps []model.MetricRsp + for _, host := range conf.Hosts { + rsp := model.MetricRsp{ + Metric: model.Metric{ + Job: "ckman", + Name: m.Field, + Instance: host, + }, + } + tmp := conf + tmp.Hosts = []string{host} + s := clickhouse.NewCkService(&tmp) + err = s.InitCkService() + if err != nil { + return + } + data, err := s.QueryInfo(query) + if err != nil { + return + } + rsp.Value = data[1:] + rsps = append(rsps, rsp) + } + controller.wrapfunc(c, model.E_SUCCESS, rsps) +} diff --git a/controller/package.go b/controller/package.go index 6b4a854f..d6e927bc 100644 --- a/controller/package.go +++ b/controller/package.go @@ -80,7 +80,7 @@ func (controller *PackageController) Upload(c *gin.Context) { } } - err = common.GetPackages() + err = common.LoadPackages() if err != nil { controller.wrapfunc(c, model.E_UPLOAD_FAILED, err) return @@ -181,6 +181,7 @@ func (controller *PackageController) List(c *gin.Context) { if pkgType == "" { pkgType = model.PkgTypeDefault } + pkgs := common.GetAllPackages() var resp []model.PkgInfo if pkgType == "all" { @@ -242,14 +243,14 @@ func (controller *PackageController) Delete(c *gin.Context) { for _, peer := range config.GetClusterPeers() { peerUrl := "" if controller.config.Server.Https { - peerUrl = fmt.Sprintf("https://%s:%d/api/v1/package?packageVersion=%s", peer.Ip, peer.Port, packageVersion) + peerUrl = fmt.Sprintf("https://%s:%d/api/v1/package?packageVersion=%s&packageType=%s", peer.Ip, peer.Port, packageVersion, packageType) err := DeleteFileByURL(peerUrl) if err != nil { controller.wrapfunc(c, model.E_DATA_DELETE_FAILED, err) return } } else { - peerUrl = fmt.Sprintf("http://%s:%d/api/v1/package?packageVersion=%s", peer.Ip, peer.Port, packageVersion) + peerUrl = fmt.Sprintf("http://%s:%d/api/v1/package?packageVersion=%s&packageType=%s", peer.Ip, peer.Port, packageVersion, packageType) err := DeleteFileByURL(peerUrl) if err != nil { controller.wrapfunc(c, model.E_DATA_DELETE_FAILED, err) @@ -258,7 +259,8 @@ func (controller *PackageController) Delete(c *gin.Context) { } } } - err := common.GetPackages() + + err := common.LoadPackages() if err != nil { controller.wrapfunc(c, model.E_DATA_SELECT_FAILED, err) return diff --git a/controller/schema_ui.go b/controller/schema_ui.go index 97e16271..a5c72249 100644 --- a/controller/schema_ui.go +++ b/controller/schema_ui.go @@ -73,6 +73,11 @@ func RegistCreateClusterSchema() common.ConfigParams { DescriptionZH: "不得与本ckman管理的其他集群名重复", DescriptionEN: "not allow to duplicate with exist name", }) + params.MustRegister(conf, "Comment", &common.Parameter{ + LabelZH: "备注", + LabelEN: "Comment", + Required: "false", + }) params.MustRegister(conf, "LogicCluster", &common.Parameter{ LabelZH: "逻辑集群名", LabelEN: "Logic Name", @@ -85,6 +90,7 @@ func RegistCreateClusterSchema() common.ConfigParams { DescriptionZH: "工作路径,仅tgz部署时需要", DescriptionEN: "Working directory, only required for tgz deployment", Visiable: "PkgType.indexOf('tgz') !== -1", + Regexp: "^/.+/$", }) params.MustRegister(conf, "SshUser", &common.Parameter{ LabelZH: "系统账户名", @@ -102,7 +108,7 @@ func RegistCreateClusterSchema() common.ConfigParams { {Value: "1", LabelEN: "Password(not save)", LabelZH: "密码认证(不保存密码)"}, {Value: "2", LabelEN: "Public Key", LabelZH: "公钥认证"}, }, - Default: "2", + Default: "0", }) params.MustRegister(conf, "SshPassword", &common.Parameter{ LabelZH: "系统账户密码", @@ -124,18 +130,6 @@ func RegistCreateClusterSchema() common.ConfigParams { LabelEN: "Default Password", InputType: common.InputPassword, }) - // params.MustRegister(conf, "IsReplica", &common.Parameter{ - // LabelZH: "是否为多副本", - // LabelEN: "Replica", - // DescriptionZH: "物理集群的每个shard是否为多副本, 生产环境建议每个shard为两副本", - // DescriptionEN: "Whether each Shard of the cluster is multiple replication, we suggest each shard have two copies.", - // }) - // params.MustRegister(conf, "Hosts", &common.Parameter{ - // LabelZH: "集群结点IP地址列表", - // LabelEN: "ClickHouse Node List", - // DescriptionZH: "由ckman完成各结点分配到shard。每输入框为单个IP,或者IP范围,或者网段掩码", - // DescriptionEN: "ClickHouse Node ip, support CIDR or Range.designation by ckman automatically", - // }) params.MustRegister(conf, "Shards", &common.Parameter{ LabelZH: "集群节点配置", LabelEN: "ClickHouse Cluster Node", @@ -181,23 +175,114 @@ func RegistCreateClusterSchema() common.ConfigParams { Required: "false", Editable: "false", }) + + params.MustRegister(conf, "Keeper", &common.Parameter{ + Default: "zookeeper", + DescriptionZH: "如果使用clickhouse-keeper, 则默认由ckman托管;如果使用已有zookeeper或已经创建好的keeper集群,都视同zookeeper", + Candidates: []common.Candidate{ + {Value: model.Zookeeper, LabelEN: "Zookeeper", LabelZH: "Zookeeper"}, + {Value: model.ClickhouseKeeper, LabelEN: "ClickHouse-Keeper", LabelZH: "ClickHouse-Keeper"}, + }, + }) + + params.MustRegister(conf, "KeeperConf", &common.Parameter{ + LabelZH: "Keeper配置", + LabelEN: "KeeperConf", + DescriptionZH: "clickhouse-keeper的配置项", + Visiable: "Keeper == 'clickhouse-keeper'", + }) + + var keeper model.KeeperConf + params.MustRegister(keeper, "Runtime", &common.Parameter{ + LabelZH: "运行方式", + LabelEN: "Runtime", + Default: model.KeeperRuntimeStandalone, + DescriptionZH: "如果单独部署,则和clickhouse-server 分开进程;如果内置,则和clickhouse-server放在一块", + Candidates: []common.Candidate{ + {Value: model.KeeperRuntimeStandalone, LabelEN: "Standalone", LabelZH: "单独部署"}, + {Value: model.KeeperRuntimeInternal, LabelEN: "Internal", LabelZH: "内置"}, + }, + }) + params.MustRegister(keeper, "KeeperNodes", &common.Parameter{ + LabelZH: "Keeper节点", + LabelEN: "KeeperNodes", + Visiable: "Runtime == 'standalone'", + }) + + params.MustRegister(keeper, "TcpPort", &common.Parameter{ + LabelZH: "Keeper端口", + LabelEN: "TcpPort", + Default: "9181", + }) + params.MustRegister(keeper, "RaftPort", &common.Parameter{ + LabelZH: "Raft通信端口", + LabelEN: "RaftPort", + Default: "9234", + }) + params.MustRegister(keeper, "LogPath", &common.Parameter{ + LabelZH: "Log路径", + LabelEN: "LogPath", + Default: "/var/lib/", + Regexp: "^/.+/$", + }) + params.MustRegister(keeper, "SnapshotPath", &common.Parameter{ + LabelZH: "Snapshot路径", + LabelEN: "SnapshotPath", + Default: "/var/lib/", + Regexp: "^/.+/$", + }) + params.MustRegister(keeper, "Expert", &common.Parameter{ + LabelZH: "专家配置", + LabelEN: "Expert", + Required: "false", + }) + params.MustRegister(keeper, "Coordination", &common.Parameter{ + LabelZH: "协作配置", + LabelEN: "Coordination", + Required: "false", + }) + + var coordination model.Coordination + params.MustRegister(coordination, "OperationTimeoutMs", &common.Parameter{ + LabelZH: "OperationTimeoutMs", + LabelEN: "OperationTimeoutMs", + Default: "10000", + Required: "false", + }) + params.MustRegister(coordination, "SessionTimeoutMs", &common.Parameter{ + LabelZH: "SessionTimeoutMs", + LabelEN: "SessionTimeoutMs", + Default: "30000", + Required: "false", + }) + params.MustRegister(coordination, "ForceSync", &common.Parameter{ + LabelZH: "ForceSync", + LabelEN: "ForceSync", + Required: "false", + }) + params.MustRegister(coordination, "AutoForwarding", &common.Parameter{ + LabelZH: "AutoForwarding", + LabelEN: "AutoForwarding", + Required: "false", + }) + params.MustRegister(coordination, "Expert", &common.Parameter{ + LabelZH: "专家配置", + LabelEN: "Expert", + Required: "false", + }) + params.MustRegister(conf, "ZkNodes", &common.Parameter{ LabelZH: "ZooKeeper集群结点列表", LabelEN: "Zookeeper Node List", DescriptionZH: "每段为单个IP,或者IP范围,或者网段掩码", DescriptionEN: "Zookeeper Node ip, support CIDR or Range.", + Visiable: "Keeper == 'zookeeper'", }) params.MustRegister(conf, "ZkPort", &common.Parameter{ - LabelZH: "ZooKeeper集群监听端口", - LabelEN: "Zookeeper Port", - Default: "2181", - }) - params.MustRegister(conf, "ZkStatusPort", &common.Parameter{ - LabelZH: "Zookeeper监控端口", - LabelEN: "Zookeeper Status Port", - DescriptionZH: "暴露给mntr等四字命令的端口,zookeeper 3.5.0 以上支持", - DescriptionEN: "expose to commands/mntr, zookeeper support it after 3.5.0", - Default: "8080", + LabelZH: "ZooKeeper集群监听端口", + LabelEN: "Zookeeper Port", + Default: "2181", + Visiable: "Keeper == 'zookeeper'", }) params.MustRegister(conf, "PromHost", &common.Parameter{ LabelZH: "Promethues 地址", @@ -247,6 +332,7 @@ func RegistCreateClusterSchema() common.ConfigParams { LabelZH: "数据存储路径", LabelEN: "Data Path", DescriptionZH: "ClickHouse存储数据的路径,路径需要存在且必须以'/'结尾", + Default: "/var/lib/", DescriptionEN: "path need exist, must end with '/'", Regexp: "^/.+/$", }) @@ -690,6 +776,11 @@ Non-professionals please do not fill in this`, func RegistUpdateConfigSchema() common.ConfigParams { var params common.ConfigParams = make(map[string]*common.Parameter) var conf model.CKManClickHouseConfig + params.MustRegister(conf, "Comment", &common.Parameter{ + LabelZH: "备注", + LabelEN: "Comment", + Required: "false", + }) params.MustRegister(conf, "Version", &common.Parameter{ LabelZH: "ClickHouse版本", LabelEN: "Version", @@ -766,18 +857,6 @@ func RegistUpdateConfigSchema() common.ConfigParams { LabelEN: "SSH Port", DescriptionZH: "不得为空", }) - // params.MustRegister(conf, "IsReplica", &common.Parameter{ - // LabelZH: "是否为多副本", - // LabelEN: "Replica", - // DescriptionZH: "物理集群的每个shard是否为多副本, 生产环境建议每个shard为两副本", - // DescriptionEN: "Whether each Shard of the cluster is multiple replication, we suggest each shard have two copies.", - // }) - // params.MustRegister(conf, "Hosts", &common.Parameter{ - // LabelZH: "集群结点IP地址列表", - // LabelEN: "ClickHouse Node List", - // DescriptionZH: "由ckman完成各结点分配到shard。每输入框为单个IP,或者IP范围,或者网段掩码", - // DescriptionEN: "ClickHouse Node ip, support CIDR or Range.designation by ckman automatically", - // }) params.MustRegister(conf, "Shards", &common.Parameter{ LabelZH: "集群节点配置", LabelEN: "ClickHouse Cluster Node", @@ -800,21 +879,108 @@ func RegistUpdateConfigSchema() common.ConfigParams { LabelEN: "Replica Node IP", Editable: "false", }) + params.MustRegister(conf, "Keeper", &common.Parameter{ + DescriptionZH: "如果使用clickhouse-keeper, 则默认由ckman托管;如果使用已有zookeeper或已经创建好的keeper集群,都视同zookeeper", + Candidates: []common.Candidate{ + {Value: model.Zookeeper, LabelEN: "Zookeeper", LabelZH: "Zookeeper"}, + {Value: model.ClickhouseKeeper, LabelEN: "ClickHouse-Keeper", LabelZH: "ClickHouse-Keeper"}, + }, + Editable: "false", + }) + + params.MustRegister(conf, "KeeperConf", &common.Parameter{ + LabelZH: "Keeper配置", + LabelEN: "KeeperConf", + DescriptionZH: "clickhouse-keeper的配置项", + Visiable: "Keeper == 'clickhouse-keeper'", + }) + + var keeper model.KeeperConf + params.MustRegister(keeper, "Runtime", &common.Parameter{ + LabelZH: "运行方式", + LabelEN: "Runtime", + DescriptionZH: "如果单独部署,则和clickhouse-server 分开进程;如果内置,则和clickhouse-server放在一块", + Candidates: []common.Candidate{ + {Value: model.KeeperRuntimeStandalone, LabelEN: "Standalone", LabelZH: "单独部署"}, + {Value: model.KeeperRuntimeInternal, LabelEN: "Internal", LabelZH: "内置"}, + }, + Editable: "false", + }) + params.MustRegister(keeper, "KeeperNodes", &common.Parameter{ + LabelZH: "Keeper节点", + LabelEN: "KeeperNodes", + Editable: "false", + }) + + params.MustRegister(keeper, "TcpPort", &common.Parameter{ + LabelZH: "Keeper端口", + LabelEN: "TcpPort", + }) + params.MustRegister(keeper, "RaftPort", &common.Parameter{ + LabelZH: "Raft通信端口", + LabelEN: "RaftPort", + }) + params.MustRegister(keeper, "LogPath", &common.Parameter{ + LabelZH: "Log路径", + LabelEN: "LogPath", + Editable: "false", + }) + params.MustRegister(keeper, "SnapshotPath", &common.Parameter{ + LabelZH: "Snapshot路径", + LabelEN: "SnapshotPath", + Editable: "false", + }) + params.MustRegister(keeper, "Expert", &common.Parameter{ + LabelZH: "专家配置", + LabelEN: "Expert", + Required: "false", + }) + params.MustRegister(keeper, "Coordination", &common.Parameter{ + LabelZH: "协作配置", + LabelEN: "Coordination", + Required: "false", + }) + + var coordination model.Coordination + params.MustRegister(coordination, "OperationTimeoutMs", &common.Parameter{ + LabelZH: "OperationTimeoutMs", + LabelEN: "OperationTimeoutMs", + Default: "10000", + Required: "false", + }) + params.MustRegister(coordination, "SessionTimeoutMs", &common.Parameter{ + LabelZH: "SessionTimeoutMs", + LabelEN: "SessionTimeoutMs", + Default: "30000", + Required: "false", + }) + params.MustRegister(coordination, "ForceSync", &common.Parameter{ + LabelZH: "ForceSync", + LabelEN: "ForceSync", + Required: "false", + }) + params.MustRegister(coordination, "AutoForwarding", &common.Parameter{ + LabelZH: "AutoForwarding", + LabelEN: "AutoForwarding", + Required: "false", + }) + params.MustRegister(coordination, "Expert", &common.Parameter{ + LabelZH: "专家配置", + LabelEN: "Expert", + Required: "false", + }) + params.MustRegister(conf, "ZkNodes", &common.Parameter{ LabelZH: "ZooKeeper集群结点列表", LabelEN: "Zookeeper Node List", DescriptionZH: "每段为单个IP,或者IP范围,或者网段掩码", DescriptionEN: "Zookeeper Node ip, support CIDR or Range.", + Visiable: "Keeper == 'zookeeper'", }) params.MustRegister(conf, "ZkPort", &common.Parameter{ - LabelZH: "ZooKeeper集群监听端口", - LabelEN: "Zookeeper Port", - }) - params.MustRegister(conf, "ZkStatusPort", &common.Parameter{ - LabelZH: "Zookeeper监控端口", - LabelEN: "Zookeeper Status Port", - DescriptionZH: "暴露给mntr等四字命令的端口,zookeeper 3.5.0 以上支持", - DescriptionEN: "expose to commands/mntr, zookeeper support it after 3.5.0", + LabelZH: "ZooKeeper集群监听端口", + LabelEN: "Zookeeper Port", + Visiable: "Keeper == 'zookeeper'", }) params.MustRegister(conf, "PromHost", &common.Parameter{ LabelZH: "Promethues 地址", @@ -1370,6 +1536,27 @@ func RegistRebalanceClusterSchema() common.ConfigParams { Required: "false", }) + params.MustRegister(key, "AllowLossRate", &common.Parameter{ + LabelZH: "允许错误率", + LabelEN: "AllowLossRate", + DescriptionZH: "均衡数据过程中允许数据的丢失率", + DescriptionEN: "Allow the loss rate during the data balancing process", + Range: &common.Range{ + Min: 0, + Max: 1, + Step: 0.01, + }, + Default: "0", + Required: "false", + }) + params.MustRegister(key, "SaveTemps", &common.Parameter{ + LabelZH: "保留临时数据", + LabelEN: "SaveTemps", + DescriptionZH: "均衡数据过程中保存原始数据到临时表", + DescriptionEN: "Save the original data to a temporary table during data balancing", + Required: "false", + }) + return params } diff --git a/controller/zookeeper.go b/controller/zookeeper.go index d2bbde83..f5e270b5 100644 --- a/controller/zookeeper.go +++ b/controller/zookeeper.go @@ -4,10 +4,12 @@ import ( "fmt" "strings" + "github.com/housepower/ckman/log" "github.com/housepower/ckman/repository" "github.com/gin-gonic/gin" "github.com/housepower/ckman/model" + "github.com/housepower/ckman/service/clickhouse" "github.com/housepower/ckman/service/zookeeper" jsoniter "github.com/json-iterator/go" ) @@ -46,15 +48,23 @@ func (controller *ZookeeperController) GetStatus(c *gin.Context) { return } - zkList := make([]model.ZkStatusRsp, len(conf.ZkNodes)) - for index, node := range conf.ZkNodes { + nodes, port := zookeeper.GetZkInfo(&conf) + + zkList := make([]model.ZkStatusRsp, len(nodes)) + for index, node := range nodes { tmp := model.ZkStatusRsp{ Host: node, } - body, err := zookeeper.ZkMetric(node, conf.ZkStatusPort, "mntr") + body, err := zookeeper.ZkMetric(node, port, "mntr") if err != nil { - controller.wrapfunc(c, model.E_ZOOKEEPER_ERROR, fmt.Sprintf("get zookeeper node %s satus fail: %v", node, err)) - return + // controller.wrapfunc(c, model.E_ZOOKEEPER_ERROR, fmt.Sprintf("get zookeeper node %s satus fail: %v", node, err)) + // return + log.Logger.Warnf("get zookeeper node %s satus fail: %v", node, err) + tmp.Version = "unknown" + tmp.ServerState = "unknown" + tmp.PeerState = "offline" + zkList[index] = tmp + continue } _ = json.Unmarshal(body, &tmp) tmp.Version = strings.Split(strings.Split(tmp.Version, ",")[0], "-")[0] @@ -83,13 +93,7 @@ func (controller *ZookeeperController) GetReplicatedTableStatus(c *gin.Context) return } - zkService, err := zookeeper.GetZkService(clusterName) - if err != nil { - controller.wrapfunc(c, model.E_ZOOKEEPER_ERROR, fmt.Sprintf("get zookeeper service fail: %v", err)) - return - } - - tables, err := zkService.GetReplicatedTableStatus(&conf) + tables, err := clickhouse.GetReplicatedTableStatus(&conf) if err != nil { controller.wrapfunc(c, model.E_ZOOKEEPER_ERROR, err) return diff --git a/deploy/base.go b/deploy/base.go index 043e92a8..61577881 100644 --- a/deploy/base.go +++ b/deploy/base.go @@ -3,6 +3,7 @@ package deploy type Packages struct { PkgLists []string Cwd string + Keeper string } type DeployBase struct { diff --git a/deploy/ck.go b/deploy/ck.go index 98070832..e468e5d5 100644 --- a/deploy/ck.go +++ b/deploy/ck.go @@ -178,7 +178,7 @@ func (d *CKDeploy) Install() error { d.Conf.Normalize() cmdIns := GetSuitableCmdAdpt(d.Conf.PkgType) cmds := make([]string, 0) - cmds = append(cmds, cmdIns.InstallCmd(d.Packages)) + cmds = append(cmds, cmdIns.InstallCmd(CkSvrName, d.Packages)) cmds = append(cmds, fmt.Sprintf("rm -rf %s", path.Join(d.Conf.Path, "clickhouse"))) cmds = append(cmds, fmt.Sprintf("mkdir -p %s", path.Join(d.Conf.Path, "clickhouse"))) if d.Conf.NeedSudo { @@ -243,7 +243,7 @@ func (d *CKDeploy) Uninstall() error { d.Conf.Normalize() cmdIns := GetSuitableCmdAdpt(d.Conf.PkgType) cmds := make([]string, 0) - cmds = append(cmds, cmdIns.Uninstall(d.Packages, d.Conf.Version)) + cmds = append(cmds, cmdIns.Uninstall(CkSvrName, d.Packages, d.Conf.Version)) cmds = append(cmds, fmt.Sprintf("rm -rf %s", path.Join(d.Conf.Path, "clickhouse"))) if d.Conf.NeedSudo { cmds = append(cmds, "rm -rf /etc/clickhouse-server") @@ -284,7 +284,7 @@ func (d *CKDeploy) Uninstall() error { func (d *CKDeploy) Upgrade() error { d.Conf.Normalize() cmdIns := GetSuitableCmdAdpt(d.Conf.PkgType) - cmd := cmdIns.UpgradeCmd(d.Packages) + cmd := cmdIns.UpgradeCmd(CkSvrName, d.Packages) var lastError error var wg sync.WaitGroup for _, host := range d.Conf.Hosts { @@ -391,6 +391,30 @@ func (d *CKDeploy) Config() error { } confFiles = append(confFiles, hostXml) + var keeperFile common.TempFile + if d.Conf.Keeper == model.ClickhouseKeeper && !d.Conf.KeeperWithStanalone() { + var err error + keeperFile, err = common.NewTempFile(path.Join(config.GetWorkDirectory(), "package"), "keeper_config") + if err != nil { + lastError = err + return + } + defer os.Remove(keeperFile.FullName) + idx := 0 + for i, kn := range d.Conf.KeeperConf.KeeperNodes { + if kn == innerHost { + idx = i + break + } + } + keeperXml, err := ckconfig.GenerateKeeperXML(keeperFile.FullName, d.Conf, d.Ext.Ipv6Enable, idx+1) + if err != nil { + lastError = err + return + } + confFiles = append(confFiles, keeperXml) + } + if err := common.ScpUploadFiles(confFiles, path.Join(remotePath, "config.d"), sshOpts); err != nil { lastError = err return @@ -403,6 +427,10 @@ func (d *CKDeploy) Config() error { cmds := make([]string, 0) cmds = append(cmds, fmt.Sprintf("mv %s %s", path.Join(remotePath, "config.d", hostFile.BaseName), path.Join(remotePath, "config.d", "host.xml"))) cmds = append(cmds, fmt.Sprintf("mv %s %s", path.Join(remotePath, "users.d", usersFile.BaseName), path.Join(remotePath, "users.d", "users.xml"))) + if d.Conf.Keeper == model.ClickhouseKeeper && !d.Conf.KeeperWithStanalone() { + cmds = append(cmds, fmt.Sprintf("mv %s %s", path.Join(remotePath, "config.d", keeperFile.BaseName), path.Join(remotePath, "config.d", "keeper_config.xml"))) + } + cmds = append(cmds, "rm -rf /tmp/host* /tmp/users*") if d.Conf.NeedSudo { cmds = append(cmds, "chown -R clickhouse:clickhouse /etc/clickhouse-server") } @@ -421,6 +449,9 @@ func (d *CKDeploy) Config() error { if d.Conf.LogicCluster != nil { logicMetrika, deploys := GenLogicMetrika(d) for _, deploy := range deploys { + if d.Ext.CurClusterOnly && d.Conf.Cluster != deploy.Conf.Cluster { + continue + } deploy.Conf.Normalize() metrikaFile, err := common.NewTempFile(path.Join(config.GetWorkDirectory(), "package"), "metrika") if err != nil { @@ -615,10 +646,8 @@ func (d *CKDeploy) Check(timeout int) error { log.Logger.Errorf("ping error: %v", err) continue } - if err == nil { - log.Logger.Debugf("host %s check done", innerHost) - return - } + log.Logger.Debugf("host %s check done", innerHost) + return case <-ticker2.C: lastError = errors.Wrapf(model.CheckTimeOutErr, "clickhouse-server may start failed, please check the clickhouse-server log") return @@ -806,6 +835,7 @@ func BuildPackages(version, pkgType, cwd string) Packages { if !ok { return Packages{} } + var keeper string for _, pkg := range pkgs.(common.CkPackageFiles) { if pkg.Version == version { if pkg.Module == common.PkgModuleCommon { @@ -814,6 +844,8 @@ func BuildPackages(version, pkgType, cwd string) Packages { pkgLists[1] = pkg.PkgName } else if pkg.Module == common.PkgModuleClient { pkgLists[2] = pkg.PkgName + } else if pkg.Module == common.PkgModuleKeeper { + keeper = pkg.PkgName } } } @@ -821,5 +853,6 @@ func BuildPackages(version, pkgType, cwd string) Packages { return Packages{ PkgLists: pkgLists, Cwd: cwd, + Keeper: keeper, } } diff --git a/deploy/cmd.go b/deploy/cmd.go index 0795f44c..85df0140 100644 --- a/deploy/cmd.go +++ b/deploy/cmd.go @@ -10,9 +10,9 @@ type CmdAdpt interface { StartCmd(svr, cwd string) string StopCmd(svr, cwd string) string RestartCmd(svr, cwd string) string - InstallCmd(pkgs Packages) string - UpgradeCmd(pkgs Packages) string - Uninstall(pkgs Packages, version string) string + InstallCmd(svr string, pkgs Packages) string + UpgradeCmd(svr string, pkgs Packages) string + Uninstall(svr string, pkgs Packages, version string) string } type CmdFactory interface { diff --git a/deploy/cmd_deb.go b/deploy/cmd_deb.go index 4e42a1da..7ccd41f8 100644 --- a/deploy/cmd_deb.go +++ b/deploy/cmd_deb.go @@ -26,20 +26,25 @@ func (p *DebPkg) RestartCmd(svr, cwd string) string { return "service " + svr + " restart" } -func (p *DebPkg) InstallCmd(pkgs Packages) string { - for idx, pkg := range pkgs.PkgLists { - pkgs.PkgLists[idx] = path.Join(common.TmpWorkDirectory, pkg) +func (p *DebPkg) InstallCmd(svr string, pkgs Packages) string { + if svr == CkSvrName { + for idx, pkg := range pkgs.PkgLists { + pkgs.PkgLists[idx] = path.Join(common.TmpWorkDirectory, pkg) + } + return "DEBIAN_FRONTEND=noninteractive dpkg -i " + strings.Join(pkgs.PkgLists, " ") + } else { + return "dpkg -i " + pkgs.Keeper } - return "DEBIAN_FRONTEND=noninteractive dpkg -i " + strings.Join(pkgs.PkgLists, " ") } -func (p *DebPkg) UpgradeCmd(pkgs Packages) string { - for idx, pkg := range pkgs.PkgLists { - pkgs.PkgLists[idx] = path.Join(common.TmpWorkDirectory, pkg) - } - return "DEBIAN_FRONTEND=noninteractive dpkg -i " + strings.Join(pkgs.PkgLists, " ") +func (p *DebPkg) UpgradeCmd(svr string, pkgs Packages) string { + return p.InstallCmd(svr, pkgs) } -func (p *DebPkg) Uninstall(pkgs Packages, version string) string { - return "dpkg -P clickhouse-client clickhouse-common-static clickhouse-server" +func (p *DebPkg) Uninstall(svr string, pkgs Packages, version string) string { + if svr == CkSvrName { + return "dpkg -P clickhouse-client clickhouse-common-static clickhouse-server" + } else { + return "dpkg -P clickhouse-keeper" + } } diff --git a/deploy/cmd_pkg_test.go b/deploy/cmd_pkg_test.go index 34a248a6..341997d9 100644 --- a/deploy/cmd_pkg_test.go +++ b/deploy/cmd_pkg_test.go @@ -37,7 +37,7 @@ func TestTgzPkg_InstallCmd(t *testing.T) { Cwd: "/home/eoi/clickhouse", } p := TgzFacotry{}.Create() - out := p.InstallCmd(pkgs) + out := p.InstallCmd(CkSvrName, pkgs) expect := `mkdir -p /home/eoi/clickhouse/bin /home/eoi/clickhouse/etc/clickhouse-server/config.d /home/eoi/clickhouse/etc/clickhouse-server/users.d /home/eoi/clickhouse/log/clickhouse-server /home/eoi/clickhouse/run /home/eoi/clickhouse/data/clickhouse;tar -xvf /tmp/clickhouse-common-static-22.3.6.5-amd64.tgz -C /tmp;cp -rf /tmp/clickhouse-common-static-22.3.6.5/usr/bin/* /home/eoi/clickhouse/bin;tar -xvf /tmp/clickhouse-server-22.3.6.5-amd64.tgz -C /tmp;cp -rf /tmp/clickhouse-server-22.3.6.5/usr/bin/* /home/eoi/clickhouse/bin;cp -rf /tmp/clickhouse-server-22.3.6.5/etc/clickhouse-* /home/eoi/clickhouse/etc/;tar -xvf /tmp/clickhouse-client-22.3.6.5-amd64.tgz -C /tmp;cp -rf /tmp/clickhouse-client-22.3.6.5/usr/bin/* /home/eoi/clickhouse/bin;cp -rf /tmp/clickhouse-client-22.3.6.5/etc/clickhouse-* /home/eoi/clickhouse/etc/` assert.Equal(t, expect, out) } @@ -53,7 +53,7 @@ func TestTgzPkg_UninstallCmd(t *testing.T) { } p := TgzFacotry{}.Create() expect := "rm -rf /home/eoi/clickhouse/*" - out := p.Uninstall(pkgs, "22.3.6.5") + out := p.Uninstall(CkSvrName, pkgs, "22.3.6.5") assert.Equal(t, expect, out) } @@ -67,7 +67,7 @@ func TestTgzPkg_UpgradeCmd(t *testing.T) { Cwd: "/home/eoi/clickhouse", } p := TgzFacotry{}.Create() - out := p.UpgradeCmd(pkgs) + out := p.UpgradeCmd(CkSvrName, pkgs) expect := `mkdir -p /home/eoi/clickhouse/bin /home/eoi/clickhouse/etc/clickhouse-server/config.d /home/eoi/clickhouse/etc/clickhouse-server/users.d /home/eoi/clickhouse/log/clickhouse-server /home/eoi/clickhouse/run /home/eoi/clickhouse/data/clickhouse;tar -xvf /tmp/clickhouse-common-static-22.3.6.5-amd64.tgz -C /tmp;cp -rf /tmp/clickhouse-common-static-22.3.6.5/usr/bin/* /home/eoi/clickhouse/bin;tar -xvf /tmp/clickhouse-server-22.3.6.5-amd64.tgz -C /tmp;cp -rf /tmp/clickhouse-server-22.3.6.5/usr/bin/* /home/eoi/clickhouse/bin;cp -rf /tmp/clickhouse-server-22.3.6.5/etc/clickhouse-* /home/eoi/clickhouse/etc/;tar -xvf /tmp/clickhouse-client-22.3.6.5-amd64.tgz -C /tmp;cp -rf /tmp/clickhouse-client-22.3.6.5/usr/bin/* /home/eoi/clickhouse/bin;cp -rf /tmp/clickhouse-client-22.3.6.5/etc/clickhouse-* /home/eoi/clickhouse/etc/` assert.Equal(t, expect, out) } @@ -102,7 +102,7 @@ func TestRpmPkg_InstallCmd(t *testing.T) { }, } p := RpmFacotry{}.Create() - out := p.InstallCmd(pkgs) + out := p.InstallCmd(CkSvrName, pkgs) expect := `DEBIAN_FRONTEND=noninteractive rpm --force --nosignature --nodeps -ivh /tmp/clickhouse-common-static-22.3.6.5-amd64.tgz;DEBIAN_FRONTEND=noninteractive rpm --force --nosignature --nodeps -ivh /tmp/clickhouse-server-22.3.6.5-amd64.tgz;DEBIAN_FRONTEND=noninteractive rpm --force --nosignature --nodeps -ivh /tmp/clickhouse-client-22.3.6.5-amd64.tgz` assert.Equal(t, expect, out) } @@ -116,7 +116,7 @@ func TestRpmPkg_UninstallCmd(t *testing.T) { }, } p := RpmFacotry{}.Create() - out := p.Uninstall(pkgs, "22.3.6.5") + out := p.Uninstall(CkSvrName, pkgs, "22.3.6.5") expect := `rpm -e $(rpm -qa |grep clickhouse |grep 22.3.6.5)` assert.Equal(t, expect, out) } @@ -130,7 +130,7 @@ func TestRpmPkg_UpgradeCmd(t *testing.T) { }, } p := RpmFacotry{}.Create() - out := p.UpgradeCmd(pkgs) + out := p.UpgradeCmd(CkSvrName, pkgs) expect := `DEBIAN_FRONTEND=noninteractive rpm --force --nosignature --nodeps -Uvh /tmp/clickhouse-common-static-22.3.6.5-amd64.tgz;DEBIAN_FRONTEND=noninteractive rpm --force --nosignature --nodeps -Uvh /tmp/clickhouse-server-22.3.6.5-amd64.tgz;DEBIAN_FRONTEND=noninteractive rpm --force --nosignature --nodeps -Uvh /tmp/clickhouse-client-22.3.6.5-amd64.tgz` assert.Equal(t, expect, out) } @@ -165,7 +165,7 @@ func TestDebPkg_InstallCmd(t *testing.T) { }, } p := DebFacotry{}.Create() - out := p.InstallCmd(pkgs) + out := p.InstallCmd(CkSvrName, pkgs) expect := `DEBIAN_FRONTEND=noninteractive dpkg -i /tmp/clickhouse-common-static-22.3.6.5-amd64.tgz /tmp/clickhouse-server-22.3.6.5-amd64.tgz /tmp/clickhouse-client-22.3.6.5-amd64.tgz` assert.Equal(t, expect, out) } @@ -179,7 +179,7 @@ func TestDebPkg_UninstallCmd(t *testing.T) { }, } p := DebFacotry{}.Create() - out := p.Uninstall(pkgs, "22.3.6.5") + out := p.Uninstall(CkSvrName, pkgs, "22.3.6.5") expect := `dpkg -P clickhouse-client clickhouse-common-static clickhouse-server` assert.Equal(t, expect, out) } @@ -194,7 +194,7 @@ func TestDebPkg_UpgradeCmd(t *testing.T) { Cwd: "/home/eoi/clickhouse", } p := DebFacotry{}.Create() - out := p.UpgradeCmd(pkgs) + out := p.UpgradeCmd(CkSvrName, pkgs) expect := `DEBIAN_FRONTEND=noninteractive dpkg -i /tmp/clickhouse-common-static-22.3.6.5-amd64.tgz /tmp/clickhouse-server-22.3.6.5-amd64.tgz /tmp/clickhouse-client-22.3.6.5-amd64.tgz` assert.Equal(t, expect, out) } diff --git a/deploy/cmd_rpm.go b/deploy/cmd_rpm.go index 89a72d53..70b2e0b3 100644 --- a/deploy/cmd_rpm.go +++ b/deploy/cmd_rpm.go @@ -31,22 +31,34 @@ func (p *RpmPkg) RestartCmd(svr, cwd string) string { return "systemctl restart " + svr } -func (p *RpmPkg) InstallCmd(pkgs Packages) string { +func (p *RpmPkg) InstallCmd(svr string, pkgs Packages) string { var cmd string - for _, pkg := range pkgs.PkgLists { - cmd += fmt.Sprintf("%s -ivh %s;", rpmPrefix, path.Join(common.TmpWorkDirectory, pkg)) + if svr == CkSvrName { + for _, pkg := range pkgs.PkgLists { + cmd += fmt.Sprintf("%s -ivh %s;", rpmPrefix, path.Join(common.TmpWorkDirectory, pkg)) + } + } else if svr == KeeperSvrName { + cmd += fmt.Sprintf("%s -ivh %s;", rpmPrefix, path.Join(common.TmpWorkDirectory, pkgs.Keeper)) } return strings.TrimSuffix(cmd, ";") } -func (p *RpmPkg) UpgradeCmd(pkgs Packages) string { +func (p *RpmPkg) UpgradeCmd(svr string, pkgs Packages) string { var cmd string - for _, pkg := range pkgs.PkgLists { - cmd += fmt.Sprintf("%s -Uvh %s;", rpmPrefix, path.Join(common.TmpWorkDirectory, pkg)) + if svr == CkSvrName { + for _, pkg := range pkgs.PkgLists { + cmd += fmt.Sprintf("%s -Uvh %s;", rpmPrefix, path.Join(common.TmpWorkDirectory, pkg)) + } + } else if svr == KeeperSvrName { + cmd += fmt.Sprintf("%s -Uvh %s;", rpmPrefix, path.Join(common.TmpWorkDirectory, pkgs.Keeper)) } return strings.TrimSuffix(cmd, ";") } -func (p *RpmPkg) Uninstall(pkgs Packages, version string) string { - return fmt.Sprintf("rpm -e $(rpm -qa |grep clickhouse |grep %s)", version) +func (p *RpmPkg) Uninstall(svr string, pkgs Packages, version string) string { + if svr == KeeperSvrName { + return fmt.Sprintf("rpm -e $(rpm -qa |grep clickhouse-keeper |grep %s)", version) + } else { + return fmt.Sprintf("rpm -e $(rpm -qa |grep clickhouse |grep %s)", version) + } } diff --git a/deploy/cmd_tgz.go b/deploy/cmd_tgz.go index bd365ee7..059416a0 100644 --- a/deploy/cmd_tgz.go +++ b/deploy/cmd_tgz.go @@ -16,40 +16,53 @@ func (TgzFacotry) Create() CmdAdpt { type TgzPkg struct{} func (p *TgzPkg) StartCmd(svr, cwd string) string { - return fmt.Sprintf("%s/bin/%s --config-file=%s/etc/%s/config.xml --pid-file=%s/run/%s.pid --daemon", cwd, svr, cwd, svr, cwd, svr) + if svr == KeeperSvrName { + return fmt.Sprintf("%sbin/%s --config-file=%setc/%s/keeper_config.xml --pid-file=%srun/%s.pid --daemon", cwd, svr, cwd, svr, cwd, svr) + } else { + return fmt.Sprintf("%sbin/%s --config-file=%setc/%s/config.xml --pid-file=%srun/%s.pid --daemon", cwd, svr, cwd, svr, cwd, svr) + } } func (p *TgzPkg) StopCmd(svr, cwd string) string { - return fmt.Sprintf("ps -ef |grep %s/bin/%s |grep -v grep |awk '{print $2}' |xargs kill", cwd, svr) + return fmt.Sprintf("ps -ef |grep %sbin/%s |grep -v grep |awk '{print $2}' |xargs kill", cwd, svr) } func (p *TgzPkg) RestartCmd(svr, cwd string) string { - return p.StopCmd(svr, cwd) + ";" + p.StartCmd(svr, cwd) + return p.StopCmd(svr, cwd) + "; sleep 5;" + p.StartCmd(svr, cwd) } -func (p *TgzPkg) InstallCmd(pkgs Packages) string { - content := fmt.Sprintf("mkdir -p %s/bin %s/etc/clickhouse-server/config.d %s/etc/clickhouse-server/users.d %s/log/clickhouse-server %s/run %s/data/clickhouse;", pkgs.Cwd, pkgs.Cwd, pkgs.Cwd, pkgs.Cwd, pkgs.Cwd, pkgs.Cwd) - for _, pkg := range pkgs.PkgLists { +func (p *TgzPkg) InstallCmd(svr string, pkgs Packages) string { + content := "" + if svr == CkSvrName { + content = fmt.Sprintf("mkdir -p %sbin %setc/clickhouse-server/config.d %setc/clickhouse-server/users.d %slog/clickhouse-server %srun;", + pkgs.Cwd, pkgs.Cwd, pkgs.Cwd, pkgs.Cwd, pkgs.Cwd) + for _, pkg := range pkgs.PkgLists { + lastIndex := strings.LastIndex(pkg, "-") + extractDir := pkg[:lastIndex] + content += fmt.Sprintf("tar -xf /tmp/%s -C /tmp;", pkg) + content += fmt.Sprintf("cp -rf /tmp/%s/usr/bin/* %sbin;", extractDir, pkgs.Cwd) + if strings.Contains(extractDir, common.PkgModuleClient) { + content += fmt.Sprintf("cp -rf /tmp/%s/etc/clickhouse-client %setc/;", extractDir, pkgs.Cwd) + } else if strings.Contains(extractDir, common.PkgModuleServer) { + content += fmt.Sprintf("cp -rf /tmp/%s/etc/clickhouse-server %setc/;", extractDir, pkgs.Cwd) + } + } + } else if svr == KeeperSvrName { + content = fmt.Sprintf("mkdir -p %sbin %s/etc/clickhouse-keeper %slog/clickhouse-keeper %srun;", + pkgs.Cwd, pkgs.Cwd, pkgs.Cwd, pkgs.Cwd) + pkg := pkgs.Keeper lastIndex := strings.LastIndex(pkg, "-") extractDir := pkg[:lastIndex] - content += fmt.Sprintf("tar -xvf /tmp/%s -C /tmp;", pkg) - content += fmt.Sprintf("cp -rf /tmp/%s/usr/bin/* %s/bin;", extractDir, pkgs.Cwd) - if !strings.Contains(extractDir, common.PkgModuleCommon) { - content += fmt.Sprintf("cp -rf /tmp/%s/etc/clickhouse-* %s/etc/;", extractDir, pkgs.Cwd) - } + content += fmt.Sprintf("tar -xf /tmp/%s -C /tmp;", pkg) + content += fmt.Sprintf("cp -rf /tmp/%s/usr/bin/* %sbin;", extractDir, pkgs.Cwd) + content += fmt.Sprintf("cp -rf /tmp/%s/etc/clickhouse-keeper/* %setc/clickhouse-keeper/;", extractDir, pkgs.Cwd) } - //content += fmt.Sprintf(`echo "PATH=$PATH:%s/bin" > %s/.profile;`, pkgs.Cwd, pkgs.Cwd) - //content += fmt.Sprintf(`echo source %s/.profile >> ${HOME}/.bash_profile;`, pkgs.Cwd) - //content += fmt.Sprintf("source ${HOME}/.bash_profile;") - //content += "useradd clickhouse;" - //content += "groupadd clickhouse;" - //content += fmt.Sprintf("chown -R clickhouse:clickhouse %s", pkgs.Cwd) return strings.TrimSuffix(content, ";") } -func (p *TgzPkg) UpgradeCmd(pkgs Packages) string { - return p.InstallCmd(pkgs) +func (p *TgzPkg) UpgradeCmd(svr string, pkgs Packages) string { + return p.InstallCmd(svr, pkgs) } -func (p *TgzPkg) Uninstall(pkgs Packages, version string) string { - return fmt.Sprintf("rm -rf %s/*", pkgs.Cwd) +func (p *TgzPkg) Uninstall(svr string, pkgs Packages, version string) string { + return fmt.Sprintf("rm -rf %s*", pkgs.Cwd) } diff --git a/deploy/keeper.go b/deploy/keeper.go new file mode 100644 index 00000000..6ca87d5d --- /dev/null +++ b/deploy/keeper.go @@ -0,0 +1,503 @@ +package deploy + +import ( + "encoding/gob" + "fmt" + "os" + "path" + "strings" + "sync" + "time" + + "github.com/housepower/ckman/ckconfig" + "github.com/housepower/ckman/service/zookeeper" + + "github.com/housepower/ckman/config" + "github.com/pkg/errors" + + "github.com/housepower/ckman/common" + "github.com/housepower/ckman/log" + "github.com/housepower/ckman/model" +) + +func init() { + gob.Register(KeeperDeploy{}) +} + +const ( + KeeperSvrName string = "clickhouse-keeper" +) + +type KeeperDeploy struct { + DeployBase + Conf *model.CKManClickHouseConfig + HostInfos []ckconfig.HostInfo + Ext model.CkDeployExt +} + +func NewKeeperDeploy(conf model.CKManClickHouseConfig, packages Packages) *KeeperDeploy { + return &KeeperDeploy{ + Conf: &conf, + DeployBase: DeployBase{ + Packages: packages, + }, + } +} + +func (d *KeeperDeploy) Init() error { + d.Conf.Normalize() + d.HostInfos = make([]ckconfig.HostInfo, len(d.Conf.KeeperConf.KeeperNodes)) + var lastError error + var wg sync.WaitGroup + d.Ext.Ipv6Enable = true + for _, host := range d.Conf.KeeperConf.KeeperNodes { + innerHost := host + wg.Add(1) + _ = common.Pool.Submit(func() { + defer wg.Done() + sshOpts := common.SshOptions{ + User: d.Conf.SshUser, + Password: d.Conf.SshPassword, + Port: d.Conf.SshPort, + Host: innerHost, + NeedSudo: d.Conf.NeedSudo, + AuthenticateType: d.Conf.AuthenticateType, + } + kpath := "" + if !d.Conf.NeedSudo { + kpath = path.Join(d.Conf.Cwd, d.Conf.Path, "clickhouse-keeper") + } else { + kpath = path.Join(d.Conf.Path, "clickhouse-keeper") + } + cmd1 := fmt.Sprintf("mkdir -p %s ; chown -R clickhouse:clickhouse %s", kpath, kpath) + _, err := common.RemoteExecute(sshOpts, cmd1) + if err != nil { + lastError = err + return + } + if d.Ext.Ipv6Enable { + cmd2 := "grep lo /proc/net/if_inet6 >/dev/null 2>&1; echo $?" + output, err := common.RemoteExecute(sshOpts, cmd2) + if err != nil { + lastError = err + return + } + + ipv6Enable := strings.Trim(output, "\n") + if ipv6Enable != "0" { + //file not exists, return 2, file exists but empty, return 1 + d.Ext.Ipv6Enable = false + } + } + }) + } + wg.Wait() + if lastError != nil { + return lastError + } + log.Logger.Infof("init done") + return nil +} + +func (d *KeeperDeploy) Prepare() error { + d.Conf.Normalize() + file := path.Join(config.GetWorkDirectory(), common.DefaultPackageDirectory, d.Packages.Keeper) + + var lastError error + var wg sync.WaitGroup + for _, host := range d.Conf.KeeperConf.KeeperNodes { + innerHost := host + wg.Add(1) + _ = common.Pool.Submit(func() { + defer wg.Done() + sshOpts := common.SshOptions{ + User: d.Conf.SshUser, + Password: d.Conf.SshPassword, + Port: d.Conf.SshPort, + Host: innerHost, + NeedSudo: d.Conf.NeedSudo, + AuthenticateType: d.Conf.AuthenticateType, + } + if err := common.ScpUploadFiles([]string{file}, common.TmpWorkDirectory, sshOpts); err != nil { + lastError = err + return + } + log.Logger.Debugf("host %s prepare done", innerHost) + }) + } + wg.Wait() + if lastError != nil { + return lastError + } + log.Logger.Infof("prepare done") + return nil +} + +func (d *KeeperDeploy) Install() error { + d.Conf.Normalize() + cmdIns := GetSuitableCmdAdpt(d.Conf.PkgType) + cmds := make([]string, 0) + cmds = append(cmds, cmdIns.InstallCmd(KeeperSvrName, d.Packages)) + cmds = append(cmds, fmt.Sprintf("rm -rf %s/* %s/*", d.Conf.KeeperConf.LogPath, d.Conf.KeeperConf.SnapshotPath)) + if d.Conf.NeedSudo { + cmds = append(cmds, fmt.Sprintf("chown clickhouse.clickhouse %s %s -R", d.Conf.KeeperConf.LogPath, d.Conf.KeeperConf.SnapshotPath)) + } + var lastError error + var wg sync.WaitGroup + for _, host := range d.Conf.KeeperConf.KeeperNodes { + innerHost := host + wg.Add(1) + _ = common.Pool.Submit(func() { + defer wg.Done() + sshOpts := common.SshOptions{ + User: d.Conf.SshUser, + Password: d.Conf.SshPassword, + Port: d.Conf.SshPort, + Host: innerHost, + NeedSudo: d.Conf.NeedSudo, + AuthenticateType: d.Conf.AuthenticateType, + } + cmd1 := cmdIns.StopCmd(KeeperSvrName, d.Conf.Cwd) + _, _ = common.RemoteExecute(sshOpts, cmd1) + + cmd2 := strings.Join(cmds, ";") + _, err := common.RemoteExecute(sshOpts, cmd2) + if err != nil { + lastError = err + return + } + if d.Conf.Cwd != "" { + //tgz deployment, try to add auto start + pkg := d.Packages.Keeper + lastIndex := strings.LastIndex(pkg, "-") + extractDir := pkg[:lastIndex] + + cmd3 := fmt.Sprintf("cp /tmp/%s/lib/systemd/system/clickhouse-keeper.service /etc/systemd/system/", extractDir) + sshOpts.NeedSudo = true + _, err = common.RemoteExecute(sshOpts, cmd3) + if err != nil { + log.Logger.Warnf("try to config autorestart failed:%v", err) + } + } + + log.Logger.Debugf("host %s install done", innerHost) + }) + } + wg.Wait() + if lastError != nil { + return lastError + } + log.Logger.Infof("install done") + return nil +} + +func (d *KeeperDeploy) Uninstall() error { + d.Conf.Normalize() + cmdIns := GetSuitableCmdAdpt(d.Conf.PkgType) + cmds := make([]string, 0) + cmds = append(cmds, cmdIns.Uninstall(KeeperSvrName, d.Packages, d.Conf.Version)) + cmds = append(cmds, fmt.Sprintf("rm -rf %s/* %s/*", d.Conf.KeeperConf.LogPath, d.Conf.KeeperConf.SnapshotPath)) + if d.Conf.NeedSudo { + cmds = append(cmds, "rm -rf /etc/clickhouse-keeper") + } + var lastError error + var wg sync.WaitGroup + for _, host := range d.Conf.KeeperConf.KeeperNodes { + innerHost := host + wg.Add(1) + _ = common.Pool.Submit(func() { + defer wg.Done() + sshOpts := common.SshOptions{ + User: d.Conf.SshUser, + Password: d.Conf.SshPassword, + Port: d.Conf.SshPort, + Host: innerHost, + NeedSudo: d.Conf.NeedSudo, + AuthenticateType: d.Conf.AuthenticateType, + } + cmd := strings.Join(cmds, ";") + _, err := common.RemoteExecute(sshOpts, cmd) + if err != nil { + lastError = err + return + } + log.Logger.Debugf("host %s uninstall done", innerHost) + }) + } + wg.Wait() + if lastError != nil { + return lastError + } + log.Logger.Infof("uninstall done") + return nil +} + +func (d *KeeperDeploy) Upgrade() error { + d.Conf.Normalize() + cmdIns := GetSuitableCmdAdpt(d.Conf.PkgType) + cmd := cmdIns.UpgradeCmd(KeeperSvrName, d.Packages) + var lastError error + var wg sync.WaitGroup + for _, host := range d.Conf.KeeperConf.KeeperNodes { + innerHost := host + wg.Add(1) + _ = common.Pool.Submit(func() { + defer wg.Done() + sshOpts := common.SshOptions{ + User: d.Conf.SshUser, + Password: d.Conf.SshPassword, + Port: d.Conf.SshPort, + Host: innerHost, + NeedSudo: d.Conf.NeedSudo, + AuthenticateType: d.Conf.AuthenticateType, + } + _, err := common.RemoteExecute(sshOpts, cmd) + if err != nil { + lastError = err + return + } + log.Logger.Debugf("host %s upgrade done", innerHost) + }) + } + wg.Wait() + if lastError != nil { + return lastError + } + log.Logger.Infof("upgrade done") + return nil +} + +func (d *KeeperDeploy) Config() error { + d.Conf.Normalize() + confFiles := make([]string, 0) + + var remotePath string + if d.Conf.NeedSudo { + remotePath = "/etc/clickhouse-keeper" + } else { + remotePath = path.Join(d.Conf.Cwd, "etc", "clickhouse-keeper") + } + var lastError error + var wg sync.WaitGroup + for index, host := range d.Conf.KeeperConf.KeeperNodes { + innerIndex := index + innerHost := host + confFiles := confFiles + wg.Add(1) + _ = common.Pool.Submit(func() { + defer wg.Done() + sshOpts := common.SshOptions{ + User: d.Conf.SshUser, + Password: d.Conf.SshPassword, + Port: d.Conf.SshPort, + Host: innerHost, + NeedSudo: d.Conf.NeedSudo, + AuthenticateType: d.Conf.AuthenticateType, + } + if d.Conf.NeedSudo { + //clear config first + cmd := "cp /etc/clickhouse-keeper/keeper_config.xml /etc/clickhouse-keeper/keeper_config.xml.last" + if _, err := common.RemoteExecute(sshOpts, cmd); err != nil { + lastError = err + return + } + } + + keeperFile, err := common.NewTempFile(path.Join(config.GetWorkDirectory(), "package"), "keeper_config") + if err != nil { + lastError = err + return + } + defer os.Remove(keeperFile.FullName) + keeperXml, err := ckconfig.GenerateKeeperXML(keeperFile.FullName, d.Conf, d.Ext.Ipv6Enable, innerIndex+1) + if err != nil { + lastError = err + return + } + confFiles = append(confFiles, keeperXml) + + if err := common.ScpUploadFiles(confFiles, remotePath, sshOpts); err != nil { + lastError = err + return + } + + cmds := make([]string, 0) + cmds = append(cmds, fmt.Sprintf("mv %s %s", path.Join(remotePath, keeperFile.BaseName), path.Join(remotePath, "keeper_config.xml"))) + if d.Conf.NeedSudo { + cmds = append(cmds, "chown -R clickhouse:clickhouse /etc/clickhouse-keeper") + } + cmds = append(cmds, "rm -rf /tmp/keeper_config*") + cmd := strings.Join(cmds, ";") + if _, err = common.RemoteExecute(sshOpts, cmd); err != nil { + lastError = err + return + } + log.Logger.Debugf("host %s config done", innerHost) + }) + } + wg.Wait() + if lastError != nil { + return lastError + } + log.Logger.Infof("config done") + return nil +} + +func (d *KeeperDeploy) Start() error { + d.Conf.Normalize() + cmdIns := GetSuitableCmdAdpt(d.Conf.PkgType) + var lastError error + var wg sync.WaitGroup + for _, host := range d.Conf.KeeperConf.KeeperNodes { + innerHost := host + wg.Add(1) + _ = common.Pool.Submit(func() { + defer wg.Done() + sshOpts := common.SshOptions{ + User: d.Conf.SshUser, + Password: d.Conf.SshPassword, + Port: d.Conf.SshPort, + Host: innerHost, + NeedSudo: d.Conf.NeedSudo, + AuthenticateType: d.Conf.AuthenticateType, + } + // if strings.HasSuffix(d.Conf.PkgType, common.PkgSuffixTgz) { + // // try to modify ulimit nofiles + // sshOpts.NeedSudo = true + // cmds := []string{ + // fmt.Sprintf("sed -i '/%s soft nofile/d' /etc/security/limits.conf", d.Conf.SshUser), + // fmt.Sprintf("sed -i '/%s hard nofile/d' /etc/security/limits.conf", d.Conf.SshUser), + // fmt.Sprintf("echo \"%s soft nofile 500000\" >> /etc/security/limits.conf", d.Conf.SshUser), + // fmt.Sprintf("echo \"%s hard nofile 500000\" >> /etc/security/limits.conf", d.Conf.SshUser), + // } + // _, err := common.RemoteExecute(sshOpts, strings.Join(cmds, ";")) + // if err != nil { + // log.Logger.Warnf("[%s] set ulimit -n failed: %v", host, err) + // } + // sshOpts.NeedSudo = d.Conf.NeedSudo + // } + + cmd := cmdIns.StartCmd(KeeperSvrName, d.Conf.Cwd) + _, err := common.RemoteExecute(sshOpts, cmd) + if err != nil { + lastError = err + return + } + log.Logger.Debugf("host %s start done", innerHost) + }) + } + wg.Wait() + if lastError != nil { + return lastError + } + log.Logger.Infof("start done") + return nil +} + +func (d *KeeperDeploy) Stop() error { + d.Conf.Normalize() + cmdIns := GetSuitableCmdAdpt(d.Conf.PkgType) + var lastError error + var wg sync.WaitGroup + for _, host := range d.Conf.KeeperConf.KeeperNodes { + innerHost := host + wg.Add(1) + _ = common.Pool.Submit(func() { + defer wg.Done() + sshOpts := common.SshOptions{ + User: d.Conf.SshUser, + Password: d.Conf.SshPassword, + Port: d.Conf.SshPort, + Host: innerHost, + NeedSudo: d.Conf.NeedSudo, + AuthenticateType: d.Conf.AuthenticateType, + } + cmd := cmdIns.StopCmd(KeeperSvrName, d.Conf.Cwd) + _, err := common.RemoteExecute(sshOpts, cmd) + if err != nil { + lastError = err + return + } + log.Logger.Debugf("host %s stop done", innerHost) + }) + } + wg.Wait() + if lastError != nil { + return lastError + } + log.Logger.Infof("stop done") + return nil +} + +func (d *KeeperDeploy) Restart() error { + d.Conf.Normalize() + cmdIns := GetSuitableCmdAdpt(d.Conf.PkgType) + var lastError error + var wg sync.WaitGroup + for _, host := range d.Conf.KeeperConf.KeeperNodes { + innerHost := host + wg.Add(1) + _ = common.Pool.Submit(func() { + defer wg.Done() + sshOpts := common.SshOptions{ + User: d.Conf.SshUser, + Password: d.Conf.SshPassword, + Port: d.Conf.SshPort, + Host: innerHost, + NeedSudo: d.Conf.NeedSudo, + AuthenticateType: d.Conf.AuthenticateType, + } + cmd := cmdIns.RestartCmd(KeeperSvrName, d.Conf.Cwd) + _, err := common.RemoteExecute(sshOpts, cmd) + if err != nil { + lastError = err + return + } + log.Logger.Debugf("host %s restart done", innerHost) + }) + } + wg.Wait() + if lastError != nil { + return lastError + } + log.Logger.Infof("restart done") + return nil +} + +func (d *KeeperDeploy) Check(timeout int) error { + d.Conf.Normalize() + var lastError error + var wg sync.WaitGroup + for _, host := range d.Conf.KeeperConf.KeeperNodes { + innerHost := host + wg.Add(1) + _ = common.Pool.Submit(func() { + defer wg.Done() + // Golang <-time.After() is not garbage collected before expiry. + ticker := time.NewTicker(5 * time.Second) + ticker2 := time.NewTicker(time.Duration(timeout) * time.Second) + defer ticker.Stop() + defer ticker2.Stop() + for { + select { + case <-ticker.C: + res, err := zookeeper.ZkMetric(innerHost, d.Conf.KeeperConf.TcpPort, "ruok") + if err == nil && string(res) == "imok" { + log.Logger.Debugf("host %s check done", innerHost) + return + } + case <-ticker2.C: + lastError = errors.Wrapf(model.CheckTimeOutErr, "clickhouse-keeper may start failed, please check the clickhouse-keeper log") + return + } + } + }) + } + + wg.Wait() + if lastError != nil { + return lastError + } + log.Logger.Infof("check done") + return nil +} diff --git a/docker-compose.yml b/docker-compose.yml index 72a9b85f..04d73def 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,7 +1,7 @@ version: "3" services: node1: - image: eoitek/ckman-clickhouse:centos-7 + image: ckman-clickhouse:centos-7 hostname: node1 privileged: true extra_hosts: @@ -16,7 +16,7 @@ services: ipv4_address: 192.168.122.101 node2: - image: eoitek/ckman-clickhouse:centos-7 + image: ckman-clickhouse:centos-7 hostname: node2 privileged: true extra_hosts: @@ -31,7 +31,7 @@ services: ipv4_address: 192.168.122.102 node3: - image: eoitek/ckman-clickhouse:centos-7 + image: ckman-clickhouse:centos-7 hostname: node3 privileged: true extra_hosts: @@ -46,7 +46,7 @@ services: ipv4_address: 192.168.122.103 node4: - image: eoitek/ckman-clickhouse:centos-7 + image: ckman-clickhouse:centos-7 hostname: node4 privileged: true extra_hosts: @@ -61,7 +61,7 @@ services: ipv4_address: 192.168.122.104 node5: - image: eoitek/ckman-clickhouse:centos-7 + image: ckman-clickhouse:centos-7 hostname: node5 privileged: true extra_hosts: @@ -76,7 +76,7 @@ services: ipv4_address: 192.168.122.105 node6: - image: eoitek/ckman-clickhouse:centos-7 + image: ckman-clickhouse:centos-7 hostname: node6 privileged: true extra_hosts: diff --git a/docker_env.sh b/docker_env.sh deleted file mode 100755 index 64d023b2..00000000 --- a/docker_env.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env bash - -# This script is running for test-ci, after docker-compose up, run it to get container host. - -OUTFILES=/tmp/ckman/conf/docker_env.conf - -if [[ ! -d /tmp/ckman ]];then - mkdir -p /tmp/ckman/conf -fi - -rm -rf ${OUTFILES} - -# be used for test DeployCK -DOCKER_CLICKHOUSE_NODES="" - -# be used for test AddNode -DOCKER_CKNODE="" - -#zookeeper host -DOCKER_ZOOKEEPER_HOSTS="" - -node1=$(docker ps -a |grep ckman_cknode_1 |awk '{print $1}') -DOCKER_NODE1=$(docker exec $node1 cat /etc/hosts |grep $node1| awk '{print $1}') - -node2=$(docker ps -a |grep ckman_cknode_2 |awk '{print $1}') -DOCKER_NODE2=$(docker exec $node2 cat /etc/hosts |grep $node2| awk '{print $1}') - -node3=$(docker ps -a |grep ckman_cknode_3 |awk '{print $1}') -DOCKER_NODE3=$(docker exec $node3 cat /etc/hosts |grep $node3| awk '{print $1}') - - -node4=$(docker ps -a |grep ckman_cknode_4 |awk '{print $1}') -DOCKER_NODE4=$(docker exec $node4 cat /etc/hosts |grep $node4| awk '{print $1}') - -DOCKER_CLICKHOUSE_NODES="[\"${DOCKER_NODE1}\",\"${DOCKER_NODE2}\",\"${DOCKER_NODE3}\",\"${DOCKER_NODE4}\"]" - -zk=$(docker ps -a |grep zookeeper |awk '{print $1}') -DOCKER_ZOOKEEPER_HOSTS=$(docker exec $zk cat /etc/hosts |grep $zk| awk '{print $1}') -DOCKER_ZOOKEEPER_HOSTS="[\"${DOCKER_ZOOKEEPER_HOSTS}\"]" - - -echo "DOCKER_NODE1="${DOCKER_NODE1} >> ${OUTFILES} -echo "DOCKER_NODE2="${DOCKER_NODE2} >> ${OUTFILES} -echo "DOCKER_NODE3="${DOCKER_NODE3} >> ${OUTFILES} -echo "DOCKER_NODE4="${DOCKER_NODE4} >> ${OUTFILES} -echo "DOCKER_CLICKHOUSE_NODES="${DOCKER_CLICKHOUSE_NODES} >> ${OUTFILES} -echo "DOCKER_ZOOKEEPER_HOSTS="${DOCKER_ZOOKEEPER_HOSTS} >> ${OUTFILES} diff --git a/docs/docs.go b/docs/docs.go index 4f5f6324..8dc56d85 100644 --- a/docs/docs.go +++ b/docs/docs.go @@ -214,7 +214,7 @@ var doc = `{ ], "responses": { "200": { - "description": "{\"code\":\"0000\",\"msg\":\"ok\", \"data\":{\"mode\":\"import\",\"hosts\":[\"192.168.0.1\",\"192.168.0.2\",\"192.168.0.3\",\"192.168.0.4\"],\"names\":[\"node1\",\"node2\",\"node3\",\"node4\"],\"port\":9000,\"httpPort\":8123,\"user\":\"ck\",\"password\":\"123456\",\"database\":\"default\",\"cluster\":\"test\",\"zkNodes\":[\"192.168.0.1\",\"192.168.0.2\",\"192.168.0.3\"],\"zkPort\":2181,\"zkStatusPort\":8080,\"isReplica\":true,\"version\":\"20.8.5.45\",\"sshUser\":\"\",\"sshPassword\":\"\",\"shards\":[{\"replicas\":[{\"ip\":\"192.168.0.1\",\"hostname\":\"node1\"},{\"ip\":\"192.168.0.2\",\"hostname\":\"node2\"}]},{\"replicas\":[{\"ip\":\"192.168.0.3\",\"hostname\":\"node3\"},{\"ip\":\"192.168.0.4\",\"hostname\":\"node4\"}]}],\"path\":\"\"}}", + "description": "{\"code\":\"0000\",\"msg\":\"ok\", \"data\":{\"mode\":\"import\",\"hosts\":[\"192.168.0.1\",\"192.168.0.2\",\"192.168.0.3\",\"192.168.0.4\"],\"names\":[\"node1\",\"node2\",\"node3\",\"node4\"],\"port\":9000,\"httpPort\":8123,\"user\":\"ck\",\"password\":\"123456\",\"database\":\"default\",\"cluster\":\"test\",\"zkNodes\":[\"192.168.0.1\",\"192.168.0.2\",\"192.168.0.3\"],\"zkPort\":2181,\"isReplica\":true,\"version\":\"20.8.5.45\",\"sshUser\":\"\",\"sshPassword\":\"\",\"shards\":[{\"replicas\":[{\"ip\":\"192.168.0.1\",\"hostname\":\"node1\"},{\"ip\":\"192.168.0.2\",\"hostname\":\"node2\"}]},{\"replicas\":[{\"ip\":\"192.168.0.3\",\"hostname\":\"node3\"},{\"ip\":\"192.168.0.4\",\"hostname\":\"node4\"}]}],\"path\":\"\"}}", "schema": { "type": "string" } @@ -530,7 +530,7 @@ var doc = `{ ], "responses": { "200": { - "description": "{\"code\":\"0000\",\"msg\":\"success\",\"data\":{\"test\":{\"mode\":\"import\",\"hosts\":[\"192.168.0.1\",\"192.168.0.2\",\"192.168.0.3\",\"192.168.0.4\"],\"names\":[\"node1\",\"node2\",\"node3\",\"node4\"],\"port\":9000,\"httpPort\":8123,\"user\":\"ck\",\"password\":\"123456\",\"database\":\"default\",\"cluster\":\"test\",\"zkNodes\":[\"192.168.0.1\",\"192.168.0.2\",\"192.168.0.3\"],\"zkPort\":2181,\"zkStatusPort\":8080,\"isReplica\":true,\"version\":\"20.8.5.45\",\"sshUser\":\"\",\"sshPassword\":\"\",\"shards\":[{\"replicas\":[{\"ip\":\"192.168.0.1\",\"hostname\":\"node1\"},{\"ip\":\"192.168.0.2\",\"hostname\":\"node2\"}]},{\"replicas\":[{\"ip\":\"192.168.0.3\",\"hostname\":\"node3\"},{\"ip\":\"192.168.0.4\",\"hostname\":\"node4\"}]}],\"path\":\"\"}}}}", + "description": "{\"code\":\"0000\",\"msg\":\"success\",\"data\":{\"test\":{\"mode\":\"import\",\"hosts\":[\"192.168.0.1\",\"192.168.0.2\",\"192.168.0.3\",\"192.168.0.4\"],\"names\":[\"node1\",\"node2\",\"node3\",\"node4\"],\"port\":9000,\"httpPort\":8123,\"user\":\"ck\",\"password\":\"123456\",\"database\":\"default\",\"cluster\":\"test\",\"zkNodes\":[\"192.168.0.1\",\"192.168.0.2\",\"192.168.0.3\"],\"zkPort\":2181,\"isReplica\":true,\"version\":\"20.8.5.45\",\"sshUser\":\"\",\"sshPassword\":\"\",\"shards\":[{\"replicas\":[{\"ip\":\"192.168.0.1\",\"hostname\":\"node1\"},{\"ip\":\"192.168.0.2\",\"hostname\":\"node2\"}]},{\"replicas\":[{\"ip\":\"192.168.0.3\",\"hostname\":\"node3\"},{\"ip\":\"192.168.0.4\",\"hostname\":\"node4\"}]}],\"path\":\"\"}}}}", "schema": { "type": "string" } @@ -1123,6 +1123,49 @@ var doc = `{ } } }, + "/api/v2/ck/query_export/{clusterName}": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "查询SQL", + "consumes": [ + "application/json" + ], + "tags": [ + "clickhouse" + ], + "summary": "查询SQL", + "parameters": [ + { + "type": "string", + "default": "test", + "description": "cluster name", + "name": "clusterName", + "in": "path", + "required": true + }, + { + "type": "string", + "default": "show databases", + "description": "sql", + "name": "query", + "in": "query", + "required": true + } + ], + "responses": { + "200": { + "description": "{\"code\":\"0000\",\"msg\":\"ok\",\"data\":[[\"name\"],[\"default\"],[\"system\"]]}", + "schema": { + "type": "string" + } + } + } + } + }, "/api/v2/ck/rebalance/{clusterName}": { "put": { "security": [ @@ -1515,6 +1558,50 @@ var doc = `{ } } }, + "/api/v2/ck/table/dml/{clusterName}": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "更新/删除表中的数据", + "consumes": [ + "application/json" + ], + "tags": [ + "clickhouse" + ], + "summary": "更新/删除表中的数据", + "parameters": [ + { + "type": "string", + "default": "test", + "description": "cluster name", + "name": "clusterName", + "in": "path", + "required": true + }, + { + "description": "request body", + "name": "req", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/model.DMLOnLogicReq" + } + } + ], + "responses": { + "200": { + "description": "{\"code\":\"5809\",\"msg\":\"修改表失败\",\"data\":null}", + "schema": { + "type": "string" + } + } + } + } + }, "/api/v2/ck/table/group-uniq-array/{clusterName}": { "get": { "security": [ @@ -1734,7 +1821,7 @@ var doc = `{ "in": "body", "required": true, "schema": { - "$ref": "#/definitions/model.AlterCkTableReq" + "$ref": "#/definitions/model.AlterTblsTTLReq" } } ], @@ -2628,6 +2715,44 @@ var doc = `{ } } }, + "model.AlterTblTTL": { + "type": "object", + "properties": { + "database": { + "type": "string", + "example": "default" + }, + "distName": { + "type": "string", + "example": "distt1" + }, + "tableName": { + "type": "string", + "example": "t1" + } + } + }, + "model.AlterTblsTTLReq": { + "type": "object", + "properties": { + "tables": { + "type": "array", + "items": { + "$ref": "#/definitions/model.AlterTblTTL" + } + }, + "ttl": { + "type": "array", + "items": { + "$ref": "#/definitions/model.CkTableTTL" + } + }, + "ttl_type": { + "type": "string", + "example": "MODIFY" + } + } + }, "model.ArchiveHdfs": { "type": "object", "properties": { @@ -2747,6 +2872,10 @@ var doc = `{ "type": "string", "example": "test" }, + "comment": { + "type": "string", + "example": "test" + }, "cwd": { "type": "string", "example": "/home/eoi/clickhouse" @@ -2777,6 +2906,13 @@ var doc = `{ "type": "boolean", "example": true }, + "keeper": { + "type": "string", + "example": "zookeeper" + }, + "keeperConf": { + "$ref": "#/definitions/model.KeeperConf" + }, "logic_cluster": { "type": "string", "example": "logic_test" @@ -2805,6 +2941,9 @@ var doc = `{ "type": "string", "example": "127.0.0.1" }, + "promMetricPort": { + "$ref": "#/definitions/model.PromMetricPort" + }, "promPort": { "type": "integer", "example": 9090 @@ -2853,10 +2992,6 @@ var doc = `{ "zkPort": { "type": "integer", "example": 2181 - }, - "zkStatusPort": { - "type": "integer", - "example": 8080 } } }, @@ -2927,10 +3062,6 @@ var doc = `{ "zkPort": { "type": "integer", "example": 2181 - }, - "zkStatusPort": { - "type": "integer", - "example": 8080 } } }, @@ -3052,6 +3183,29 @@ var doc = `{ } } }, + "model.Coordination": { + "type": "object", + "properties": { + "autoForwarding": { + "type": "boolean" + }, + "expert": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "forceSync": { + "type": "boolean" + }, + "operationTimeoutMs": { + "type": "integer" + }, + "sessionTimeoutMs": { + "type": "integer" + } + } + }, "model.CreateCkTableReq": { "type": "object", "properties": { @@ -3119,6 +3273,29 @@ var doc = `{ } } }, + "model.DMLOnLogicReq": { + "type": "object", + "properties": { + "cond": { + "type": "string" + }, + "database": { + "type": "string" + }, + "kv": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "manipulation": { + "type": "string" + }, + "table": { + "type": "string" + } + } + }, "model.Disk": { "type": "object", "properties": { @@ -3245,6 +3422,49 @@ var doc = `{ } } }, + "model.KeeperConf": { + "type": "object", + "properties": { + "coordination": { + "$ref": "#/definitions/model.Coordination" + }, + "expert": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "keeperNodes": { + "type": "array", + "items": { + "type": "string" + }, + "example": [ + "192.168.101.102", + "192.168.101.105", + "192.168.101.107" + ] + }, + "logPath": { + "type": "string" + }, + "raftPort": { + "type": "integer", + "example": 9234 + }, + "runtime": { + "type": "string", + "example": "standalone" + }, + "snapshotPath": { + "type": "string" + }, + "tcpPort": { + "type": "integer", + "example": 9181 + } + } + }, "model.LoginReq": { "type": "object", "properties": { @@ -3362,6 +3582,20 @@ var doc = `{ } } }, + "model.PromMetricPort": { + "type": "object", + "properties": { + "clickHouse": { + "type": "integer" + }, + "nodeExport": { + "type": "integer" + }, + "zooKeeper": { + "type": "integer" + } + } + }, "model.PurgerTableReq": { "type": "object", "properties": { @@ -3393,10 +3627,18 @@ var doc = `{ "model.RebalanceShardingkey": { "type": "object", "properties": { + "allowLossRate": { + "type": "number", + "example": 0.1 + }, "database": { "type": "string", "example": "default" }, + "saveTemps": { + "type": "boolean", + "example": true + }, "shardingKey": { "type": "string", "example": "_timestamp" diff --git a/docs/swagger.json b/docs/swagger.json index 8e6227b4..42bc3e6a 100644 --- a/docs/swagger.json +++ b/docs/swagger.json @@ -198,7 +198,7 @@ ], "responses": { "200": { - "description": "{\"code\":\"0000\",\"msg\":\"ok\", \"data\":{\"mode\":\"import\",\"hosts\":[\"192.168.0.1\",\"192.168.0.2\",\"192.168.0.3\",\"192.168.0.4\"],\"names\":[\"node1\",\"node2\",\"node3\",\"node4\"],\"port\":9000,\"httpPort\":8123,\"user\":\"ck\",\"password\":\"123456\",\"database\":\"default\",\"cluster\":\"test\",\"zkNodes\":[\"192.168.0.1\",\"192.168.0.2\",\"192.168.0.3\"],\"zkPort\":2181,\"zkStatusPort\":8080,\"isReplica\":true,\"version\":\"20.8.5.45\",\"sshUser\":\"\",\"sshPassword\":\"\",\"shards\":[{\"replicas\":[{\"ip\":\"192.168.0.1\",\"hostname\":\"node1\"},{\"ip\":\"192.168.0.2\",\"hostname\":\"node2\"}]},{\"replicas\":[{\"ip\":\"192.168.0.3\",\"hostname\":\"node3\"},{\"ip\":\"192.168.0.4\",\"hostname\":\"node4\"}]}],\"path\":\"\"}}", + "description": "{\"code\":\"0000\",\"msg\":\"ok\", \"data\":{\"mode\":\"import\",\"hosts\":[\"192.168.0.1\",\"192.168.0.2\",\"192.168.0.3\",\"192.168.0.4\"],\"names\":[\"node1\",\"node2\",\"node3\",\"node4\"],\"port\":9000,\"httpPort\":8123,\"user\":\"ck\",\"password\":\"123456\",\"database\":\"default\",\"cluster\":\"test\",\"zkNodes\":[\"192.168.0.1\",\"192.168.0.2\",\"192.168.0.3\"],\"zkPort\":2181,\"isReplica\":true,\"version\":\"20.8.5.45\",\"sshUser\":\"\",\"sshPassword\":\"\",\"shards\":[{\"replicas\":[{\"ip\":\"192.168.0.1\",\"hostname\":\"node1\"},{\"ip\":\"192.168.0.2\",\"hostname\":\"node2\"}]},{\"replicas\":[{\"ip\":\"192.168.0.3\",\"hostname\":\"node3\"},{\"ip\":\"192.168.0.4\",\"hostname\":\"node4\"}]}],\"path\":\"\"}}", "schema": { "type": "string" } @@ -514,7 +514,7 @@ ], "responses": { "200": { - "description": "{\"code\":\"0000\",\"msg\":\"success\",\"data\":{\"test\":{\"mode\":\"import\",\"hosts\":[\"192.168.0.1\",\"192.168.0.2\",\"192.168.0.3\",\"192.168.0.4\"],\"names\":[\"node1\",\"node2\",\"node3\",\"node4\"],\"port\":9000,\"httpPort\":8123,\"user\":\"ck\",\"password\":\"123456\",\"database\":\"default\",\"cluster\":\"test\",\"zkNodes\":[\"192.168.0.1\",\"192.168.0.2\",\"192.168.0.3\"],\"zkPort\":2181,\"zkStatusPort\":8080,\"isReplica\":true,\"version\":\"20.8.5.45\",\"sshUser\":\"\",\"sshPassword\":\"\",\"shards\":[{\"replicas\":[{\"ip\":\"192.168.0.1\",\"hostname\":\"node1\"},{\"ip\":\"192.168.0.2\",\"hostname\":\"node2\"}]},{\"replicas\":[{\"ip\":\"192.168.0.3\",\"hostname\":\"node3\"},{\"ip\":\"192.168.0.4\",\"hostname\":\"node4\"}]}],\"path\":\"\"}}}}", + "description": "{\"code\":\"0000\",\"msg\":\"success\",\"data\":{\"test\":{\"mode\":\"import\",\"hosts\":[\"192.168.0.1\",\"192.168.0.2\",\"192.168.0.3\",\"192.168.0.4\"],\"names\":[\"node1\",\"node2\",\"node3\",\"node4\"],\"port\":9000,\"httpPort\":8123,\"user\":\"ck\",\"password\":\"123456\",\"database\":\"default\",\"cluster\":\"test\",\"zkNodes\":[\"192.168.0.1\",\"192.168.0.2\",\"192.168.0.3\"],\"zkPort\":2181,\"isReplica\":true,\"version\":\"20.8.5.45\",\"sshUser\":\"\",\"sshPassword\":\"\",\"shards\":[{\"replicas\":[{\"ip\":\"192.168.0.1\",\"hostname\":\"node1\"},{\"ip\":\"192.168.0.2\",\"hostname\":\"node2\"}]},{\"replicas\":[{\"ip\":\"192.168.0.3\",\"hostname\":\"node3\"},{\"ip\":\"192.168.0.4\",\"hostname\":\"node4\"}]}],\"path\":\"\"}}}}", "schema": { "type": "string" } @@ -1107,6 +1107,49 @@ } } }, + "/api/v2/ck/query_export/{clusterName}": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "查询SQL", + "consumes": [ + "application/json" + ], + "tags": [ + "clickhouse" + ], + "summary": "查询SQL", + "parameters": [ + { + "type": "string", + "default": "test", + "description": "cluster name", + "name": "clusterName", + "in": "path", + "required": true + }, + { + "type": "string", + "default": "show databases", + "description": "sql", + "name": "query", + "in": "query", + "required": true + } + ], + "responses": { + "200": { + "description": "{\"code\":\"0000\",\"msg\":\"ok\",\"data\":[[\"name\"],[\"default\"],[\"system\"]]}", + "schema": { + "type": "string" + } + } + } + } + }, "/api/v2/ck/rebalance/{clusterName}": { "put": { "security": [ @@ -1499,6 +1542,50 @@ } } }, + "/api/v2/ck/table/dml/{clusterName}": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "更新/删除表中的数据", + "consumes": [ + "application/json" + ], + "tags": [ + "clickhouse" + ], + "summary": "更新/删除表中的数据", + "parameters": [ + { + "type": "string", + "default": "test", + "description": "cluster name", + "name": "clusterName", + "in": "path", + "required": true + }, + { + "description": "request body", + "name": "req", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/model.DMLOnLogicReq" + } + } + ], + "responses": { + "200": { + "description": "{\"code\":\"5809\",\"msg\":\"修改表失败\",\"data\":null}", + "schema": { + "type": "string" + } + } + } + } + }, "/api/v2/ck/table/group-uniq-array/{clusterName}": { "get": { "security": [ @@ -1718,7 +1805,7 @@ "in": "body", "required": true, "schema": { - "$ref": "#/definitions/model.AlterCkTableReq" + "$ref": "#/definitions/model.AlterTblsTTLReq" } } ], @@ -2612,6 +2699,44 @@ } } }, + "model.AlterTblTTL": { + "type": "object", + "properties": { + "database": { + "type": "string", + "example": "default" + }, + "distName": { + "type": "string", + "example": "distt1" + }, + "tableName": { + "type": "string", + "example": "t1" + } + } + }, + "model.AlterTblsTTLReq": { + "type": "object", + "properties": { + "tables": { + "type": "array", + "items": { + "$ref": "#/definitions/model.AlterTblTTL" + } + }, + "ttl": { + "type": "array", + "items": { + "$ref": "#/definitions/model.CkTableTTL" + } + }, + "ttl_type": { + "type": "string", + "example": "MODIFY" + } + } + }, "model.ArchiveHdfs": { "type": "object", "properties": { @@ -2731,6 +2856,10 @@ "type": "string", "example": "test" }, + "comment": { + "type": "string", + "example": "test" + }, "cwd": { "type": "string", "example": "/home/eoi/clickhouse" @@ -2761,6 +2890,13 @@ "type": "boolean", "example": true }, + "keeper": { + "type": "string", + "example": "zookeeper" + }, + "keeperConf": { + "$ref": "#/definitions/model.KeeperConf" + }, "logic_cluster": { "type": "string", "example": "logic_test" @@ -2789,6 +2925,9 @@ "type": "string", "example": "127.0.0.1" }, + "promMetricPort": { + "$ref": "#/definitions/model.PromMetricPort" + }, "promPort": { "type": "integer", "example": 9090 @@ -2837,10 +2976,6 @@ "zkPort": { "type": "integer", "example": 2181 - }, - "zkStatusPort": { - "type": "integer", - "example": 8080 } } }, @@ -2911,10 +3046,6 @@ "zkPort": { "type": "integer", "example": 2181 - }, - "zkStatusPort": { - "type": "integer", - "example": 8080 } } }, @@ -3036,6 +3167,29 @@ } } }, + "model.Coordination": { + "type": "object", + "properties": { + "autoForwarding": { + "type": "boolean" + }, + "expert": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "forceSync": { + "type": "boolean" + }, + "operationTimeoutMs": { + "type": "integer" + }, + "sessionTimeoutMs": { + "type": "integer" + } + } + }, "model.CreateCkTableReq": { "type": "object", "properties": { @@ -3103,6 +3257,29 @@ } } }, + "model.DMLOnLogicReq": { + "type": "object", + "properties": { + "cond": { + "type": "string" + }, + "database": { + "type": "string" + }, + "kv": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "manipulation": { + "type": "string" + }, + "table": { + "type": "string" + } + } + }, "model.Disk": { "type": "object", "properties": { @@ -3229,6 +3406,49 @@ } } }, + "model.KeeperConf": { + "type": "object", + "properties": { + "coordination": { + "$ref": "#/definitions/model.Coordination" + }, + "expert": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "keeperNodes": { + "type": "array", + "items": { + "type": "string" + }, + "example": [ + "192.168.101.102", + "192.168.101.105", + "192.168.101.107" + ] + }, + "logPath": { + "type": "string" + }, + "raftPort": { + "type": "integer", + "example": 9234 + }, + "runtime": { + "type": "string", + "example": "standalone" + }, + "snapshotPath": { + "type": "string" + }, + "tcpPort": { + "type": "integer", + "example": 9181 + } + } + }, "model.LoginReq": { "type": "object", "properties": { @@ -3346,6 +3566,20 @@ } } }, + "model.PromMetricPort": { + "type": "object", + "properties": { + "clickHouse": { + "type": "integer" + }, + "nodeExport": { + "type": "integer" + }, + "zooKeeper": { + "type": "integer" + } + } + }, "model.PurgerTableReq": { "type": "object", "properties": { @@ -3377,10 +3611,18 @@ "model.RebalanceShardingkey": { "type": "object", "properties": { + "allowLossRate": { + "type": "number", + "example": 0.1 + }, "database": { "type": "string", "example": "default" }, + "saveTemps": { + "type": "boolean", + "example": true + }, "shardingKey": { "type": "string", "example": "_timestamp" diff --git a/docs/swagger.yaml b/docs/swagger.yaml index 991f48c9..773e5e59 100644 --- a/docs/swagger.yaml +++ b/docs/swagger.yaml @@ -54,6 +54,32 @@ definitions: $ref: '#/definitions/model.CkTableRename' type: array type: object + model.AlterTblTTL: + properties: + database: + example: default + type: string + distName: + example: distt1 + type: string + tableName: + example: t1 + type: string + type: object + model.AlterTblsTTLReq: + properties: + tables: + items: + $ref: '#/definitions/model.AlterTblTTL' + type: array + ttl: + items: + $ref: '#/definitions/model.CkTableTTL' + type: array + ttl_type: + example: MODIFY + type: string + type: object model.ArchiveHdfs: properties: addr: @@ -140,6 +166,9 @@ definitions: cluster: example: test type: string + comment: + example: test + type: string cwd: example: /home/eoi/clickhouse type: string @@ -162,6 +191,11 @@ definitions: isReplica: example: true type: boolean + keeper: + example: zookeeper + type: string + keeperConf: + $ref: '#/definitions/model.KeeperConf' logic_cluster: example: logic_test type: string @@ -183,6 +217,8 @@ definitions: promHost: example: 127.0.0.1 type: string + promMetricPort: + $ref: '#/definitions/model.PromMetricPort' promPort: example: 9090 type: integer @@ -219,9 +255,6 @@ definitions: zkPort: example: 2181 type: integer - zkStatusPort: - example: 8080 - type: integer type: object model.CkImportConfig: properties: @@ -273,9 +306,6 @@ definitions: zkPort: example: 2181 type: integer - zkStatusPort: - example: 8080 - type: integer type: object model.CkTableNameType: properties: @@ -364,6 +394,21 @@ definitions: example: true type: boolean type: object + model.Coordination: + properties: + autoForwarding: + type: boolean + expert: + additionalProperties: + type: string + type: object + forceSync: + type: boolean + operationTimeoutMs: + type: integer + sessionTimeoutMs: + type: integer + type: object model.CreateCkTableReq: properties: database: @@ -410,6 +455,21 @@ definitions: $ref: '#/definitions/model.CkTableTTL' type: array type: object + model.DMLOnLogicReq: + properties: + cond: + type: string + database: + type: string + kv: + additionalProperties: + type: string + type: object + manipulation: + type: string + table: + type: string + type: object model.Disk: properties: allowedBackup: @@ -498,6 +558,36 @@ definitions: description: minmax, set, bloom_filter, ngrambf_v1, tokenbf_v1 type: string type: object + model.KeeperConf: + properties: + coordination: + $ref: '#/definitions/model.Coordination' + expert: + additionalProperties: + type: string + type: object + keeperNodes: + example: + - 192.168.101.102 + - 192.168.101.105 + - 192.168.101.107 + items: + type: string + type: array + logPath: + type: string + raftPort: + example: 9234 + type: integer + runtime: + example: standalone + type: string + snapshotPath: + type: string + tcpPort: + example: 9181 + type: integer + type: object model.LoginReq: properties: password: @@ -576,6 +666,15 @@ definitions: sql: type: string type: object + model.PromMetricPort: + properties: + clickHouse: + type: integer + nodeExport: + type: integer + zooKeeper: + type: integer + type: object model.PurgerTableReq: properties: begin: @@ -598,9 +697,15 @@ definitions: type: object model.RebalanceShardingkey: properties: + allowLossRate: + example: 0.1 + type: number database: example: default type: string + saveTemps: + example: true + type: boolean shardingKey: example: _timestamp type: string @@ -790,7 +895,7 @@ paths: type: string responses: "200": - description: '{"code":"0000","msg":"ok", "data":{"mode":"import","hosts":["192.168.0.1","192.168.0.2","192.168.0.3","192.168.0.4"],"names":["node1","node2","node3","node4"],"port":9000,"httpPort":8123,"user":"ck","password":"123456","database":"default","cluster":"test","zkNodes":["192.168.0.1","192.168.0.2","192.168.0.3"],"zkPort":2181,"zkStatusPort":8080,"isReplica":true,"version":"20.8.5.45","sshUser":"","sshPassword":"","shards":[{"replicas":[{"ip":"192.168.0.1","hostname":"node1"},{"ip":"192.168.0.2","hostname":"node2"}]},{"replicas":[{"ip":"192.168.0.3","hostname":"node3"},{"ip":"192.168.0.4","hostname":"node4"}]}],"path":""}}' + description: '{"code":"0000","msg":"ok", "data":{"mode":"import","hosts":["192.168.0.1","192.168.0.2","192.168.0.3","192.168.0.4"],"names":["node1","node2","node3","node4"],"port":9000,"httpPort":8123,"user":"ck","password":"123456","database":"default","cluster":"test","zkNodes":["192.168.0.1","192.168.0.2","192.168.0.3"],"zkPort":2181,"isReplica":true,"version":"20.8.5.45","sshUser":"","sshPassword":"","shards":[{"replicas":[{"ip":"192.168.0.1","hostname":"node1"},{"ip":"192.168.0.2","hostname":"node2"}]},{"replicas":[{"ip":"192.168.0.3","hostname":"node3"},{"ip":"192.168.0.4","hostname":"node4"}]}],"path":""}}' schema: type: string security: @@ -971,7 +1076,7 @@ paths: type: string responses: "200": - description: '{"code":"0000","msg":"success","data":{"test":{"mode":"import","hosts":["192.168.0.1","192.168.0.2","192.168.0.3","192.168.0.4"],"names":["node1","node2","node3","node4"],"port":9000,"httpPort":8123,"user":"ck","password":"123456","database":"default","cluster":"test","zkNodes":["192.168.0.1","192.168.0.2","192.168.0.3"],"zkPort":2181,"zkStatusPort":8080,"isReplica":true,"version":"20.8.5.45","sshUser":"","sshPassword":"","shards":[{"replicas":[{"ip":"192.168.0.1","hostname":"node1"},{"ip":"192.168.0.2","hostname":"node2"}]},{"replicas":[{"ip":"192.168.0.3","hostname":"node3"},{"ip":"192.168.0.4","hostname":"node4"}]}],"path":""}}}}' + description: '{"code":"0000","msg":"success","data":{"test":{"mode":"import","hosts":["192.168.0.1","192.168.0.2","192.168.0.3","192.168.0.4"],"names":["node1","node2","node3","node4"],"port":9000,"httpPort":8123,"user":"ck","password":"123456","database":"default","cluster":"test","zkNodes":["192.168.0.1","192.168.0.2","192.168.0.3"],"zkPort":2181,"isReplica":true,"version":"20.8.5.45","sshUser":"","sshPassword":"","shards":[{"replicas":[{"ip":"192.168.0.1","hostname":"node1"},{"ip":"192.168.0.2","hostname":"node2"}]},{"replicas":[{"ip":"192.168.0.3","hostname":"node3"},{"ip":"192.168.0.4","hostname":"node4"}]}],"path":""}}}}' schema: type: string security: @@ -1355,6 +1460,34 @@ paths: summary: 查询SQL tags: - clickhouse + /api/v2/ck/query_export/{clusterName}: + get: + consumes: + - application/json + description: 查询SQL + parameters: + - default: test + description: cluster name + in: path + name: clusterName + required: true + type: string + - default: show databases + description: sql + in: query + name: query + required: true + type: string + responses: + "200": + description: '{"code":"0000","msg":"ok","data":[["name"],["default"],["system"]]}' + schema: + type: string + security: + - ApiKeyAuth: [] + summary: 查询SQL + tags: + - clickhouse /api/v2/ck/rebalance/{clusterName}: put: consumes: @@ -1751,6 +1884,34 @@ paths: summary: 修改表 tags: - clickhouse + /api/v2/ck/table/dml/{clusterName}: + post: + consumes: + - application/json + description: 更新/删除表中的数据 + parameters: + - default: test + description: cluster name + in: path + name: clusterName + required: true + type: string + - description: request body + in: body + name: req + required: true + schema: + $ref: '#/definitions/model.DMLOnLogicReq' + responses: + "200": + description: '{"code":"5809","msg":"修改表失败","data":null}' + schema: + type: string + security: + - ApiKeyAuth: [] + summary: 更新/删除表中的数据 + tags: + - clickhouse /api/v2/ck/table/group-uniq-array/{clusterName}: delete: consumes: @@ -1888,7 +2049,7 @@ paths: name: req required: true schema: - $ref: '#/definitions/model.AlterCkTableReq' + $ref: '#/definitions/model.AlterTblsTTLReq' responses: "200": description: '{"code":"5809","msg":"修改表失败","data":null}' diff --git a/frontend b/frontend index 56aa7429..979a3bb7 160000 --- a/frontend +++ b/frontend @@ -1 +1 @@ -Subproject commit 56aa7429de66fccb7e9c83296a4770575a27ff74 +Subproject commit 979a3bb7f9f01d3841bb7cfa3e2db2da362b4924 diff --git a/go.mod b/go.mod index b224d2d0..ff3cad5f 100644 --- a/go.mod +++ b/go.mod @@ -34,6 +34,7 @@ require ( require ( github.com/ClickHouse/clickhouse-go/v2 v2.15.0 + github.com/alecthomas/kingpin/v2 v2.4.0 github.com/arl/statsviz v0.5.1 github.com/aws/aws-sdk-go v1.27.0 github.com/bramvdbogaerde/go-scp v1.2.0 @@ -55,6 +56,7 @@ require ( github.com/KyleBanks/depth v1.2.1 // indirect github.com/PuerkitoBio/purell v1.1.1 // indirect github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect + github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 // indirect github.com/aliyun/alibaba-cloud-sdk-go v1.61.18 // indirect github.com/andybalholm/brotli v1.0.6 // indirect github.com/buger/jsonparser v0.0.0-20181115193947-bf1c66bbce23 // indirect @@ -114,6 +116,7 @@ require ( github.com/thoas/go-funk v0.9.2 // indirect github.com/toolkits/concurrent v0.0.0-20150624120057-a4371d70e3e3 // indirect github.com/ugorji/go/codec v1.1.13 // indirect + github.com/xhit/go-str2duration/v2 v2.1.0 // indirect go.opentelemetry.io/otel v1.19.0 // indirect go.opentelemetry.io/otel/trace v1.19.0 // indirect go.uber.org/multierr v1.11.0 // indirect diff --git a/go.sum b/go.sum index 0753f095..8e5321fd 100644 --- a/go.sum +++ b/go.sum @@ -50,12 +50,16 @@ github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tN github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= +github.com/alecthomas/kingpin/v2 v2.4.0 h1:f48lwail6p8zpO1bC4TxtqACaGqHYA22qkHjHpqDjYY= +github.com/alecthomas/kingpin/v2 v2.4.0/go.mod h1:0gyi0zQnjuFk8xrkNKamJoyUo382HRL7ATRpFZCw6tE= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= +github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc= +github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE= github.com/aliyun/alibaba-cloud-sdk-go v1.61.18 h1:zOVTBdCKFd9JbCKz9/nt+FovbjPFmb7mUnp8nH9fQBA= github.com/aliyun/alibaba-cloud-sdk-go v1.61.18/go.mod h1:v8ESoHo4SyHmuB4b1tJqDHxfTGEciD+yhvOU/5s1Rfk= github.com/andybalholm/brotli v1.0.6 h1:Yf9fFpf49Zrxb9NlQaluyE92/+X7UVHlhMNJN2sxfOI= @@ -512,6 +516,8 @@ github.com/wanlay/gorm-dm8 v1.0.5/go.mod h1:wu9Q9IEBuOKOR/XlDrEeytM1asrBft3cestP github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g= github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8= +github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8Ydu2Bstc= +github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= diff --git a/go.test.sh b/go.test.sh deleted file mode 100755 index bb23289a..00000000 --- a/go.test.sh +++ /dev/null @@ -1,173 +0,0 @@ -#!/usr/bin/env bash - -token="" -hosts="" -resp="" - -POST="" -GET="" -PUT="" -DELETE="" -DOCKER_NODE4="" - -PRINT_TITTLE() -{ - echo "+----------------------------------------------------------+" - echo " $1 " - echo "+----------------------------------------------------------+" - -} - -GetToken() -{ - PRINT_TITTLE "Login" - resp=$(curl -s -H "Content-Type: application/json" "localhost:18808/api/login" -d '{"username":"ckman", "password":"63cb91a2ceb9d4f7c8b1ba5e50046f52"}') - token=$(echo ${resp}|jq '.entity.token'|awk -F \" '{print $2}') - echo ${resp}|jq - POST="curl -s -H Content-Type:application/json -H token:${token} -X POST -d " - GET="curl -s -H token:${token} " - PUT="curl -s -H token:${token} -X PUT " - DELETE=" curl -s -H token:${token} -X DELETE " -} - -#check request result -CheckResult() -{ - echo $2|jq - retcode=$(echo $2|jq '.retCode') - if [[ ${retcode} = "0" ]];then - echo -e "\033[32m"$1"...[SUCCESS]\033[0m" - else - echo -e "\033[31m"$1"...[FAILURE]\033[0m" - Destroy - exit 1 - fi -} - -ReplaceTemplate() -{ - DOCKER_NODE1=$(grep 'DOCKER_NODE1' /tmp/ckman/conf/docker_env.conf |awk -F = '{print $2}') - DOCKER_NODE2=$(grep 'DOCKER_NODE2' /tmp/ckman/conf/docker_env.conf |awk -F = '{print $2}') - DOCKER_NODE3=$(grep 'DOCKER_NODE3' /tmp/ckman/conf/docker_env.conf |awk -F = '{print $2}') - DOCKER_NODE4=$(grep 'DOCKER_NODE4' /tmp/ckman/conf/docker_env.conf |awk -F = '{print $2}') - DOCKER_CLICKHOUSE_NODES=$(grep 'DOCKER_CLICKHOUSE_NODES' /tmp/ckman/conf/docker_env.conf |awk -F = '{print $2}') - DOCKER_ZOOKEEPER_HOSTS=$(grep 'DOCKER_ZOOKEEPER_HOSTS' /tmp/ckman/conf/docker_env.conf |awk -F = '{print $2}') - sed -i "s/{DOCKER_NODE1}/${DOCKER_NODE1}/g" `grep -rl '{DOCKER_NODE1}' /tmp/ckman/tests` - sed -i "s/{DOCKER_NODE2}/${DOCKER_NODE2}/g" `grep -rl '{DOCKER_NODE2}' /tmp/ckman/tests` - sed -i "s/{DOCKER_NODE3}/${DOCKER_NODE3}/g" `grep -rl '{DOCKER_NODE3}' /tmp/ckman/tests` - sed -i "s/{DOCKER_NODE4}/${DOCKER_NODE4}/g" `grep -rl '{DOCKER_NODE4}' /tmp/ckman/tests` - sed -i "s/{DOCKER_CLICKHOUSE_NODES}/${DOCKER_CLICKHOUSE_NODES}/g" `grep -rl '{DOCKER_CLICKHOUSE_NODES}' /tmp/ckman/tests` - sed -i "s/{DOCKER_ZOOKEEPER_HOSTS}/${DOCKER_ZOOKEEPER_HOSTS}/g" `grep -rl '{DOCKER_ZOOKEEPER_HOSTS}' /tmp/ckman/tests` - sed -i 's/port: 8808/port: 18808/g' /tmp/ckman/conf/ckman.yaml - sed -i 's#{{.CkListenHost}}#0.0.0.0#g' /tmp/ckman/template/config.xml -} - -PrepareCKPkg() -{ - cd /tmp/ckman/package - version=$1 - common=clickhouse-common-static-${version}-2.x86_64.rpm - client=clickhouse-client-${version}-2.noarch.rpm - server=clickhouse-server-${version}-2.noarch.rpm - echo "download clickhouse package, version:${version}..." - - # download package from https://repo.yandex.ru/clickhouse/rpm/stable/x86_64/ - curl -s -o ${common} https://repo.yandex.ru/clickhouse/rpm/stable/x86_64/${common} - echo ${common}" done" - curl -s -o ${server} https://repo.yandex.ru/clickhouse/rpm/stable/x86_64/${server} - echo ${server}" done" - curl -s -o ${client} https://repo.yandex.ru/clickhouse/rpm/stable/x86_64/${client} - echo ${client}" done" - cd /tmp/ckman -} - -Installjq() -{ - if test $(which jq) = "" ; then - yum install -y epel-release > /dev/null - yum install -y jq > /dev/null - fi -} - -# initalize ckman running environment -Init() -{ - if test $(uname) != "Linux"; then - echo "This platform not support this script." - exit 1 - fi - cd /tmp/ckman - ReplaceTemplate - bin/stop 2>/dev/null - PrepareCKPkg 20.9.3.45 - PrepareCKPkg 21.3.9.83 - Installjq - bin/start - sleep 1 -} - - -Destroy() -{ - cd /tmp/ckman - bin/stop -} - -DeployCKTest() -{ - PRINT_TITTLE "DeployCK" - resp=$(${POST} @/tmp/ckman/tests/DeployCK.json "localhost:18808/api/v1/deploy/ck") - CheckResult "DeployCK" "${resp}" -} - -DestroyCKTest() -{ - PRINT_TITTLE "DestroyCK" - resp=$(${PUT} "localhost:18808/api/v1/ck/destroy/test") - CheckResult "DestoryCK" "${resp}" -} - -AddNodeTest() -{ - PRINT_TITTLE "AddNode" - resp=$(${POST} @/tmp/ckman/tests/AddNode.json "localhost:18808/api/v1/ck/node/test") - CheckResult "AddNode" "${resp}" -} - -DeleteNodeTest() -{ - PRINT_TITTLE "DeleteNode" - DOCKER_CKNODE=$(grep 'DOCKER_CKNODE' /tmp/ckman/conf/docker_env.conf |awk -F \" '{print $2}') - resp=$(${DELETE} "localhost:18808/api/v1/ck/node/test?ip=${DOCKER_NODE4}") - CheckResult "DeleteNode" "${resp}" -} - -UpgradeTest() -{ - PRINT_TITTLE "Upgrade" - resp=$(${PUT} -d @/tmp/ckman/tests/Upgrade.json "localhost:18808/api/v1/ck/upgrade/test") - CheckResult "Upgrade" "${resp}" - -} - -SysTest() -{ - GetToken - DeployCKTest - DeleteNodeTest - AddNodeTest - UpgradeTest - #DestroyCKTest -} - -main() -{ - Init - - SysTest - - Destroy -} - -# __main__ start -main diff --git a/log/log.go b/log/log.go index ade8f5b2..a3d5595c 100644 --- a/log/log.go +++ b/log/log.go @@ -26,6 +26,26 @@ func InitLogger(path string, config *config.CKManLogConfig) { Logger = ZapLog.Sugar() } +func InitLoggerDefault(level string, paths []string) { + var err error + cfg := zap.NewProductionConfig() + cfg.Encoding = "console" + zaplevel := zapcore.InfoLevel + zaplevel.UnmarshalText([]byte(level)) + cfg.Level.SetLevel(zaplevel) + cfg.EncoderConfig.EncodeTime = zapcore.ISO8601TimeEncoder + cfg.EncoderConfig.EncodeLevel = zapcore.CapitalLevelEncoder + if len(paths) == 0 { + paths = []string{"stdout"} + } + cfg.OutputPaths = paths + ZapLog, err = cfg.Build() + if err != nil { + panic(err) + } + Logger = ZapLog.Sugar() +} + func InitLoggerConsole() { cfg := zap.NewProductionConfig() cfg.Encoding = "console" diff --git a/model/ck_table.go b/model/ck_table.go index 50756df1..fc54c117 100644 --- a/model/ck_table.go +++ b/model/ck_table.go @@ -161,15 +161,17 @@ type AlterCkTableParams struct { DropIndex []Index } +type AlterTblTTL struct { + Database string `json:"database" example:"default"` + TableName string `json:"tableName" example:"t1"` + DistName string `json:"distName" example:"distt1"` +} + type AlterTblsTTLReq struct { - Tables []struct { - Database string `json:"database" example:"default"` - TableName string `json:"tableName" example:"t1"` - DistName string `json:"distName" example:"distt1"` - } `json:"tables"` - TTLType string `json:"ttl_type" example:"MODIFY"` - TTL []CkTableTTL `json:"ttl"` - TTLExpr string `json:"-"` + Tables []AlterTblTTL `json:"tables"` + TTLType string `json:"ttl_type" example:"MODIFY"` + TTL []CkTableTTL `json:"ttl"` + TTLExpr string `json:"-"` } type DescCkTableParams struct { @@ -187,6 +189,19 @@ type CkColumnAttribute struct { TTLExpression string `json:"ttlExpression"` } +const ( + DML_UPDATE string = "UPDATE" + DML_DELETE string = "DELETE" +) + +type DMLOnLogicReq struct { + Database string + Table string + Manipulation string + KV map[string]string + Cond string +} + type CkTableMetrics struct { Columns uint64 `json:"columns"` Rows uint64 `json:"rows"` diff --git a/model/deploy_ck.go b/model/deploy_ck.go index 17f1ecbb..879b4a1b 100644 --- a/model/deploy_ck.go +++ b/model/deploy_ck.go @@ -33,12 +33,21 @@ const ( SshPasswordUsePubkey int = 2 MaxTimeOut int = 3600 + + ClickhouseKeeper string = "clickhouse-keeper" + Zookeeper string = "zookeeper" + ClickHouse string = "clickhouse" + + KeeperRuntimeStandalone = "standalone" + KeeperRuntimeInternal = "internal" ) type CkDeployExt struct { - Policy string - Ipv6Enable bool - Restart bool + Policy string + Ipv6Enable bool + Restart bool + ChangeCk bool + CurClusterOnly bool //仅修改当前集群的配置 } type CkShard struct { @@ -63,7 +72,6 @@ type CkImportConfig struct { LogicCluster string `json:"logic_cluster" example:"logic_test"` ZkNodes []string `json:"zkNodes" example:"192.168.101.102,192.168.101.105,192.168.101.107"` ZkPort int `json:"zkPort" example:"2181"` - ZkStatusPort int `json:"zkStatusPort" example:"8080"` PromHost string `json:"prom_host" example:"127.0.01"` PromPort int `json:"prom_port" example:"9090"` } @@ -76,6 +84,7 @@ type PromMetricPort struct { type CKManClickHouseConfig struct { Cluster string `json:"cluster" example:"test"` + Comment string `json:"comment" example:"test"` PkgType string `json:"pkgType" example:"x86_64.rpm"` PkgName string `json:"pkgName" example:"clickhouse-common-static-22.3.3.44.noarch.rpm"` Version string `json:"version" example:"21.9.1.7647"` @@ -88,11 +97,12 @@ type CKManClickHouseConfig struct { Secure bool `json:"secure"` IsReplica bool `json:"isReplica" example:"true"` Hosts []string `json:"hosts" example:"192.168.0.1,192.168.0.2,192.168.0.3,192.168.0.4"` - ZkNodes []string `json:"zkNodes" example:"192.168.0.1,192.168.0.2,192.168.0.3"` - ZkPort int `json:"zkPort" example:"2181"` - ZkStatusPort int `json:"zkStatusPort" example:"8080"` - PromHost string `json:"promHost" example:"127.0.0.1"` - PromPort int `json:"promPort" example:"9090"` + Keeper string `json:"keeper" example:"zookeeper"` + KeeperConf *KeeperConf + ZkNodes []string `json:"zkNodes" example:"192.168.0.1,192.168.0.2,192.168.0.3"` + ZkPort int `json:"zkPort" example:"2181"` + PromHost string `json:"promHost" example:"127.0.0.1"` + PromPort int `json:"promPort" example:"9090"` PromMetricPort PromMetricPort User string `json:"user" example:"ck"` Password string `json:"password" example:"123456"` @@ -106,9 +116,27 @@ type CKManClickHouseConfig struct { Expert map[string]string // don't need to regist to schema - Mode string `json:"mode" swaggerignore:"true"` - ZooPath map[string]string `json:"zooPath" swaggerignore:"true"` - NeedSudo bool `json:"needSudo" swaggerignore:"true"` + Mode string `json:"mode" swaggerignore:"true"` + NeedSudo bool `json:"needSudo" swaggerignore:"true"` +} + +type Coordination struct { + OperationTimeoutMs int + SessionTimeoutMs int + ForceSync bool + AutoForwarding bool + Expert map[string]string +} + +type KeeperConf struct { + Runtime string `json:"runtime" example:"standalone"` + KeeperNodes []string `json:"keeperNodes" example:"192.168.101.102,192.168.101.105,192.168.101.107"` + TcpPort int `json:"tcpPort" example:"9181"` + RaftPort int `json:"raftPort" example:"9234"` + LogPath string + SnapshotPath string + Coordination Coordination + Expert map[string]string } // Refers to https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/#table_engine-mergetree-multiple-volumes @@ -233,9 +261,6 @@ func (config *CKManClickHouseConfig) Normalize() { if config.ZkPort == 0 { config.ZkPort = ClickHouseDefaultZkPort } - if config.ZkStatusPort == 0 { - config.ZkStatusPort = ZkStatusDefaultPort - } if config.SshPort == 0 { config.SshPort = SshDefaultPort } @@ -260,6 +285,10 @@ func (config *CKManClickHouseConfig) Normalize() { config.PkgType = PkgTypeDefault } + if config.Keeper == "" { + config.Keeper = Zookeeper + } + if !strings.HasSuffix(config.PkgType, "tgz") { config.Cwd = "" } @@ -300,7 +329,6 @@ func (config *CKManClickHouseConfig) UnWatch(host string) { } func (config *CKManClickHouseConfig) Pack() { - config.ZooPath = make(map[string]string) config.Password = strings.Repeat("*", len(config.Password)) if config.SshPassword != "" { config.SshPassword = strings.Repeat("*", len(config.SshPassword)) @@ -355,3 +383,10 @@ func (config *CKManClickHouseConfig) GetConnOption() ConnetOption { opt.Password = config.Password return opt } + +func (config *CKManClickHouseConfig) KeeperWithStanalone() bool { + if config.Keeper == ClickhouseKeeper { + return config.KeeperConf != nil && config.KeeperConf.Runtime == KeeperRuntimeStandalone + } + return false +} diff --git a/model/manage_ck.go b/model/manage_ck.go index 65d79b7f..d56622f0 100644 --- a/model/manage_ck.go +++ b/model/manage_ck.go @@ -92,11 +92,13 @@ type ArchiveTableReq struct { } type RebalanceShardingkey struct { - Database string `json:"database" example:"default"` - Table string `json:"table" example:"t123"` - ShardingKey string `json:"shardingKey" example:"_timestamp"` - ShardingType TypeInfo `json:"-"` - DistTable string `json:"-"` + Database string `json:"database" example:"default"` + Table string `json:"table" example:"t123"` + ShardingKey string `json:"shardingKey" example:"_timestamp"` + AllowLossRate float64 `json:"allowLossRate" example:"0.1"` + SaveTemps bool `json:"saveTemps" example:"true"` + ShardingType TypeInfo `json:"-"` + DistTable string `json:"-"` } type RebalanceTableReq struct { diff --git a/model/metric.go b/model/metric.go index 3304fc0d..a3de6c1a 100644 --- a/model/metric.go +++ b/model/metric.go @@ -12,3 +12,23 @@ type MetricQueryRangeReq struct { End int64 Step int64 } + +type MetricRsp struct { + Metric Metric + Value [][]interface{} +} + +type Metric struct { + Name string `json:"__name__"` + Instance string + Job string +} + +var MetricMap = map[string]M{ + "CPU Usage (cores)": {"system.metric_log", "avg(ProfileEvent_OSCPUVirtualTimeMicroseconds) / 1000000"}, +} + +type M struct { + Table string + Field string +} diff --git a/model/task.go b/model/task.go index b2aad096..4ee3eac5 100644 --- a/model/task.go +++ b/model/task.go @@ -31,6 +31,8 @@ const ( TaskTypeKeeperDeploy string = "keeper.deploy" TaskTypeKeeperUpgrade string = "keeper.upgrade" + TaskTypeKeeperDestory string = "keeper.destory" + TaskTypeKeeperSetting string = "keeper.setting" ALL_NODES_DEFAULT string = "all_hosts" ) diff --git a/nfpm.yaml b/nfpm.yaml index ae615a12..04a8b289 100644 --- a/nfpm.yaml +++ b/nfpm.yaml @@ -10,10 +10,7 @@ section: "default" priority: "extra" provides: - ckman -- ckmanpasswd -- migrate -- znodefix -- znode_count +- ckmanctl maintainer: "Zhichang Yu " description: | ckman is a tool which used to manage and monitor ClickHouse database @@ -22,14 +19,8 @@ license: "MIT" contents: - src: ./ckman dst: /usr/local/bin/ckman - - src: ./ckmanpasswd - dst: /usr/local/bin/ckmanpasswd - - src: ./migrate - dst: /usr/local/bin/migrate - - src: ./znodefix - dst: /usr/local/bin/znodefix - - src: ./znode_count - dst: /usr/local/bin/znode_count + - src: ./cmd/ckmanctl/ckmanctl + dst: /usr/local/bin/ckmanctl - src: ./resources/yaml2json dst: /usr/local/bin/yaml2json - src: ./README.md @@ -58,6 +49,9 @@ contents: - src: /usr/local/bin/ckman dst: /sbin/ckman type: symlink + - src: /usr/local/bin/ckmanctl + dst: /sbin/ckmanctl + type: symlink - src: /etc/systemd/system/ckman.service dst: /etc/systemd/system/multi-user.target.wants/ckman.service type: symlink diff --git a/router/v1.go b/router/v1.go index 5f7b1cc2..c97015ca 100644 --- a/router/v1.go +++ b/router/v1.go @@ -84,6 +84,7 @@ func InitRouterV1(groupV1 *gin.RouterGroup, config *config.CKManConfig, signal c groupV1.POST(fmt.Sprintf("/ck/dist_logic_table/:%s", controller.ClickHouseClusterPath), ckController.CreateDistTableOnLogic) groupV1.DELETE(fmt.Sprintf("/ck/dist_logic_table/:%s", controller.ClickHouseClusterPath), ckController.DeleteDistTableOnLogic) groupV1.PUT(fmt.Sprintf("/ck/table/:%s", controller.ClickHouseClusterPath), ckController.AlterTable) + groupV1.POST(fmt.Sprintf("/ck/table/dml/:%s", controller.ClickHouseClusterPath), ckController.DMLOnLogic) groupV1.DELETE(fmt.Sprintf("/ck/truncate_table/:%s", controller.ClickHouseClusterPath), ckController.TruncateTable) groupV1.PUT(fmt.Sprintf("/ck/table/ttl/:%s", controller.ClickHouseClusterPath), ckController.AlterTableTTL) groupV1.PUT(fmt.Sprintf("/ck/table/readonly/:%s", controller.ClickHouseClusterPath), ckController.RestoreReplica) @@ -97,6 +98,7 @@ func InitRouterV1(groupV1 *gin.RouterGroup, config *config.CKManConfig, signal c groupV1.GET(fmt.Sprintf("/ck/table/:%s", controller.ClickHouseClusterPath), ckController.DescTable) groupV1.GET(fmt.Sprintf("/ck/query/:%s", controller.ClickHouseClusterPath), ckController.QueryInfo) groupV1.GET(fmt.Sprintf("/ck/query_explain/:%s", controller.ClickHouseClusterPath), ckController.QueryExplain) + groupV1.GET(fmt.Sprintf("/ck/query_export/:%s", controller.ClickHouseClusterPath), ckController.QueryExport) groupV1.GET(fmt.Sprintf("/ck/query_history/:%s", controller.ClickHouseClusterPath), ckController.QueryHistory) groupV1.DELETE(fmt.Sprintf("/ck/query_history/:%s", controller.ClickHouseClusterPath), ckController.DeleteQuery) groupV1.GET(fmt.Sprintf("/ck/table_lists/:%s", controller.ClickHouseClusterPath), ckController.GetTableLists) diff --git a/router/v2.go b/router/v2.go index af031a97..4981e3e2 100644 --- a/router/v2.go +++ b/router/v2.go @@ -84,6 +84,7 @@ func InitRouterV2(groupV2 *gin.RouterGroup, config *config.CKManConfig, signal c groupV2.POST(fmt.Sprintf("/ck/dist-logic-table/:%s", controller.ClickHouseClusterPath), ckController.CreateDistTableOnLogic) groupV2.DELETE(fmt.Sprintf("/ck/dist-logic-table/:%s", controller.ClickHouseClusterPath), ckController.DeleteDistTableOnLogic) groupV2.PUT(fmt.Sprintf("/ck/table/:%s", controller.ClickHouseClusterPath), ckController.AlterTable) + groupV2.POST(fmt.Sprintf("/ck/table/dml/:%s", controller.ClickHouseClusterPath), ckController.DMLOnLogic) groupV2.DELETE(fmt.Sprintf("/ck/truncate-table/:%s", controller.ClickHouseClusterPath), ckController.TruncateTable) groupV2.PUT(fmt.Sprintf("/ck/table/ttl/:%s", controller.ClickHouseClusterPath), ckController.AlterTableTTL) groupV2.PUT(fmt.Sprintf("/ck/table/readonly/:%s", controller.ClickHouseClusterPath), ckController.RestoreReplica) @@ -97,6 +98,7 @@ func InitRouterV2(groupV2 *gin.RouterGroup, config *config.CKManConfig, signal c groupV2.GET(fmt.Sprintf("/ck/table/:%s", controller.ClickHouseClusterPath), ckController.DescTable) groupV2.GET(fmt.Sprintf("/ck/query/:%s", controller.ClickHouseClusterPath), ckController.QueryInfo) groupV2.GET(fmt.Sprintf("/ck/query-explain/:%s", controller.ClickHouseClusterPath), ckController.QueryExplain) + groupV2.GET(fmt.Sprintf("/ck/query-export/:%s", controller.ClickHouseClusterPath), ckController.QueryExport) groupV2.GET(fmt.Sprintf("/ck/query-history/:%s", controller.ClickHouseClusterPath), ckController.QueryHistory) groupV2.DELETE(fmt.Sprintf("/ck/query-history/:%s", controller.ClickHouseClusterPath), ckController.DeleteQuery) groupV2.GET(fmt.Sprintf("/ck/table-lists/:%s", controller.ClickHouseClusterPath), ckController.GetTableLists) diff --git a/server/enforce/enforce.go b/server/enforce/enforce.go index ce557a6d..2fde0e3e 100644 --- a/server/enforce/enforce.go +++ b/server/enforce/enforce.go @@ -65,18 +65,21 @@ func Enforce(username, url, method string) bool { return true } - userinfo, err := common.GetUserInfo(username) - if err != nil { - return false - } var policies []Policy - switch userinfo.Policy { - case common.GUEST: - policies = e.guest - case common.ORDINARY: + if username == common.InternalOrdinaryName { policies = e.orinary + } else { + userinfo, err := common.GetUserInfo(username) + if err != nil { + return false + } + switch userinfo.Policy { + case common.GUEST: + policies = e.guest + case common.ORDINARY: + policies = e.orinary + } } - for _, policy := range policies { if e.Match(policy.URL, url) && policy.Method == method { return true diff --git a/server/enforce/guest.go b/server/enforce/guest.go index e20d1a86..9fdf9c22 100644 --- a/server/enforce/guest.go +++ b/server/enforce/guest.go @@ -8,6 +8,7 @@ func GuestPolicies() []Policy { {"/ck/table/group_uniq_array/*", GET}, {"/ck/query/*", GET}, {"/ck/query_explain/*", GET}, + {"/ck/query_export/*", GET}, {"/ck/query_history/*", GET}, {"/ck/table_lists/*", GET}, {"/ck/table_schema/*", GET}, diff --git a/server/enforce/ordinary.go b/server/enforce/ordinary.go index 2955369a..ebb25971 100644 --- a/server/enforce/ordinary.go +++ b/server/enforce/ordinary.go @@ -19,6 +19,7 @@ func OrdinaryPolicies() []Policy { {"/ck/open_sessions/*", PUT}, {"/ck/purge_tables/*", POST}, {"/ck/archive/*", POST}, + {"/ck/table/dml/*", POST}, } return append(OrdinaryPolicies, GuestPolicies()...) } diff --git a/server/server.go b/server/server.go index ed0a3c11..ee19ecf6 100644 --- a/server/server.go +++ b/server/server.go @@ -6,6 +6,7 @@ import ( "fmt" "net/http" "os" + "path/filepath" "runtime/debug" "strings" "time" @@ -98,6 +99,7 @@ func (server *ApiServer) Start() error { groupApi := r.Group("/api") groupApi.POST("/login", userController.Login) // add authenticate middleware for /api + common.LoadUsers(filepath.Dir(server.config.ConfigFile)) groupApi.Use(ginJWTAuth()) groupApi.Use(ginRefreshTokenExpires()) groupApi.Use(ginEnforce()) diff --git a/service/clickhouse/clickhouse.go b/service/clickhouse/clickhouse.go new file mode 100644 index 00000000..d346bf86 --- /dev/null +++ b/service/clickhouse/clickhouse.go @@ -0,0 +1,1654 @@ +package clickhouse + +import ( + "fmt" + "net" + "regexp" + "runtime" + "sort" + "strings" + "sync" + "time" + + "github.com/housepower/ckman/common" + "github.com/housepower/ckman/log" + "github.com/housepower/ckman/model" + "github.com/housepower/ckman/repository" + "github.com/pkg/errors" +) + +func GetCkClusterConfig(conf *model.CKManClickHouseConfig) (string, error) { + var replicas []model.CkReplica + + service := NewCkService(conf) + if err := service.InitCkService(); err != nil { + return model.E_CH_CONNECT_FAILED, err + } + hosts := conf.Hosts + conf.Hosts = make([]string, 0) + conf.Shards = make([]model.CkShard, 0) + + value, err := service.QueryInfo(fmt.Sprintf("SELECT cluster, shard_num, replica_num, host_name, host_address FROM system.clusters WHERE cluster='%s' ORDER BY cluster, shard_num, replica_num", conf.Cluster)) + if err != nil { + return model.E_DATA_SELECT_FAILED, err + } + if len(value) == 1 { + return model.E_RECORD_NOT_FOUND, errors.Errorf("cluster %s is not exist, or hosts %v is not in cluster %s", conf.Cluster, hosts, conf.Cluster) + } + shardNum := uint32(0) + var ( + loopback bool + lbhostName string + ) + for i := 1; i < len(value); i++ { + if shardNum != value[i][1].(uint32) { + if len(replicas) != 0 { + shard := model.CkShard{ + Replicas: replicas, + } + conf.Shards = append(conf.Shards, shard) + } + replicas = make([]model.CkReplica, 0) + } + if value[i][2].(uint32) > 1 { + conf.IsReplica = true + } + replica := model.CkReplica{ + Ip: value[i][4].(string), + HostName: value[i][3].(string), + } + replicas = append(replicas, replica) + conf.Hosts = append(conf.Hosts, value[i][4].(string)) + shardNum = value[i][1].(uint32) + // when deployed on k8s, IP is not stable, and always return 127.0.0.1 + if replica.Ip == common.NetLoopBack { + log.Logger.Infof("found loopback") + loopback = true + lbhostName = replica.HostName + } + } + + if len(replicas) != 0 { + shard := model.CkShard{ + Replicas: replicas, + } + conf.Shards = append(conf.Shards, shard) + } + + if loopback { + var realHost string + query := fmt.Sprintf("SELECT host_address FROM system.clusters WHERE cluster='%s' AND host_name = '%s'", conf.Cluster, lbhostName) + + hosts, err := common.GetShardAvaliableHosts(conf) + if err != nil { + return model.E_CH_CONNECT_FAILED, err + } + conn := common.GetConnection(hosts[0]) + rows, err := conn.Query(query) + if err != nil { + return model.E_DATA_SELECT_FAILED, err + } + for rows.Next() { + var ip string + err = rows.Scan(&ip) + if err != nil { + return model.E_DATA_SELECT_FAILED, err + } + if ip != "" && ip != common.NetLoopBack { + realHost = ip + break + } + } + log.Logger.Infof("realHost: %s", realHost) + + for i := range conf.Hosts { + if conf.Hosts[i] == common.NetLoopBack { + conf.Hosts[i] = realHost + break + } + } + + for i := range conf.Shards { + for j := range conf.Shards[i].Replicas { + if conf.Shards[i].Replicas[j].Ip == common.NetLoopBack { + conf.Shards[i].Replicas[j].Ip = realHost + } + } + } + } + + if conf.LogicCluster != nil { + query := fmt.Sprintf("SELECT count() FROM system.clusters WHERE cluster = '%s'", *conf.LogicCluster) + value, err = service.QueryInfo(query) + if err != nil { + return model.E_DATA_SELECT_FAILED, err + } + c := value[1][0].(uint64) + if c == 0 { + return model.E_RECORD_NOT_FOUND, fmt.Errorf("logic cluster %s not exist", *conf.LogicCluster) + } + } + + value, err = service.QueryInfo("SELECT version()") + if err != nil { + return model.E_DATA_SELECT_FAILED, err + } + conf.Version = value[1][0].(string) + + return model.E_SUCCESS, nil +} + +func getNodeInfo(service *CkService) (string, string) { + query := `SELECT + formatReadableSize(sum(total_space) - sum(free_space)) AS used, + formatReadableSize(sum(total_space)) AS total, uptime() as uptime + FROM system.disks WHERE lower(type) = 'local'` + value, err := service.QueryInfo(query) + if err != nil { + return "NA/NA", "" + } + usedSpace := value[1][0].(string) + totalSpace := value[1][1].(string) + uptime := value[1][2].(uint32) + return fmt.Sprintf("%s/%s", usedSpace, totalSpace), common.FormatReadableTime(uptime) +} + +func GetCkClusterStatus(conf *model.CKManClickHouseConfig) []model.CkClusterNode { + index := 0 + statusList := make([]model.CkClusterNode, len(conf.Hosts)) + statusMap := make(map[string]string, len(conf.Hosts)) + diskMap := make(map[string]string, len(conf.Hosts)) + uptimeMap := make(map[string]string, len(conf.Hosts)) + var lock sync.RWMutex + var wg sync.WaitGroup + for _, host := range conf.Hosts { + innerHost := host + wg.Add(1) + _ = common.Pool.Submit(func() { + defer wg.Done() + tmp := &model.CKManClickHouseConfig{ + Hosts: []string{innerHost}, + Port: conf.Port, + HttpPort: conf.HttpPort, + Cluster: conf.Cluster, + User: conf.User, + Password: conf.Password, + } + service := NewCkService(tmp) + if err := service.InitCkService(); err != nil { + lock.Lock() + statusMap[innerHost] = model.CkStatusRed + diskMap[innerHost] = "NA/NA" + lock.Unlock() + } else { + lock.Lock() + statusMap[innerHost] = model.CkStatusGreen + diskMap[innerHost], uptimeMap[innerHost] = getNodeInfo(service) + lock.Unlock() + } + }) + } + wg.Wait() + for i, shard := range conf.Shards { + for j, replica := range shard.Replicas { + status := model.CkClusterNode{ + Ip: replica.Ip, + HostName: replica.HostName, + ShardNumber: i + 1, + ReplicaNumber: j + 1, + Status: statusMap[replica.Ip], + Disk: diskMap[replica.Ip], + Uptime: uptimeMap[replica.Ip], + } + statusList[index] = status + index++ + } + } + return statusList +} +func GetCkTableMetrics(conf *model.CKManClickHouseConfig, database string, cols []string) (map[string]*model.CkTableMetrics, error) { + metrics := make(map[string]*model.CkTableMetrics) + + service := NewCkService(conf) + if err := service.InitCkService(); err != nil { + return nil, err + } + // get table names + databases, dbtables, err := common.GetMergeTreeTables("MergeTree", database, service.Conn) + if err != nil { + return nil, err + } + for db, tables := range dbtables { + for _, table := range tables { + // init + tableName := fmt.Sprintf("%s.%s", db, table) + metric := &model.CkTableMetrics{ + RWStatus: true, + } + metrics[tableName] = metric + } + } + + dbs := strings.Join(databases, "','") + var query string + var value [][]interface{} + + // get columns + if common.ArraySearch("columns", cols) || len(cols) == 0 { + query = fmt.Sprintf("SELECT table, count() as columns, database FROM system.columns WHERE database in ('%s') GROUP BY table, database", + dbs) + value, err = service.QueryInfo(query) + if err != nil { + return nil, err + } + for i := 1; i < len(value); i++ { + table := value[i][0].(string) + database := value[i][2].(string) + tableName := fmt.Sprintf("%s.%s", database, table) + if metric, ok := metrics[tableName]; ok { + metric.Columns = value[i][1].(uint64) + } + } + } + + // get bytes, parts, rows + found := false + if common.ArraySearch("partitions", cols) || common.ArraySearch("parts", cols) || + common.ArraySearch("compressed", cols) || common.ArraySearch("uncompressed", cols) || + common.ArraySearch("rows", cols) || len(cols) == 0 { + found = true + } + if found { + query = fmt.Sprintf("SELECT table, uniqExact(partition) AS partitions, count(*) AS parts, sum(data_compressed_bytes) AS compressed, sum(data_uncompressed_bytes) AS uncompressed, sum(rows) AS rows, database FROM cluster('%s', system.parts) WHERE (database in ('%s')) AND (active = '1') GROUP BY table, database;", conf.Cluster, dbs) + value, err = service.QueryInfo(query) + if err != nil { + return nil, err + } + for i := 1; i < len(value); i++ { + table := value[i][0].(string) + database := value[i][6].(string) + tableName := fmt.Sprintf("%s.%s", database, table) + if metric, ok := metrics[tableName]; ok { + if common.ArraySearch("partitions", cols) || len(cols) == 0 { + metric.Partitions = value[i][1].(uint64) + } + if common.ArraySearch("parts", cols) || len(cols) == 0 { + metric.Parts = value[i][2].(uint64) + } + if common.ArraySearch("compressed", cols) || len(cols) == 0 { + metric.Compressed = value[i][3].(uint64) + } + if common.ArraySearch("uncompressed", cols) || len(cols) == 0 { + metric.UnCompressed = value[i][4].(uint64) + } + if common.ArraySearch("rows", cols) || len(cols) == 0 { + metric.Rows = value[i][5].(uint64) + } + } + } + } + + // get readwrite_status + if common.ArraySearch("is_readonly", cols) || len(cols) == 0 { + query = fmt.Sprintf("select table, is_readonly, database from cluster('%s', system.replicas) where database in ('%s')", conf.Cluster, dbs) + value, err = service.QueryInfo(query) + if err != nil { + return nil, err + } + for i := 1; i < len(value); i++ { + table := value[i][0].(string) + database := value[i][2].(string) + tableName := fmt.Sprintf("%s.%s", database, table) + if metric, ok := metrics[tableName]; ok { + isReadonly := value[i][1].(uint8) + if isReadonly != 0 { + metric.RWStatus = false + } + } + } + } + + return metrics, nil +} + +func GetCKMerges(conf *model.CKManClickHouseConfig) ([]model.CKTableMerges, error) { + var merges []model.CKTableMerges + query := "SELECT database, table, elapsed, progress, num_parts, result_part_name, source_part_names, total_size_bytes_compressed, bytes_read_uncompressed, bytes_written_uncompressed, rows_read, memory_usage, merge_algorithm FROM system.merges" + log.Logger.Debug("query: %s", query) + for _, host := range conf.Hosts { + db, err := common.ConnectClickHouse(host, model.ClickHouseDefaultDB, conf.GetConnOption()) + if err != nil { + return merges, err + } + rows, err := db.Query(query) + if err != nil { + return merges, err + } + for rows.Next() { + var ( + databse, table, result_part_name, merge_algorithm string + elapsed, progress float64 + memory_usage, num_parts, total_size_bytes_compressed, bytes_written_uncompressed, bytes_read_uncompressed, rows_read uint64 + source_part_names []string + ) + err = rows.Scan(&databse, &table, &elapsed, &progress, &num_parts, &result_part_name, &source_part_names, &total_size_bytes_compressed, &bytes_read_uncompressed, &bytes_written_uncompressed, &rows_read, &memory_usage, &merge_algorithm) + if err != nil { + return merges, err + } + merge := model.CKTableMerges{ + Table: databse + "." + table, + Host: host, + Elapsed: elapsed, + MergeStart: time.Now().Add(time.Duration(elapsed*float64(time.Second)) * (-1)), + Progress: progress, + NumParts: num_parts, + ResultPartName: result_part_name, + SourcePartNames: strings.Join(source_part_names, ","), + Compressed: total_size_bytes_compressed, + Uncomressed: bytes_read_uncompressed + bytes_written_uncompressed, + Rows: rows_read, + MemUsage: memory_usage, + Algorithm: merge_algorithm, + } + merges = append(merges, merge) + } + rows.Close() + } + + return merges, nil +} + +func SetTableOrderBy(conf *model.CKManClickHouseConfig, req model.OrderbyReq) error { + hosts, err := common.GetShardAvaliableHosts(conf) + if err != nil { + return err + } + + ck := NewCkService(conf) + if err = ck.InitCkService(); err != nil { + return err + } + local, dist, err := common.GetTableNames(ck.Conn, req.Database, req.Table, req.DistName, conf.Cluster, true) + if err != nil { + return err + } + var wg sync.WaitGroup + var lastError error + query := fmt.Sprintf(`SELECT create_table_query, engine, partition_key, sorting_key FROM system.tables WHERE (database = '%s') AND (name = '%s')`, req.Database, local) + log.Logger.Debugf(query) + rows, err := ck.Conn.Query(query) + if err != nil { + return err + } + var createSql, engine, partition, order string + for rows.Next() { + err = rows.Scan(&createSql, &engine, &partition, &order) + if err != nil { + return err + } + } + log.Logger.Debugf("createsql: %s, engine:%s, partition: %s, order: %s", createSql, engine, partition, order) + new_partition := "" + if req.Partitionby.Name != "" { + switch req.Partitionby.Policy { + case model.CkTablePartitionPolicyDay: + new_partition = fmt.Sprintf("toYYYYMMDD(`%s`)", req.Partitionby.Name) + case model.CkTablePartitionPolicyMonth: + new_partition = fmt.Sprintf("toYYYYMM(`%s`)", req.Partitionby.Name) + case model.CkTablePartitionPolicyWeek: + new_partition = fmt.Sprintf("toYearWeek(`%s`)", req.Partitionby.Name) + default: + new_partition = fmt.Sprintf("toYYYYMMDD(`%s`)", req.Partitionby.Name) + } + } + + new_order := "" + if len(req.Orderby) > 0 { + new_order = strings.Join(req.Orderby, ",") + } + if new_partition == partition && new_order == order { + return fmt.Errorf("partition and orderby is the same as the old") + } + tmpSql := fmt.Sprintf("CREATE TABLE `%s`.`tmp_%s` AS `%s`.`%s` ENGINE=%s() PARTITION BY %s ORDER BY (%s)", req.Database, local, req.Database, local, engine, new_partition, new_order) + createSql = strings.ReplaceAll(strings.ReplaceAll(createSql, "PARTITION BY "+partition, "PARTITION BY "+new_partition), "ORDER BY ("+order, "ORDER BY ("+new_order) + createSql = strings.ReplaceAll(createSql, fmt.Sprintf("CREATE TABLE %s.%s", req.Database, local), fmt.Sprintf("CREATE TABLE IF NOT EXISTS %s.%s ON CLUSTER `%s`", req.Database, local, conf.Cluster)) + + max_insert_threads := runtime.NumCPU()*3/4 + 1 + for _, host := range hosts { + host := host + wg.Add(1) + common.Pool.Submit(func() { + defer wg.Done() + conn, err := common.ConnectClickHouse(host, req.Database, conf.GetConnOption()) + if err != nil { + lastError = err + return + } + + queries := []string{ + tmpSql, + fmt.Sprintf("INSERT INTO `%s`.`tmp_%s` SELECT * FROM `%s`.`%s` SETTINGS max_insert_threads=%d, max_execution_time=0", req.Database, local, req.Database, local, max_insert_threads), + } + + for _, query := range queries { + log.Logger.Debugf("[%s]%s", host, query) + err = conn.Exec(query) + if err != nil { + lastError = err + return + } + } + }) + } + wg.Wait() + + // if lastError not nil, need to drop tmp table + if lastError == nil { + // we must ensure data move to tmptable succeed, then drop and recreate origin table + queries := []string{ + fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s` ON CLUSTER `%s` SYNC", req.Database, local, conf.Cluster), + createSql, + } + + for _, query := range queries { + log.Logger.Debugf("%s", query) + err = ck.Conn.Exec(query) + if err != nil { + lastError = err + break + } + } + } + + for _, host := range hosts { + host := host + wg.Add(1) + common.Pool.Submit(func() { + defer wg.Done() + db := common.GetConnection(host) + if db == nil { + return + } + if lastError == nil { + query := fmt.Sprintf("INSERT INTO `%s`.`%s` SELECT * FROM `%s`.`tmp_%s` SETTINGS max_insert_threads=%d,max_execution_time=0", req.Database, local, req.Database, local, max_insert_threads) + log.Logger.Debugf("%s: %s", host, query) + err = ck.Conn.Exec(query) + if err != nil { + lastError = err + } + } + + cleanSql := fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`tmp_%s` SYNC", req.Database, local) + log.Logger.Debugf("%s: %s", host, cleanSql) + _ = db.Exec(cleanSql) + }) + } + wg.Wait() + if lastError != nil { + return lastError + } + + if dist != "" { + //alter distributed table + ck = NewCkService(conf) + if err = ck.InitCkService(); err != nil { + return err + } + deleteSql := fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s` ON CLUSTER `%s` SYNC", + req.Database, dist, conf.Cluster) + log.Logger.Debugf(deleteSql) + if err = ck.Conn.Exec(deleteSql); err != nil { + return errors.Wrap(err, "") + } + + create := fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s`.`%s` ON CLUSTER `%s` AS `%s`.`%s` ENGINE = Distributed(`%s`, `%s`, `%s`, rand())", + req.Database, dist, conf.Cluster, req.Database, local, + conf.Cluster, req.Database, local) + log.Logger.Debugf(create) + if err = ck.Conn.Exec(create); err != nil { + return errors.Wrap(err, "") + } + + if conf.LogicCluster != nil { + distParams := model.DistLogicTblParams{ + Database: req.Database, + TableName: local, + DistName: dist, + ClusterName: conf.Cluster, + LogicCluster: *conf.LogicCluster, + } + if err := ck.DeleteDistTblOnLogic(&distParams); err != nil { + return err + } + if err := ck.CreateDistTblOnLogic(&distParams); err != nil { + return err + } + } + } + + return nil +} + +func MaterializedView(conf *model.CKManClickHouseConfig, req model.MaterializedViewReq) (string, error) { + var statement string + ckService := NewCkService(conf) + err := ckService.InitCkService() + if err != nil { + return "", err + } + var query string + if req.Operate == model.OperateCreate { + partition := "" + switch req.Partition.Policy { + case model.CkTablePartitionPolicyDay: + partition = fmt.Sprintf("toYYYYMMDD(`%s`)", req.Partition.Name) + case model.CkTablePartitionPolicyMonth: + partition = fmt.Sprintf("toYYYYMM(`%s`)", req.Partition.Name) + case model.CkTablePartitionPolicyWeek: + partition = fmt.Sprintf("toYearWeek(`%s`)", req.Partition.Name) + default: + partition = fmt.Sprintf("toYYYYMMDD(`%s`)", req.Partition.Name) + } + + var populate string + if req.Populate { + populate = "POPULATE" + } + + query = fmt.Sprintf("CREATE MATERIALIZED VIEW IF NOT EXISTS `%s`.`%s` ON CLUSTER `%s` ENGINE=%s PARTITION BY %s ORDER BY (`%s`) %s AS %s", + req.Database, req.Name, conf.Cluster, req.Engine, partition, strings.Join(req.Order, "`,`"), populate, req.Statement) + } else if req.Operate == model.OperateDelete { + query = fmt.Sprintf("DROP VIEW IF EXISTS `%s`.`%s` ON CLUSTER `%s`) SYNC", + req.Database, req.Name, conf.Cluster) + } + if req.Dryrun { + return query, nil + } else { + log.Logger.Debug(query) + err = ckService.Conn.Exec(query) + if err != nil { + return "", err + } + } + + return statement, nil +} + +func GetPartitions(conf *model.CKManClickHouseConfig, table string) (map[string]model.PartitionInfo, error) { + partInfo := make(map[string]model.PartitionInfo) + + dbTbl := strings.SplitN(table, ".", 2) + dabatase := dbTbl[0] + tableName := dbTbl[1] + + service := NewCkService(conf) + err := service.InitCkService() + if err != nil { + return nil, err + } + query := fmt.Sprintf(`SELECT + partition, + count(name), + sum(rows), + sum(data_compressed_bytes), + sum(data_uncompressed_bytes), + min(min_time), + max(max_time), + disk_name +FROM cluster('%s', system.parts) +WHERE (database = '%s') AND (table = '%s') AND (active = 1) +GROUP BY + partition, + disk_name +ORDER BY partition ASC`, conf.Cluster, dabatase, tableName) + log.Logger.Infof("query: %s", query) + value, err := service.QueryInfo(query) + if err != nil { + return nil, err + } + for i := 1; i < len(value); i++ { + partitionId := value[i][0].(string) + part := model.PartitionInfo{ + Database: dabatase, + Table: tableName, + Parts: value[i][1].(uint64), + Rows: value[i][2].(uint64), + Compressed: value[i][3].(uint64), + UnCompressed: value[i][4].(uint64), + MinTime: value[i][5].(time.Time), + MaxTime: value[i][6].(time.Time), + DiskName: value[i][7].(string), + } + partInfo[partitionId] = part + } + + return partInfo, nil +} + +func getHostSessions(service *CkService, query, host string) ([]*model.CkSessionInfo, error) { + list := make([]*model.CkSessionInfo, 0) + + value, err := service.QueryInfo(query) + if err != nil { + return nil, err + } + for i := 1; i < len(value); i++ { + session := new(model.CkSessionInfo) + session.StartTime = value[i][0].(time.Time).Unix() + session.QueryDuration = value[i][1].(uint64) + session.Query = value[i][2].(string) + session.User = value[i][3].(string) + session.QueryId = value[i][4].(string) + session.Address = value[i][5].(net.IP).String() + session.Threads = len(value[i][6].([]uint64)) + session.Host = host + list = append(list, session) + } + + return list, nil +} + +func getCkSessions(conf *model.CKManClickHouseConfig, limit int, query string) ([]*model.CkSessionInfo, error) { + list := make([]*model.CkSessionInfo, 0) + + var lastError error + var wg sync.WaitGroup + for _, host := range conf.Hosts { + innerHost := host + wg.Add(1) + _ = common.Pool.Submit(func() { + defer wg.Done() + service, err := GetCkNodeService(conf.Cluster, innerHost) + if err != nil { + log.Logger.Warnf("get ck node %s service error: %v", innerHost, err) + return + } + + sessions, err := getHostSessions(service, query, innerHost) + if err != nil { + lastError = err + } + list = append(list, sessions...) + }) + } + wg.Wait() + if lastError != nil { + return nil, lastError + } + + sort.Sort(model.SessionList(list)) + if len(list) <= limit { + return list, nil + } else { + return list[:limit], nil + } +} + +func GetCkOpenSessions(conf *model.CKManClickHouseConfig, limit int) ([]*model.CkSessionInfo, error) { + query := fmt.Sprintf("select subtractSeconds(now(), elapsed) AS query_start_time, toUInt64(elapsed*1000) AS query_duration_ms, query, initial_user, initial_query_id, initial_address, thread_ids, (extractAllGroups(query, '(from|FROM)\\s+(\\w+\\.)\\?(\\w+)')[1])[3] AS tbl_name from system.processes WHERE tbl_name != '' AND tbl_name != 'processes' AND tbl_name != 'query_log' AND is_initial_query=1 ORDER BY query_duration_ms DESC limit %d", limit) + log.Logger.Debugf("query: %s", query) + return getCkSessions(conf, limit, query) +} + +func GetDistibutedDDLQueue(conf *model.CKManClickHouseConfig) ([]*model.CkSessionInfo, error) { + query := fmt.Sprintf("select DISTINCT query_create_time, query, host, initiator_host, entry from cluster('%s', system.distributed_ddl_queue) where cluster = '%s' and status != 'Finished' ORDER BY query_create_time", conf.Cluster, conf.Cluster) + log.Logger.Debugf("query:%s", query) + service := NewCkService(conf) + err := service.InitCkService() + if err != nil { + return nil, err + } + + value, err := service.QueryInfo(query) + if err != nil { + return nil, err + } + var sessions []*model.CkSessionInfo + if len(value) > 1 { + sessions = make([]*model.CkSessionInfo, len(value)-1) + for i := 1; i < len(value); i++ { + var session model.CkSessionInfo + startTime := value[i][0].(time.Time) + session.StartTime = startTime.Unix() + session.QueryDuration = uint64(time.Since(startTime).Milliseconds()) + session.Query = value[i][1].(string) + session.Host = value[i][2].(string) + session.Address = value[i][3].(string) + session.QueryId = value[i][4].(string) + + sessions[i-1] = &session + } + } else { + sessions = make([]*model.CkSessionInfo, 0) + } + return sessions, nil +} +func KillCkOpenSessions(conf *model.CKManClickHouseConfig, host, queryId, typ string) error { + conn, err := common.ConnectClickHouse(host, model.ClickHouseDefaultDB, conf.GetConnOption()) + if err != nil { + return err + } + if typ == "queue" { + query := fmt.Sprintf(`SELECT + splitByChar('.', table)[1] AS database, + splitByChar('.', table)[2] AS tbl, + initial_query_id + FROM + ( + SELECT + (extractAllGroups(value, 'TABLE (\\w+\\.\\w+) ')[1])[1] AS table, + (extractAllGroups(value, 'initial_query_id: (.*)\n')[1])[1] AS initial_query_id + FROM system.zookeeper + WHERE (path = '/clickhouse/task_queue/ddl/%s') AND (name = '%s') + )`, conf.Cluster, queryId) + var database, table, initial_query_id string + log.Logger.Debugf(query) + err := conn.QueryRow(query).Scan(&database, &table, &initial_query_id) + if err != nil { + return errors.Wrap(err, "") + } + log.Logger.Debugf("database: %s, table: %s, initial_query_id: %s", database, table, initial_query_id) + query = fmt.Sprintf("select query_id from system.processes where initial_query_id = '%s'", initial_query_id) + var query_id string + log.Logger.Debugf(query) + err = conn.QueryRow(query).Scan(&query_id) + if err == nil { + query = fmt.Sprintf("KILL QUERY WHERE query_id = '%s'", query_id) + log.Logger.Debugf(query) + err = conn.Exec(query) + if err != nil { + return errors.Wrap(err, "") + } + } else { + // kill mutation + query = fmt.Sprintf("select count() from system.mutations where is_done = 0 and database = '%s' and table = '%s'", database, table) + log.Logger.Debugf(query) + var count uint64 + err = conn.QueryRow(query).Scan(&count) + if err != nil { + return errors.Wrap(err, "") + } + if count > 0 { + query = fmt.Sprintf("KILL MUTATION WHERE database = '%s' AND table = '%s'", database, table) + log.Logger.Debugf(query) + err = conn.Exec(query) + if err != nil { + return errors.Wrap(err, "") + } + } + } + } else { + query := fmt.Sprintf("KILL QUERY WHERE query_id = '%s'", queryId) + err = conn.Exec(query) + if err != nil { + return errors.Wrap(err, "") + } + } + return nil +} + +func GetCkSlowSessions(conf *model.CKManClickHouseConfig, cond model.SessionCond) ([]*model.CkSessionInfo, error) { + query := fmt.Sprintf("SELECT query_start_time, query_duration_ms, query, initial_user, initial_query_id, initial_address, thread_ids, splitByChar('.', tables[1])[-1] AS tbl_name from system.query_log WHERE tbl_name != '' AND tbl_name != 'query_log' AND tbl_name != 'processes' AND type=2 AND is_initial_query=1 AND event_date >= parseDateTimeBestEffort('%d') AND query_start_time >= parseDateTimeBestEffort('%d') AND query_start_time <= parseDateTimeBestEffort('%d') ORDER BY query_duration_ms DESC limit %d", cond.StartTime, cond.StartTime, cond.EndTime, cond.Limit) + log.Logger.Debugf("query: %s", query) + return getCkSessions(conf, cond.Limit, query) +} +func GetZkPath(conn *common.Conn, database, table string) (string, error) { + var err error + var path string + var rows *common.Rows + query := fmt.Sprintf(`SELECT + (extractAllGroups(create_table_query, '(MergeTree\\(\')(.*)\', \'{replica}\'\\)')[1])[2] AS zoopath +FROM system.tables +WHERE database = '%s' AND name = '%s'`, database, table) + log.Logger.Debugf("database:%s, table:%s: query: %s", database, table, query) + if rows, err = conn.Query(query); err != nil { + err = errors.Wrapf(err, "") + return "", err + } + defer rows.Close() + for rows.Next() { + var result string + if err = rows.Scan(&result); err != nil { + err = errors.Wrapf(err, "") + return "", err + } + path = result + } + + return path, nil +} + +func checkTableIfExists(database, name, cluster string) bool { + conf, err := repository.Ps.GetClusterbyName(cluster) + if err != nil { + return false + } + hosts, err := common.GetShardAvaliableHosts(&conf) + if err != nil { + return false + } + for _, host := range hosts { + tmp := conf + tmp.Hosts = []string{host} + service := NewCkService(&tmp) + if err := service.InitCkService(); err != nil { + log.Logger.Warnf("shard: %v init service failed: %v", tmp.Hosts, err) + return false + } + if err := common.CheckTable(service.Conn, database, name); err != nil { + log.Logger.Warnf("shard: %v, table %s does not exist", tmp.Hosts, name) + return false + } + } + return true +} + +func DropTableIfExists(params model.CreateCkTableParams, ck *CkService) error { + dropSql := fmt.Sprintf("DROP TABLE IF EXISTS %s.%s ON CLUSTER %s SYNC", params.DB, params.Name, params.Cluster) + log.Logger.Debugf(dropSql) + err := ck.Conn.Exec(dropSql) + if err != nil { + return err + } + + dropSql = fmt.Sprintf("DROP TABLE IF EXISTS %s.%s ON CLUSTER %s SYNC", params.DB, params.DistName, params.Cluster) + log.Logger.Debugf(dropSql) + err = ck.Conn.Exec(dropSql) + return err +} +func GetCKVersion(conf *model.CKManClickHouseConfig, host string) (string, error) { + tmp := *conf + tmp.Hosts = []string{host} + service, err := GetCkService(conf.Cluster) + if err != nil { + return "", err + } + value, err := service.QueryInfo("SELECT version()") + if err != nil { + return "", err + } + version := value[1][0].(string) + return version, nil +} + +func SyncLogicTable(src, dst model.CKManClickHouseConfig, name ...string) bool { + hosts, err := common.GetShardAvaliableHosts(&src) + if err != nil || len(hosts) == 0 { + log.Logger.Warnf("cluster %s all node is unvaliable", src.Cluster) + return false + } + srcConn, err := common.ConnectClickHouse(hosts[0], model.ClickHouseDefaultDB, src.GetConnOption()) + if err != nil { + log.Logger.Warnf("connect %s failed", hosts[0]) + return false + } + tableName := "" + database := "" + if len(name) > 0 { + database = name[0] + tableName = name[1] + } + statementsqls, err := GetLogicSchema(srcConn, *dst.LogicCluster, dst.Cluster, dst.IsReplica, database, tableName) + if err != nil { + log.Logger.Warnf("get logic schema failed: %v", err) + return false + } + + dstConn, err := common.ConnectClickHouse(dst.Hosts[0], model.ClickHouseDefaultDB, dst.GetConnOption()) + if err != nil { + log.Logger.Warnf("can't connect %s", dst.Hosts[0]) + return false + } + for _, schema := range statementsqls { + for _, statement := range schema.Statements { + log.Logger.Debugf("%s", statement) + if err := dstConn.Exec(statement); err != nil { + log.Logger.Warnf("excute sql failed: %v", err) + return false + } + } + } + return true +} + +func RestoreReplicaTable(conf *model.CKManClickHouseConfig, host, database, table string) error { + conn, err := common.ConnectClickHouse(host, database, conf.GetConnOption()) + if err != nil { + return errors.Wrapf(err, "cann't connect to %s", host) + } + query := "SELECT is_readonly FROM system.replicas" + var is_readonly uint8 + if err = conn.QueryRow(query).Scan(&is_readonly); err != nil { + return errors.Wrap(err, host) + } + if is_readonly == 0 { + return nil + } + + query = "SYSTEM RESTART REPLICA " + table + if err := conn.Exec(query); err != nil { + return errors.Wrap(err, host) + } + query = "SYSTEM RESTORE REPLICA " + table + if err := conn.Exec(query); err != nil { + // Code: 36. DB::Exception: Replica must be readonly. (BAD_ARGUMENTS) + if common.ExceptionAS(err, common.BAD_ARGUMENTS) { + return nil + } + return errors.Wrap(err, host) + } + return nil +} + +func RebalanceCluster(conf *model.CKManClickHouseConfig, keys []model.RebalanceShardingkey, allTable, exceptMaxShard bool) error { + var err error + var exceptHost, target string + service := NewCkService(conf) + if err = service.InitCkService(); err != nil { + return err + } + + //check the full scale, if table not in the request, rebalance by partition + keys, err = paddingKeys(keys, service, allTable) + if err != nil { + return err + } + + hosts, err := common.GetShardAvaliableHosts(conf) + if err != nil { + return err + } + if err = checkBasicTools(conf, hosts, keys); err != nil { + return err + } + + if exceptMaxShard { + exceptHost = hosts[len(hosts)-1] + hosts = hosts[:len(hosts)-1] + target, err = checkDiskSpace(hosts, exceptHost) + if err != nil { + return err + } + } + + log.Logger.Debugf("keys: %d, %#v", len(keys), keys) + for _, key := range keys { + if exceptMaxShard { + if err = MoveExceptToOthers(conf, exceptHost, target, key.Database, key.Table); err != nil { + return err + } + } + rebalancer := &CKRebalance{ + Cluster: conf.Cluster, + Hosts: hosts, + Database: key.Database, + Table: key.Table, + TmpTable: "tmp_" + key.Table, + DistTable: key.DistTable, + DataDir: conf.Path, + OsUser: conf.SshUser, + OsPassword: conf.SshPassword, + OsPort: conf.SshPort, + RepTables: make(map[string]string), + ConnOpt: conf.GetConnOption(), + AllowLossRate: key.AllowLossRate, + SaveTemps: key.SaveTemps, + IsReplica: conf.IsReplica, + } + defer rebalancer.Close() + + if key.ShardingKey != "" { + //rebalance by shardingkey + log.Logger.Infof("[rebalance]table %s.%s rebalance by shardingkey", key.Database, key.Table) + if err = getShardingType(&key, service.Conn); err != nil { + return err + } + rebalancer.Shardingkey = key + if err = RebalanceByShardingkey(conf, rebalancer); err != nil { + return err + } + } else { + //rebalance by partition + log.Logger.Infof("[rebalance]table %s.%s rebalance by partition", key.Database, key.Table) + err = RebalanceByPartition(conf, rebalancer) + if err != nil { + return err + } + } + + } + + return nil +} + +func checkBasicTools(conf *model.CKManClickHouseConfig, hosts []string, keys []model.RebalanceShardingkey) error { + var chkrsync, chkawk bool + for _, key := range keys { + if chkawk && chkrsync { + break + } + if key.ShardingKey != "" { + chkawk = true + } else { + // by partition + if !conf.IsReplica { + chkrsync = true + } + } + } + for _, host := range hosts { + opts := common.SshOptions{ + User: conf.SshUser, + Password: conf.SshPassword, + Port: conf.SshPort, + Host: host, + NeedSudo: conf.NeedSudo, + AuthenticateType: conf.AuthenticateType, + } + + var cmds []string + if chkawk { + cmds = append(cmds, "which awk >/dev/null 2>&1 ;echo $?") + } + if chkrsync { + cmds = append(cmds, "which rsync >/dev/null 2>&1 ;echo $?") + } + for _, cmd := range cmds { + if output, err := common.RemoteExecute(opts, cmd); err != nil { + return err + } else { + if strings.TrimSuffix(output, "\n") != "0" { + return errors.Errorf("excute cmd:[%s] on %s failed", cmd, host) + } + } + } + } + return nil +} + +func checkDiskSpace(hosts []string, exceptHost string) (string, error) { + var needSpace, maxLeftSpace uint64 + var target string + query := `SELECT sum(total_bytes) +FROM system.tables +WHERE match(engine, 'MergeTree') AND (database NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA')) +SETTINGS skip_unavailable_shards = 1` + log.Logger.Debugf("[%s]%s", exceptHost, query) + conn := common.GetConnection(exceptHost) + rows, err := conn.Query(query) + if err != nil { + return "", errors.Wrap(err, exceptHost) + } + for rows.Next() { + rows.Scan(&needSpace) + } + query = "SELECT free_space FROM system.disks" + for _, host := range hosts { + conn = common.GetConnection(host) + log.Logger.Debugf("[%s]%s", host, query) + rows, err := conn.Query(query) + if err != nil { + return "", errors.Wrap(err, exceptHost) + } + var freeSpace uint64 + for rows.Next() { + rows.Scan(&freeSpace) + } + if maxLeftSpace*2 < freeSpace { + target = host + maxLeftSpace = freeSpace + } + } + if maxLeftSpace <= needSpace { + return "", fmt.Errorf("need %s space on the disk, but not enough", common.ConvertDisk(uint64(needSpace))) + } + return target, nil +} + +func paddingKeys(keys []model.RebalanceShardingkey, service *CkService, allTable bool) ([]model.RebalanceShardingkey, error) { + var results []model.RebalanceShardingkey + resps, err := service.GetRebalanceTables() + if err != nil { + return keys, err + } + //k: database, v:tables + for _, rt := range resps { + key := model.RebalanceShardingkey{ + Database: rt.Database, + Table: rt.Table, + DistTable: rt.DistTable, + } + found := false + for _, elem := range keys { + elem.Table = common.TernaryExpression(strings.HasPrefix(elem.Table, "^"), elem.Table, "^"+elem.Table).(string) + elem.Table = common.TernaryExpression(strings.HasSuffix(elem.Table, "$"), elem.Table, elem.Table+"$").(string) + reg, err := regexp.Compile(elem.Table) + if err != nil { + return keys, err + } + if key.Database == elem.Database && (reg.MatchString(key.Table)) { + if found { + return keys, fmt.Errorf("table %s matches more than one regexp expression", key.Table) + } + if common.ArraySearch(elem.ShardingKey, rt.Columns) { + found = true + key.ShardingKey = elem.ShardingKey + results = append(results, key) + //break + } else { + return keys, fmt.Errorf("shardingkey %s not found in %s.%s", elem.ShardingKey, elem.Database, key.Table) + } + } + } + if allTable && !found { + results = append(results, key) + } + } + + return results, nil +} + +func getShardingType(key *model.RebalanceShardingkey, conn *common.Conn) error { + query := fmt.Sprintf("SELECT type FROM system.columns WHERE (database = '%s') AND (table = '%s') AND (name = '%s') ", + key.Database, key.Table, key.ShardingKey) + rows, err := conn.Query(query) + if err != nil { + return err + } + defer rows.Close() + for rows.Next() { + var typ string + rows.Scan(&typ) + key.ShardingType = WhichType(typ) + } + if key.ShardingType.Nullable || key.ShardingType.Array { + return errors.Errorf("invalid shardingKey %s, expect its type be numerical or string", key.ShardingKey) + } + return nil +} + +func RebalanceByPartition(conf *model.CKManClickHouseConfig, rebalancer *CKRebalance) error { + var err error + if err = rebalancer.InitCKConns(false); err != nil { + log.Logger.Errorf("[rebalance]got error %+v", err) + return err + } + + if err = rebalancer.GetRepTables(); err != nil { + log.Logger.Errorf("[rebalance]got error %+v", err) + return err + } + + if err = rebalancer.DoRebalanceByPart(); err != nil { + log.Logger.Errorf("got error %+v", err) + return err + } + log.Logger.Infof("rebalance done") + return nil +} + +// 200w data costs 4s +func RebalanceByShardingkey(conf *model.CKManClickHouseConfig, rebalancer *CKRebalance) error { + var err error + start := time.Now() + log.Logger.Info("[rebalance] STEP InitCKConns") + if err = rebalancer.InitCKConns(true); err != nil { + log.Logger.Errorf("got error %+v", err) + return err + } + log.Logger.Info("[rebalance] STEP CreateTemporaryTable") + if err = rebalancer.CreateTemporaryTable(); err != nil { + return err + } + log.Logger.Info("[rebalance] STEP MoveBackup") + if err = rebalancer.MoveBackup(); err != nil { + return err + } + if err = rebalancer.CheckCounts(rebalancer.TmpTable); err != nil { + time.Sleep(5 * time.Second) + if err = rebalancer.CheckCounts(rebalancer.TmpTable); err != nil { + return err + } + } + log.Logger.Info("[rebalance] STEP InsertPlan") + if err = rebalancer.InsertPlan(); err != nil { + return errors.Wrapf(err, "table %s.%s rebalance failed, data can be corrupted, please move back from temp table[%s] manually", rebalancer.Database, rebalancer.Table, rebalancer.TmpTable) + } + if err = rebalancer.CheckCounts(rebalancer.Table); err != nil { + time.Sleep(5 * time.Second) + if err = rebalancer.CheckCounts(rebalancer.Table); err != nil { + return err + } + } + if !rebalancer.SaveTemps { + log.Logger.Info("[rebalance] STEP Cleanup") + rebalancer.Cleanup() + } + + log.Logger.Infof("[rebalance] DONE, Total counts: %d, Elapsed: %v sec", rebalancer.OriCount, time.Since(start).Seconds()) + return nil +} + +func MoveExceptToOthers(conf *model.CKManClickHouseConfig, except, target, database, table string) error { + max_insert_threads := runtime.NumCPU()*3/4 + 1 + query := fmt.Sprintf("INSERT INTO `%s`.`%s` SELECT * FROM remote('%s', '%s', '%s', '%s', '%s') SETTINGS max_insert_threads=%d,max_execution_time=0", + database, table, except, database, table, conf.User, conf.Password, max_insert_threads) + log.Logger.Debugf("[%s] %s", target, query) + conn := common.GetConnection(target) + err := conn.Exec(query) + if err != nil { + return err + } + query = fmt.Sprintf("TRUNCATE TABLE `%s`.`%s` %s", database, table, common.WithAlterSync(conf.Version)) + log.Logger.Debugf("[%s] %s", except, query) + conn = common.GetConnection(except) + err = conn.Exec(query) + if err != nil { + return err + } + return nil +} + +func GroupUniqArray(conf *model.CKManClickHouseConfig, req model.GroupUniqArrayReq) error { + //创建本地聚合表,本地物化视图,分布式聚合表,分布式视图 + if err := CreateViewOnCluster(conf, req); err != nil { + return err + } + // 创建逻辑聚合表和逻辑视图 + if conf.LogicCluster != nil { + //当前集群的逻辑表和逻辑视图创建 + if err := CreateLogicViewOnCluster(conf, req); err != nil { + return err + } + clusters, err := repository.Ps.GetLogicClusterbyName(*conf.LogicCluster) + if err != nil { + return err + } + for _, cluster := range clusters { + con, err := repository.Ps.GetClusterbyName(cluster) + if err != nil { + return err + } + //当前集群已经创建过了,跳过 + if con.Cluster == conf.Cluster { + continue + } else { + //其他物理集群需要同步创建本地表、本地视图,分布式表、分布式视图,以及逻辑表,逻辑视图 + if err := CreateViewOnCluster(&con, req); err != nil { + return err + } + if err := CreateLogicViewOnCluster(&con, req); err != nil { + return err + } + } + } + } + return nil +} + +func CreateViewOnCluster(conf *model.CKManClickHouseConfig, req model.GroupUniqArrayReq) error { + //前置工作 + service := NewCkService(conf) + err := service.InitCkService() + if err != nil { + return err + } + query := fmt.Sprintf("SELECT name, type FROM system.columns WHERE (database = '%s') AND (table = '%s')", req.Database, req.Table) + rows, err := service.Conn.Query(query) + if err != nil { + return err + } + fields := make(map[string]string, len(req.Fields)+1) + for rows.Next() { + var name, typ string + err = rows.Scan(&name, &typ) + if err != nil { + rows.Close() + return err + } + if name == req.TimeField { + fields[name] = typ + } else { + for _, f := range req.Fields { + if name == f.Name { + fields[name] = typ + } + } + } + } + rows.Close() + + // check 一把是否所有字段在表里都能找到 + for _, f := range req.Fields { + if _, ok := fields[f.Name]; !ok { + return fmt.Errorf("can't find field %s in %s.%s", f.Name, req.Database, req.Table) + } + } + + aggTable := fmt.Sprintf("%s%s", common.ClickHouseAggregateTablePrefix, req.Table) + distAggTable := fmt.Sprintf("%s%s", common.ClickHouseAggDistTablePrefix, req.Table) + + mvLocal := fmt.Sprintf("%s%s", common.ClickHouseLocalViewPrefix, req.Table) + mvDist := fmt.Sprintf("%s%s", common.ClickHouseDistributedViewPrefix, req.Table) + + var engine string + if conf.IsReplica { + engine = "ReplicatedReplacingMergeTree()" + } else { + engine = "ReplacingMergeTree" + } + + fieldAndType := fmt.Sprintf("`%s` %s,", req.TimeField, fields[req.TimeField]) + fieldSql := fmt.Sprintf("`%s`, ", req.TimeField) + where := " WHERE 1=1 " + for i, f := range req.Fields { + if i > 0 { + fieldAndType += "," + } + if f.MaxSize == 0 { + f.MaxSize = 10000 + } + typ := fields[f.Name] + nullable := false + defaultValue := f.DefaultValue + if strings.HasPrefix(typ, "Nullable(") { + nullable = true + typ = strings.TrimSuffix(strings.TrimPrefix(typ, "Nullable("), ")") + if strings.Contains(typ, "Int") || strings.Contains(typ, "Float") { + defaultValue = fmt.Sprintf("%v", f.DefaultValue) + } else { + defaultValue = fmt.Sprintf("'%v'", f.DefaultValue) + } + where += fmt.Sprintf(" AND isNotNull(`%s`) ", f.Name) + } + fieldAndType += fmt.Sprintf("`%s%s` AggregateFunction(groupUniqArray(%d), `%s`)", model.GroupUniqArrayPrefix, f.Name, f.MaxSize, typ) + if nullable { + fieldSql += fmt.Sprintf("groupUniqArrayState(%d)(ifNull(`%s`, %s)) AS `%s%s`", f.MaxSize, f.Name, defaultValue, model.GroupUniqArrayPrefix, f.Name) + } else { + fieldSql += fmt.Sprintf("groupUniqArrayState(%d)(`%s`) AS `%s%s`", f.MaxSize, f.Name, model.GroupUniqArrayPrefix, f.Name) + } + } + + view_sql := fmt.Sprintf("SELECT %s FROM `%s`.`%s` %s GROUP BY (`%s`)", fieldSql, req.Database, req.Table, where, req.TimeField) + + // 创建本地聚合表及本地视图 + agg_query := fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s`.`%s` ON CLUSTER `%s` (%s) ENGINE = %s ORDER BY (`%s`)", + req.Database, aggTable, conf.Cluster, fieldAndType, engine, req.TimeField) + + //需不需要partition by? + log.Logger.Debugf("agg_query: %s", agg_query) + err = service.Conn.Exec(agg_query) + if err != nil { + return err + } + view_query := fmt.Sprintf("CREATE MATERIALIZED VIEW IF NOT EXISTS `%s`.`%s` ON CLUSTER `%s` TO `%s`.`%s` AS %s", + req.Database, mvLocal, conf.Cluster, req.Database, aggTable, view_sql) + + log.Logger.Debugf("view_query: %s", view_query) + err = service.Conn.Exec(view_query) + if err != nil { + return err + } + + // 创建分布式聚合表及分布式视图 + agg_query = fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s`.`%s` ON CLUSTER `%s` AS `%s`.`%s` ENGINE = Distributed(`%s`, `%s`, `%s`, rand())", + req.Database, distAggTable, conf.Cluster, req.Database, aggTable, conf.Cluster, req.Database, aggTable) + log.Logger.Debugf("agg_query: %s", agg_query) + err = service.Conn.Exec(agg_query) + if err != nil { + return err + } + + view_query = fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s`.`%s` ON CLUSTER `%s` AS `%s`.`%s` ENGINE = Distributed(`%s`, `%s`, `%s`, rand())", + req.Database, mvDist, conf.Cluster, req.Database, mvLocal, conf.Cluster, req.Database, mvLocal) + log.Logger.Debugf("view_query: %s", view_query) + err = service.Conn.Exec(view_query) + if err != nil { + return err + } + + if req.Populate { + insert_query := fmt.Sprintf("INSERT INTO `%s`.`%s` %s ", req.Database, aggTable, view_sql) + log.Logger.Debugf("[%s]insert_query: %s", conf.Cluster, insert_query) + hosts, err := common.GetShardAvaliableHosts(conf) + if err != nil { + return err + } + for _, host := range hosts { + conn := common.GetConnection(host) + if conn != nil { + err = service.Conn.AsyncInsert(insert_query, false) + if err != nil { + return err + } + } + } + } + return nil +} + +func CreateLogicViewOnCluster(conf *model.CKManClickHouseConfig, req model.GroupUniqArrayReq) error { + service := NewCkService(conf) + err := service.InitCkService() + if err != nil { + return err + } + aggTable := fmt.Sprintf("%s%s", common.ClickHouseAggregateTablePrefix, req.Table) + logicAggTable := fmt.Sprintf("%s%s", common.ClickHouseAggLogicTablePrefix, req.Table) + + mvLocal := fmt.Sprintf("%s%s", common.ClickHouseAggregateTablePrefix, req.Table) + mvLogic := fmt.Sprintf("%s%s", common.ClickHouseLogicViewPrefix, req.Table) + + agg_query := fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s`.`%s` ON CLUSTER `%s` AS `%s`.`%s` ENGINE = Distributed(`%s`, `%s`, `%s`, rand())", + req.Database, logicAggTable, conf.Cluster, req.Database, aggTable, *conf.LogicCluster, req.Database, aggTable) + log.Logger.Debugf("agg_query: %s", agg_query) + err = service.Conn.Exec(agg_query) + if err != nil { + return err + } + + view_query := fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s`.`%s` ON CLUSTER `%s` AS `%s`.`%s` ENGINE = Distributed(`%s`, `%s`, `%s`, rand())", + req.Database, mvLogic, conf.Cluster, req.Database, mvLocal, *conf.LogicCluster, req.Database, mvLocal) + log.Logger.Debugf("view_query: %s", view_query) + err = service.Conn.Exec(view_query) + if err != nil { + return err + } + return nil +} + +func GetGroupUniqArray(conf *model.CKManClickHouseConfig, database, table string) (map[string]interface{}, error) { + //确定是查分布式表还是逻辑表 + viewName := common.TernaryExpression(conf.LogicCluster != nil, fmt.Sprintf("%s%s", common.ClickHouseLogicViewPrefix, table), fmt.Sprintf("%s%s", common.ClickHouseDistributedViewPrefix, table)).(string) + //根据表名查询出物化视图名,聚合函数类型 + service := NewCkService(conf) + err := service.InitCkService() + if err != nil { + return nil, err + } + query := fmt.Sprintf(`SELECT + name, + (extractAllGroups(type, 'groupUniqArray\\((\\d+)\\)')[1])[1] AS maxsize +FROM system.columns +WHERE (database = '%s') AND (table = '%s') AND (type LIKE 'AggregateFunction%%')`, + database, viewName) + log.Logger.Debugf(query) + rows, err := service.Conn.Query(query) + if err != nil { + return nil, err + } + var aggFields string + idx := 0 + for rows.Next() { + var name, maxSize string + err = rows.Scan(&name, &maxSize) + if err != nil { + rows.Close() + return nil, err + } + if name != "" { + if idx > 0 { + aggFields += ", " + } + aggFields += fmt.Sprintf("groupUniqArrayMerge(%s)(%s) AS %s", maxSize, name, strings.TrimPrefix(name, model.GroupUniqArrayPrefix)) + idx++ + } + } + rows.Close() + + //查询 + query = fmt.Sprintf("SELECT %s FROM `%s`.`%s`", aggFields, database, viewName) + data, err := service.QueryInfo(query) + if err != nil { + return nil, err + } + result := make(map[string]interface{}) + keys := data[0] + for i, key := range keys { + value := data[1][i] + result[key.(string)] = value + } + return result, nil +} + +func DelGroupUniqArray(conf *model.CKManClickHouseConfig, database, table string) error { + err := delGuaViewOnCluster(conf, database, table) + if err != nil { + return err + } + + //如果有逻辑集群,还要去各个逻辑集群删除本地物化视图、分布式物化视图,逻辑物化视图 + if conf.LogicCluster != nil { + clusters, err := repository.Ps.GetLogicClusterbyName(*conf.LogicCluster) + if err != nil { + return err + } + for _, cluster := range clusters { + if cluster == conf.Cluster { + err = delGuaViewOnLogic(conf, database, table) + if err != nil { + return err + } + } else { + clus, err := repository.Ps.GetClusterbyName(cluster) + if err != nil { + return err + } + if err = delGuaViewOnCluster(&clus, database, table); err != nil { + return err + } + err = delGuaViewOnLogic(&clus, database, table) + if err != nil { + return err + } + } + } + } + return nil +} + +func delGuaViewOnCluster(conf *model.CKManClickHouseConfig, database, table string) error { + service := NewCkService(conf) + err := service.InitCkService() + if err != nil { + return err + } + + queries := []string{ + fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s%s` ON CLUSTER `%s` SYNC", database, common.ClickHouseLocalViewPrefix, table, conf.Cluster), + fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s%s` ON CLUSTER `%s` SYNC", database, common.ClickHouseDistributedViewPrefix, table, conf.Cluster), + fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s%s` ON CLUSTER `%s` SYNC", database, common.ClickHouseAggregateTablePrefix, table, conf.Cluster), + fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s%s` ON CLUSTER `%s` SYNC", database, common.ClickHouseAggDistTablePrefix, table, conf.Cluster), + } + + for _, query := range queries { + log.Logger.Debugf("[%s]%s", conf.Cluster, query) + err = service.Conn.Exec(query) + if err != nil { + return err + } + } + + return nil +} + +func delGuaViewOnLogic(conf *model.CKManClickHouseConfig, database, table string) error { + service := NewCkService(conf) + err := service.InitCkService() + if err != nil { + return err + } + + queries := []string{ + fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s%s` ON CLUSTER `%s` SYNC", database, common.ClickHouseLogicViewPrefix, table, conf.Cluster), + fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s%s` ON CLUSTER `%s` SYNC", database, common.ClickHouseAggLogicTablePrefix, table, conf.Cluster), + } + + for _, query := range queries { + log.Logger.Debugf("[%s]%s", conf.Cluster, query) + err = service.Conn.Exec(query) + if err != nil { + return err + } + } + return nil +} + +func GetReplicatedTableStatus(conf *model.CKManClickHouseConfig) ([]model.ZkReplicatedTableStatus, error) { + if !conf.IsReplica { + return nil, nil + } + service := NewCkService(conf) + if err := service.InitCkService(); err != nil { + return nil, err + } + query := fmt.Sprintf("select database, table, is_leader, replica_name, log_pointer from clusterAllReplicas('%s', system.replicas) order by database, table", conf.Cluster) + data, err := service.QueryInfo(query) + if err != nil { + return nil, err + } + tblReplicaStatus := make(map[string]model.ZkReplicatedTableStatus) + for i := 1; i < len(data); i++ { + database := data[i][0].(string) + table := data[i][1].(string) + is_leader := data[i][2].(uint8) + replica_name := data[i][3].(string) + log_pointer := data[i][4].(uint64) + key := database + "." + table + if _, ok := tblReplicaStatus[key]; !ok { + value := make([][]string, len(conf.Shards)) + for shardIndex, shard := range conf.Shards { + value[shardIndex] = make([]string, len(shard.Replicas)) + } + tblReplicaStatus[key] = model.ZkReplicatedTableStatus{ + Name: key, + Values: value, + } + } + + logPointer := "" + if common.CompareClickHouseVersion(conf.Version, "20.5.x") >= 0 { + logPointer = fmt.Sprintf("[ML]%d", log_pointer) + } else { + if is_leader == 1 { + logPointer = fmt.Sprintf("[L]%d", log_pointer) + } else { + logPointer = fmt.Sprintf("[F]%d", log_pointer) + } + } + OUTER_LOOP: + for shardIndex, shard := range conf.Shards { + for replicaIndex, replica := range shard.Replicas { + if replica_name == replica.Ip { + tblReplicaStatus[key].Values[shardIndex][replicaIndex] = logPointer + break OUTER_LOOP + } + } + } + } + var replicaStatus []model.ZkReplicatedTableStatus + for _, value := range tblReplicaStatus { + replicaStatus = append(replicaStatus, value) + } + return replicaStatus, nil +} diff --git a/service/clickhouse/clickhouse_service.go b/service/clickhouse/clickhouse_service.go index ad8a88d6..fa67effe 100644 --- a/service/clickhouse/clickhouse_service.go +++ b/service/clickhouse/clickhouse_service.go @@ -3,16 +3,10 @@ package clickhouse import ( "fmt" "io" - "net" "reflect" - "regexp" - "runtime" - "sort" "strings" "sync" - "time" - client "github.com/ClickHouse/clickhouse-go/v2" "github.com/housepower/ckman/common" "github.com/housepower/ckman/log" "github.com/housepower/ckman/model" @@ -101,196 +95,6 @@ func GetCkNodeService(clusterName string, node string) (*CkService, error) { } } -func GetCkClusterConfig(conf *model.CKManClickHouseConfig) (string, error) { - var replicas []model.CkReplica - - service := NewCkService(conf) - if err := service.InitCkService(); err != nil { - return model.E_CH_CONNECT_FAILED, err - } - hosts := conf.Hosts - conf.Hosts = make([]string, 0) - conf.Shards = make([]model.CkShard, 0) - - value, err := service.QueryInfo(fmt.Sprintf("SELECT cluster, shard_num, replica_num, host_name, host_address FROM system.clusters WHERE cluster='%s' ORDER BY cluster, shard_num, replica_num", conf.Cluster)) - if err != nil { - return model.E_DATA_SELECT_FAILED, err - } - if len(value) == 1 { - return model.E_RECORD_NOT_FOUND, errors.Errorf("cluster %s is not exist, or hosts %v is not in cluster %s", conf.Cluster, hosts, conf.Cluster) - } - shardNum := uint32(0) - var ( - loopback bool - lbhostName string - ) - for i := 1; i < len(value); i++ { - if shardNum != value[i][1].(uint32) { - if len(replicas) != 0 { - shard := model.CkShard{ - Replicas: replicas, - } - conf.Shards = append(conf.Shards, shard) - } - replicas = make([]model.CkReplica, 0) - } - if value[i][2].(uint32) > 1 { - conf.IsReplica = true - } - replica := model.CkReplica{ - Ip: value[i][4].(string), - HostName: value[i][3].(string), - } - replicas = append(replicas, replica) - conf.Hosts = append(conf.Hosts, value[i][4].(string)) - shardNum = value[i][1].(uint32) - // when deployed on k8s, IP is not stable, and always return 127.0.0.1 - if replica.Ip == common.NetLoopBack { - log.Logger.Infof("found loopback") - loopback = true - lbhostName = replica.HostName - } - } - - if len(replicas) != 0 { - shard := model.CkShard{ - Replicas: replicas, - } - conf.Shards = append(conf.Shards, shard) - } - - if loopback { - var realHost string - query := fmt.Sprintf("SELECT host_address FROM system.clusters WHERE cluster='%s' AND host_name = '%s'", conf.Cluster, lbhostName) - - hosts, err := common.GetShardAvaliableHosts(conf) - if err != nil { - return model.E_CH_CONNECT_FAILED, err - } - conn := common.GetConnection(hosts[0]) - rows, err := conn.Query(query) - if err != nil { - return model.E_DATA_SELECT_FAILED, err - } - for rows.Next() { - var ip string - err = rows.Scan(&ip) - if err != nil { - return model.E_DATA_SELECT_FAILED, err - } - if ip != "" && ip != common.NetLoopBack { - realHost = ip - break - } - } - log.Logger.Infof("realHost: %s", realHost) - - for i := range conf.Hosts { - if conf.Hosts[i] == common.NetLoopBack { - conf.Hosts[i] = realHost - break - } - } - - for i := range conf.Shards { - for j := range conf.Shards[i].Replicas { - if conf.Shards[i].Replicas[j].Ip == common.NetLoopBack { - conf.Shards[i].Replicas[j].Ip = realHost - } - } - } - } - - if conf.LogicCluster != nil { - query := fmt.Sprintf("SELECT count() FROM system.clusters WHERE cluster = '%s'", *conf.LogicCluster) - value, err = service.QueryInfo(query) - if err != nil { - return model.E_DATA_SELECT_FAILED, err - } - c := value[1][0].(uint64) - if c == 0 { - return model.E_RECORD_NOT_FOUND, fmt.Errorf("logic cluster %s not exist", *conf.LogicCluster) - } - } - - value, err = service.QueryInfo("SELECT version()") - if err != nil { - return model.E_DATA_SELECT_FAILED, err - } - conf.Version = value[1][0].(string) - - return model.E_SUCCESS, nil -} - -func getNodeInfo(service *CkService) (string, string) { - query := `SELECT - formatReadableSize(sum(total_space) - sum(free_space)) AS used, - formatReadableSize(sum(total_space)) AS total, uptime() as uptime - FROM system.disks WHERE lower(type) = 'local'` - value, err := service.QueryInfo(query) - if err != nil { - return "NA/NA", "" - } - usedSpace := value[1][0].(string) - totalSpace := value[1][1].(string) - uptime := value[1][2].(uint32) - return fmt.Sprintf("%s/%s", usedSpace, totalSpace), common.FormatReadableTime(uptime) -} - -func GetCkClusterStatus(conf *model.CKManClickHouseConfig) []model.CkClusterNode { - index := 0 - statusList := make([]model.CkClusterNode, len(conf.Hosts)) - statusMap := make(map[string]string, len(conf.Hosts)) - diskMap := make(map[string]string, len(conf.Hosts)) - uptimeMap := make(map[string]string, len(conf.Hosts)) - var lock sync.RWMutex - var wg sync.WaitGroup - for _, host := range conf.Hosts { - innerHost := host - wg.Add(1) - _ = common.Pool.Submit(func() { - defer wg.Done() - tmp := &model.CKManClickHouseConfig{ - Hosts: []string{innerHost}, - Port: conf.Port, - HttpPort: conf.HttpPort, - Cluster: conf.Cluster, - User: conf.User, - Password: conf.Password, - } - service := NewCkService(tmp) - if err := service.InitCkService(); err != nil { - lock.Lock() - statusMap[innerHost] = model.CkStatusRed - diskMap[innerHost] = "NA/NA" - lock.Unlock() - } else { - lock.Lock() - statusMap[innerHost] = model.CkStatusGreen - diskMap[innerHost], uptimeMap[innerHost] = getNodeInfo(service) - lock.Unlock() - } - }) - } - wg.Wait() - for i, shard := range conf.Shards { - for j, replica := range shard.Replicas { - status := model.CkClusterNode{ - Ip: replica.Ip, - HostName: replica.HostName, - ShardNumber: i + 1, - ReplicaNumber: j + 1, - Status: statusMap[replica.Ip], - Disk: diskMap[replica.Ip], - Uptime: uptimeMap[replica.Ip], - } - statusList[index] = status - index++ - } - } - return statusList -} - func (ck *CkService) CreateTable(params *model.CreateCkTableParams, dryrun bool) ([]string, error) { var statements []string if ck.Conn == nil { @@ -328,6 +132,11 @@ func (ck *CkService) CreateTable(params *model.CreateCkTableParams, dryrun bool) projections += fmt.Sprintf(", PROJECTION %s (%s)", p.Name, p.Sql) } + settings := make(map[string]interface{}) + if common.CompareClickHouseVersion(ck.Config.Version, "22.4.x") > 0 { + settings["use_metadata_cache"] = true + } + create := fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s`.`%s` ON CLUSTER `%s` (%s%s%s) ENGINE = %s() PARTITION BY %s ORDER BY (%s)", params.DB, params.Name, params.Cluster, strings.Join(columns, ", "), params.IndexExpr, projections, params.Engine, partition, strings.Join(params.Order, ", ")) @@ -335,8 +144,21 @@ func (ck *CkService) CreateTable(params *model.CreateCkTableParams, dryrun bool) create += fmt.Sprintf(" TTL %s", params.TTLExpr) } if params.StoragePolicy != "" { - create += fmt.Sprintf(" SETTINGS storage_policy = '%s'", params.StoragePolicy) + settings["storage_policy"] = params.StoragePolicy + } + if len(settings) > 0 { + create += " SETTINGS " + idx := 0 + for k, v := range settings { + if idx == len(settings) { + create += fmt.Sprintf("%s = '%v'", k, v) + } else { + create += fmt.Sprintf("%s = '%v',", k, v) + } + idx++ + } } + log.Logger.Debugf(create) statements = append(statements, create) if !dryrun { @@ -419,10 +241,6 @@ func (ck *CkService) DeleteTable(conf *model.CKManClickHouseConfig, params *mode } } - // delete zoopath - tableName := fmt.Sprintf("%s.%s", params.DB, local) - delete(conf.ZooPath, tableName) - return nil } @@ -582,31 +400,56 @@ func (ck *CkService) AlterTableTTL(req *model.AlterTblsTTLReq) error { return errors.Errorf("clickhouse service unavailable") } + var wg sync.WaitGroup + wg.Add(len(req.Tables)) + var lastErr error for _, table := range req.Tables { - local, _, err := common.GetTableNames(ck.Conn, table.Database, table.TableName, table.DistName, ck.Config.Cluster, true) - if err != nil { - return err - } - if req.TTLType != "" { - if req.TTLType == model.TTLTypeModify { - if req.TTLExpr != "" { - ttl := fmt.Sprintf("ALTER TABLE `%s`.`%s` ON CLUSTER `%s` MODIFY TTL %s %s", table.Database, local, ck.Config.Cluster, req.TTLExpr, common.WithAlterSync(ck.Config.Version)) + go func(table model.AlterTblTTL) { + defer wg.Done() + local, _, err := common.GetTableNames(ck.Conn, table.Database, table.TableName, table.DistName, ck.Config.Cluster, true) + if err != nil { + lastErr = err + return + } + if req.TTLType != "" { + if req.TTLType == model.TTLTypeModify { + if req.TTLExpr != "" { + ttl := fmt.Sprintf("ALTER TABLE `%s`.`%s` ON CLUSTER `%s` MODIFY TTL %s %s", table.Database, local, ck.Config.Cluster, req.TTLExpr, common.WithAlterSync(ck.Config.Version)) + log.Logger.Debugf(ttl) + if err := ck.Conn.Exec(ttl); err != nil { + if common.ExceptionAS(err, common.UNFINISHED) { + var create_table_query string + query := fmt.Sprintf("select create_table_query from system.tables where database = '%s' and name = '%s'", table.Database, local) + err = ck.Conn.QueryRow(query).Scan(&create_table_query) + if err != nil { + lastErr = err + return + } + if strings.Contains(create_table_query, req.TTLExpr) || strings.Contains(create_table_query, strings.ReplaceAll(req.TTLExpr, "`", "")) { + return + } + } + lastErr = err + return + } + } + } else if req.TTLType == model.TTLTypeRemove { + ttl := fmt.Sprintf("ALTER TABLE `%s`.`%s` ON CLUSTER `%s` REMOVE TTL %s", table.Database, local, ck.Config.Cluster, common.WithAlterSync(ck.Config.Version)) log.Logger.Debugf(ttl) if err := ck.Conn.Exec(ttl); err != nil { - return errors.Wrap(err, "") + if common.ExceptionAS(err, common.UNFINISHED) { + return + } + lastErr = err + return } } - } else if req.TTLType == model.TTLTypeRemove { - ttl := fmt.Sprintf("ALTER TABLE `%s`.`%s` ON CLUSTER `%s` REMOVE TTL %s", table.Database, local, ck.Config.Cluster, common.WithAlterSync(ck.Config.Version)) - log.Logger.Debugf(ttl) - if err := ck.Conn.Exec(ttl); err != nil { - return errors.Wrap(err, "") - } } - } + }(table) } - return nil + wg.Wait() + return lastErr } func (ck *CkService) DescTable(params *model.DescCkTableParams) ([]model.CkColumnAttribute, error) { @@ -732,1612 +575,169 @@ func (ck *CkService) FetchSchemerFromOtherNode(host string) error { return nil } -func GetCkTableMetrics(conf *model.CKManClickHouseConfig, database string, cols []string) (map[string]*model.CkTableMetrics, error) { - metrics := make(map[string]*model.CkTableMetrics) - - service := NewCkService(conf) - if err := service.InitCkService(); err != nil { - return nil, err - } - // get table names - databases, dbtables, err := common.GetMergeTreeTables("MergeTree", database, service.Conn) +func (ck *CkService) ShowCreateTable(tbname, database string) (string, error) { + query := fmt.Sprintf("SELECT create_table_query FROM system.tables WHERE database = '%s' AND name = '%s'", database, tbname) + value, err := ck.QueryInfo(query) if err != nil { - return nil, err - } - for db, tables := range dbtables { - for _, table := range tables { - // init - tableName := fmt.Sprintf("%s.%s", db, table) - metric := &model.CkTableMetrics{ - RWStatus: true, - } - metrics[tableName] = metric - } + return "", err } + schema := value[1][0].(string) + return schema, nil +} - dbs := strings.Join(databases, "','") - var query string - var value [][]interface{} +type RebalanceTables struct { + Database string + DistTable string + Table string + Columns []string +} - // get columns - if common.ArraySearch("columns", cols) || len(cols) == 0 { - query = fmt.Sprintf("SELECT table, count() as columns, database FROM system.columns WHERE database in ('%s') GROUP BY table, database", - dbs) - value, err = service.QueryInfo(query) - if err != nil { - return nil, err - } - for i := 1; i < len(value); i++ { - table := value[i][0].(string) - database := value[i][2].(string) - tableName := fmt.Sprintf("%s.%s", database, table) - if metric, ok := metrics[tableName]; ok { - metric.Columns = value[i][1].(uint64) - } - } - } +func (ck *CkService) GetRebalanceTables() ([]RebalanceTables, error) { + query := fmt.Sprintf(`SELECT + t2.database AS database, + t2.name AS dist, + t2.local AS table, + groupArray(t1.name) AS rows +FROM system.columns AS t1 +INNER JOIN +( + SELECT + database, + name, + (extractAllGroups(engine_full, '(Distributed\\(\')(.*)\',\\s+\'(.*)\',\\s+\'(.*)\'(.*)')[1])[2] AS cluster, + (extractAllGroups(engine_full, '(Distributed\\(\')(.*)\',\\s+\'(.*)\',\\s+\'(.*)\'(.*)')[1])[4] AS local +FROM system.tables +WHERE match(engine, 'Distributed') AND (database NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA') +AND cluster = '%s') +) AS t2 ON t1.table = t2.name and t1.database=t2.database +WHERE (multiSearchAny(t1.type, ['Int', 'Float', 'Date', 'String', 'Decimal']) = '1') +GROUP BY + database, + dist, + table +ORDER BY + database +`, ck.Config.Cluster) - // get bytes, parts, rows - found := false - if common.ArraySearch("partitions", cols) || common.ArraySearch("parts", cols) || - common.ArraySearch("compressed", cols) || common.ArraySearch("uncompressed", cols) || - common.ArraySearch("rows", cols) || len(cols) == 0 { - found = true + log.Logger.Debug(query) + rows, err := ck.Conn.Query(query) + if err != nil { + return nil, err } - if found { - query = fmt.Sprintf("SELECT table, uniqExact(partition) AS partitions, count(*) AS parts, sum(data_compressed_bytes) AS compressed, sum(data_uncompressed_bytes) AS uncompressed, sum(rows) AS rows, database FROM cluster('%s', system.parts) WHERE (database in ('%s')) AND (active = '1') GROUP BY table, database;", conf.Cluster, dbs) - value, err = service.QueryInfo(query) + defer rows.Close() + tblLists := make([]RebalanceTables, 0) + for rows.Next() { + var database, dist, table string + var cols []string + err = rows.Scan(&database, &dist, &table, &cols) if err != nil { return nil, err } - for i := 1; i < len(value); i++ { - table := value[i][0].(string) - database := value[i][6].(string) - tableName := fmt.Sprintf("%s.%s", database, table) - if metric, ok := metrics[tableName]; ok { - if common.ArraySearch("partitions", cols) || len(cols) == 0 { - metric.Partitions = value[i][1].(uint64) - } - if common.ArraySearch("parts", cols) || len(cols) == 0 { - metric.Parts = value[i][2].(uint64) - } - if common.ArraySearch("compressed", cols) || len(cols) == 0 { - metric.Compressed = value[i][3].(uint64) - } - if common.ArraySearch("uncompressed", cols) || len(cols) == 0 { - metric.UnCompressed = value[i][4].(uint64) - } - if common.ArraySearch("rows", cols) || len(cols) == 0 { - metric.Rows = value[i][5].(uint64) - } - } - } + tblLists = append(tblLists, RebalanceTables{ + Database: database, + DistTable: dist, + Table: table, + Columns: cols, + }) } + return tblLists, nil +} - // get readwrite_status - if common.ArraySearch("is_readonly", cols) || len(cols) == 0 { - query = fmt.Sprintf("select table, is_readonly, database from cluster('%s', system.replicas) where database in ('%s')", conf.Cluster, dbs) - value, err = service.QueryInfo(query) - if err != nil { - return nil, err - } - for i := 1; i < len(value); i++ { - table := value[i][0].(string) - database := value[i][2].(string) - tableName := fmt.Sprintf("%s.%s", database, table) - if metric, ok := metrics[tableName]; ok { - isReadonly := value[i][1].(uint8) - if isReadonly != 0 { - metric.RWStatus = false - } - } +func (ck *CkService) GetTblLists() (map[string]map[string][]string, error) { + query := `SELECT + t2.database AS database, + t2.name AS table, + groupArray(t1.name) AS rows +FROM system.columns AS t1 +INNER JOIN +( + SELECT + database, + name + FROM system.tables + WHERE match(engine, 'Distributed') AND (database NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA') ) +) AS t2 ON t1.table = t2.name and t1.database=t2.database +GROUP BY + database, + table +ORDER BY + database` + + tblLists := make(map[string]map[string][]string) + value, err := ck.QueryInfo(query) + if err != nil { + return nil, err + } + for i := 1; i < len(value); i++ { + tblMapping := make(map[string][]string) + database := value[i][0].(string) + table := value[i][1].(string) + cols := value[i][2].([]string) + tableMap, isExist := tblLists[database] + if isExist { + tblMapping = tableMap } + tblMapping[table] = cols + tblLists[database] = tblMapping } - // get success, failed counts - // tables := "[" - // for k := range dbtables { - // tables += fmt.Sprintf("'%s',", k) - // } - // tables = strings.TrimRight(tables, ",") - // tables += "]" - // if common.ArraySearch("queries", cols) || len(cols) == 0 { - // query = fmt.Sprintf("SELECT tables[1], type, count() AS counts from cluster('{cluster}', system.query_log) where hasAny(databases, %s) = 1 AND is_initial_query=1 AND event_date >= subtractDays(now(), 1) group by tables, type", tables) - // value, err = service.QueryInfo(query) - // if err != nil { - // return nil, err - // } - // for i := 1; i < len(value); i++ { - // tableName := value[i][0].(string) - // if metric, ok := metrics[tableName]; ok { - // types := value[i][1].(string) - // if types == ClickHouseQueryFinish { - // metric.CompletedQueries = value[i][2].(uint64) - // } else if types == ClickHouseQueryExStart || types == ClickHouseQueryExProcessing { - // metric.FailedQueries = value[i][2].(uint64) - // } - // } - // } - // } - - // // get query duration - // if common.ArraySearch("cost", cols) || len(cols) == 0 { - // query = fmt.Sprintf("SELECT tables[1] AS tbl_name, quantiles(0.5, 0.99, 1.0)(query_duration_ms) AS duration from cluster('{cluster}', system.query_log) where hasAny(databases, %s) = 1 AND type = 2 AND is_initial_query=1 AND event_date >= subtractDays(now(), 7) group by tables", tables) - // value, err = service.QueryInfo(query) - // if err != nil { - // return nil, err - // } - // for i := 1; i < len(value); i++ { - // tableName := value[i][0].(string) - // if _, ok := metrics[tableName]; ok { - // durations := value[i][1].([]float64) - // metrics[tableName].QueryCost.Middle = common.Decimal(durations[0]) - // metrics[tableName].QueryCost.SecondaryMax = common.Decimal(durations[1]) - // metrics[tableName].QueryCost.Max = common.Decimal(durations[2]) - // } - // } - // } - - return metrics, nil -} - -func GetCKMerges(conf *model.CKManClickHouseConfig) ([]model.CKTableMerges, error) { - var merges []model.CKTableMerges - query := "SELECT database, table, elapsed, progress, num_parts, result_part_name, source_part_names, total_size_bytes_compressed, bytes_read_uncompressed, bytes_written_uncompressed, rows_read, memory_usage, merge_algorithm FROM system.merges" - log.Logger.Debug("query: %s", query) - for _, host := range conf.Hosts { - db, err := common.ConnectClickHouse(host, model.ClickHouseDefaultDB, conf.GetConnOption()) - if err != nil { - return merges, err - } - rows, err := db.Query(query) - if err != nil { - return merges, err - } - for rows.Next() { - var ( - databse, table, result_part_name, merge_algorithm string - elapsed, progress float64 - memory_usage, num_parts, total_size_bytes_compressed, bytes_written_uncompressed, bytes_read_uncompressed, rows_read uint64 - source_part_names []string - ) - err = rows.Scan(&databse, &table, &elapsed, &progress, &num_parts, &result_part_name, &source_part_names, &total_size_bytes_compressed, &bytes_read_uncompressed, &bytes_written_uncompressed, &rows_read, &memory_usage, &merge_algorithm) - if err != nil { - return merges, err - } - merge := model.CKTableMerges{ - Table: databse + "." + table, - Host: host, - Elapsed: elapsed, - MergeStart: time.Now().Add(time.Duration(elapsed*float64(time.Second)) * (-1)), - Progress: progress, - NumParts: num_parts, - ResultPartName: result_part_name, - SourcePartNames: strings.Join(source_part_names, ","), - Compressed: total_size_bytes_compressed, - Uncomressed: bytes_read_uncompressed + bytes_written_uncompressed, - Rows: rows_read, - MemUsage: memory_usage, - Algorithm: merge_algorithm, - } - merges = append(merges, merge) + query = `SELECT name FROM system.databases WHERE name NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA')` + value, err = ck.QueryInfo(query) + for i := 1; i < len(value); i++ { + database := value[i][0].(string) + if _, ok := tblLists[database]; !ok { + tblLists[database] = make(map[string][]string) } - rows.Close() } - return merges, nil + return tblLists, err } -func SetTableOrderBy(conf *model.CKManClickHouseConfig, req model.OrderbyReq) error { - hosts, err := common.GetShardAvaliableHosts(conf) - if err != nil { - return err +func DMLOnLogic(logics []string, req model.DMLOnLogicReq) error { + var query string + if req.Manipulation == model.DML_DELETE { + query = fmt.Sprintf("ALTER TABLE `%s`.`%s` %s WHERE (1=1)", req.Database, req.Table, req.Manipulation) + } else if req.Manipulation == model.DML_UPDATE { + var kv string + for k, v := range req.KV { + kv += fmt.Sprintf(" `%s` = '%s',", k, v) + } + kv = kv[:len(kv)-1] + query = fmt.Sprintf("ALTER TABLE `%s`.`%s` %s %s WHERE (1=1)", req.Database, req.Table, req.Manipulation, kv) } - ck := NewCkService(conf) - if err = ck.InitCkService(); err != nil { - return err - } - local, dist, err := common.GetTableNames(ck.Conn, req.Database, req.Table, req.DistName, conf.Cluster, true) - if err != nil { - return err + if req.Cond != "" { + query += fmt.Sprintf(" AND (%s)", req.Cond) } var wg sync.WaitGroup - var lastError error - query := fmt.Sprintf(`SELECT create_table_query, engine, partition_key, sorting_key FROM system.tables WHERE (database = '%s') AND (name = '%s')`, req.Database, local) - log.Logger.Debugf(query) - rows, err := ck.Conn.Query(query) - if err != nil { - return err - } - var createSql, engine, partition, order string - for rows.Next() { - err = rows.Scan(&createSql, &engine, &partition, &order) + var lastErr error + for _, cluster := range logics { + conf, err := repository.Ps.GetClusterbyName(cluster) if err != nil { return err } - } - log.Logger.Debugf("createsql: %s, engine:%s, partition: %s, order: %s", createSql, engine, partition, order) - new_partition := "" - if req.Partitionby.Name != "" { - switch req.Partitionby.Policy { - case model.CkTablePartitionPolicyDay: - new_partition = fmt.Sprintf("toYYYYMMDD(`%s`)", req.Partitionby.Name) - case model.CkTablePartitionPolicyMonth: - new_partition = fmt.Sprintf("toYYYYMM(`%s`)", req.Partitionby.Name) - case model.CkTablePartitionPolicyWeek: - new_partition = fmt.Sprintf("toYearWeek(`%s`)", req.Partitionby.Name) - default: - new_partition = fmt.Sprintf("toYYYYMMDD(`%s`)", req.Partitionby.Name) + hosts, err := common.GetShardAvaliableHosts(&conf) + if err != nil { + return err } - } - - new_order := "" - if len(req.Orderby) > 0 { - new_order = strings.Join(req.Orderby, ",") - } - if new_partition == partition && new_order == order { - return fmt.Errorf("partition and orderby is the same as the old") - } - tmpSql := fmt.Sprintf("CREATE TABLE `%s`.`tmp_%s` AS `%s`.`%s` ENGINE=%s() PARTITION BY %s ORDER BY (%s)", req.Database, local, req.Database, local, engine, new_partition, new_order) - createSql = strings.ReplaceAll(strings.ReplaceAll(createSql, "PARTITION BY "+partition, "PARTITION BY "+new_partition), "ORDER BY ("+order, "ORDER BY ("+new_order) - createSql = strings.ReplaceAll(createSql, fmt.Sprintf("CREATE TABLE %s.%s", req.Database, local), fmt.Sprintf("CREATE TABLE IF NOT EXISTS %s.%s ON CLUSTER `%s`", req.Database, local, conf.Cluster)) - - max_insert_threads := runtime.NumCPU()*3/4 + 1 - for _, host := range hosts { - host := host - wg.Add(1) - common.Pool.Submit(func() { - defer wg.Done() - conn, err := common.ConnectClickHouse(host, req.Database, conf.GetConnOption()) - if err != nil { - lastError = err - return - } - - queries := []string{ - tmpSql, - fmt.Sprintf("INSERT INTO `%s`.`tmp_%s` SELECT * FROM `%s`.`%s` SETTINGS max_insert_threads=%d, max_execution_time=0", req.Database, local, req.Database, local, max_insert_threads), - } - - for _, query := range queries { + for _, host := range hosts { + wg.Add(1) + go func(host string) { + defer wg.Done() + conn := common.GetConnection(host) + if conn == nil { + lastErr = fmt.Errorf("%s can't connect clickhouse", host) + return + } log.Logger.Debugf("[%s]%s", host, query) err = conn.Exec(query) if err != nil { - lastError = err + lastErr = err return } - } - }) + }(host) + } } wg.Wait() - - // if lastError not nil, need to drop tmp table - if lastError == nil { - // we must ensure data move to tmptable succeed, then drop and recreate origin table - queries := []string{ - fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s` ON CLUSTER `%s` SYNC", req.Database, local, conf.Cluster), - createSql, - } - - for _, query := range queries { - log.Logger.Debugf("%s", query) - err = ck.Conn.Exec(query) - if err != nil { - lastError = err - break - } - } - } - - for _, host := range hosts { - host := host - wg.Add(1) - common.Pool.Submit(func() { - defer wg.Done() - db := common.GetConnection(host) - if db == nil { - return - } - if lastError == nil { - query := fmt.Sprintf("INSERT INTO `%s`.`%s` SELECT * FROM `%s`.`tmp_%s` SETTINGS max_insert_threads=%d,max_execution_time=0", req.Database, local, req.Database, local, max_insert_threads) - log.Logger.Debugf("%s: %s", host, query) - err = ck.Conn.Exec(query) - if err != nil { - lastError = err - } - } - - cleanSql := fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`tmp_%s` SYNC", req.Database, local) - log.Logger.Debugf("%s: %s", host, cleanSql) - _ = db.Exec(cleanSql) - }) - } - wg.Wait() - if lastError != nil { - return lastError - } - - if dist != "" { - //alter distributed table - ck = NewCkService(conf) - if err = ck.InitCkService(); err != nil { - return err - } - deleteSql := fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s` ON CLUSTER `%s` SYNC", - req.Database, dist, conf.Cluster) - log.Logger.Debugf(deleteSql) - if err = ck.Conn.Exec(deleteSql); err != nil { - return errors.Wrap(err, "") - } - - create := fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s`.`%s` ON CLUSTER `%s` AS `%s`.`%s` ENGINE = Distributed(`%s`, `%s`, `%s`, rand())", - req.Database, dist, conf.Cluster, req.Database, local, - conf.Cluster, req.Database, local) - log.Logger.Debugf(create) - if err = ck.Conn.Exec(create); err != nil { - return errors.Wrap(err, "") - } - - if conf.LogicCluster != nil { - distParams := model.DistLogicTblParams{ - Database: req.Database, - TableName: local, - DistName: dist, - ClusterName: conf.Cluster, - LogicCluster: *conf.LogicCluster, - } - if err := ck.DeleteDistTblOnLogic(&distParams); err != nil { - return err - } - if err := ck.CreateDistTblOnLogic(&distParams); err != nil { - return err - } - } - } - - return nil -} - -func MaterializedView(conf *model.CKManClickHouseConfig, req model.MaterializedViewReq) (string, error) { - var statement string - ckService := NewCkService(conf) - err := ckService.InitCkService() - if err != nil { - return "", err - } - var query string - if req.Operate == model.OperateCreate { - partition := "" - switch req.Partition.Policy { - case model.CkTablePartitionPolicyDay: - partition = fmt.Sprintf("toYYYYMMDD(`%s`)", req.Partition.Name) - case model.CkTablePartitionPolicyMonth: - partition = fmt.Sprintf("toYYYYMM(`%s`)", req.Partition.Name) - case model.CkTablePartitionPolicyWeek: - partition = fmt.Sprintf("toYearWeek(`%s`)", req.Partition.Name) - default: - partition = fmt.Sprintf("toYYYYMMDD(`%s`)", req.Partition.Name) - } - - var populate string - if req.Populate { - populate = "POPULATE" - } - - query = fmt.Sprintf("CREATE MATERIALIZED VIEW IF NOT EXISTS `%s`.`%s` ON CLUSTER `%s` ENGINE=%s PARTITION BY %s ORDER BY (`%s`) %s AS %s", - req.Database, req.Name, conf.Cluster, req.Engine, partition, strings.Join(req.Order, "`,`"), populate, req.Statement) - } else if req.Operate == model.OperateDelete { - query = fmt.Sprintf("DROP VIEW IF EXISTS `%s`.`%s` ON CLUSTER `%s`) SYNC", - req.Database, req.Name, conf.Cluster) - } - if req.Dryrun { - return query, nil - } else { - log.Logger.Debug(query) - err = ckService.Conn.Exec(query) - if err != nil { - return "", err - } - } - - return statement, nil -} - -func GetPartitions(conf *model.CKManClickHouseConfig, table string) (map[string]model.PartitionInfo, error) { - partInfo := make(map[string]model.PartitionInfo) - - chHosts, err := common.GetShardAvaliableHosts(conf) - if err != nil { - return nil, err - } - - dbTbl := strings.SplitN(table, ".", 2) - dabatase := dbTbl[0] - tableName := dbTbl[1] - - for _, host := range chHosts { - service, err := GetCkNodeService(conf.Cluster, host) - if err != nil { - return nil, err - } - - query := fmt.Sprintf(`SELECT - partition, - count(name), - sum(rows), - sum(data_compressed_bytes), - sum(data_uncompressed_bytes), - min(min_time), - max(max_time), - disk_name -FROM system.parts -WHERE (database = '%s') AND (table = '%s') AND (active = 1) -GROUP BY - partition, - disk_name -ORDER BY partition ASC`, dabatase, tableName) - log.Logger.Infof("host: %s, query: %s", host, query) - value, err := service.QueryInfo(query) - if err != nil { - return nil, err - } - for i := 1; i < len(value); i++ { - partitionId := value[i][0].(string) - if part, ok := partInfo[partitionId]; ok { - part.Parts += value[i][1].(uint64) - part.Rows += value[i][2].(uint64) - part.Compressed += value[i][3].(uint64) - part.UnCompressed += value[i][4].(uint64) - minTime := value[i][5].(time.Time) - part.MinTime = common.TernaryExpression(part.MinTime.After(minTime), minTime, part.MinTime).(time.Time) - maxTime := value[i][6].(time.Time) - part.MaxTime = common.TernaryExpression(part.MaxTime.Before(maxTime), maxTime, part.MinTime).(time.Time) - part.DiskName = value[i][7].(string) - partInfo[partitionId] = part - } else { - part := model.PartitionInfo{ - Database: dabatase, - Table: tableName, - Parts: value[i][1].(uint64), - Rows: value[i][2].(uint64), - Compressed: value[i][3].(uint64), - UnCompressed: value[i][4].(uint64), - MinTime: value[i][5].(time.Time), - MaxTime: value[i][6].(time.Time), - DiskName: value[i][7].(string), - } - partInfo[partitionId] = part - } - } - } - - return partInfo, nil -} - -func getHostSessions(service *CkService, query, host string) ([]*model.CkSessionInfo, error) { - list := make([]*model.CkSessionInfo, 0) - - value, err := service.QueryInfo(query) - if err != nil { - return nil, err - } - for i := 1; i < len(value); i++ { - session := new(model.CkSessionInfo) - session.StartTime = value[i][0].(time.Time).Unix() - session.QueryDuration = value[i][1].(uint64) - session.Query = value[i][2].(string) - session.User = value[i][3].(string) - session.QueryId = value[i][4].(string) - session.Address = value[i][5].(net.IP).String() - session.Threads = len(value[i][6].([]uint64)) - session.Host = host - list = append(list, session) - } - - return list, nil -} - -func getCkSessions(conf *model.CKManClickHouseConfig, limit int, query string) ([]*model.CkSessionInfo, error) { - list := make([]*model.CkSessionInfo, 0) - - var lastError error - var wg sync.WaitGroup - for _, host := range conf.Hosts { - innerHost := host - wg.Add(1) - _ = common.Pool.Submit(func() { - defer wg.Done() - service, err := GetCkNodeService(conf.Cluster, innerHost) - if err != nil { - log.Logger.Warnf("get ck node %s service error: %v", innerHost, err) - return - } - - sessions, err := getHostSessions(service, query, innerHost) - if err != nil { - lastError = err - } - list = append(list, sessions...) - }) - } - wg.Wait() - if lastError != nil { - return nil, lastError - } - - sort.Sort(model.SessionList(list)) - if len(list) <= limit { - return list, nil - } else { - return list[:limit], nil - } -} - -func GetCkOpenSessions(conf *model.CKManClickHouseConfig, limit int) ([]*model.CkSessionInfo, error) { - query := fmt.Sprintf("select subtractSeconds(now(), elapsed) AS query_start_time, toUInt64(elapsed*1000) AS query_duration_ms, query, initial_user, initial_query_id, initial_address, thread_ids, (extractAllGroups(query, '(from|FROM)\\s+(\\w+\\.)\\?(\\w+)')[1])[3] AS tbl_name from system.processes WHERE tbl_name != '' AND tbl_name != 'processes' AND tbl_name != 'query_log' AND is_initial_query=1 ORDER BY query_duration_ms DESC limit %d", limit) - log.Logger.Debugf("query: %s", query) - return getCkSessions(conf, limit, query) -} - -func GetDistibutedDDLQueue(conf *model.CKManClickHouseConfig) ([]*model.CkSessionInfo, error) { - query := fmt.Sprintf("select DISTINCT query_create_time, query, host, initiator_host, entry from cluster('%s', system.distributed_ddl_queue) where cluster = '%s' and status != 'Finished' ORDER BY query_create_time", conf.Cluster, conf.Cluster) - log.Logger.Debugf("query:%s", query) - service := NewCkService(conf) - err := service.InitCkService() - if err != nil { - return nil, err - } - - value, err := service.QueryInfo(query) - if err != nil { - return nil, err - } - var sessions []*model.CkSessionInfo - if len(value) > 1 { - sessions = make([]*model.CkSessionInfo, len(value)-1) - for i := 1; i < len(value); i++ { - var session model.CkSessionInfo - startTime := value[i][0].(time.Time) - session.StartTime = startTime.Unix() - session.QueryDuration = uint64(time.Since(startTime).Milliseconds()) - session.Query = value[i][1].(string) - session.Host = value[i][2].(string) - session.Address = value[i][3].(string) - session.QueryId = value[i][4].(string) - - sessions[i-1] = &session - } - } else { - sessions = make([]*model.CkSessionInfo, 0) - } - return sessions, nil -} -func KillCkOpenSessions(conf *model.CKManClickHouseConfig, host, queryId, typ string) error { - conn, err := common.ConnectClickHouse(host, model.ClickHouseDefaultDB, conf.GetConnOption()) - if err != nil { - return err - } - if typ == "queue" { - query := fmt.Sprintf(`SELECT - splitByChar('.', table)[1] AS database, - splitByChar('.', table)[2] AS tbl, - initial_query_id - FROM - ( - SELECT - (extractAllGroups(value, 'TABLE (\\w+\\.\\w+) ')[1])[1] AS table, - (extractAllGroups(value, 'initial_query_id: (.*)\n')[1])[1] AS initial_query_id - FROM system.zookeeper - WHERE (path = '/clickhouse/task_queue/ddl/%s') AND (name = '%s') - )`, conf.Cluster, queryId) - var database, table, initial_query_id string - log.Logger.Debugf(query) - err := conn.QueryRow(query).Scan(&database, &table, &initial_query_id) - if err != nil { - return errors.Wrap(err, "") - } - log.Logger.Debugf("database: %s, table: %s, initial_query_id: %s", database, table, initial_query_id) - query = fmt.Sprintf("select query_id from system.processes where initial_query_id = '%s'", initial_query_id) - var query_id string - log.Logger.Debugf(query) - err = conn.QueryRow(query).Scan(&query_id) - if err == nil { - query = fmt.Sprintf("KILL QUERY WHERE query_id = '%s'", query_id) - log.Logger.Debugf(query) - err = conn.Exec(query) - if err != nil { - return errors.Wrap(err, "") - } - } else { - // kill mutation - query = fmt.Sprintf("select count() from system.mutations where is_done = 0 and database = '%s' and table = '%s'", database, table) - log.Logger.Debugf(query) - var count uint64 - err = conn.QueryRow(query).Scan(&count) - if err != nil { - return errors.Wrap(err, "") - } - if count > 0 { - query = fmt.Sprintf("KILL MUTATION WHERE database = '%s' AND table = '%s'", database, table) - log.Logger.Debugf(query) - err = conn.Exec(query) - if err != nil { - return errors.Wrap(err, "") - } - } - } - } else { - query := fmt.Sprintf("KILL QUERY WHERE query_id = '%s'", queryId) - err = conn.Exec(query) - if err != nil { - return errors.Wrap(err, "") - } - } - return nil -} - -func GetCkSlowSessions(conf *model.CKManClickHouseConfig, cond model.SessionCond) ([]*model.CkSessionInfo, error) { - query := fmt.Sprintf("SELECT query_start_time, query_duration_ms, query, initial_user, initial_query_id, initial_address, thread_ids, splitByChar('.', tables[1])[-1] AS tbl_name from system.query_log WHERE tbl_name != '' AND tbl_name != 'query_log' AND tbl_name != 'processes' AND type=2 AND is_initial_query=1 AND event_date >= parseDateTimeBestEffort('%d') AND query_start_time >= parseDateTimeBestEffort('%d') AND query_start_time <= parseDateTimeBestEffort('%d') ORDER BY query_duration_ms DESC limit %d", cond.StartTime, cond.StartTime, cond.EndTime, cond.Limit) - log.Logger.Debugf("query: %s", query) - return getCkSessions(conf, cond.Limit, query) -} - -func GetReplicaZkPath(conf *model.CKManClickHouseConfig) error { - var err error - service := NewCkService(conf) - if err = service.InitCkService(); err != nil { - log.Logger.Errorf("all hosts not available, can't get zoopath") - return err - } - - query := `SELECT database, name, (extractAllGroups(create_table_query, '(MergeTree\\(\')(.*)\', \'{replica}\'\\)')[1])[2] AS zoopath FROM system.tables where match(engine, 'Replicated\w*MergeTree') AND (database NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA'))` - log.Logger.Debug(query) - rows, err := service.Conn.Query(query) - if err != nil { - return err - } - defer rows.Close() - conf.ZooPath = make(map[string]string) - for rows.Next() { - var database, table, zoopath string - err = rows.Scan(&database, &table, &zoopath) - if err != nil { - return err - } - tableName := fmt.Sprintf("%s.%s", database, table) - conf.ZooPath[tableName] = zoopath - } - return nil -} - -func GetZkPath(conn *common.Conn, database, table string) (string, error) { - var err error - var path string - var rows *common.Rows - query := fmt.Sprintf(`SELECT - (extractAllGroups(create_table_query, '(MergeTree\\(\')(.*)\', \'{replica}\'\\)')[1])[2] AS zoopath -FROM system.tables -WHERE database = '%s' AND name = '%s'`, database, table) - log.Logger.Debugf("database:%s, table:%s: query: %s", database, table, query) - if rows, err = conn.Query(query); err != nil { - err = errors.Wrapf(err, "") - return "", err - } - defer rows.Close() - for rows.Next() { - var result string - if err = rows.Scan(&result); err != nil { - err = errors.Wrapf(err, "") - return "", err - } - path = result - } - - return path, nil -} - -func ConvertZooPath(conf *model.CKManClickHouseConfig) []string { - var zooPaths []string - - for _, path := range conf.ZooPath { - if path != "" { - for index := range conf.Shards { - // TODO[L] macros maybe not named {cluster} or {shard} - shardNum := fmt.Sprintf("%d", index+1) - zooPath := strings.Replace(path, "{cluster}", conf.Cluster, -1) - zooPath = strings.Replace(zooPath, "{shard}", shardNum, -1) - zooPaths = append(zooPaths, zooPath) - } - } - } - return zooPaths -} - -func checkTableIfExists(database, name, cluster string) bool { - conf, err := repository.Ps.GetClusterbyName(cluster) - if err != nil { - return false - } - hosts, err := common.GetShardAvaliableHosts(&conf) - if err != nil { - return false - } - for _, host := range hosts { - tmp := conf - tmp.Hosts = []string{host} - service := NewCkService(&tmp) - if err := service.InitCkService(); err != nil { - log.Logger.Warnf("shard: %v init service failed: %v", tmp.Hosts, err) - return false - } - query := fmt.Sprintf("SELECT count() FROM system.tables WHERE database = '%s' AND name = '%s'", database, name) - data, err := service.QueryInfo(query) - if err != nil { - log.Logger.Warnf("shard: %v , query: %v ,err: %v", tmp.Hosts, query, err) - return false - } - log.Logger.Debugf("count: %d", data[1][0].(uint64)) - if data[1][0].(uint64) != 1 { - log.Logger.Warnf("shard: %v, table %s does not exist", tmp.Hosts, name) - return false - } - } - return true -} - -func DropTableIfExists(params model.CreateCkTableParams, ck *CkService) error { - dropSql := fmt.Sprintf("DROP TABLE IF EXISTS %s.%s ON CLUSTER %s SYNC", params.DB, params.Name, params.Cluster) - log.Logger.Debugf(dropSql) - err := ck.Conn.Exec(dropSql) - if err != nil { - return err - } - - dropSql = fmt.Sprintf("DROP TABLE IF EXISTS %s.%s ON CLUSTER %s SYNC", params.DB, params.DistName, params.Cluster) - log.Logger.Debugf(dropSql) - err = ck.Conn.Exec(dropSql) - return err -} - -func (ck *CkService) ShowCreateTable(tbname, database string) (string, error) { - query := fmt.Sprintf("SELECT create_table_query FROM system.tables WHERE database = '%s' AND name = '%s'", database, tbname) - value, err := ck.QueryInfo(query) - if err != nil { - return "", err - } - schema := value[1][0].(string) - return schema, nil -} - -type RebalanceTables struct { - Database string - DistTable string - Table string - Columns []string -} - -func (ck *CkService) GetRebalanceTables() ([]RebalanceTables, error) { - query := fmt.Sprintf(`SELECT - t2.database AS database, - t2.name AS dist, - t2.local AS table, - groupArray(t1.name) AS rows -FROM system.columns AS t1 -INNER JOIN -( - SELECT - database, - name, - (extractAllGroups(engine_full, '(Distributed\\(\')(.*)\',\\s+\'(.*)\',\\s+\'(.*)\'(.*)')[1])[2] AS cluster, - (extractAllGroups(engine_full, '(Distributed\\(\')(.*)\',\\s+\'(.*)\',\\s+\'(.*)\'(.*)')[1])[4] AS local -FROM system.tables -WHERE match(engine, 'Distributed') AND (database NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA') -AND cluster = '%s') -) AS t2 ON t1.table = t2.name and t1.database=t2.database -WHERE (multiSearchAny(t1.type, ['Int', 'Float', 'Date', 'String', 'Decimal']) = '1') -GROUP BY - database, - dist, - table -ORDER BY - database -`, ck.Config.Cluster) - - log.Logger.Debug(query) - rows, err := ck.Conn.Query(query) - if err != nil { - return nil, err - } - defer rows.Close() - tblLists := make([]RebalanceTables, 0) - for rows.Next() { - var database, dist, table string - var cols []string - err = rows.Scan(&database, &dist, &table, &cols) - if err != nil { - return nil, err - } - tblLists = append(tblLists, RebalanceTables{ - Database: database, - DistTable: dist, - Table: table, - Columns: cols, - }) - } - return tblLists, nil -} - -func (ck *CkService) GetTblLists() (map[string]map[string][]string, error) { - query := `SELECT - t2.database AS database, - t2.name AS table, - groupArray(t1.name) AS rows -FROM system.columns AS t1 -INNER JOIN -( - SELECT - database, - name - FROM system.tables - WHERE match(engine, 'Distributed') AND (database NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA') ) -) AS t2 ON t1.table = t2.name and t1.database=t2.database -GROUP BY - database, - table -ORDER BY - database` - - tblLists := make(map[string]map[string][]string) - value, err := ck.QueryInfo(query) - for i := 1; i < len(value); i++ { - tblMapping := make(map[string][]string) - database := value[i][0].(string) - table := value[i][1].(string) - cols := value[i][2].([]string) - tableMap, isExist := tblLists[database] - if isExist { - tblMapping = tableMap - } - tblMapping[table] = cols - tblLists[database] = tblMapping - } - return tblLists, err -} - -func GetCKVersion(conf *model.CKManClickHouseConfig, host string) (string, error) { - tmp := *conf - tmp.Hosts = []string{host} - service, err := GetCkService(conf.Cluster) - if err != nil { - return "", err - } - value, err := service.QueryInfo("SELECT version()") - if err != nil { - return "", err - } - version := value[1][0].(string) - return version, nil -} - -func SyncLogicTable(src, dst model.CKManClickHouseConfig, name ...string) bool { - hosts, err := common.GetShardAvaliableHosts(&src) - if err != nil || len(hosts) == 0 { - log.Logger.Warnf("cluster %s all node is unvaliable", src.Cluster) - return false - } - srcConn, err := common.ConnectClickHouse(hosts[0], model.ClickHouseDefaultDB, src.GetConnOption()) - if err != nil { - log.Logger.Warnf("connect %s failed", hosts[0]) - return false - } - tableName := "" - database := "" - if len(name) > 0 { - database = name[0] - tableName = name[1] - } - statementsqls, err := GetLogicSchema(srcConn, *dst.LogicCluster, dst.Cluster, dst.IsReplica, database, tableName) - if err != nil { - log.Logger.Warnf("get logic schema failed: %v", err) - return false - } - - dstConn, err := common.ConnectClickHouse(dst.Hosts[0], model.ClickHouseDefaultDB, dst.GetConnOption()) - if err != nil { - log.Logger.Warnf("can't connect %s", dst.Hosts[0]) - return false - } - for _, schema := range statementsqls { - for _, statement := range schema.Statements { - log.Logger.Debugf("%s", statement) - if err := dstConn.Exec(statement); err != nil { - log.Logger.Warnf("excute sql failed: %v", err) - return false - } - } - } - return true -} - -func RestoreReplicaTable(conf *model.CKManClickHouseConfig, host, database, table string) error { - conn, err := common.ConnectClickHouse(host, database, conf.GetConnOption()) - if err != nil { - return errors.Wrapf(err, "cann't connect to %s", host) - } - query := "SELECT is_readonly FROM system.replicas" - var is_readonly uint8 - if err = conn.QueryRow(query).Scan(&is_readonly); err != nil { - return errors.Wrap(err, host) - } - if is_readonly == 0 { - return nil - } - - query = "SYSTEM RESTART REPLICA " + table - if err := conn.Exec(query); err != nil { - return errors.Wrap(err, host) - } - query = "SYSTEM RESTORE REPLICA " + table - if err := conn.Exec(query); err != nil { - err = common.ClikHouseExceptionDecode(err) - var exception *client.Exception - if errors.As(err, &exception) { - if exception.Code == 36 { - // Code: 36. DB::Exception: Replica must be readonly. (BAD_ARGUMENTS) - return nil - } - } - return errors.Wrap(err, host) - } - return nil -} - -func RebalanceCluster(conf *model.CKManClickHouseConfig, keys []model.RebalanceShardingkey, allTable, exceptMaxShard bool) error { - var err error - var exceptHost, target string - service := NewCkService(conf) - if err = service.InitCkService(); err != nil { - return err - } - - //check the full scale, if table not in the request, rebalance by partition - keys, err = paddingKeys(keys, service, allTable) - if err != nil { - return err - } - - hosts, err := common.GetShardAvaliableHosts(conf) - if err != nil { - return err - } - if err = checkBasicTools(conf, hosts, keys); err != nil { - return err - } - - if exceptMaxShard { - exceptHost = hosts[len(hosts)-1] - hosts = hosts[:len(hosts)-1] - target, err = checkDiskSpace(hosts, exceptHost) - if err != nil { - return err - } - } - - log.Logger.Debugf("keys: %d, %#v", len(keys), keys) - for _, key := range keys { - if exceptMaxShard { - if err = MoveExceptToOthers(conf, exceptHost, target, key.Database, key.Table); err != nil { - return err - } - } - rebalancer := &CKRebalance{ - Cluster: conf.Cluster, - Hosts: hosts, - Database: key.Database, - Table: key.Table, - TmpTable: "tmp_" + key.Table, - DistTable: key.DistTable, - DataDir: conf.Path, - OsUser: conf.SshUser, - OsPassword: conf.SshPassword, - OsPort: conf.SshPort, - RepTables: make(map[string]string), - ConnOpt: conf.GetConnOption(), - } - defer rebalancer.Close() - - if key.ShardingKey != "" { - //rebalance by shardingkey - log.Logger.Infof("table %s.%s rebalance by shardingkey", key.Database, key.Table) - if err = getShardingType(&key, service.Conn); err != nil { - return err - } - rebalancer.Shardingkey = key - if err = RebalanceByShardingkey(conf, rebalancer); err != nil { - return err - } - } else { - //rebalance by partition - log.Logger.Infof("table %s.%s rebalance by partition", key.Database, key.Table) - err = RebalanceByPartition(conf, rebalancer) - if err != nil { - return err - } - } - - } - - return nil -} - -func checkBasicTools(conf *model.CKManClickHouseConfig, hosts []string, keys []model.RebalanceShardingkey) error { - var chkrsync, chkawk bool - for _, key := range keys { - if chkawk && chkrsync { - break - } - if key.ShardingKey != "" { - chkawk = true - } else { - // by partition - if !conf.IsReplica { - chkrsync = true - } - } - } - for _, host := range hosts { - opts := common.SshOptions{ - User: conf.SshUser, - Password: conf.SshPassword, - Port: conf.SshPort, - Host: host, - NeedSudo: conf.NeedSudo, - AuthenticateType: conf.AuthenticateType, - } - - var cmds []string - if chkawk { - cmds = append(cmds, "which awk >/dev/null 2>&1 ;echo $?") - } - if chkrsync { - cmds = append(cmds, "which rsync >/dev/null 2>&1 ;echo $?") - } - for _, cmd := range cmds { - if output, err := common.RemoteExecute(opts, cmd); err != nil { - return err - } else { - if strings.TrimSuffix(output, "\n") != "0" { - return errors.Errorf("excute cmd:[%s] on %s failed", cmd, host) - } - } - } - } - return nil -} - -func checkDiskSpace(hosts []string, exceptHost string) (string, error) { - var needSpace, maxLeftSpace uint64 - var target string - query := `SELECT sum(total_bytes) -FROM system.tables -WHERE match(engine, 'MergeTree') AND (database NOT IN ('system', 'information_schema', 'INFORMATION_SCHEMA')) -SETTINGS skip_unavailable_shards = 1` - log.Logger.Debugf("[%s]%s", exceptHost, query) - conn := common.GetConnection(exceptHost) - rows, err := conn.Query(query) - if err != nil { - return "", errors.Wrap(err, exceptHost) - } - for rows.Next() { - rows.Scan(&needSpace) - } - query = "SELECT free_space FROM system.disks" - for _, host := range hosts { - conn = common.GetConnection(host) - log.Logger.Debugf("[%s]%s", host, query) - rows, err := conn.Query(query) - if err != nil { - return "", errors.Wrap(err, exceptHost) - } - var freeSpace uint64 - for rows.Next() { - rows.Scan(&freeSpace) - } - if maxLeftSpace*2 < freeSpace { - target = host - maxLeftSpace = freeSpace - } - } - if maxLeftSpace <= needSpace { - return "", fmt.Errorf("need %s space on the disk, but not enough", common.ConvertDisk(uint64(needSpace))) - } - return target, nil -} - -func paddingKeys(keys []model.RebalanceShardingkey, service *CkService, allTable bool) ([]model.RebalanceShardingkey, error) { - var results []model.RebalanceShardingkey - resps, err := service.GetRebalanceTables() - if err != nil { - return keys, err - } - //k: database, v:tables - for _, rt := range resps { - key := model.RebalanceShardingkey{ - Database: rt.Database, - Table: rt.Table, - DistTable: rt.DistTable, - } - found := false - for _, elem := range keys { - elem.Table = common.TernaryExpression(strings.HasPrefix(elem.Table, "^"), elem.Table, "^"+elem.Table).(string) - elem.Table = common.TernaryExpression(strings.HasSuffix(elem.Table, "$"), elem.Table, elem.Table+"$").(string) - reg, err := regexp.Compile(elem.Table) - if err != nil { - return keys, err - } - if key.Database == elem.Database && (reg.MatchString(key.Table)) { - if found { - return keys, fmt.Errorf("table %s matches more than one regexp expression", key.Table) - } - if common.ArraySearch(elem.ShardingKey, rt.Columns) { - found = true - key.ShardingKey = elem.ShardingKey - results = append(results, key) - //break - } else { - return keys, fmt.Errorf("shardingkey %s not found in %s.%s", elem.ShardingKey, elem.Database, key.Table) - } - } - if allTable && !found { - results = append(results, key) - } - } - } - - return results, nil -} - -func getShardingType(key *model.RebalanceShardingkey, conn *common.Conn) error { - query := fmt.Sprintf("SELECT type FROM system.columns WHERE (database = '%s') AND (table = '%s') AND (name = '%s') ", - key.Database, key.Table, key.ShardingKey) - rows, err := conn.Query(query) - if err != nil { - return err - } - defer rows.Close() - for rows.Next() { - var typ string - rows.Scan(&typ) - key.ShardingType = WhichType(typ) - } - if key.ShardingType.Nullable || key.ShardingType.Array { - return errors.Errorf("invalid shardingKey %s, expect its type be numerical or string", key.ShardingKey) - } - return nil -} - -func RebalanceByPartition(conf *model.CKManClickHouseConfig, rebalancer *CKRebalance) error { - var err error - if err = rebalancer.InitCKConns(false); err != nil { - log.Logger.Errorf("got error %+v", err) - return err - } - - if err = rebalancer.GetRepTables(); err != nil { - log.Logger.Errorf("got error %+v", err) - return err - } - - if err = rebalancer.DoRebalanceByPart(); err != nil { - log.Logger.Errorf("got error %+v", err) - return err - } - log.Logger.Infof("rebalance done") - return nil -} - -// 200w data costs 4s -func RebalanceByShardingkey(conf *model.CKManClickHouseConfig, rebalancer *CKRebalance) error { - var err error - start := time.Now() - log.Logger.Info("[rebalance] STEP InitCKConns") - if err = rebalancer.InitCKConns(true); err != nil { - log.Logger.Errorf("got error %+v", err) - return err - } - log.Logger.Info("[rebalance] STEP CreateTemporaryTable") - if err = rebalancer.CreateTemporaryTable(); err != nil { - return err - } - log.Logger.Info("[rebalance] STEP MoveBackup") - if err = rebalancer.MoveBackup(); err != nil { - return err - } - if err = rebalancer.CheckCounts(rebalancer.TmpTable); err != nil { - time.Sleep(5 * time.Second) - if err = rebalancer.CheckCounts(rebalancer.TmpTable); err != nil { - return err - } - } - log.Logger.Info("[rebalance] STEP InsertPlan") - if err = rebalancer.InsertPlan(); err != nil { - return errors.Wrapf(err, "table %s.%s rebalance failed, data can be corrupted, please move back from temp table[%s] manually", rebalancer.Database, rebalancer.Table, rebalancer.TmpTable) - } - if err = rebalancer.CheckCounts(rebalancer.Table); err != nil { - time.Sleep(5 * time.Second) - if err = rebalancer.CheckCounts(rebalancer.Table); err != nil { - return err - } - } - log.Logger.Info("[rebalance] STEP Cleanup") - rebalancer.Cleanup() - - log.Logger.Infof("[rebalance] DONE, Total counts: %d, Elapsed: %v sec", rebalancer.OriCount, time.Since(start).Seconds()) - return nil -} - -func MoveExceptToOthers(conf *model.CKManClickHouseConfig, except, target, database, table string) error { - max_insert_threads := runtime.NumCPU()*3/4 + 1 - query := fmt.Sprintf("INSERT INTO `%s`.`%s` SELECT * FROM remote('%s', '%s', '%s', '%s', '%s') SETTINGS max_insert_threads=%d", - database, table, except, database, table, conf.User, conf.Password, max_insert_threads) - log.Logger.Debugf("[%s] %s", target, query) - conn := common.GetConnection(target) - err := conn.Exec(query) - if err != nil { - return err - } - query = fmt.Sprintf("TRUNCATE TABLE `%s`.`%s` %s", database, table, common.WithAlterSync(conf.Version)) - log.Logger.Debugf("[%s] %s", except, query) - conn = common.GetConnection(except) - err = conn.Exec(query) - if err != nil { - return err - } - return nil -} - -func GroupUniqArray(conf *model.CKManClickHouseConfig, req model.GroupUniqArrayReq) error { - //创建本地聚合表,本地物化视图,分布式聚合表,分布式视图 - if err := CreateViewOnCluster(conf, req); err != nil { - return err - } - // 创建逻辑聚合表和逻辑视图 - if conf.LogicCluster != nil { - //当前集群的逻辑表和逻辑视图创建 - if err := CreateLogicViewOnCluster(conf, req); err != nil { - return err - } - clusters, err := repository.Ps.GetLogicClusterbyName(*conf.LogicCluster) - if err != nil { - return err - } - for _, cluster := range clusters { - con, err := repository.Ps.GetClusterbyName(cluster) - if err != nil { - return err - } - //当前集群已经创建过了,跳过 - if con.Cluster == conf.Cluster { - continue - } else { - //其他物理集群需要同步创建本地表、本地视图,分布式表、分布式视图,以及逻辑表,逻辑视图 - if err := CreateViewOnCluster(&con, req); err != nil { - return err - } - if err := CreateLogicViewOnCluster(&con, req); err != nil { - return err - } - } - } - } - return nil -} - -func CreateViewOnCluster(conf *model.CKManClickHouseConfig, req model.GroupUniqArrayReq) error { - //前置工作 - service := NewCkService(conf) - err := service.InitCkService() - if err != nil { - return err - } - query := fmt.Sprintf("SELECT name, type FROM system.columns WHERE (database = '%s') AND (table = '%s')", req.Database, req.Table) - rows, err := service.Conn.Query(query) - if err != nil { - return err - } - fields := make(map[string]string, len(req.Fields)+1) - for rows.Next() { - var name, typ string - err = rows.Scan(&name, &typ) - if err != nil { - rows.Close() - return err - } - if name == req.TimeField { - fields[name] = typ - } else { - for _, f := range req.Fields { - if name == f.Name { - fields[name] = typ - } - } - } - } - rows.Close() - - // check 一把是否所有字段在表里都能找到 - for _, f := range req.Fields { - if _, ok := fields[f.Name]; !ok { - return fmt.Errorf("can't find field %s in %s.%s", f.Name, req.Database, req.Table) - } - } - - aggTable := fmt.Sprintf("%s%s", common.ClickHouseAggregateTablePrefix, req.Table) - distAggTable := fmt.Sprintf("%s%s", common.ClickHouseAggDistTablePrefix, req.Table) - - mvLocal := fmt.Sprintf("%s%s", common.ClickHouseLocalViewPrefix, req.Table) - mvDist := fmt.Sprintf("%s%s", common.ClickHouseDistributedViewPrefix, req.Table) - - var engine string - if conf.IsReplica { - engine = "ReplicatedReplacingMergeTree()" - } else { - engine = "ReplacingMergeTree" - } - - fieldAndType := fmt.Sprintf("`%s` %s,", req.TimeField, fields[req.TimeField]) - fieldSql := fmt.Sprintf("`%s`, ", req.TimeField) - where := " WHERE 1=1 " - for i, f := range req.Fields { - if i > 0 { - fieldAndType += "," - } - if f.MaxSize == 0 { - f.MaxSize = 10000 - } - typ := fields[f.Name] - nullable := false - defaultValue := f.DefaultValue - if strings.HasPrefix(typ, "Nullable(") { - nullable = true - typ = strings.TrimSuffix(strings.TrimPrefix(typ, "Nullable("), ")") - if strings.Contains(typ, "Int") || strings.Contains(typ, "Float") { - defaultValue = fmt.Sprintf("%v", f.DefaultValue) - } else { - defaultValue = fmt.Sprintf("'%v'", f.DefaultValue) - } - where += fmt.Sprintf(" AND isNotNull(`%s`) ", f.Name) - } - fieldAndType += fmt.Sprintf("`%s%s` AggregateFunction(groupUniqArray(%d), `%s`)", model.GroupUniqArrayPrefix, f.Name, f.MaxSize, typ) - if nullable { - fieldSql += fmt.Sprintf("groupUniqArrayState(%d)(ifNull(`%s`, %s)) AS `%s%s`", f.MaxSize, f.Name, defaultValue, model.GroupUniqArrayPrefix, f.Name) - } else { - fieldSql += fmt.Sprintf("groupUniqArrayState(%d)(`%s`) AS `%s%s`", f.MaxSize, f.Name, model.GroupUniqArrayPrefix, f.Name) - } - } - - view_sql := fmt.Sprintf("SELECT %s FROM `%s`.`%s` %s GROUP BY (`%s`)", fieldSql, req.Database, req.Table, where, req.TimeField) - - // 创建本地聚合表及本地视图 - agg_query := fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s`.`%s` ON CLUSTER `%s` (%s) ENGINE = %s ORDER BY (`%s`)", - req.Database, aggTable, conf.Cluster, fieldAndType, engine, req.TimeField) - - //需不需要partition by? - log.Logger.Debugf("agg_query: %s", agg_query) - err = service.Conn.Exec(agg_query) - if err != nil { - return err - } - view_query := fmt.Sprintf("CREATE MATERIALIZED VIEW IF NOT EXISTS `%s`.`%s` ON CLUSTER `%s` TO `%s`.`%s` AS %s", - req.Database, mvLocal, conf.Cluster, req.Database, aggTable, view_sql) - - log.Logger.Debugf("view_query: %s", view_query) - err = service.Conn.Exec(view_query) - if err != nil { - return err - } - - // 创建分布式聚合表及分布式视图 - agg_query = fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s`.`%s` ON CLUSTER `%s` AS `%s`.`%s` ENGINE = Distributed(`%s`, `%s`, `%s`, rand())", - req.Database, distAggTable, conf.Cluster, req.Database, aggTable, conf.Cluster, req.Database, aggTable) - log.Logger.Debugf("agg_query: %s", agg_query) - err = service.Conn.Exec(agg_query) - if err != nil { - return err - } - - view_query = fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s`.`%s` ON CLUSTER `%s` AS `%s`.`%s` ENGINE = Distributed(`%s`, `%s`, `%s`, rand())", - req.Database, mvDist, conf.Cluster, req.Database, mvLocal, conf.Cluster, req.Database, mvLocal) - log.Logger.Debugf("view_query: %s", view_query) - err = service.Conn.Exec(view_query) - if err != nil { - return err - } - - if req.Populate { - insert_query := fmt.Sprintf("INSERT INTO `%s`.`%s` %s ", req.Database, aggTable, view_sql) - log.Logger.Debugf("[%s]insert_query: %s", conf.Cluster, insert_query) - hosts, err := common.GetShardAvaliableHosts(conf) - if err != nil { - return err - } - for _, host := range hosts { - conn := common.GetConnection(host) - if conn != nil { - err = service.Conn.AsyncInsert(insert_query, false) - if err != nil { - return err - } - } - } - } - return nil -} - -func CreateLogicViewOnCluster(conf *model.CKManClickHouseConfig, req model.GroupUniqArrayReq) error { - service := NewCkService(conf) - err := service.InitCkService() - if err != nil { - return err - } - aggTable := fmt.Sprintf("%s%s", common.ClickHouseAggregateTablePrefix, req.Table) - logicAggTable := fmt.Sprintf("%s%s", common.ClickHouseAggLogicTablePrefix, req.Table) - - mvLocal := fmt.Sprintf("%s%s", common.ClickHouseAggregateTablePrefix, req.Table) - mvLogic := fmt.Sprintf("%s%s", common.ClickHouseLogicViewPrefix, req.Table) - - agg_query := fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s`.`%s` ON CLUSTER `%s` AS `%s`.`%s` ENGINE = Distributed(`%s`, `%s`, `%s`, rand())", - req.Database, logicAggTable, conf.Cluster, req.Database, aggTable, *conf.LogicCluster, req.Database, aggTable) - log.Logger.Debugf("agg_query: %s", agg_query) - err = service.Conn.Exec(agg_query) - if err != nil { - return err - } - - view_query := fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s`.`%s` ON CLUSTER `%s` AS `%s`.`%s` ENGINE = Distributed(`%s`, `%s`, `%s`, rand())", - req.Database, mvLogic, conf.Cluster, req.Database, mvLocal, *conf.LogicCluster, req.Database, mvLocal) - log.Logger.Debugf("view_query: %s", view_query) - err = service.Conn.Exec(view_query) - if err != nil { - return err - } - return nil -} - -func GetGroupUniqArray(conf *model.CKManClickHouseConfig, database, table string) (map[string]interface{}, error) { - //确定是查分布式表还是逻辑表 - viewName := common.TernaryExpression(conf.LogicCluster != nil, fmt.Sprintf("%s%s", common.ClickHouseLogicViewPrefix, table), fmt.Sprintf("%s%s", common.ClickHouseDistributedViewPrefix, table)).(string) - //根据表名查询出物化视图名,聚合函数类型 - service := NewCkService(conf) - err := service.InitCkService() - if err != nil { - return nil, err - } - query := fmt.Sprintf(`SELECT - name, - (extractAllGroups(type, 'groupUniqArray\\((\\d+)\\)')[1])[1] AS maxsize -FROM system.columns -WHERE (database = '%s') AND (table = '%s') AND (type LIKE 'AggregateFunction%%')`, - database, viewName) - log.Logger.Debugf(query) - rows, err := service.Conn.Query(query) - if err != nil { - return nil, err - } - var aggFields string - idx := 0 - for rows.Next() { - var name, maxSize string - err = rows.Scan(&name, &maxSize) - if err != nil { - rows.Close() - return nil, err - } - if name != "" { - if idx > 0 { - aggFields += ", " - } - aggFields += fmt.Sprintf("groupUniqArrayMerge(%s)(%s) AS %s", maxSize, name, strings.TrimPrefix(name, model.GroupUniqArrayPrefix)) - idx++ - } - } - rows.Close() - - //查询 - query = fmt.Sprintf("SELECT %s FROM `%s`.`%s`", aggFields, database, viewName) - data, err := service.QueryInfo(query) - if err != nil { - return nil, err - } - result := make(map[string]interface{}) - keys := data[0] - for i, key := range keys { - value := data[1][i] - result[key.(string)] = value - } - return result, nil -} - -func DelGroupUniqArray(conf *model.CKManClickHouseConfig, database, table string) error { - err := delGuaViewOnCluster(conf, database, table) - if err != nil { - return err - } - - //如果有逻辑集群,还要去各个逻辑集群删除本地物化视图、分布式物化视图,逻辑物化视图 - if conf.LogicCluster != nil { - clusters, err := repository.Ps.GetLogicClusterbyName(*conf.LogicCluster) - if err != nil { - return err - } - for _, cluster := range clusters { - if cluster == conf.Cluster { - err = delGuaViewOnLogic(conf, database, table) - if err != nil { - return err - } - } else { - clus, err := repository.Ps.GetClusterbyName(cluster) - if err != nil { - return err - } - if err = delGuaViewOnCluster(&clus, database, table); err != nil { - return err - } - err = delGuaViewOnLogic(&clus, database, table) - if err != nil { - return err - } - } - } - } - return nil -} - -func delGuaViewOnCluster(conf *model.CKManClickHouseConfig, database, table string) error { - service := NewCkService(conf) - err := service.InitCkService() - if err != nil { - return err - } - - queries := []string{ - fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s%s` ON CLUSTER `%s` SYNC", database, common.ClickHouseLocalViewPrefix, table, conf.Cluster), - fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s%s` ON CLUSTER `%s` SYNC", database, common.ClickHouseDistributedViewPrefix, table, conf.Cluster), - fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s%s` ON CLUSTER `%s` SYNC", database, common.ClickHouseAggregateTablePrefix, table, conf.Cluster), - fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s%s` ON CLUSTER `%s` SYNC", database, common.ClickHouseAggDistTablePrefix, table, conf.Cluster), - } - - for _, query := range queries { - log.Logger.Debugf("[%s]%s", conf.Cluster, query) - err = service.Conn.Exec(query) - if err != nil { - return err - } - } - - return nil -} - -func delGuaViewOnLogic(conf *model.CKManClickHouseConfig, database, table string) error { - service := NewCkService(conf) - err := service.InitCkService() - if err != nil { - return err - } - - queries := []string{ - fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s%s` ON CLUSTER `%s` SYNC", database, common.ClickHouseLogicViewPrefix, table, conf.Cluster), - fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s%s` ON CLUSTER `%s` SYNC", database, common.ClickHouseAggLogicTablePrefix, table, conf.Cluster), - } - - for _, query := range queries { - log.Logger.Debugf("[%s]%s", conf.Cluster, query) - err = service.Conn.Exec(query) - if err != nil { - return err - } - } - return nil + return lastErr } diff --git a/service/clickhouse/rebalance.go b/service/clickhouse/rebalance.go index e24ddb0a..b9fcb9f4 100644 --- a/service/clickhouse/rebalance.go +++ b/service/clickhouse/rebalance.go @@ -20,25 +20,27 @@ var ( ) type CKRebalance struct { - Cluster string - Hosts []string - DataDir string - Database string - Table string - TmpTable string - DistTable string - IsReplica bool - RepTables map[string]string - OsUser string - OsPassword string - OsPort int - Shardingkey model.RebalanceShardingkey - ExceptHost string - ConnOpt model.ConnetOption - Engine string - EngineFull string - OriCount uint64 - SortingKey []string + Cluster string + Hosts []string + DataDir string + Database string + Table string + TmpTable string + DistTable string + IsReplica bool + RepTables map[string]string + OsUser string + OsPassword string + OsPort int + Shardingkey model.RebalanceShardingkey + ExceptHost string + ConnOpt model.ConnetOption + Engine string + EngineFull string + OriCount uint64 + SortingKey []string + AllowLossRate float64 + SaveTemps bool } // TblPartitions is partitions status of a host. A host never move out and move in at the same iteration. @@ -59,7 +61,7 @@ func (r *CKRebalance) InitCKConns(withShardingkey bool) (err error) { if err != nil { return } - log.Logger.Infof("initialized clickhouse connection to %s", host) + log.Logger.Infof("[rebalance]initialized clickhouse connection to %s", host) locks[host] = &sync.Mutex{} } @@ -67,7 +69,7 @@ func (r *CKRebalance) InitCKConns(withShardingkey bool) (err error) { conn := common.GetConnection(r.Hosts[0]) // get engine query := fmt.Sprintf("SELECT engine, engine_full FROM system.tables WHERE database = '%s' AND table = '%s'", r.Database, r.Table) - log.Logger.Debugf("query:%s", query) + log.Logger.Debugf("[rebalance]query:%s", query) rows, _ := conn.Query(query) for rows.Next() { err = rows.Scan(&r.Engine, &r.EngineFull) @@ -76,12 +78,12 @@ func (r *CKRebalance) InitCKConns(withShardingkey bool) (err error) { } } rows.Close() - log.Logger.Infof("table: %s.%s, engine: %s, engine_full:%s", r.Database, r.Table, r.Engine, r.EngineFull) + log.Logger.Infof("[rebalance]table: %s.%s, engine: %s, engine_full:%s", r.Database, r.Table, r.Engine, r.EngineFull) //get sortingkey if strings.Contains(r.Engine, "Replacing") { query = fmt.Sprintf("SELECT name FROM system.columns WHERE (database = '%s') AND (table = '%s') AND (is_in_sorting_key = 1)", r.Database, r.Table) - log.Logger.Debugf("query:%s", query) + log.Logger.Debugf("[rebalance]query:%s", query) rows, _ := conn.Query(query) for rows.Next() { var sortingkey string @@ -92,7 +94,7 @@ func (r *CKRebalance) InitCKConns(withShardingkey bool) (err error) { r.SortingKey = append(r.SortingKey, sortingkey) } rows.Close() - log.Logger.Infof("table: %s.%s, sortingkey:%s", r.Database, r.Table, r.SortingKey) + log.Logger.Infof("[rebalance]table: %s.%s, sortingkey:%s", r.Database, r.Table, r.SortingKey) } @@ -104,13 +106,15 @@ func (r *CKRebalance) InitCKConns(withShardingkey bool) (err error) { } log.Logger.Debugf("query: %s", query) rows, _ = conn.Query(query) + var oriCount uint64 for rows.Next() { - err = rows.Scan(&r.OriCount) + err = rows.Scan(&oriCount) if err != nil { return } } - log.Logger.Infof("table: %s.%s, count: %d", r.Database, r.Table, r.OriCount) + r.OriCount = uint64((1 - r.AllowLossRate) * float64(oriCount)) + log.Logger.Infof("table: %s.%s, oriCount: %d, count: %d", r.Database, r.Table, oriCount, r.OriCount) rows.Close() } return @@ -123,10 +127,10 @@ func (r *CKRebalance) GetRepTables() (err error) { for _, host := range r.Hosts { conn := common.GetConnection(host) if conn == nil { - return fmt.Errorf("can't get connection: %s", host) + return fmt.Errorf("[rebalance]can't get connection: %s", host) } query := fmt.Sprintf("SELECT zookeeper_path FROM system.replicas WHERE database='%s' AND table = '%s'", r.Database, r.Table) - log.Logger.Infof("host %s: query: %s", host, query) + log.Logger.Infof("[rebalance]host %s: query: %s", host, query) var rows *common.Rows if rows, err = conn.Query(query); err != nil { err = errors.Wrapf(err, "") @@ -152,7 +156,7 @@ func (r *CKRebalance) InitSshConns() (err error) { continue } cmd := fmt.Sprintf("ssh -o StrictHostKeyChecking=false %s sudo ls %sclickhouse/data/%s", dstHost, r.DataDir, r.Database) - log.Logger.Infof("host: %s, command: %s", srcHost, cmd) + log.Logger.Infof("[rebalance]host: %s, command: %s", srcHost, cmd) sshOpts := common.SshOptions{ User: r.OsUser, Password: r.OsPassword, @@ -165,7 +169,7 @@ func (r *CKRebalance) InitSshConns() (err error) { if out, err = common.RemoteExecute(sshOpts, cmd); err != nil { return } - log.Logger.Debugf("host: %s, output: %s", srcHost, out) + log.Logger.Debugf("[rebalance]host: %s, output: %s", srcHost, out) } } return @@ -176,13 +180,13 @@ func (r *CKRebalance) GetPartState() (tbls []*TblPartitions, err error) { for _, host := range r.Hosts { conn := common.GetConnection(host) if conn == nil { - err = fmt.Errorf("can't get connection: %s", host) + err = fmt.Errorf("[rebalance]can't get connection: %s", host) return } var rows *common.Rows // Skip the newest partition on each host since into which there could by ongoing insertions. query := fmt.Sprintf(`WITH (SELECT argMax(partition, modification_time) FROM system.parts WHERE database='%s' AND table='%s') AS latest_partition SELECT partition, sum(data_compressed_bytes) AS compressed FROM system.parts WHERE database='%s' AND table='%s' AND active=1 AND partition!=latest_partition GROUP BY partition ORDER BY partition;`, r.Database, r.Table, r.Database, r.Table) - log.Logger.Infof("host %s: query: %s", host, query) + log.Logger.Infof("[rebalance]host %s: query: %s", host, query) if rows, err = conn.Query(query); err != nil { err = errors.Wrapf(err, "") return @@ -208,7 +212,7 @@ func (r *CKRebalance) GetPartState() (tbls []*TblPartitions, err error) { } tbls = append(tbls, &tbl) } - log.Logger.Infof("table %s state %s", r.Table, pp.Sprint(tbls)) + log.Logger.Infof("[rebalance]table %s state %s", r.Table, pp.Sprint(tbls)) return } @@ -258,6 +262,7 @@ func (r *CKRebalance) ExecutePartPlan(tbl *TblPartitions) (err error) { return } if tbl.ZooPath != "" { + // 带副本集群 for patt, dstHost := range tbl.ToMoveOut { lock := locks[dstHost] @@ -265,7 +270,7 @@ func (r *CKRebalance) ExecutePartPlan(tbl *TblPartitions) (err error) { lock.Lock() dstChConn := common.GetConnection(dstHost) if dstChConn == nil { - return fmt.Errorf("can't get connection: %s", dstHost) + return fmt.Errorf("[rebalance]can't get connection: %s", dstHost) } dstQuires := []string{ fmt.Sprintf("ALTER TABLE %s DROP DETACHED PARTITION '%s' ", tbl.Table, patt), @@ -273,7 +278,7 @@ func (r *CKRebalance) ExecutePartPlan(tbl *TblPartitions) (err error) { fmt.Sprintf("ALTER TABLE %s ATTACH PARTITION '%s'", tbl.Table, patt), } for _, query := range dstQuires { - log.Logger.Infof("host %s: query: %s", dstHost, query) + log.Logger.Infof("[rebalance]host %s: query: %s", dstHost, query) if err = dstChConn.Exec(query); err != nil { err = errors.Wrapf(err, "") return @@ -283,75 +288,77 @@ func (r *CKRebalance) ExecutePartPlan(tbl *TblPartitions) (err error) { srcChConn := common.GetConnection(tbl.Host) if srcChConn == nil { - return fmt.Errorf("can't get connection: %s", tbl.Host) + return fmt.Errorf("[rebalance]can't get connection: %s", tbl.Host) } query := fmt.Sprintf("ALTER TABLE %s DROP PARTITION '%s'", tbl.Table, patt) if err = srcChConn.Exec(query); err != nil { - log.Logger.Infof("host %s: query: %s", tbl.Host, query) + log.Logger.Infof("[rebalance]host %s: query: %s", tbl.Host, query) err = errors.Wrapf(err, "") return } } return - } - if sshErr != nil { - log.Logger.Warnf("skip execution for %s due to previous SSH error", tbl.Table) - return - } - for patt, dstHost := range tbl.ToMoveOut { - srcCkConn := common.GetConnection(tbl.Host) - dstCkConn := common.GetConnection(dstHost) - if srcCkConn == nil || dstCkConn == nil { - log.Logger.Errorf("can't get connection: %s & %s", tbl.Host, dstHost) - return - } - lock := locks[dstHost] - tableName := strings.Split(tbl.Table, ".")[1] - dstDir := filepath.Join(r.DataDir, fmt.Sprintf("clickhouse/data/%s/%s/detached", r.Database, tableName)) - srcDir := dstDir + "/" - - query := fmt.Sprintf("ALTER TABLE %s DETACH PARTITION '%s'", tbl.Table, patt) - log.Logger.Infof("host: %s, query: %s", tbl.Host, query) - if err = srcCkConn.Exec(query); err != nil { - err = errors.Wrapf(err, "") + } else { + //不带副本集群, 公钥认证集群做不了 + if sshErr != nil { + log.Logger.Warnf("[rebalance]skip execution for %s due to previous SSH error", tbl.Table) return } + for patt, dstHost := range tbl.ToMoveOut { + srcCkConn := common.GetConnection(tbl.Host) + dstCkConn := common.GetConnection(dstHost) + if srcCkConn == nil || dstCkConn == nil { + log.Logger.Errorf("[rebalance]can't get connection: %s & %s", tbl.Host, dstHost) + return + } + lock := locks[dstHost] + tableName := strings.Split(tbl.Table, ".")[1] + dstDir := filepath.Join(r.DataDir, fmt.Sprintf("clickhouse/data/%s/%s/detached", r.Database, tableName)) + srcDir := dstDir + "/" - // There could be multiple executions on the same dest node and partition. - lock.Lock() - cmds := []string{ - fmt.Sprintf(`rsync -e "ssh -o StrictHostKeyChecking=false" -avp %s %s:%s`, srcDir, dstHost, dstDir), - fmt.Sprintf("rm -fr %s", srcDir), - } - sshOpts := common.SshOptions{ - User: r.OsUser, - Password: r.OsPassword, - Port: r.OsPort, - Host: tbl.Host, - NeedSudo: true, - AuthenticateType: model.SshPasswordSave, - } - var out string - if out, err = common.RemoteExecute(sshOpts, strings.Join(cmds, ";")); err != nil { - lock.Unlock() - return - } - log.Logger.Debugf("host: %s, output: %s", tbl.Host, out) + query := fmt.Sprintf("ALTER TABLE %s DETACH PARTITION '%s'", tbl.Table, patt) + log.Logger.Infof("[rebalance]host: %s, query: %s", tbl.Host, query) + if err = srcCkConn.Exec(query); err != nil { + err = errors.Wrapf(err, "") + return + } - query = fmt.Sprintf("ALTER TABLE %s ATTACH PARTITION '%s'", tbl.Table, patt) - log.Logger.Infof("host: %s, query: %s", dstHost, query) - if err = dstCkConn.Exec(query); err != nil { - err = errors.Wrapf(err, "") + // There could be multiple executions on the same dest node and partition. + lock.Lock() + cmds := []string{ + fmt.Sprintf(`rsync -e "ssh -o StrictHostKeyChecking=false" -avp %s %s:%s`, srcDir, dstHost, dstDir), + fmt.Sprintf("rm -fr %s", srcDir), + } + sshOpts := common.SshOptions{ + User: r.OsUser, + Password: r.OsPassword, + Port: r.OsPort, + Host: tbl.Host, + NeedSudo: true, + AuthenticateType: model.SshPasswordSave, + } + var out string + if out, err = common.RemoteExecute(sshOpts, strings.Join(cmds, ";")); err != nil { + lock.Unlock() + return + } + log.Logger.Debugf("[rebalance]host: %s, output: %s", tbl.Host, out) + + query = fmt.Sprintf("ALTER TABLE %s ATTACH PARTITION '%s'", tbl.Table, patt) + log.Logger.Infof("[rebalance]host: %s, query: %s", dstHost, query) + if err = dstCkConn.Exec(query); err != nil { + err = errors.Wrapf(err, "") + lock.Unlock() + return + } lock.Unlock() - return - } - lock.Unlock() - query = fmt.Sprintf("ALTER TABLE %s DROP DETACHED PARTITION '%s'", tbl.Table, patt) - log.Logger.Infof("host: %s, query: %s", tbl.Host, query) - if err = srcCkConn.Exec(query); err != nil { - err = errors.Wrapf(err, "") - return + query = fmt.Sprintf("ALTER TABLE %s DROP DETACHED PARTITION '%s'", tbl.Table, patt) + log.Logger.Infof("[rebalance]host: %s, query: %s", tbl.Host, query) + if err = srcCkConn.Exec(query); err != nil { + err = errors.Wrapf(err, "") + return + } } } return @@ -360,12 +367,14 @@ func (r *CKRebalance) ExecutePartPlan(tbl *TblPartitions) (err error) { func (r *CKRebalance) DoRebalanceByPart() (err error) { // initialize SSH connections only if there are some non-replicated tables - if sshErr = r.InitSshConns(); sshErr != nil { - log.Logger.Warnf("failed to init ssh connections, error: %+v", sshErr) + if !r.IsReplica { + if sshErr = r.InitSshConns(); sshErr != nil { + log.Logger.Warnf("[rebalance]failed to init ssh connections, error: %+v", sshErr) + } } var tbls []*TblPartitions if tbls, err = r.GetPartState(); err != nil { - log.Logger.Errorf("got error %+v", err) + log.Logger.Errorf("[rebalance]got error %+v", err) return err } r.GeneratePartPlan(tbls) @@ -378,10 +387,10 @@ func (r *CKRebalance) DoRebalanceByPart() (err error) { _ = common.Pool.Submit(func() { defer wg.Done() if err := r.ExecutePartPlan(tbl); err != nil { - log.Logger.Errorf("host: %s, got error %+v", tbl.Host, err) + log.Logger.Errorf("[rebalance]host: %s, got error %+v", tbl.Host, err) gotError = true } else { - log.Logger.Infof("table %s host %s rebalance done", tbl.Table, tbl.Host) + log.Logger.Infof("[rebalance]table %s host %s rebalance done", tbl.Table, tbl.Host) } }) } @@ -389,7 +398,7 @@ func (r *CKRebalance) DoRebalanceByPart() (err error) { if gotError { return err } - log.Logger.Infof("table %s.%s rebalance done", r.Database, r.Table) + log.Logger.Infof("[rebalance]table %s.%s rebalance done", r.Database, r.Table) return } diff --git a/service/cron/clickhouse.go b/service/cron/clickhouse.go index 0fec5588..543ecf6a 100644 --- a/service/cron/clickhouse.go +++ b/service/cron/clickhouse.go @@ -4,7 +4,6 @@ import ( "fmt" "strings" - client "github.com/ClickHouse/clickhouse-go/v2" "github.com/housepower/ckman/common" "github.com/housepower/ckman/deploy" "github.com/housepower/ckman/log" @@ -63,14 +62,10 @@ AND (cluster != '%s')`, cluster) _ = rows.Scan(&database, &table, &logic, &local) err = syncLogicbyTable(clusters, database, local) if err != nil { - err = common.ClikHouseExceptionDecode(err) - var exception *client.Exception - if errors.As(err, &exception) { - if exception.Code == 60 { - //means local table is not exist, will auto sync schema - needCreateTable[cluster] = clusters - log.Logger.Infof("[%s]table %s.%s may not exists on one of cluster %v, need to auto create", cluster, database, local, clusters) - } + if common.ExceptionAS(err, common.UNKNOWN_TABLE) { + //means local table is not exist, will auto sync schema + needCreateTable[cluster] = clusters + log.Logger.Infof("[%s]table %s.%s may not exists on one of cluster %v, need to auto create", cluster, database, local, clusters) } else { log.Logger.Errorf("logic %s table %s.%s sync logic table failed: %v", cluster, database, local, err) continue @@ -319,25 +314,15 @@ func syncDistTable(distTable, localTable, database string, conf model.CKManClick logicTable := common.ClickHouseDistTableOnLogicPrefix + localTable logicCols, err := getColumns(conn, database, logicTable) if err != nil { - err = common.ClikHouseExceptionDecode(err) - var exception *client.Exception - if errors.As(err, &exception) { - // 逻辑表不存在没关系,不报错 - if exception.Code == 60 { - continue - } + if common.ExceptionAS(err, common.UNKNOWN_TABLE) { + continue } return errors.Wrap(err, host) } if err = syncSchema(conn, allCols, logicCols, database, logicTable, onCluster, conf.Version); err != nil { - err = common.ClikHouseExceptionDecode(err) - var exception *client.Exception - if errors.As(err, &exception) { - // 逻辑表不存在没关系,不报错 - if exception.Code == 60 { - continue - } + if common.ExceptionAS(err, common.UNKNOWN_TABLE) { + continue } return errors.Wrap(err, "logic table") } diff --git a/service/cron/constant.go b/service/cron/constant.go index 72bf9899..5de21eb5 100644 --- a/service/cron/constant.go +++ b/service/cron/constant.go @@ -5,6 +5,7 @@ const ( JOB_SYNC_LOGIC_SCHEMA JOB_WATCH_CLUSTER_STATUS JOB_SYNC_DIST_SCHEMA + JOB_CLEAR_ZNODES ) const ( @@ -15,6 +16,7 @@ const ( SCHEDULE_WATCH_DEFAULT = "0 */3 * * * ?" SCHEDULE_SYNC_DIST = "30 */10 * * * ?" + SCHEDULE_CLEAR_ZNODES = "0 0 */6 * * ?" SCHEDULE_DISABLED = "disabled" ) diff --git a/service/cron/cron_service.go b/service/cron/cron_service.go index 6a14646b..794a4f27 100644 --- a/service/cron/cron_service.go +++ b/service/cron/cron_service.go @@ -17,6 +17,7 @@ var JobList = map[int16]func() error{ JOB_SYNC_LOGIC_SCHEMA: SyncLogicSchema, JOB_WATCH_CLUSTER_STATUS: WatchClusterStatus, JOB_SYNC_DIST_SCHEMA: SyncDistSchema, + JOB_CLEAR_ZNODES: ClearZnodes, } func NewCronService(config config.CronJob) *CronService { @@ -31,6 +32,7 @@ func (job *CronService) schedulePadding() { job.jobSchedules[JOB_SYNC_LOGIC_SCHEMA] = common.GetStringwithDefault(job.config.SyncLogicSchema, SCHEDULE_EVERY_MIN) job.jobSchedules[JOB_WATCH_CLUSTER_STATUS] = common.GetStringwithDefault(job.config.WatchClusterStatus, SCHEDULE_WATCH_DEFAULT) job.jobSchedules[JOB_SYNC_DIST_SCHEMA] = common.GetStringwithDefault(job.config.SyncDistSchema, SCHEDULE_SYNC_DIST) + job.jobSchedules[JOB_CLEAR_ZNODES] = common.GetStringwithDefault(job.config.ClearZnodes, SCHEDULE_CLEAR_ZNODES) } func (job *CronService) Start() error { diff --git a/service/cron/zookeeper.go b/service/cron/zookeeper.go new file mode 100644 index 00000000..e3afcd67 --- /dev/null +++ b/service/cron/zookeeper.go @@ -0,0 +1,150 @@ +package cron + +import ( + "errors" + "fmt" + + "github.com/go-zookeeper/zk" + "github.com/housepower/ckman/log" + "github.com/housepower/ckman/repository" + "github.com/housepower/ckman/service/clickhouse" + "github.com/housepower/ckman/service/zookeeper" +) + +func ClearZnodes() error { + log.Logger.Debugf("clear znodes task triggered") + clusters, err := repository.Ps.GetAllClusters() + if err != nil { + return err + } + for _, cluster := range clusters { + ckService := clickhouse.NewCkService(&cluster) + if err = ckService.InitCkService(); err != nil { + log.Logger.Warnf("[%s]init clickhouse service failed: %v", cluster.Cluster, err) + continue + } + nodes, port := zookeeper.GetZkInfo(&cluster) + zkService, err := zookeeper.NewZkService(nodes, port) + if err != nil { + log.Logger.Warnf("can't create zookeeper instance:%v", err) + continue + } + // remove block_numbers in zookeeper + // znodes, err := GetBlockNumberZnodes(ckService) + // if err != nil { + // log.Logger.Warnf("[%s]remove block_number from zookeeper failed: %v", cluster.Cluster, err) + // } + + // deleted, notexist := RemoveZnodes(zkService, znodes) + // log.Logger.Warnf("[%s]remove [%d] block_number from zookeeper success, %d already deleted", cluster.Cluster, deleted, notexist) + + // remove replica_queue in zookeeper + znodes, err := GetReplicaQueueZnodes(ckService, 100) + if err != nil { + log.Logger.Infof("[%s]remove replica_queue from zookeeper failed: %v", cluster.Cluster, err) + } + deleted, notexist := RemoveZnodes(zkService, znodes) + log.Logger.Infof("[%s]remove [%d] replica_queue from zookeeper success, [%d] already deleted", cluster.Cluster, deleted, notexist) + + } + return nil +} + +func RemoveZnodes(zkService *zookeeper.ZkService, znodes []string) (int, int) { + var deleted, notexist int + for _, znode := range znodes { + err := zkService.Delete(znode) + if err != nil { + if errors.Is(err, zk.ErrNoNode) { + notexist++ + } else { + log.Logger.Debugf("[%s]remove replica_queue from zookeeper failed: %v", znode, err) + } + } else { + deleted++ + } + } + return deleted, notexist +} + +func GetBlockNumberZnodes(ckService *clickhouse.CkService) ([]string, error) { + query := `SELECT distinct concat(zk.block_numbers_path, zk.partition_id) FROM + ( + SELECT r.database, r.table, zk.block_numbers_path, zk.partition_id, p.partition_id + FROM + ( + SELECT path as block_numbers_path, name as partition_id + FROM system.zookeeper + WHERE path IN ( + SELECT concat(zookeeper_path, '/block_numbers/') as block_numbers_path + FROM clusterAllReplicas('{cluster}',system.replicas) + ) + ) as zk + LEFT JOIN + ( + SELECT database, table, concat(zookeeper_path, '/block_numbers/') as block_numbers_path + FROM clusterAllReplicas('{cluster}',system.replicas) + ) + as r ON (r.block_numbers_path = zk.block_numbers_path) + LEFT JOIN + ( + SELECT DISTINCT partition_id, database, table + FROM clusterAllReplicas('{cluster}',system.parts) + ) + as p ON (p.partition_id = zk.partition_id AND p.database = r.database AND p.table = r.table) + WHERE p.partition_id = '' AND zk.partition_id <> 'all' + ORDER BY r.database, r.table, zk.block_numbers_path, zk.partition_id, p.partition_id + ) t + ` + rows, err := ckService.Conn.Query(query) + if err != nil { + return nil, err + } + defer rows.Close() + var znodes []string + for rows.Next() { + var path string + if err = rows.Scan(&path); err != nil { + return nil, err + } + znodes = append(znodes, path) + + } + log.Logger.Debugf("[%s]remove block_number from zookeeper: %v", ckService.Config.Cluster, len(znodes)) + return znodes, nil +} + +func GetReplicaQueueZnodes(ckService *clickhouse.CkService, numtries int) ([]string, error) { + query := fmt.Sprintf(`SELECT DISTINCT concat(t0.replica_path, '/queue/', t1.node_name) + FROM clusterAllReplicas('%s', system.replicas) AS t0, + ( + SELECT + database, + table, + replica_name, + node_name, + create_time, + last_exception_time, + num_tries + FROM clusterAllReplicas('%s', system.replication_queue) + WHERE (num_postponed > 0) AND (num_tries > %d OR create_time > addSeconds(now(), -86400)) + ) AS t1 + WHERE (t0.database = t1.database) AND (t0.table = t1.table) AND (t0.replica_name = t1.replica_name)`, + ckService.Config.Cluster, ckService.Config.Cluster, numtries) + rows, err := ckService.Conn.Query(query) + if err != nil { + return nil, err + } + defer rows.Close() + var znodes []string + for rows.Next() { + var path string + if err = rows.Scan(&path); err != nil { + return nil, err + } + znodes = append(znodes, path) + + } + log.Logger.Debugf("[%s]remove replica_queue from zookeeper: %v", ckService.Config.Cluster, len(znodes)) + return znodes, nil +} diff --git a/service/runner/ck.go b/service/runner/ck.go index 741bd69f..e6ca26bd 100644 --- a/service/runner/ck.go +++ b/service/runner/ck.go @@ -2,7 +2,6 @@ package runner import ( "fmt" - "strings" "github.com/housepower/ckman/common" "github.com/housepower/ckman/deploy" @@ -56,6 +55,9 @@ func DestroyCkCluster(task *model.Task, d deploy.CKDeploy, conf *model.CKManClic return errors.Wrapf(err, "[%s]", model.NodeStatusInstall.EN) } + if d.Conf.Keeper == model.ClickhouseKeeper { + return nil + } //clear zkNode deploy.SetNodeStatus(task, model.NodeStatusClearData, model.ALL_NODES_DEFAULT) service, err := zookeeper.GetZkService(conf.Cluster) @@ -68,14 +70,6 @@ func DestroyCkCluster(task *model.Task, d deploy.CKDeploy, conf *model.CKManClic if err = service.DeleteAll(stdZooPath); err != nil { return errors.Wrapf(err, "[%s]", model.NodeStatusClearData.EN) } - zooPaths := clickhouse.ConvertZooPath(conf) - if len(zooPaths) > 0 { - for _, zooPath := range zooPaths { - if err = service.DeleteAll(zooPath); err != nil { - return errors.Wrapf(err, "[%s]", model.NodeStatusClearData.EN) - } - } - } taskQueuePath := fmt.Sprintf("/clickhouse/task_queue/ddl/%s", conf.Cluster) if err = service.DeleteAll(taskQueuePath); err != nil { return errors.Wrapf(err, "[%s]", model.NodeStatusClearData.EN) @@ -88,13 +82,9 @@ func DeleteCkClusterNode(task *model.Task, conf *model.CKManClickHouseConfig, ip //delete zookeeper path if need deploy.SetNodeStatus(task, model.NodeStatusClearData, model.ALL_NODES_DEFAULT) ifDeleteShard := false - shardNum := 0 - replicaNum := 0 for i, shard := range conf.Shards { - for j, replica := range shard.Replicas { + for _, replica := range shard.Replicas { if replica.Ip == ip { - shardNum = i - replicaNum = j if i+1 == len(conf.Shards) { if len(shard.Replicas) == 1 { ifDeleteShard = true @@ -118,33 +108,33 @@ func DeleteCkClusterNode(task *model.Task, conf *model.CKManClickHouseConfig, ip if err != nil { return errors.Wrapf(err, "[%s]", model.NodeStatusClearData.EN) } - _ = clickhouse.GetReplicaZkPath(conf) + // err = service.CleanZoopath(*conf, conf.Cluster, ip, false) + // if err != nil { + // log.Logger.Fatalf("clean zoopath error:%v", err) + // } var zooPaths []string - for _, path := range conf.ZooPath { - zooPath := strings.Replace(path, "{cluster}", conf.Cluster, -1) - zooPath = strings.Replace(zooPath, "{shard}", fmt.Sprintf("%d", shardNum+1), -1) - zooPaths = append(zooPaths, zooPath) - } - - for _, path := range zooPaths { - if ifDeleteShard { - //delete the shard - shardNode := fmt.Sprintf("%d", shardNum+1) - err = service.DeletePathUntilNode(path, shardNode) - if err != nil { - return errors.Wrapf(err, "[%s]", model.NodeStatusClearData.EN) - } - } else { - // delete replica path - replicaName := conf.Shards[shardNum].Replicas[replicaNum].Ip - replicaPath := fmt.Sprintf("%s/replicas/%s", path, replicaName) - log.Logger.Debugf("replicaPath: %s", replicaPath) - err = service.DeleteAll(replicaPath) - if err != nil { - return errors.Wrapf(err, "[%s]", model.NodeStatusClearData.EN) + query := fmt.Sprintf("SELECT zookeeper_path,replica_path FROM clusterAllReplicas('%s', system.replicas) WHERE replica_name = '%s'", conf.Cluster, ip) + ckService := clickhouse.NewCkService(conf) + err = ckService.InitCkService() + if err == nil { + data, err := ckService.QueryInfo(query) + if err == nil { + for i := 1; i < len(data); i++ { + if ifDeleteShard { + zooPaths = append(zooPaths, data[i][0].(string)) + } else { + zooPaths = append(zooPaths, data[i][1].(string)) + } } } } + for _, zoopath := range zooPaths { + log.Logger.Debugf("delete zoopath:%s", zoopath) + err = service.DeleteAll(zoopath) + if err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusClearData.EN) + } + } } // stop the node @@ -188,15 +178,32 @@ func DeleteCkClusterNode(task *model.Task, conf *model.CKManClickHouseConfig, ip d = deploy.NewCkDeploy(*conf) d.Conf.Hosts = hosts d.Conf.Shards = shards + if d.Conf.Keeper == model.ClickhouseKeeper && d.Conf.KeeperConf.Runtime == model.KeeperRuntimeInternal { + d.Ext.Restart = true + d.Conf.KeeperConf.KeeperNodes = common.ArrayRemove(conf.Hosts, ip) + } if err := d.Init(); err != nil { return errors.Wrapf(err, "[%s]", model.NodeStatusConfigExt.EN) } if err := d.Config(); err != nil { return errors.Wrapf(err, "[%s]", model.NodeStatusConfigExt.EN) } + if d.Ext.Restart { + if err := d.Restart(); err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusRestart.EN) + } + if err := d.Check(300); err != nil { + log.Logger.Warnf("[%s]delnode check failed: %v", d.Conf.Cluster, err) + //return errors.Wrapf(err, "[%s]", model.NodeStatusCheck.EN) + } + } conf.Hosts = hosts conf.Shards = shards + if d.Conf.Keeper == model.ClickhouseKeeper && d.Conf.KeeperConf.Runtime == model.KeeperRuntimeInternal { + conf.KeeperConf.KeeperNodes = make([]string, len(d.Conf.Hosts)) + copy(conf.KeeperConf.KeeperNodes, d.Conf.Hosts) + } return nil } @@ -228,13 +235,18 @@ func AddCkClusterNode(task *model.Task, conf *model.CKManClickHouseConfig, d *de deploy.SetNodeStatus(task, model.NodeStatusCheck, model.ALL_NODES_DEFAULT) if err := d.Check(30); err != nil { - return errors.Wrapf(err, "[%s]", model.NodeStatusCheck.EN) + log.Logger.Warnf("[%s]addnode check failed: %v", d.Conf.Cluster, err) + //return errors.Wrapf(err, "[%s]", model.NodeStatusCheck.EN) } // update other nodes config deploy.SetNodeStatus(task, model.NodeStatusConfigExt, model.ALL_NODES_DEFAULT) d2 := deploy.NewCkDeploy(*conf) d2.Conf.Shards = d.Conf.Shards + if conf.Keeper == model.ClickhouseKeeper && conf.KeeperConf.Runtime == model.KeeperRuntimeInternal { + d2.Conf.KeeperConf.KeeperNodes = make([]string, len(conf.Hosts)+len(d.Conf.Hosts)) + copy(d2.Conf.KeeperConf.KeeperNodes, append(conf.Hosts, d.Conf.Hosts...)) + } if err := d2.Init(); err != nil { return errors.Wrapf(err, "[%s]", model.NodeStatusConfigExt.EN) } @@ -242,8 +254,23 @@ func AddCkClusterNode(task *model.Task, conf *model.CKManClickHouseConfig, d *de return errors.Wrapf(err, "[%s]", model.NodeStatusConfig.EN) } + if d.Ext.Restart { + if err := d2.Restart(); err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusConfigExt.EN) + } + + if err := d2.Check(300); err != nil { + log.Logger.Warnf("[%s]addnode check failed: %v", d.Conf.Cluster, err) + //return errors.Wrapf(err, "[%s]", model.NodeStatusCheck.EN) + } + } + conf.Shards = d.Conf.Shards conf.Hosts = append(conf.Hosts, d.Conf.Hosts...) + if conf.Keeper == model.ClickhouseKeeper && conf.KeeperConf.Runtime == model.KeeperRuntimeInternal { + conf.KeeperConf.KeeperNodes = make([]string, len(conf.Hosts)) + copy(conf.KeeperConf.KeeperNodes, conf.Hosts) + } return nil } @@ -260,7 +287,7 @@ func UpgradeCkCluster(task *model.Task, d deploy.CKDeploy) error { } case model.PolicyFull: err := upgradePackage(task, d, 10) - if err != model.CheckTimeOutErr { + if err != nil && err != model.CheckTimeOutErr { return err } default: @@ -345,7 +372,7 @@ func ConfigCkCluster(task *model.Task, d deploy.CKDeploy) error { case model.PolicyFull: deploy.SetNodeStatus(task, model.NodeStatusRestart, model.ALL_NODES_DEFAULT) err := d.Restart() - if err != model.CheckTimeOutErr { + if err != nil && err != model.CheckTimeOutErr { return err } _ = d.Check(30) diff --git a/service/runner/handle.go b/service/runner/handle.go index fe485a8f..ae3a1961 100644 --- a/service/runner/handle.go +++ b/service/runner/handle.go @@ -3,7 +3,6 @@ package runner import ( "encoding/json" - client "github.com/ClickHouse/clickhouse-go/v2" "github.com/housepower/ckman/common" "github.com/housepower/ckman/deploy" "github.com/housepower/ckman/log" @@ -47,6 +46,14 @@ func CKDeployHandle(task *model.Task) error { return err } + if d.Conf.KeeperWithStanalone() { + task.TaskType = model.TaskTypeKeeperDeploy + if err := DeployKeeperCluster(task, d); err != nil { + return err + } + task.TaskType = model.TaskTypeCKDeploy + } + if err := DeployCkCluster(task, d); err != nil { return err } @@ -116,6 +123,14 @@ func CKDestoryHandle(task *model.Task) error { return err } + if d.Conf.KeeperWithStanalone() { + task.TaskType = model.TaskTypeKeeperDestory + if err = DestroyKeeperCluster(task, d, &conf); err != nil { + return err + } + task.TaskType = model.TaskTypeCKDestory + } + deploy.SetNodeStatus(task, model.NodeStatusStore, model.ALL_NODES_DEFAULT) if err = repository.Ps.Begin(); err != nil { return err @@ -190,6 +205,11 @@ func CKAddNodeHandle(task *model.Task) error { return err } + if d.Conf.Keeper == model.ClickhouseKeeper && d.Conf.KeeperConf.Runtime == model.KeeperRuntimeInternal { + d.Ext.Restart = true + d.Conf.KeeperConf.KeeperNodes = append(d.Conf.KeeperConf.KeeperNodes, d.Conf.Hosts...) + } + conf, err := repository.Ps.GetClusterbyName(d.Conf.Cluster) if err != nil { return nil @@ -215,25 +235,20 @@ func CKAddNodeHandle(task *model.Task) error { return errors.Wrapf(err, "[%s]", model.NodeStatusConfigExt.EN) } if err := service.FetchSchemerFromOtherNode(conf.Hosts[0]); err != nil { - err = common.ClikHouseExceptionDecode(err) - var exception *client.Exception - if errors.As(err, &exception) { - if exception.Code == 253 { - //Code: 253: Replica /clickhouse/tables/XXX/XXX/replicas/{replica} already exists, clean the znode and retry - zkService, err := zookeeper.GetZkService(conf.Cluster) + if common.ExceptionAS(err, common.REPLICA_ALREADY_EXISTS) { + //Code: 253: Replica /clickhouse/tables/XXX/XXX/replicas/{replica} already exists, clean the znode and retry + zkService, err := zookeeper.GetZkService(conf.Cluster) + if err == nil { + err = zkService.CleanZoopath(conf, conf.Cluster, conf.Hosts[0], false) if err == nil { - err = zkService.CleanZoopath(conf, conf.Cluster, conf.Hosts[0], false) - if err == nil { - if err = service.FetchSchemerFromOtherNode(conf.Hosts[0]); err != nil { - log.Logger.Errorf("fetch schema from other node failed again") - } + if err = service.FetchSchemerFromOtherNode(conf.Hosts[0]); err != nil { + log.Logger.Errorf("fetch schema from other node failed again") } - } else { - log.Logger.Errorf("can't create zookeeper instance:%v", err) } + } else { + log.Logger.Errorf("can't create zookeeper instance:%v", err) } - } - if err != nil { + } else { return errors.Wrapf(err, "[%s]", model.NodeStatusConfigExt.EN) } } @@ -258,6 +273,14 @@ func CKUpgradeHandle(task *model.Task) error { return nil } + if d.Conf.KeeperWithStanalone() { + task.TaskType = model.TaskTypeKeeperUpgrade + if err = UpgradeKeeperCluster(task, d); err != nil { + return err + } + task.TaskType = model.TaskTypeCKUpgrade + } + err = UpgradeCkCluster(task, d) if err != nil { return err @@ -278,23 +301,25 @@ func CKSettingHandle(task *model.Task) error { return err } - if err := ConfigCkCluster(task, d); err != nil { - return err + if !d.Ext.ChangeCk { + deploy.SetNodeStatus(task, model.NodeStatusStore, model.ALL_NODES_DEFAULT) + if err := repository.Ps.UpdateCluster(*d.Conf); err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusStore.EN) + } + deploy.SetNodeStatus(task, model.NodeStatusDone, model.ALL_NODES_DEFAULT) + return nil } - // sync table schema when logic cluster exists - deploy.SetNodeStatus(task, model.NodeStatusStore, model.ALL_NODES_DEFAULT) - if d.Conf.LogicCluster != nil { - logics, err := repository.Ps.GetLogicClusterbyName(*d.Conf.LogicCluster) - if err == nil && len(logics) > 0 { - for _, logic := range logics { - if cluster, err := repository.Ps.GetClusterbyName(logic); err == nil { - if clickhouse.SyncLogicTable(cluster, *d.Conf) { - break - } - } - } + if d.Conf.KeeperWithStanalone() { + task.TaskType = model.TaskTypeKeeperSetting + if err := ConfigKeeperCluster(task, d); err != nil { + return err } + task.TaskType = model.TaskTypeCKSetting + } + + if err := ConfigCkCluster(task, d); err != nil { + return err } if err := repository.Ps.Begin(); err != nil { diff --git a/service/runner/keeper.go b/service/runner/keeper.go new file mode 100644 index 00000000..dc945bf6 --- /dev/null +++ b/service/runner/keeper.go @@ -0,0 +1,171 @@ +package runner + +import ( + "fmt" + + "github.com/housepower/ckman/common" + "github.com/housepower/ckman/deploy" + "github.com/housepower/ckman/log" + "github.com/housepower/ckman/model" + "github.com/pkg/errors" +) + +func DeployKeeperCluster(task *model.Task, d deploy.CKDeploy) error { + kd := deploy.NewKeeperDeploy(*d.Conf, d.Packages) + deploy.SetNodeStatus(task, model.NodeStatusInit, model.ALL_NODES_DEFAULT) + if err := kd.Init(); err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusInit.EN) + } + + deploy.SetNodeStatus(task, model.NodeStatusPrepare, model.ALL_NODES_DEFAULT) + if err := kd.Prepare(); err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusPrepare.EN) + } + + deploy.SetNodeStatus(task, model.NodeStatusInstall, model.ALL_NODES_DEFAULT) + if err := kd.Install(); err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusInstall.EN) + } + + deploy.SetNodeStatus(task, model.NodeStatusConfig, model.ALL_NODES_DEFAULT) + if err := kd.Config(); err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusConfig.EN) + } + + deploy.SetNodeStatus(task, model.NodeStatusStart, model.ALL_NODES_DEFAULT) + if err := kd.Start(); err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusStart.EN) + } + + deploy.SetNodeStatus(task, model.NodeStatusCheck, model.ALL_NODES_DEFAULT) + if err := kd.Check(30); err != nil { + //return errors.Wrapf(err, "[%s]", model.NodeStatusCheck.EN) + deploy.SetTaskStatus(task, model.TaskStatusFailed, err.Error()) + } + return nil +} + +func DestroyKeeperCluster(task *model.Task, d deploy.CKDeploy, conf *model.CKManClickHouseConfig) error { + kd := deploy.NewKeeperDeploy(*d.Conf, d.Packages) + deploy.SetNodeStatus(task, model.NodeStatusStop, model.ALL_NODES_DEFAULT) + _ = kd.Stop() + + deploy.SetNodeStatus(task, model.NodeStatusUninstall, model.ALL_NODES_DEFAULT) + if err := kd.Uninstall(); err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusInstall.EN) + } + + return nil +} + +func UpgradeKeeperCluster(task *model.Task, d deploy.CKDeploy) error { + kd := deploy.NewKeeperDeploy(*d.Conf, d.Packages) + switch d.Ext.Policy { + case model.PolicyRolling: + var rd deploy.KeeperDeploy + common.DeepCopyByGob(&rd, kd) + for _, host := range d.Conf.KeeperConf.KeeperNodes { + rd.Conf.KeeperConf.KeeperNodes = []string{host} + if err := upgradeKeeperPackage(task, rd, model.MaxTimeOut); err != nil { + return err + } + } + case model.PolicyFull: + err := upgradeKeeperPackage(task, *kd, 30) + if err != nil { + return err + } + default: + return fmt.Errorf("not support policy %s yet", d.Ext.Policy) + } + + return nil +} + +func upgradeKeeperPackage(task *model.Task, d deploy.KeeperDeploy, timeout int) error { + var node string + if d.Ext.Policy == model.PolicyRolling { + node = d.Conf.Hosts[0] + } else { + node = model.ALL_NODES_DEFAULT + } + + deploy.SetNodeStatus(task, model.NodeStatusInit, node) + if err := d.Init(); err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusInit.EN) + } + deploy.SetNodeStatus(task, model.NodeStatusStop, node) + if err := d.Stop(); err != nil { + log.Logger.Warnf("stop cluster %s failed: %v", d.Conf.Cluster, err) + } + + deploy.SetNodeStatus(task, model.NodeStatusPrepare, node) + if err := d.Prepare(); err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusPrepare.EN) + } + + deploy.SetNodeStatus(task, model.NodeStatusUpgrade, node) + if err := d.Upgrade(); err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusUpgrade.EN) + } + + deploy.SetNodeStatus(task, model.NodeStatusConfig, node) + if err := d.Config(); err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusConfig.EN) + } + + deploy.SetNodeStatus(task, model.NodeStatusStart, node) + if err := d.Start(); err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusStart.EN) + } + + deploy.SetNodeStatus(task, model.NodeStatusCheck, node) + if err := d.Check(timeout); err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusCheck.EN) + } + deploy.SetNodeStatus(task, model.NodeStatusDone, node) + + return nil +} + +func ConfigKeeperCluster(task *model.Task, d deploy.CKDeploy) error { + kd := deploy.NewKeeperDeploy(*d.Conf, d.Packages) + deploy.SetNodeStatus(task, model.NodeStatusInit, model.ALL_NODES_DEFAULT) + if err := kd.Init(); err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusInit.EN) + } + deploy.SetNodeStatus(task, model.NodeStatusConfig, model.ALL_NODES_DEFAULT) + if err := kd.Config(); err != nil { + return errors.Wrapf(err, "[%s]", model.NodeStatusConfig.EN) + } + + if kd.Ext.Restart { + switch d.Ext.Policy { + case model.PolicyRolling: + var rd deploy.CKDeploy + common.DeepCopyByGob(&rd, kd) + for _, host := range rd.Conf.KeeperConf.KeeperNodes { + deploy.SetNodeStatus(task, model.NodeStatusRestart, host) + rd.Conf.KeeperConf.KeeperNodes = []string{host} + if err := rd.Restart(); err != nil { + return err + } + if err := rd.Check(model.MaxTimeOut); err != nil { + return err + } + deploy.SetNodeStatus(task, model.NodeStatusDone, host) + } + case model.PolicyFull: + deploy.SetNodeStatus(task, model.NodeStatusRestart, model.ALL_NODES_DEFAULT) + err := kd.Restart() + if err != nil && err != model.CheckTimeOutErr { + return err + } + _ = d.Check(30) + default: + return fmt.Errorf("not support policy %s yet", d.Ext.Policy) + } + + } + return nil +} diff --git a/service/zookeeper/zk_test.go b/service/zookeeper/zk_test.go new file mode 100644 index 00000000..de3aad73 --- /dev/null +++ b/service/zookeeper/zk_test.go @@ -0,0 +1,14 @@ +package zookeeper + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestMetric(t *testing.T) { + resp, err := ZkMetric("192.168.122.101", 9181, "mntr") + assert.Nil(t, err) + fmt.Println(string(resp)) +} diff --git a/service/zookeeper/zookeeper_service.go b/service/zookeeper/zookeeper_service.go index 6acb8a2b..0a7cafc4 100644 --- a/service/zookeeper/zookeeper_service.go +++ b/service/zookeeper/zookeeper_service.go @@ -4,19 +4,17 @@ import ( "encoding/json" "fmt" "io" - "net/http" + "net" "path" "path/filepath" - "sort" + "regexp" "strings" "time" - "github.com/housepower/ckman/common" "github.com/housepower/ckman/repository" "github.com/go-zookeeper/zk" "github.com/housepower/ckman/model" - "github.com/housepower/ckman/service/clickhouse" "github.com/patrickmn/go-cache" "github.com/pkg/errors" ) @@ -59,7 +57,8 @@ func GetZkService(clusterName string) (*ZkService, error) { } else { conf, err := repository.Ps.GetClusterbyName(clusterName) if err == nil { - service, err := NewZkService(conf.ZkNodes, conf.ZkPort) + nodes, port := GetZkInfo(&conf) + service, err := NewZkService(nodes, port) if err != nil { return nil, err } @@ -71,73 +70,6 @@ func GetZkService(clusterName string) (*ZkService, error) { } } -func (z *ZkService) GetReplicatedTableStatus(conf *model.CKManClickHouseConfig) ([]model.ZkReplicatedTableStatus, error) { - if !conf.IsReplica { - return nil, nil - } - err := clickhouse.GetReplicaZkPath(conf) - if err != nil { - return nil, err - } - - tableStatus := make([]model.ZkReplicatedTableStatus, len(conf.ZooPath)) - tableIndex := 0 - for key, value := range conf.ZooPath { - status := model.ZkReplicatedTableStatus{ - Name: key, - } - shards := make([][]string, len(conf.Shards)) - status.Values = shards - tableStatus[tableIndex] = status - - for shardIndex, shard := range conf.Shards { - replicas := make([]string, len(shard.Replicas)) - shards[shardIndex] = replicas - - zooPath := strings.Replace(value, "{shard}", fmt.Sprintf("%d", shardIndex+1), -1) - zooPath = strings.Replace(zooPath, "{cluster}", conf.Cluster, -1) - - path := fmt.Sprintf("%s/leader_election", zooPath) - leaderElection, _, err := z.Conn.Children(path) - if err != nil { - continue - } - sort.Strings(leaderElection) - // fix out of range cause panic issue - if len(leaderElection) == 0 { - continue - } - leaderBytes, _, _ := z.Conn.Get(fmt.Sprintf("%s/%s", path, leaderElection[0])) - if len(leaderBytes) == 0 { - continue - } - leader := strings.Split(string(leaderBytes), " ")[0] - - for replicaIndex, replica := range shard.Replicas { - // the clickhouse version 20.5.x already Remove leader election, refer to : allow multiple leaders https://github.com/ClickHouse/ClickHouse/pull/11639 - const featureVersion = "20.5.x" - logPointer := "" - if common.CompareClickHouseVersion(conf.Version, featureVersion) >= 0 { - logPointer = "ML" - } else { - if leader == replica.Ip { - logPointer = "L" - } else { - logPointer = "F" - } - } - path = fmt.Sprintf("%s/replicas/%s/log_pointer", zooPath, replica.Ip) - pointer, _, _ := z.Conn.Get(path) - logPointer = logPointer + fmt.Sprintf("[%s]", pointer) - replicas[replicaIndex] = logPointer - } - } - tableIndex++ - } - - return tableStatus, nil -} - func (z *ZkService) DeleteAll(node string) (err error) { children, stat, err := z.Conn.Children(node) if errors.Is(err, zk.ErrNoNode) { @@ -157,6 +89,14 @@ func (z *ZkService) DeleteAll(node string) (err error) { return z.Conn.Delete(node, stat.Version) } +func (z *ZkService) Delete(node string) (err error) { + _, stat, err := z.Conn.Get(node) + if err != nil { + return + } + return z.Conn.Delete(node, stat.Version) +} + func (z *ZkService) DeletePathUntilNode(path, endNode string) error { ok, _, _ := z.Conn.Exists(path) if !ok { @@ -218,46 +158,54 @@ func clean(svr *ZkService, znode, target string, dryrun bool) error { } func ZkMetric(host string, port int, metric string) ([]byte, error) { - url := fmt.Sprintf("http://%s:%d/commands/%s", host, port, metric) - request, err := http.NewRequest("GET", url, nil) + conn, err := net.Dial("tcp", fmt.Sprintf("%s:%d", host, port)) if err != nil { - return nil, errors.Wrap(err, "") + return nil, err } - - client := &http.Client{} - response, err := client.Do(request) + defer conn.Close() + _, err = conn.Write([]byte(metric)) if err != nil { - return nil, errors.Wrap(err, "") + return nil, err } - defer response.Body.Close() - - if response.StatusCode != 200 { - return nil, errors.Errorf("%s", response.Status) + var b []byte + for { + buf := [8192]byte{} + n, err := conn.Read(buf[:]) + if err != nil { + if err == io.EOF { + break + } + return nil, err + } + if n == 0 { + break + } + b = append(b, buf[:n]...) + } + resp := make(map[string]interface{}) + lines := strings.Split(string(b), "\n") + re := regexp.MustCompile(`zk_(\w+)\s+(.*)`) + for _, line := range lines { + matches := re.FindStringSubmatch(line) + if len(matches) >= 3 { + resp[matches[1]] = matches[2] + } } - - body, err := io.ReadAll(response.Body) - if err != nil { - return nil, errors.Wrap(err, "") + if len(resp) == 0 { + return b, nil } - - return body, nil + return json.Marshal(resp) } -func GetZkClusterNodes(host string, port int) ([]string, error) { - b, err := ZkMetric(host, port, "voting_view") - if err != nil { - return nil, err - } - zkCluster := make(map[string]interface{}) - err = json.Unmarshal(b, &zkCluster) - if err != nil { - return nil, err - } +func GetZkInfo(conf *model.CKManClickHouseConfig) ([]string, int) { var nodes []string - for _, v := range zkCluster["current_config"].(map[string]interface{}) { - for _, value := range v.(map[string]interface{})["server_addresses"].([]interface{}) { - nodes = append(nodes, strings.Split(value.(string), ":")[0]) - } + var port int + if conf.Keeper == model.ClickhouseKeeper { + nodes = conf.KeeperConf.KeeperNodes + port = conf.KeeperConf.TcpPort + } else { + nodes = conf.ZkNodes + port = conf.ZkPort } - return nodes, nil + return nodes, port } diff --git a/tests/AddNode.json b/tests/AddNode.json deleted file mode 100644 index 99c5e746..00000000 --- a/tests/AddNode.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "ips": ["{DOCKER_NODE4}"], - "shard": 2 -} diff --git a/tests/DeployCK.json b/tests/DeployCK.json deleted file mode 100644 index 6ce680db..00000000 --- a/tests/DeployCK.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "hosts": {DOCKER_CLICKHOUSE_NODES}, - "user": "root", - "password": "123456", - "sshPort":22, - "clickhouse": { - "path": "/var/lib", - "user": "ck", - "password": "123456", - "zkNodes": {DOCKER_ZOOKEEPER_HOSTS}, - "zkPort": 12181, - "zkStatusPort": 8080, - "clustername": "test", - "shards": [{ - "replicas":[ - { - "ip": "{DOCKER_NODE1}" - }, - { - "ip": "{DOCKER_NODE2}" - } - ] - }, { - "replicas" :[ - { - "ip": "{DOCKER_NODE3}" - }, - { - "ip": "{DOCKER_NODE4}" - } - ] - }], - "packageVersion": "20.9.3.45", - "ckTcpPort": 19000 - } -} diff --git a/tests/Upgrade.json b/tests/Upgrade.json deleted file mode 100644 index 4df0937d..00000000 --- a/tests/Upgrade.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "packageVersion": "21.3.9.83" -}