Skip to content

Commit

Permalink
feat(backend): 支持 flow 批量失败重试 #3041
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangzhw8 committed Jan 16, 2024
1 parent 1b76ec2 commit f33a8ba
Show file tree
Hide file tree
Showing 19 changed files with 89 additions and 49 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/check_hard_code_ip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
- uses: actions/checkout@v2
- name: Check hard code ip
run: |
RESULT=$(grep -nrE '\b([0-9]{1,3}\.){3}[0-9]{1,3}\b' * | grep -vE '\b[012345678]\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' | grep -vE '127\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' grep -vE '192\.168\.[0-9]{1,3}\.[0-9]{1,3}') || true
RESULT=$(grep -nrE '\b([0-9]{1,3}\.){3}[0-9]{1,3}\b' * | grep -v poetry.lock | grep -vE '\b[012345]\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' | grep -vE '127\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' | grep -vE '192\.168\.[0-9]{1,3}\.[0-9]{1,3}') || true
if [[ ${RESULT} == '' ]]; then
echo "good job!"
else
Expand Down
2 changes: 1 addition & 1 deletion dbm-services/common/dbha/ha-module/test/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func TestGetInstanceByIp(t *testing.T) {
cmdbC := client.CmDBClient{
Client: *c,
}
inf, err := cmdbC.GetDBInstanceInfoByIp("6.6.6.6")
inf, err := cmdbC.GetDBInstanceInfoByIp("127.0.0.6")
if err != nil {
fmt.Printf("get instance failed. err:%s", err.Error())
t.FailNow()
Expand Down
4 changes: 2 additions & 2 deletions dbm-services/mysql/db-priv/service/db_meta_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@ GetAllClustersInfo 获取业务下所有集群信息
],
"storages": [
{
"ip": "3.3.3.3",
"ip": "127.0.0.3",
"instance_role": "backend_slave",
"port": 30000
},
{
"ip": "4.4.4.4",
"ip": "127.0.0.4",
"instance_role": "backend_master",
"port": 40000
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func (g *GrantReplComp) Example() interface{} {
Params: &GrantReplParam{
Host: "1.1.1.1",
Port: 3306,
ReplHosts: []string{"2.2.2.2", "3.3.3.3"},
ReplHosts: []string{"2.2.2.2", "127.0.0.3"},
},
GeneralParam: &components.GeneralParam{
RuntimeAccountParam: components.RuntimeAccountParam{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,23 @@ func (a *AddSlaveClusterRoutingComp) Example() interface{} {
Port: 26000,
SlaveInstances: []Instance{
{
Host: "3.3.3.3",
Host: "127.0.0.3",
Port: 20000,
ShardID: 0,
},
{
Host: "3.3.3.3",
Host: "127.0.0.3",
Port: 20001,
ShardID: 1,
},
},
SpiderSlaveInstances: []Instance{
{
Host: "3.3.3.3",
Host: "127.0.0.3",
Port: 25000,
},
{
Host: "3.3.3.3",
Host: "127.0.0.3",
Port: 25001,
},
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,13 @@ func (s *SpiderClusterBackendMigrateCutoverComp) Example() interface{} {
Port: 3006,
},
DestMaster: CutoverUnit{
Host: "3.3.3.3",
Host: "127.0.0.3",
Port: 3306,
User: "xx",
Password: "xx",
},
DestSlave: CutoverUnit{
Host: "4.4.4.4",
Host: "127.0.0.4",
Port: 3306,
User: "xx",
Password: "xx",
Expand All @@ -159,13 +159,13 @@ func (s *SpiderClusterBackendMigrateCutoverComp) Example() interface{} {
Port: 3007,
},
DestMaster: CutoverUnit{
Host: "3.3.3.3",
Host: "127.0.0.3",
Port: 3307,
User: "xx",
Password: "xx",
},
DestSlave: CutoverUnit{
Host: "4.4.4.4",
Host: "127.0.0.4",
Port: 3307,
User: "xx",
Password: "xx",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func (r *SpiderClusterBackendSwitchComp) Example() interface{} {
Port: 3306,
},
Slave: Instance{
Host: "3.3.3.3",
Host: "127.0.0.3",
Port: 3306,
},
},
Expand All @@ -117,7 +117,7 @@ func (r *SpiderClusterBackendSwitchComp) Example() interface{} {
Port: 3307,
},
Slave: Instance{
Host: "3.3.3.3",
Host: "127.0.0.3",
Port: 3307,
},
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ package scenesnapshot
+-------+------+-------------------+----+---------+------+-----------+--------------------------------+
| ID | USER | HOST | DB | COMMAND | TIME | STATE | INFO |
+-------+------+-------------------+----+---------+------+-----------+--------------------------------+
| 74590 | root | 10.45.39.34:54219 | | Query | 0 | executing | SELECT ID, USER, |
| 74590 | root | 127.0.0.1:54219 | | Query | 0 | executing | SELECT ID, USER, |
| | | | | | | | HOST, DB, COMMAND, |
| | | | | | | | TIME, STATE, INFO FROM |
| | | | | | | | INFORMATION_SCHEMA.PROCESSLIST |
+-------+------+-------------------+----+---------+------+-----------+--------------------------------+
| 74572 | root | 10.45.39.34:62014 | | Sleep | 2865 | | |
| 74572 | root | 127.0.0.2:62014 | | Sleep | 2865 | | |
+-------+------+-------------------+----+---------+------+-----------+--------------------------------+
*/

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ func TestDecodeClusterNodes(t *testing.T) {
mylog.UnitTestInitLog()
convey.Convey("cluster nodes decode", t, func() {
clusterNodesStr := `
17922e98b0b8f7a9d233422cf8ae55f2d22fdab7 4.4.4.4:30003@40003 master - 0 1655005291000 20 connected 7509-8191
e81c4276dce41ae3ed4a5fe18e460ed5b9f77e8b 3.3.3.3:30003@40003 slave 17922e98b0b8f7a9d233422cf8ae55f2d22fdab7 0 1655005291000 20 connected
17922e98b0b8f7a9d233422cf8ae55f2d22fdab7 127.0.0.4:30003@40003 master - 0 1655005291000 20 connected 7509-8191
e81c4276dce41ae3ed4a5fe18e460ed5b9f77e8b 127.0.0.3:30003@40003 slave 17922e98b0b8f7a9d233422cf8ae55f2d22fdab7 0 1655005291000 20 connected
56e53ca70ef13f3ca1817b0746d64319a4b66fed synctest-redis-rdsplus1-0.synctest-svc.vip:30000@40000 myself,slave 72ffcd1f8d39d1b6011ab38f5e1a42dd6f66f765 0 1655006313000 3 connected
72ffcd1f8d39d1b6011ab38f5e1a42dd6f66f765 synctest-redis-rdsplus1-1.synctest-svc.vip:30000@40000 master - 0 1655006315419 7 connected 5461-10921
`
Expand All @@ -25,7 +25,7 @@ func TestDecodeClusterNodes(t *testing.T) {
}
convey.So(len(nodes), convey.ShouldEqual, 4)
convey.So(nodes[0].NodeID, convey.ShouldEqual, "17922e98b0b8f7a9d233422cf8ae55f2d22fdab7")
convey.So(nodes[0].IP, convey.ShouldEqual, "4.4.4.4")
convey.So(nodes[0].IP, convey.ShouldEqual, "127.0.0.4")
convey.So(nodes[0].Port, convey.ShouldEqual, 30003)
convey.So(nodes[0].SlotsMap, convey.ShouldContainKey, 7560)
convey.So(nodes[1].MasterID, convey.ShouldEqual, "17922e98b0b8f7a9d233422cf8ae55f2d22fdab7")
Expand All @@ -36,12 +36,12 @@ func TestDecodeClusterNodes(t *testing.T) {
})

convey.Convey("cluster nodes decode2", t, func() {
clusterNodesStr := `36b96240e16051711d2391472cfd5900d33dc8bd 5.5.5.5:46000@56000 master - 0 1660014754278 5 connected
a32f9cb266d85ea96a1a87ce56872f339e2a257f 5.5.5.5:45001@55001 master - 0 1660014755280 4 connected 5462-10923
5d555b4ab569de196f71afd275c1edf8c046959a 5.5.5.5:45000@55000 myself,master - 0 1660014753000 1 connected 0-5461
90ed7be9db5e4b78e959ad3b40253c2ffb3d5845 5.5.5.5:46002@56002 master - 0 1660014752269 3 connected
dcff36cc5e915024d12173b1c5a3235e9186f193 5.5.5.5:46001@56001 master - 0 1660014753273 2 connected
ff29e2e2782916a0451d5f4064cb55483f4b2a97 5.5.5.5:45002@55002 master - 0 1660014753000 0 connected 10924-16383
clusterNodesStr := `36b96240e16051711d2391472cfd5900d33dc8bd 127.0.0.5:46000@56000 master - 0 1660014754278 5 connected
a32f9cb266d85ea96a1a87ce56872f339e2a257f 127.0.0.5:45001@55001 master - 0 1660014755280 4 connected 5462-10923
5d555b4ab569de196f71afd275c1edf8c046959a 127.0.0.5:45000@55000 myself,master - 0 1660014753000 1 connected 0-5461
90ed7be9db5e4b78e959ad3b40253c2ffb3d5845 127.0.0.5:46002@56002 master - 0 1660014752269 3 connected
dcff36cc5e915024d12173b1c5a3235e9186f193 127.0.0.5:46001@56001 master - 0 1660014753273 2 connected
ff29e2e2782916a0451d5f4064cb55483f4b2a97 127.0.0.5:45002@55002 master - 0 1660014753000 0 connected 10924-16383
`
nodes, err := DecodeClusterNodes(clusterNodesStr)
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ import (
func TestDecodeClusterNodes(t *testing.T) {
convey.Convey("cluster nodes decode", t, func() {
clusterNodesStr := `
17922e98b0b8f7a9d233422cf8ae55f2d22fdab7 4.4.4.4:30003@40003 master - 0 1655005291000 20 connected 7509-8191
e81c4276dce41ae3ed4a5fe18e460ed5b9f77e8b 3.3.3.3:30003@40003 slave 17922e98b0b8f7a9d233422cf8ae55f2d22fdab7 0 1655005291000 20 connected
17922e98b0b8f7a9d233422cf8ae55f2d22fdab7 127.0.0.4:30003@40003 master - 0 1655005291000 20 connected 7509-8191
e81c4276dce41ae3ed4a5fe18e460ed5b9f77e8b 127.0.0.3:30003@40003 slave 17922e98b0b8f7a9d233422cf8ae55f2d22fdab7 0 1655005291000 20 connected
56e53ca70ef13f3ca1817b0746d64319a4b66fed synctest-redis-rdsplus1-0.synctest-svc.vip:30000@40000 myself,slave 72ffcd1f8d39d1b6011ab38f5e1a42dd6f66f765 0 1655006313000 3 connected
72ffcd1f8d39d1b6011ab38f5e1a42dd6f66f765 synctest-redis-rdsplus1-1.synctest-svc.vip:30000@40000 master - 0 1655006315419 7 connected 5461-10921
`
Expand All @@ -22,7 +22,7 @@ func TestDecodeClusterNodes(t *testing.T) {
}
convey.So(len(nodes), convey.ShouldEqual, 4)
convey.So(nodes[0].NodeID, convey.ShouldEqual, "17922e98b0b8f7a9d233422cf8ae55f2d22fdab7")
convey.So(nodes[0].IP, convey.ShouldEqual, "4.4.4.4")
convey.So(nodes[0].IP, convey.ShouldEqual, "127.0.0.4")
convey.So(nodes[0].Port, convey.ShouldEqual, 30003)
convey.So(nodes[0].SlotsMap, convey.ShouldContainKey, 7560)
convey.So(nodes[1].MasterID, convey.ShouldEqual, "17922e98b0b8f7a9d233422cf8ae55f2d22fdab7")
Expand All @@ -33,12 +33,12 @@ func TestDecodeClusterNodes(t *testing.T) {
})

convey.Convey("cluster nodes decode2", t, func() {
clusterNodesStr := `36b96240e16051711d2391472cfd5900d33dc8bd 5.5.5.5:46000@56000 master - 0 1660014754278 5 connected
a32f9cb266d85ea96a1a87ce56872f339e2a257f 5.5.5.5:45001@55001 master - 0 1660014755280 4 connected 5462-10923
5d555b4ab569de196f71afd275c1edf8c046959a 5.5.5.5:45000@55000 myself,master - 0 1660014753000 1 connected 0-5461
90ed7be9db5e4b78e959ad3b40253c2ffb3d5845 5.5.5.5:46002@56002 master - 0 1660014752269 3 connected
dcff36cc5e915024d12173b1c5a3235e9186f193 5.5.5.5:46001@56001 master - 0 1660014753273 2 connected
ff29e2e2782916a0451d5f4064cb55483f4b2a97 5.5.5.5:45002@55002 master - 0 1660014753000 0 connected 10924-16383
clusterNodesStr := `36b96240e16051711d2391472cfd5900d33dc8bd 127.0.0.5:46000@56000 master - 0 1660014754278 5 connected
a32f9cb266d85ea96a1a87ce56872f339e2a257f 127.0.0.5:45001@55001 master - 0 1660014755280 4 connected 5462-10923
5d555b4ab569de196f71afd275c1edf8c046959a 127.0.0.5:45000@55000 myself,master - 0 1660014753000 1 connected 0-5461
90ed7be9db5e4b78e959ad3b40253c2ffb3d5845 127.0.0.5:46002@56002 master - 0 1660014752269 3 connected
dcff36cc5e915024d12173b1c5a3235e9186f193 127.0.0.5:46001@56001 master - 0 1660014753273 2 connected
ff29e2e2782916a0451d5f4064cb55483f4b2a97 127.0.0.5:45002@55002 master - 0 1660014753000 0 connected 10924-16383
`
nodes, err := DecodeClusterNodes(clusterNodesStr)
if err != nil {
Expand Down
4 changes: 2 additions & 2 deletions dbm-ui/backend/bk_dataview/dashboards/json/kafka.json
Original file line number Diff line number Diff line change
Expand Up @@ -4231,7 +4231,7 @@
"options": {
"mode": "exclude",
"names": [
"172.27.129.217-9092"
"127.0.0.1-9092"
],
"prefix": "All except:",
"readOnly": true
Expand Down Expand Up @@ -4557,7 +4557,7 @@
"options": {
"mode": "exclude",
"names": [
"172.27.128.218"
"127.0.0.1"
],
"prefix": "All except:",
"readOnly": true
Expand Down
4 changes: 2 additions & 2 deletions dbm-ui/backend/bk_dataview/dashboards/json/tendbha.json
Original file line number Diff line number Diff line change
Expand Up @@ -8358,8 +8358,8 @@
{
"current": {
"selected": false,
"text": "9.143.80.224",
"value": "9.143.80.224"
"text": "127.0.0.1",
"value": "127.0.0.1"
},
"datasource": {
"type": "bkmonitor-timeseries-datasource",
Expand Down
30 changes: 30 additions & 0 deletions dbm-ui/backend/db_services/taskflow/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from typing import Any, Dict, List, Optional

from bamboo_engine.api import EngineAPIResult
from bamboo_engine.eri import NodeType
from django.utils import timezone
from django.utils.translation import gettext as _

Expand Down Expand Up @@ -78,6 +79,15 @@ def retry_node(self, node_id: str):
"""重试节点"""
return task.retry_node(root_id=self.root_id, node_id=node_id, retry_times=1)

def batch_retry_nodes(self):
"""批量重试节点"""
node_ids = self.get_failed_node_ids()
for node_id in node_ids:
try:
self.retry_node(node_id)
except Exception as err:
logger.error(f"{node_id} retry failed, {err}")

def skip_node(self, node_id: str):
"""跳过节点"""
result = BambooEngine(root_id=self.root_id).skip_node(node_id=node_id)
Expand Down Expand Up @@ -107,6 +117,26 @@ def callback_node(self, node_id: str, desc: Optional[Any]):

return result

def get_failed_node_ids(self) -> List[str]:
"""
获取失败节点ID列表
"""
node_ids = []
tree_states = BambooEngine(root_id=self.root_id).get_pipeline_tree_states()
activities = tree_states.get("activities", {})

def recurse_activities(current_activities):
for act_id, activity in current_activities.items():
# 如果有子流程,递归检查子流程内的活动
if "pipeline" in activity:
pipeline_activities = activity["pipeline"].get("activities", {})
recurse_activities(pipeline_activities)
if activity.get("status") == StateType.FAILED and activity.get("type") == NodeType.ServiceActivity:
node_ids.append(act_id)

recurse_activities(activities)
return node_ids

def get_node_histories(self, node_id: str) -> List[Dict[str, Any]]:
"""获取节点历史版本信息"""
histories = [
Expand Down
9 changes: 9 additions & 0 deletions dbm-ui/backend/db_services/taskflow/views/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,15 @@ def retry_node(self, requests, *args, **kwargs):
validated_data = self.params_validate(self.get_serializer_class())
return Response(TaskFlowHandler(root_id=root_id).retry_node(node_id=validated_data["node_id"]).result)

@common_swagger_auto_schema(
operation_summary=_("批量重试"),
tags=[SWAGGER_TAG],
)
@action(methods=["POST"], detail=True)
def batch_retry_nodes(self, requests, *args, **kwargs):
root_id = kwargs["root_id"]
return Response(TaskFlowHandler(root_id=root_id).batch_retry_nodes())

@common_swagger_auto_schema(
operation_summary=_("跳过节点"),
tags=[SWAGGER_TAG],
Expand Down
12 changes: 6 additions & 6 deletions dbm-ui/backend/flow/views/migrate_views/pulsar_fake_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,41 +53,41 @@ class FakeInstallPulsarSceneApiView(MigrateFlowView):
"nodes": {
"zookeeper": [
{
"ip": "1.1.1.1",
"ip": "127.0.0.1",
"bk_cloud_id": 0,
"bk_host_id": 1,
"bk_biz_id": 111
},
{
"ip": "2.2.2.2",
"ip": "127.0.0.2",
"bk_cloud_id": 0,
"bk_host_id": 2,
"bk_biz_id": 111
},
{
"ip": "3.3.3.3",
"ip": "127.0.0.3",
"bk_cloud_id": 0,
"bk_host_id": 3,
"bk_biz_id": 111
}
],
"broker": [
{
"ip": "4.4.4.4",
"ip": "127.0.0.4",
"bk_cloud_id": 0,
"bk_host_id": 4,
"bk_biz_id": 111
}
],
"bookkeeper": [
{
"ip": "5.5.5.5",
"ip": "127.0.0.5",
"bk_cloud_id": 0,
"bk_host_id": 5,
"bk_biz_id": 111
},
{
"ip": "6.6.6.6",
"ip": "127.0.0.6",
"bk_cloud_id": 0,
"bk_host_id": 6,
"bk_biz_id": 111
Expand Down
1 change: 1 addition & 0 deletions dbm-ui/bin/environ.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ export APP_TOKEN="xxxxxx"
export DJANGO_SETTINGS_MODULE=config.prod
export BK_LOG_DIR=/tmp/bk-dbm
export BK_IAM_SKIP=true
export DBA_APP_BK_BIZ_ID=0
export DB_NAME="bk_dbm"
export REPORT_DB_NAME="bk_dbm_report"
4 changes: 2 additions & 2 deletions helm-charts/bk-dbm/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,5 @@ dependencies:
description: A Helm chart for bk-dbm
name: bk-dbm
type: application
version: 1.3.0-alpha.35
appVersion: 1.3.0-alpha.35
version: 1.3.0-alpha.36
appVersion: 1.3.0-alpha.36
2 changes: 1 addition & 1 deletion helm-charts/bk-dbm/charts/dbconfig/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 0.0.1-alpha.72
appVersion: 0.0.1-alpha.73
description: A Helm chart for dbconfig
name: dbconfig
type: application
Expand Down
2 changes: 1 addition & 1 deletion helm-charts/bk-dbm/charts/dbm/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 1.3.0-alpha.303
appVersion: 1.3.0-alpha.315
description: A Helm chart for dbm
name: dbm
type: application
Expand Down

0 comments on commit f33a8ba

Please sign in to comment.