Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ACM-14639: Dashboard loader: retry adding folder on errors #1646

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 82 additions & 37 deletions loaders/dashboards/pkg/controller/dashboard_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"os"
Expand Down Expand Up @@ -40,24 +41,25 @@ const (
var (
grafanaURI = "http://127.0.0.1:3001"
// Retry on errors.
retry = 10
maxHttpRetry = 10
maxDashboardRetry = 40
)

// RunGrafanaDashboardController ...
func RunGrafanaDashboardController(stop <-chan struct{}) {
config, err := clientcmd.BuildConfigFromFlags("", "")
if err != nil {
klog.Error("Failed to get cluster config", "error", err)
klog.Error("failed to get cluster config", "error", err)
}
// Build kubeclient client and informer for managed cluster
kubeClient, err := kubernetes.NewForConfig(config)
if err != nil {
klog.Fatal("Failed to build kubeclient", "error", err)
klog.Fatal("failed to build kubeclient", "error", err)
}

informer, err := newKubeInformer(kubeClient.CoreV1())
if err != nil {
klog.Fatal("Failed to get informer", "error", err)
klog.Fatal("failed to get informer", "error", err)
}

go informer.Run(stop)
Expand Down Expand Up @@ -109,7 +111,23 @@ func newKubeInformer(coreClient corev1client.CoreV1Interface) (cache.SharedIndex
return
}
klog.Infof("detect there is a new dashboard %v created", obj.(*corev1.ConfigMap).Name)
updateDashboard(nil, obj, false)
err := updateDashboard(nil, obj, false)

times := 0
for {
if err == nil {
break
} else if times == maxDashboardRetry {
klog.Errorf("dashboard: %s could not be created after retrying %v times", obj.(*corev1.ConfigMap).Name, maxDashboardRetry)
break
}

klog.Warningf("creation of dashboard: %v failed. Retrying in 10s. Error: %v", obj.(*corev1.ConfigMap).Name, err)
time.Sleep(time.Second * 10)
err = updateDashboard(nil, obj, false)

times++
}
},
UpdateFunc: func(old, new interface{}) {
if old.(*corev1.ConfigMap).ObjectMeta.ResourceVersion == new.(*corev1.ConfigMap).ObjectMeta.ResourceVersion {
Expand All @@ -119,7 +137,24 @@ func newKubeInformer(coreClient corev1client.CoreV1Interface) (cache.SharedIndex
return
}
klog.Infof("detect there is a dashboard %v updated", new.(*corev1.ConfigMap).Name)
updateDashboard(old, new, false)
err := updateDashboard(old, new, false)

times := 0
for {
if err == nil {
break
} else if times == maxDashboardRetry {
klog.Errorf("dashboard: %s could not be created after retrying %v times", new.(*corev1.ConfigMap).Name, maxDashboardRetry)
break
}

klog.Warningf("updating of dashboard: %v failed. Retrying in 10s. Error: %v", new.(*corev1.ConfigMap).Name, err)
time.Sleep(time.Second * 10)

err = updateDashboard(old, new, false)

times++
}
},
DeleteFunc: func(obj interface{}) {
if !isDesiredDashboardConfigmap(obj) {
Expand All @@ -138,7 +173,7 @@ func newKubeInformer(coreClient corev1client.CoreV1Interface) (cache.SharedIndex

func hasCustomFolder(folderTitle string) float64 {
grafanaURL := grafanaURI + "/api/folders"
body, _ := util.SetRequest("GET", grafanaURL, nil, retry)
body, _ := util.SetRequest("GET", grafanaURL, nil, maxHttpRetry)

folders := []map[string]interface{}{}
err := json.Unmarshal(body, &folders)
Expand All @@ -158,22 +193,36 @@ func hasCustomFolder(folderTitle string) float64 {
func createCustomFolder(folderTitle string) float64 {
folderID := hasCustomFolder(folderTitle)
if folderID == 0 {
// Create the folder
grafanaURL := grafanaURI + "/api/folders"
body, _ := util.SetRequest("POST", grafanaURL, strings.NewReader("{\"title\":\""+folderTitle+"\"}"), retry)
body, _ := util.SetRequest("POST", grafanaURL, strings.NewReader("{\"title\":\""+folderTitle+"\"}"), maxHttpRetry)
folder := map[string]interface{}{}
err := json.Unmarshal(body, &folder)
if err != nil {
klog.Error(unmarshallErrMsg, "error", err)
return 0
}

time.Sleep(time.Second * 1)
// check if permissions were set correctly as sometimes this silently fails in Grafana
grafanaURL = grafanaURI + "/api/folders/" + folder["uid"].(string) + "/permissions"
body, _ = util.SetRequest("GET", grafanaURL, nil, maxHttpRetry)
if string(body) == "[]" {
// if this fails no permissions are set. In which case we want to delete the folder and try again...
klog.Warningf("failed to set permissions for folder: %v. Deleting folder and retrying later.", folderTitle)

time.Sleep(time.Second * 5)
deleteCustomFolder(folder["id"].(float64))
return 0
}
return folder["id"].(float64)
}
return folderID
}

func getCustomFolderUID(folderID float64) string {
grafanaURL := grafanaURI + "/api/folders/id/" + fmt.Sprint(folderID)
body, _ := util.SetRequest("GET", grafanaURL, nil, retry)
body, _ := util.SetRequest("GET", grafanaURL, nil, maxHttpRetry)
folder := map[string]interface{}{}
err := json.Unmarshal(body, &folder)
if err != nil {
Expand All @@ -194,7 +243,7 @@ func isEmptyFolder(folderID float64) bool {
}

grafanaURL := grafanaURI + "/api/search?folderIds=" + fmt.Sprint(folderID)
body, _ := util.SetRequest("GET", grafanaURL, nil, retry)
body, _ := util.SetRequest("GET", grafanaURL, nil, maxHttpRetry)
dashboards := []map[string]interface{}{}
err := json.Unmarshal(body, &dashboards)
if err != nil {
Expand All @@ -217,12 +266,12 @@ func deleteCustomFolder(folderID float64) bool {

uid := getCustomFolderUID(folderID)
if uid == "" {
klog.Error("Failed to get custom folder UID")
klog.Error("failed to get custom folder UID")
return false
}

grafanaURL := grafanaURI + "/api/folders/" + uid
_, respStatusCode := util.SetRequest("DELETE", grafanaURL, nil, retry)
_, respStatusCode := util.SetRequest("DELETE", grafanaURL, nil, maxHttpRetry)
if respStatusCode != http.StatusOK {
klog.Errorf("failed to delete custom folder %v with %v", folderID, respStatusCode)
return false
Expand Down Expand Up @@ -251,14 +300,13 @@ func getDashboardCustomFolderTitle(obj interface{}) string {
}

// updateDashboard is used to update the customized dashboards via calling grafana api.
func updateDashboard(old, new interface{}, overwrite bool) {
func updateDashboard(old, new interface{}, overwrite bool) error {
folderID := 0.0
folderTitle := getDashboardCustomFolderTitle(new)
if folderTitle != "" {
folderID = createCustomFolder(folderTitle)
if folderID == 0 {
klog.Error("Failed to get custom folder id")
return
return errors.New("failed to get folder id")
}
}

Expand All @@ -267,8 +315,7 @@ func updateDashboard(old, new interface{}, overwrite bool) {
dashboard := map[string]interface{}{}
err := json.Unmarshal([]byte(value), &dashboard)
if err != nil {
klog.Error("Failed to unmarshall data", "error", err)
return
return fmt.Errorf("failed to unmarshall data: %v", err)
}
if dashboard["uid"] == nil {
dashboard["uid"], _ = util.GenerateUID(new.(*corev1.ConfigMap).GetName(),
Expand All @@ -283,26 +330,24 @@ func updateDashboard(old, new interface{}, overwrite bool) {

b, err := json.Marshal(data)
if err != nil {
klog.Error("failed to marshal body", "error", err)
return
return fmt.Errorf("failed to marshal body: %v", err)
}

grafanaURL := grafanaURI + "/api/dashboards/db"
body, respStatusCode := util.SetRequest("POST", grafanaURL, bytes.NewBuffer(b), retry)
body, respStatusCode := util.SetRequest("POST", grafanaURL, bytes.NewBuffer(b), maxHttpRetry)

if respStatusCode != http.StatusOK {
if respStatusCode == http.StatusPreconditionFailed {
if strings.Contains(string(body), "version-mismatch") {
updateDashboard(nil, new, true)
return updateDashboard(nil, new, true)
} else if strings.Contains(string(body), "name-exists") {
klog.Info("the dashboard name already existed")
return fmt.Errorf("the dashboard name already existed")
} else {
klog.Infof("failed to create/update: %v", respStatusCode)
return fmt.Errorf("failed to create/update dashboard: %v", respStatusCode)
}
} else {
klog.Infof("failed to create/update: %v", respStatusCode)
return fmt.Errorf("failed to create/update dashboard: %v", respStatusCode)
}
return
}

if dashboard["title"] == homeDashboardTitle {
Expand All @@ -314,23 +359,23 @@ func updateDashboard(old, new interface{}, overwrite bool) {
} else {
id, err := strconv.Atoi(strings.Trim(string(result[1]), " "))
if err != nil {
klog.Error(err, "failed to parse dashboard id")
return fmt.Errorf("failed to parse dashboard id: %v", err)
} else {
setHomeDashboard(id)
}
}
}
klog.Info("Dashboard created/updated")
klog.Infof("dashboard: %v created/updated successfully", new.(*corev1.ConfigMap).Name)
}

folderTitle = getDashboardCustomFolderTitle(old)
folderID = hasCustomFolder(folderTitle)
if isEmptyFolder(folderID) {
if deleteCustomFolder(folderID) {
klog.Errorf("Failed to delete custom folder")
return
if !deleteCustomFolder(folderID) {
return errors.New("failed to delete custom folder")
}
}
return nil
}

// DeleteDashboard ...
Expand All @@ -340,7 +385,7 @@ func deleteDashboard(obj interface{}) {
dashboard := map[string]interface{}{}
err := json.Unmarshal([]byte(value), &dashboard)
if err != nil {
klog.Error("Failed to unmarshall data", "error", err)
klog.Error("failed to unmarshall data", "error", err)
return
}

Expand All @@ -351,18 +396,18 @@ func deleteDashboard(obj interface{}) {

grafanaURL := grafanaURI + "/api/dashboards/uid/" + uid

_, respStatusCode := util.SetRequest("DELETE", grafanaURL, nil, retry)
_, respStatusCode := util.SetRequest("DELETE", grafanaURL, nil, maxHttpRetry)
if respStatusCode != http.StatusOK {
klog.Errorf("failed to delete dashboard %v with %v", obj.(*corev1.ConfigMap).Name, respStatusCode)
} else {
klog.Info("Dashboard deleted")
klog.Info("dashboard deleted")
}

folderTitle := getDashboardCustomFolderTitle(obj)
folderID := hasCustomFolder(folderTitle)
if isEmptyFolder(folderID) {
if deleteCustomFolder(folderID) {
klog.Errorf("Failed to delete custom folder")
if !deleteCustomFolder(folderID) {
klog.Errorf("failed to delete custom folder")
return
}
}
Expand All @@ -380,11 +425,11 @@ func setHomeDashboard(id int) {
return
}
grafanaURL := grafanaURI + "/api/org/preferences"
_, respStatusCode := util.SetRequest("PUT", grafanaURL, bytes.NewBuffer(b), retry)
_, respStatusCode := util.SetRequest("PUT", grafanaURL, bytes.NewBuffer(b), maxHttpRetry)

if respStatusCode != http.StatusOK {
klog.Infof("failed to set home dashboard: %v", respStatusCode)
} else {
klog.Info("Home dashboard is set")
klog.Info("home dashboard is set")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func TestGrafanaDashboardController(t *testing.T) {
stop := make(chan struct{})

go createFakeServer(t)
retry = 1
maxHttpRetry = 1

os.Setenv("POD_NAMESPACE", "ns2")

Expand Down Expand Up @@ -237,7 +237,7 @@ func TestIsDesiredDashboardConfigmap(t *testing.T) {
func TestGetCustomFolderUID(t *testing.T) {
if !hasFakeServer {
go createFakeServer(t)
retry = 1
maxHttpRetry = 1
}

testCaseList := []struct {
Expand Down Expand Up @@ -273,7 +273,7 @@ func TestGetCustomFolderUID(t *testing.T) {
func TestIsEmptyFolder(t *testing.T) {
if !hasFakeServer {
go createFakeServer(t)
retry = 1
maxHttpRetry = 1
}

testCaseList := []struct {
Expand Down Expand Up @@ -351,7 +351,7 @@ func TestGetDashboardCustomFolderTitle(t *testing.T) {
func TestDeleteCustomFolder(t *testing.T) {
if !hasFakeServer {
go createFakeServer(t)
retry = 1
maxHttpRetry = 1
}

testCaseList := []struct {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,5 @@ stringData:
enabled = false
[unified_alerting]
enabled = false
[database]
query_retries = 5
8 changes: 1 addition & 7 deletions tests/grafana-dev-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@ base_dir="$(
cd "$base_dir"
obs_namespace=open-cluster-management-observability

# create a dashboard for test export grafana dashboard
kubectl apply -n "$obs_namespace" -f "$base_dir"/examples/dashboards/sample_custom_dashboard/custom-sample-dashboard.yaml

# test deploy grafana-dev
cd $base_dir/tools
./setup-grafana-dev.sh --deploy
Expand Down Expand Up @@ -63,7 +60,7 @@ fi
n=0
until [ "$n" -ge 10 ]; do
# test export grafana dashboard
./generate-dashboard-configmap-yaml.sh "Sample Dashboard for E2E"
./generate-dashboard-configmap-yaml.sh "ACM - Clusters Overview"
if [ $? -eq 0 ]; then
break
fi
Expand All @@ -81,6 +78,3 @@ if [ $? -ne 0 ]; then
echo "Failed run setup-grafana-dev.sh --clean"
exit 1
fi

# clean test env
kubectl delete -n "$obs_namespace" -f "$base_dir"/examples/dashboards/sample_custom_dashboard/custom-sample-dashboard.yaml
6 changes: 3 additions & 3 deletions tests/pkg/tests/observability_dashboard_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ var _ = Describe("Observability:", func() {
Eventually(func() bool {
_, result := utils.ContainDashboard(testOptions, dashboardTitle)
return result
}, EventuallyTimeoutMinute*3, EventuallyIntervalSecond*5).Should(BeTrue())
}, EventuallyTimeoutMinute*5, EventuallyIntervalSecond*5).Should(BeTrue())
})

It("[P2][Sev2][observability][Stable] Should have update custom dashboard after configmap updated (dashboard/g0)", func() {
Expand All @@ -64,11 +64,11 @@ var _ = Describe("Observability:", func() {
Eventually(func() bool {
_, result := utils.ContainDashboard(testOptions, dashboardTitle)
return result
}, EventuallyTimeoutMinute*3, EventuallyIntervalSecond*5).Should(BeFalse())
}, EventuallyTimeoutMinute*5, EventuallyIntervalSecond*5).Should(BeFalse())
Eventually(func() bool {
_, result := utils.ContainDashboard(testOptions, updateDashboardTitle)
return result
}, EventuallyTimeoutMinute*3, EventuallyIntervalSecond*5).Should(BeTrue())
}, EventuallyTimeoutMinute*5, EventuallyIntervalSecond*5).Should(BeTrue())
})

It("[P2][Sev2][observability][Stable] Should have no custom dashboard in grafana after related configmap removed (dashboard/g0)", func() {
Expand Down
Loading