From b40e38ea1441bf76609e8503f336410d591251a5 Mon Sep 17 00:00:00 2001 From: mphanias Date: Thu, 23 Nov 2023 15:00:21 +0530 Subject: [PATCH 1/8] OM86 - initial checkin support for micro benchmarks issue micro-benchmark commands during latency-watcher passtwo keys namespace-watcher and node-watcher add respective namespace and service benchmark flags which are later consumed by latency-watcher --- watcher_latency.go | 69 ++++++++++++++++++++++++++++++++++++++++++- watcher_namespaces.go | 10 +++++++ watcher_node_stats.go | 5 ++++ 3 files changed, 83 insertions(+), 1 deletion(-) diff --git a/watcher_latency.go b/watcher_latency.go index 10125525..59cd9bee 100644 --- a/watcher_latency.go +++ b/watcher_latency.go @@ -1,6 +1,8 @@ package main import ( + "strings" + "github.com/prometheus/client_golang/prometheus" log "github.com/sirupsen/logrus" @@ -9,6 +11,8 @@ import ( type LatencyWatcher struct { } +var LatencyBenchmarks = make(map[string]float64) + func (lw *LatencyWatcher) describe(ch chan<- *prometheus.Desc) {} func (lw *LatencyWatcher) passOneKeys() []string { @@ -33,7 +37,7 @@ func (lw *LatencyWatcher) passTwoKeys(rawMetrics map[string]string) (latencyComm } if ok { - return []string{"latencies:"} + return lw.getLatenciesCommands(rawMetrics) } return []string{"latency:"} @@ -99,3 +103,66 @@ func (lw *LatencyWatcher) refresh(o *Observer, infoKeys []string, rawMetrics map return nil } + +// Utility methods +// checks if a stat can be considered for latency stat retrieval +func canConsiderLatencyCommand(stat string) bool { + return (strings.Contains(stat, "enable-benchmarks-") || + strings.Contains(stat, "enable-hist-")) // hist-proxy & hist-info - both at service level +} + +func (lw *LatencyWatcher) getLatenciesCommands(rawMetrics map[string]string) []string { + var commands = []string{"latencies:"} + + // below latency-command are added to the auto-enabled list, i.e. latencies: command + // re-repl is auto-enabled, but not coming as part of latencies: list, hence we are adding it explicitly + // + // Hashmap content format := namespace- = <0/1> + for ns_latency_enabled_benchmark := range LatencyBenchmarks { + l_value := LatencyBenchmarks[ns_latency_enabled_benchmark] + // only if enabled, fetch the metrics + if l_value == 1 { + // if enable-hist-proxy + // command = latencies:hist={test}-proxy + // else if enable-benchmarks-fabric + // command = latencies:hist=benchmarks-fabric + // else if re-repl + // command = latencies:hist={test}-re-repl + + if strings.Contains(ns_latency_enabled_benchmark, "re-repl") { + // Exception case + ns := strings.Split(ns_latency_enabled_benchmark, "-")[0] + l_command := "latencies:hist={" + ns + "}-re-repl" + commands = append(commands, l_command) + } else if strings.Contains(ns_latency_enabled_benchmark, "enable-hist-proxy") { + // Exception case + ns := strings.Split(ns_latency_enabled_benchmark, "-")[0] + l_command := "latencies:hist={" + ns + "}-proxy" + commands = append(commands, l_command) + } else if strings.Contains(ns_latency_enabled_benchmark, "enable-benchmarks-fabric") { + // Exception case + l_command := "latencies:hist=benchmarks-fabric" + commands = append(commands, l_command) + } else if strings.Contains(ns_latency_enabled_benchmark, "enable-hist-info") { + // Exception case + l_command := "latencies:hist=info" + commands = append(commands, l_command) + } else if strings.Contains(ns_latency_enabled_benchmark, "-benchmarks-") { + // remaining enabled benchmark latencies like + // enable-benchmarks-fabric, enable-benchmarks-ops-sub, enable-benchmarks-read + // enable-benchmarks-write, enable-benchmarks-udf, enable-benchmarks-udf-sub, enable-benchmarks-batch-sub + + // format:= test-enable-benchmarks-read (or) test-enable-hist-proxy + ns := strings.Split(ns_latency_enabled_benchmark, "-")[0] + benchmarks_start_index := strings.LastIndex(ns_latency_enabled_benchmark, "-benchmarks-") + l_command := ns_latency_enabled_benchmark[benchmarks_start_index:] + l_command = "latencies:hist={" + ns + "}" + l_command + commands = append(commands, l_command) + } + } + } + + log.Tracef("latency-passtwokeys:%s", commands) + + return commands +} diff --git a/watcher_namespaces.go b/watcher_namespaces.go index 17133e84..fc5c3b56 100644 --- a/watcher_namespaces.go +++ b/watcher_namespaces.go @@ -210,6 +210,16 @@ func (nw *NamespaceWatcher) refreshNamespaceStats(singleInfoKey string, infoKeys // push to prom-channel pushToPrometheus(asMetric, pv, labels, labelValues, ch) } + + // below code section is to ensure ns+latencies combination is handled during LatencyWatcher + // + // check and if latency benchmarks stat - is it enabled (bool true==1 and false==0 after conversion) + if canConsiderLatencyCommand(stat) { + LatencyBenchmarks[nsName+"-"+stat] = pv + } + // append default re-repl, as this auto-enabled, but not coming as part of latencies, we need this as namespace is available only here + LatencyBenchmarks[nsName+"-re-repl"] = 1 + } } diff --git a/watcher_node_stats.go b/watcher_node_stats.go index 6ab99b62..09e75d06 100644 --- a/watcher_node_stats.go +++ b/watcher_node_stats.go @@ -75,5 +75,10 @@ func (sw *StatsWatcher) handleRefresh(o *Observer, nodeRawMetrics string, cluste pushToPrometheus(asMetric, pv, labels, labelsValues, ch) + // check and if latency benchmarks stat, is it enabled (bool true==1 and false==0 after conversion) + if canConsiderLatencyCommand(stat) { + LatencyBenchmarks["service-"+stat] = pv + } + } } From 70180fa39e3d21feea0ef9df6f0edd0917cafa59 Mon Sep 17 00:00:00 2001 From: mphanias Date: Thu, 23 Nov 2023 16:51:43 +0530 Subject: [PATCH 2/8] OM86 --- watcher_latency.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/watcher_latency.go b/watcher_latency.go index 59cd9bee..76b5aa18 100644 --- a/watcher_latency.go +++ b/watcher_latency.go @@ -60,6 +60,18 @@ func (lw *LatencyWatcher) refresh(o *Observer, infoKeys []string, rawMetrics map } } + // loop all the latency infokeys + for ik := range infoKeys { + parseSingleLatenciesKey(infoKeys[ik], rawMetrics, allowedLatenciesList, blockedLatenciessList, ch) + } + + return nil +} + +func parseSingleLatenciesKey(singleLatencyKey string, rawMetrics map[string]string, + allowedLatenciesList map[string]struct{}, + blockedLatenciessList map[string]struct{}, ch chan<- prometheus.Metric) error { + var latencyStats map[string]StatsMap if rawMetrics["latencies:"] != "" { From c663723162b43bb7df6625200713f07ae68bec7d Mon Sep 17 00:00:00 2001 From: mphanias Date: Thu, 23 Nov 2023 17:00:10 +0530 Subject: [PATCH 3/8] OM86 --- watcher_latency.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/watcher_latency.go b/watcher_latency.go index 76b5aa18..55aa0043 100644 --- a/watcher_latency.go +++ b/watcher_latency.go @@ -75,7 +75,7 @@ func parseSingleLatenciesKey(singleLatencyKey string, rawMetrics map[string]stri var latencyStats map[string]StatsMap if rawMetrics["latencies:"] != "" { - latencyStats = parseLatencyInfo(rawMetrics["latencies:"], int(config.Aerospike.LatencyBucketsCount)) + latencyStats = parseLatencyInfo(rawMetrics[singleLatencyKey], int(config.Aerospike.LatencyBucketsCount)) } else { latencyStats = parseLatencyInfoLegacy(rawMetrics["latency:"], int(config.Aerospike.LatencyBucketsCount)) } From 25cb2756529531761e42cc6bb247f324cd0bc6a9 Mon Sep 17 00:00:00 2001 From: mphanias Date: Thu, 23 Nov 2023 19:51:20 +0530 Subject: [PATCH 4/8] OM96 fixed lint errors --- watcher_latency.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/watcher_latency.go b/watcher_latency.go index 55aa0043..472c82ed 100644 --- a/watcher_latency.go +++ b/watcher_latency.go @@ -62,7 +62,10 @@ func (lw *LatencyWatcher) refresh(o *Observer, infoKeys []string, rawMetrics map // loop all the latency infokeys for ik := range infoKeys { - parseSingleLatenciesKey(infoKeys[ik], rawMetrics, allowedLatenciesList, blockedLatenciessList, ch) + err := parseSingleLatenciesKey(infoKeys[ik], rawMetrics, allowedLatenciesList, blockedLatenciessList, ch) + if err != nil { + return err + } } return nil From aa4f155e9d844286525b65e86157387f7b5b353a Mon Sep 17 00:00:00 2001 From: mphanias Date: Wed, 29 Nov 2023 17:19:29 +0530 Subject: [PATCH 5/8] OM86 - review feedback --- watcher_latency.go | 66 +++++++++++++++---------------------------- watcher_namespaces.go | 6 ++-- watcher_node_stats.go | 4 +-- 3 files changed, 27 insertions(+), 49 deletions(-) diff --git a/watcher_latency.go b/watcher_latency.go index 472c82ed..31e26245 100644 --- a/watcher_latency.go +++ b/watcher_latency.go @@ -1,6 +1,7 @@ package main import ( + "fmt" "strings" "github.com/prometheus/client_golang/prometheus" @@ -11,7 +12,7 @@ import ( type LatencyWatcher struct { } -var LatencyBenchmarks = make(map[string]float64) +var LatencyBenchmarks = make(map[string]string) func (lw *LatencyWatcher) describe(ch chan<- *prometheus.Desc) {} @@ -121,8 +122,9 @@ func parseSingleLatenciesKey(singleLatencyKey string, rawMetrics map[string]stri // Utility methods // checks if a stat can be considered for latency stat retrieval -func canConsiderLatencyCommand(stat string) bool { - return (strings.Contains(stat, "enable-benchmarks-") || +func isStatLatencyHistRelated(stat string) bool { + // is not enable-benchmarks-storage and (enable-benchmarks-* or enable-hist-*) + return (!strings.Contains(stat, "enable-benchmarks-storage")) && (strings.Contains(stat, "enable-benchmarks-") || strings.Contains(stat, "enable-hist-")) // hist-proxy & hist-info - both at service level } @@ -133,48 +135,24 @@ func (lw *LatencyWatcher) getLatenciesCommands(rawMetrics map[string]string) []s // re-repl is auto-enabled, but not coming as part of latencies: list, hence we are adding it explicitly // // Hashmap content format := namespace- = <0/1> - for ns_latency_enabled_benchmark := range LatencyBenchmarks { - l_value := LatencyBenchmarks[ns_latency_enabled_benchmark] - // only if enabled, fetch the metrics - if l_value == 1 { - // if enable-hist-proxy - // command = latencies:hist={test}-proxy - // else if enable-benchmarks-fabric - // command = latencies:hist=benchmarks-fabric - // else if re-repl - // command = latencies:hist={test}-re-repl - - if strings.Contains(ns_latency_enabled_benchmark, "re-repl") { - // Exception case - ns := strings.Split(ns_latency_enabled_benchmark, "-")[0] - l_command := "latencies:hist={" + ns + "}-re-repl" - commands = append(commands, l_command) - } else if strings.Contains(ns_latency_enabled_benchmark, "enable-hist-proxy") { - // Exception case - ns := strings.Split(ns_latency_enabled_benchmark, "-")[0] - l_command := "latencies:hist={" + ns + "}-proxy" - commands = append(commands, l_command) - } else if strings.Contains(ns_latency_enabled_benchmark, "enable-benchmarks-fabric") { - // Exception case - l_command := "latencies:hist=benchmarks-fabric" - commands = append(commands, l_command) - } else if strings.Contains(ns_latency_enabled_benchmark, "enable-hist-info") { - // Exception case - l_command := "latencies:hist=info" - commands = append(commands, l_command) - } else if strings.Contains(ns_latency_enabled_benchmark, "-benchmarks-") { - // remaining enabled benchmark latencies like - // enable-benchmarks-fabric, enable-benchmarks-ops-sub, enable-benchmarks-read - // enable-benchmarks-write, enable-benchmarks-udf, enable-benchmarks-udf-sub, enable-benchmarks-batch-sub - - // format:= test-enable-benchmarks-read (or) test-enable-hist-proxy - ns := strings.Split(ns_latency_enabled_benchmark, "-")[0] - benchmarks_start_index := strings.LastIndex(ns_latency_enabled_benchmark, "-benchmarks-") - l_command := ns_latency_enabled_benchmark[benchmarks_start_index:] - l_command = "latencies:hist={" + ns + "}" + l_command - commands = append(commands, l_command) - } + for latencyHistName := range LatencyBenchmarks { + histTokens := strings.Split(latencyHistName, "-") + + histCommand := "latencies:hist=" + + // service-enable-benchmarks-fabric or ns-enable-benchmarks-ops-sub or service-enable-hist-info + if histTokens[0] != "service" { + histCommand = histCommand + "{" + histTokens[0] + "}-" } + + if strings.Contains(latencyHistName, "enable-benchmarks-") { + histCommand = histCommand + strings.Join(histTokens[2:], "-") + } else { + histCommand = histCommand + strings.Join(histTokens[3:], "-") + } + + commands = append(commands, histCommand) + fmt.Println(" asinfo -h $1 -v ", histCommand) } log.Tracef("latency-passtwokeys:%s", commands) diff --git a/watcher_namespaces.go b/watcher_namespaces.go index fc5c3b56..460a16ff 100644 --- a/watcher_namespaces.go +++ b/watcher_namespaces.go @@ -214,11 +214,11 @@ func (nw *NamespaceWatcher) refreshNamespaceStats(singleInfoKey string, infoKeys // below code section is to ensure ns+latencies combination is handled during LatencyWatcher // // check and if latency benchmarks stat - is it enabled (bool true==1 and false==0 after conversion) - if canConsiderLatencyCommand(stat) { - LatencyBenchmarks[nsName+"-"+stat] = pv + if isStatLatencyHistRelated(stat) && pv == 1 { + LatencyBenchmarks[nsName+"-"+stat] = stat } // append default re-repl, as this auto-enabled, but not coming as part of latencies, we need this as namespace is available only here - LatencyBenchmarks[nsName+"-re-repl"] = 1 + LatencyBenchmarks[nsName+"-latency-hist-re-repl"] = "{" + nsName + "}-re-repl" } diff --git a/watcher_node_stats.go b/watcher_node_stats.go index 09e75d06..dce35af2 100644 --- a/watcher_node_stats.go +++ b/watcher_node_stats.go @@ -76,8 +76,8 @@ func (sw *StatsWatcher) handleRefresh(o *Observer, nodeRawMetrics string, cluste pushToPrometheus(asMetric, pv, labels, labelsValues, ch) // check and if latency benchmarks stat, is it enabled (bool true==1 and false==0 after conversion) - if canConsiderLatencyCommand(stat) { - LatencyBenchmarks["service-"+stat] = pv + if isStatLatencyHistRelated(stat) && pv == 1 { + LatencyBenchmarks["service-"+stat] = stat } } From cbe9369b33edde7f8c7e1009eb6889f80cd241c9 Mon Sep 17 00:00:00 2001 From: mphanias Date: Wed, 29 Nov 2023 17:25:01 +0530 Subject: [PATCH 6/8] OM86 removed fmt.println --- watcher_latency.go | 2 -- watcher_namespaces.go | 2 -- 2 files changed, 4 deletions(-) diff --git a/watcher_latency.go b/watcher_latency.go index 31e26245..1d720949 100644 --- a/watcher_latency.go +++ b/watcher_latency.go @@ -1,7 +1,6 @@ package main import ( - "fmt" "strings" "github.com/prometheus/client_golang/prometheus" @@ -152,7 +151,6 @@ func (lw *LatencyWatcher) getLatenciesCommands(rawMetrics map[string]string) []s } commands = append(commands, histCommand) - fmt.Println(" asinfo -h $1 -v ", histCommand) } log.Tracef("latency-passtwokeys:%s", commands) diff --git a/watcher_namespaces.go b/watcher_namespaces.go index 460a16ff..5e7e2140 100644 --- a/watcher_namespaces.go +++ b/watcher_namespaces.go @@ -177,8 +177,6 @@ func (nw *NamespaceWatcher) refreshNamespaceStats(singleInfoKey string, infoKeys sindexType := stats[SINDEX_TYPE] storageEngine := stats[STORAGE_ENGINE] - // fmt.Println(" storageEngine: ", storageEngine) - // if stat is index-type or sindex-type , append addl label if strings.HasPrefix(deviceType, INDEX_TYPE) && len(indexType) > 0 { labels = append(labels, METRIC_LABEL_INDEX) From abf7d907108505aae4e52c359f6705f4f0829b87 Mon Sep 17 00:00:00 2001 From: mphanias Date: Wed, 29 Nov 2023 18:42:36 +0530 Subject: [PATCH 7/8] OM86 - bug fix --- watcher_namespaces.go | 8 ++++++-- watcher_node_stats.go | 9 +++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/watcher_namespaces.go b/watcher_namespaces.go index 5e7e2140..8d061cf2 100644 --- a/watcher_namespaces.go +++ b/watcher_namespaces.go @@ -212,8 +212,12 @@ func (nw *NamespaceWatcher) refreshNamespaceStats(singleInfoKey string, infoKeys // below code section is to ensure ns+latencies combination is handled during LatencyWatcher // // check and if latency benchmarks stat - is it enabled (bool true==1 and false==0 after conversion) - if isStatLatencyHistRelated(stat) && pv == 1 { - LatencyBenchmarks[nsName+"-"+stat] = stat + if isStatLatencyHistRelated(stat) { + delete(LatencyBenchmarks, nsName+"-"+stat) + + if pv == 1 { + LatencyBenchmarks[nsName+"-"+stat] = stat + } } // append default re-repl, as this auto-enabled, but not coming as part of latencies, we need this as namespace is available only here LatencyBenchmarks[nsName+"-latency-hist-re-repl"] = "{" + nsName + "}-re-repl" diff --git a/watcher_node_stats.go b/watcher_node_stats.go index dce35af2..ff8fda04 100644 --- a/watcher_node_stats.go +++ b/watcher_node_stats.go @@ -76,8 +76,13 @@ func (sw *StatsWatcher) handleRefresh(o *Observer, nodeRawMetrics string, cluste pushToPrometheus(asMetric, pv, labels, labelsValues, ch) // check and if latency benchmarks stat, is it enabled (bool true==1 and false==0 after conversion) - if isStatLatencyHistRelated(stat) && pv == 1 { - LatencyBenchmarks["service-"+stat] = stat + if isStatLatencyHistRelated(stat) { + // remove old value as microbenchmark may get enabled / disable on-the-fly at server so we cannot rely on value + delete(LatencyBenchmarks, "service-"+stat) + + if pv == 1 { + LatencyBenchmarks["service-"+stat] = stat + } } } From a8ea04c100f1e845697059fef8f35ecd06550442 Mon Sep 17 00:00:00 2001 From: mphanias Date: Thu, 30 Nov 2023 02:13:37 +0530 Subject: [PATCH 8/8] OM86 - review feedback removed - re-repl hist command modified the for loop removed unnecessary comments --- watcher_latency.go | 9 +++------ watcher_namespaces.go | 4 ++-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/watcher_latency.go b/watcher_latency.go index 1d720949..7d0bc7ac 100644 --- a/watcher_latency.go +++ b/watcher_latency.go @@ -61,8 +61,8 @@ func (lw *LatencyWatcher) refresh(o *Observer, infoKeys []string, rawMetrics map } // loop all the latency infokeys - for ik := range infoKeys { - err := parseSingleLatenciesKey(infoKeys[ik], rawMetrics, allowedLatenciesList, blockedLatenciessList, ch) + for _, infoKey := range infoKeys { + err := parseSingleLatenciesKey(infoKey, rawMetrics, allowedLatenciesList, blockedLatenciessList, ch) if err != nil { return err } @@ -130,10 +130,7 @@ func isStatLatencyHistRelated(stat string) bool { func (lw *LatencyWatcher) getLatenciesCommands(rawMetrics map[string]string) []string { var commands = []string{"latencies:"} - // below latency-command are added to the auto-enabled list, i.e. latencies: command - // re-repl is auto-enabled, but not coming as part of latencies: list, hence we are adding it explicitly - // - // Hashmap content format := namespace- = <0/1> + // Hashmap content format := namespace- = for latencyHistName := range LatencyBenchmarks { histTokens := strings.Split(latencyHistName, "-") diff --git a/watcher_namespaces.go b/watcher_namespaces.go index 8d061cf2..13231c54 100644 --- a/watcher_namespaces.go +++ b/watcher_namespaces.go @@ -219,10 +219,10 @@ func (nw *NamespaceWatcher) refreshNamespaceStats(singleInfoKey string, infoKeys LatencyBenchmarks[nsName+"-"+stat] = stat } } - // append default re-repl, as this auto-enabled, but not coming as part of latencies, we need this as namespace is available only here - LatencyBenchmarks[nsName+"-latency-hist-re-repl"] = "{" + nsName + "}-re-repl" } + // // append default re-repl, as this auto-enabled, but not coming as part of latencies, we need this as namespace is available only here + // LatencyBenchmarks[nsName+"-latency-hist-re-repl"] = "{" + nsName + "}-re-repl" }