Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding podMem #155

Merged
merged 5 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions cmd/k8s-netperf/k8s-netperf.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,14 @@ var rootCmd = &cobra.Command{
if len(npr.ClientNodeInfo.NodeName) > 0 && len(npr.ServerNodeInfo.NodeName) > 0 {
sr.Results[i].ClientMetrics, _ = metrics.QueryNodeCPU(npr.ClientNodeInfo, pcon, npr.StartTime, npr.EndTime)
sr.Results[i].ServerMetrics, _ = metrics.QueryNodeCPU(npr.ServerNodeInfo, pcon, npr.StartTime, npr.EndTime)
metrics.VSwitchCPU(npr.ClientNodeInfo, pcon, npr.StartTime, npr.EndTime, &sr.Results[i].ClientMetrics)
metrics.VSwitchMem(npr.ClientNodeInfo, pcon, npr.StartTime, npr.EndTime, &sr.Results[i].ClientMetrics)
metrics.VSwitchCPU(npr.ServerNodeInfo, pcon, npr.StartTime, npr.EndTime, &sr.Results[i].ServerMetrics)
metrics.VSwitchMem(npr.ServerNodeInfo, pcon, npr.StartTime, npr.EndTime, &sr.Results[i].ServerMetrics)
sr.Results[i].ClientPodCPU, _ = metrics.TopPodCPU(npr.ClientNodeInfo, pcon, npr.StartTime, npr.EndTime)
sr.Results[i].ServerPodCPU, _ = metrics.TopPodCPU(npr.ServerNodeInfo, pcon, npr.StartTime, npr.EndTime)
sr.Results[i].ClientPodMem, _ = metrics.TopPodMem(npr.ClientNodeInfo, pcon, npr.StartTime, npr.EndTime)
sr.Results[i].ServerPodMem, _ = metrics.TopPodMem(npr.ServerNodeInfo, pcon, npr.StartTime, npr.EndTime)
}
}
}
Expand Down Expand Up @@ -319,6 +325,7 @@ var rootCmd = &cobra.Command{
if showMetrics {
result.ShowNodeCPU(sr)
result.ShowPodCPU(sr)
result.ShowPodMem(sr)
}
} else {
err = archive.WriteJSONResult(sr)
Expand Down
171 changes: 113 additions & 58 deletions pkg/archive/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,36 +19,42 @@ const ltcyMetric = "usec"

// Doc struct of the JSON document to be indexed
type Doc struct {
UUID string `json:"uuid"`
Timestamp time.Time `json:"timestamp"`
HostNetwork bool `json:"hostNetwork"`
Driver string `json:"driver"`
Parallelism int `json:"parallelism"`
Profile string `json:"profile"`
Duration int `json:"duration"`
Service bool `json:"service"`
Local bool `json:"local"`
Virt bool `json:"virt"`
AcrossAZ bool `json:"acrossAZ"`
Samples int `json:"samples"`
Messagesize int `json:"messageSize"`
Burst int `json:"burst"`
Throughput float64 `json:"throughput"`
Latency float64 `json:"latency"`
TputMetric string `json:"tputMetric"`
LtcyMetric string `json:"ltcyMetric"`
TCPRetransmit float64 `json:"tcpRetransmits"`
UDPLossPercent float64 `json:"udpLossPercent"`
ToolVersion string `json:"toolVersion"`
ToolGitCommit string `json:"toolGitCommit"`
Metadata result.Metadata `json:"metadata"`
ServerNodeCPU metrics.NodeCPU `json:"serverCPU"`
ServerPodCPU []metrics.PodCPU `json:"serverPods"`
ClientNodeCPU metrics.NodeCPU `json:"clientCPU"`
ClientPodCPU []metrics.PodCPU `json:"clientPods"`
Confidence []float64 `json:"confidence"`
ServerNodeInfo metrics.NodeInfo `json:"serverNodeInfo"`
ClientNodeInfo metrics.NodeInfo `json:"clientNodeInfo"`
UUID string `json:"uuid"`
Timestamp time.Time `json:"timestamp"`
HostNetwork bool `json:"hostNetwork"`
Driver string `json:"driver"`
Parallelism int `json:"parallelism"`
Profile string `json:"profile"`
Duration int `json:"duration"`
Service bool `json:"service"`
Local bool `json:"local"`
Virt bool `json:"virt"`
AcrossAZ bool `json:"acrossAZ"`
Samples int `json:"samples"`
Messagesize int `json:"messageSize"`
Burst int `json:"burst"`
Throughput float64 `json:"throughput"`
Latency float64 `json:"latency"`
TputMetric string `json:"tputMetric"`
LtcyMetric string `json:"ltcyMetric"`
TCPRetransmit float64 `json:"tcpRetransmits"`
UDPLossPercent float64 `json:"udpLossPercent"`
ToolVersion string `json:"toolVersion"`
ToolGitCommit string `json:"toolGitCommit"`
Metadata result.Metadata `json:"metadata"`
ServerNodeCPU metrics.NodeCPU `json:"serverCPU"`
ServerPodCPU []metrics.PodCPU `json:"serverPods"`
ServerPodMem []metrics.PodMem `json:"serverPodsMem"`
ClientNodeCPU metrics.NodeCPU `json:"clientCPU"`
ClientPodCPU []metrics.PodCPU `json:"clientPods"`
ClientPodMem []metrics.PodMem `json:"clientPodsMem"`
Confidence []float64 `json:"confidence"`
ServerNodeInfo metrics.NodeInfo `json:"serverNodeInfo"`
ClientNodeInfo metrics.NodeInfo `json:"clientNodeInfo"`
ServerVSwitchCpu float64 `json:"serverVswtichCpu"`
ServerVSwitchMem float64 `json:"serverVswitchMem"`
ClientVSwitchCpu float64 `json:"clientVswtichCpu"`
ClientVSwiitchMem float64 `json:"clientVswitchMem"`
}

// Connect returns a client connected to the desired cluster.
Expand Down Expand Up @@ -89,31 +95,37 @@ func BuildDocs(sr result.ScenarioResults, uuid string) ([]interface{}, error) {
}
c := []float64{lo, hi}
d := Doc{
UUID: uuid,
Timestamp: time,
ToolVersion: sr.Version,
ToolGitCommit: sr.GitCommit,
Driver: r.Driver,
HostNetwork: r.HostNetwork,
Parallelism: r.Parallelism,
Profile: r.Profile,
Duration: r.Duration,
Virt: sr.Virt,
Samples: r.Samples,
Service: r.Service,
Messagesize: r.MessageSize,
Burst: r.Burst,
TputMetric: r.Metric,
LtcyMetric: ltcyMetric,
ServerNodeCPU: r.ServerMetrics,
ClientNodeCPU: r.ClientMetrics,
ServerPodCPU: r.ServerPodCPU.Results,
ClientPodCPU: r.ClientPodCPU.Results,
Metadata: sr.Metadata,
AcrossAZ: r.AcrossAZ,
Confidence: c,
ClientNodeInfo: r.ClientNodeInfo,
ServerNodeInfo: r.ServerNodeInfo,
UUID: uuid,
Timestamp: time,
ToolVersion: sr.Version,
ToolGitCommit: sr.GitCommit,
Driver: r.Driver,
HostNetwork: r.HostNetwork,
Parallelism: r.Parallelism,
Profile: r.Profile,
Duration: r.Duration,
Virt: sr.Virt,
Samples: r.Samples,
Service: r.Service,
Messagesize: r.MessageSize,
Burst: r.Burst,
TputMetric: r.Metric,
LtcyMetric: ltcyMetric,
ServerNodeCPU: r.ServerMetrics,
ClientNodeCPU: r.ClientMetrics,
ServerPodCPU: r.ServerPodCPU.Results,
ServerPodMem: r.ServerPodMem.MemResults,
ClientPodMem: r.ClientPodMem.MemResults,
ClientPodCPU: r.ClientPodCPU.Results,
ClientVSwitchCpu: r.ClientMetrics.VSwitchCPU,
ClientVSwiitchMem: r.ClientMetrics.VSwitchMem,
ServerVSwitchCpu: r.ServerMetrics.VSwitchCPU,
ServerVSwitchMem: r.ServerMetrics.VSwitchMem,
Metadata: sr.Metadata,
AcrossAZ: r.AcrossAZ,
Confidence: c,
ClientNodeInfo: r.ClientNodeInfo,
ServerNodeInfo: r.ServerNodeInfo,
}
UDPLossPercent, e := result.Average(r.LossSummary)
if e != nil {
Expand Down Expand Up @@ -189,7 +201,7 @@ func commonCsvDataFields(row result.Data) []string {
}

// Writes all the mertics to the archive.
func writeArchive(cpuarchive, podarchive *csv.Writer, role string, row result.Data, podResults []metrics.PodCPU) error {
func writeArchive(vswitch, cpuarchive, podarchive, podmemarchive *csv.Writer, role string, row result.Data, podResults []metrics.PodCPU, podMem []metrics.PodMem) error {
roleFieldData := []string{role}
for _, pod := range podResults {
if err := podarchive.Write(append(append(roleFieldData,
Expand All @@ -200,11 +212,26 @@ func writeArchive(cpuarchive, podarchive *csv.Writer, role string, row result.Da
return fmt.Errorf("failed to write archive to file")
}
}
for _, pod := range podMem {
if err := podmemarchive.Write(append(append(roleFieldData,
commonCsvDataFields(row)...),
pod.Name,
fmt.Sprintf("%f", pod.Value),
)); err != nil {
return fmt.Errorf("failed to write archive to file")
}
}

cpu := row.ClientMetrics
if role == "Server" {
cpu = row.ServerMetrics
}
if err := vswitch.Write(append(append(roleFieldData,
commonCsvDataFields(row)...),
fmt.Sprintf("%f", cpu.VSwitchCPU),
fmt.Sprintf("%f", cpu.VSwitchMem))); err != nil {
return fmt.Errorf("failed to write archive to file")
}
if err := cpuarchive.Write(append(append(roleFieldData,
commonCsvDataFields(row)...),
fmt.Sprintf("%f", cpu.Idle),
Expand All @@ -223,6 +250,17 @@ func writeArchive(cpuarchive, podarchive *csv.Writer, role string, row result.Da
// WritePromCSVResult writes the prom data in CSV format
func WritePromCSVResult(r result.ScenarioResults) error {
d := time.Now().Unix()

vswitchfp, err := os.Create(fmt.Sprintf("vswitch-result-%d.csv", d))
if err != nil {
return fmt.Errorf("failed to open vswitch archive file")
}
defer vswitchfp.Close()
podmemfp, err := os.Create(fmt.Sprintf("podmem-result-%d.csv", d))
if err != nil {
return fmt.Errorf("failed to open pod mem archive file")
}
defer podmemfp.Close()
podfp, err := os.Create(fmt.Sprintf("podcpu-result-%d.csv", d))
if err != nil {
return fmt.Errorf("failed to open pod cpu archive file")
Expand All @@ -233,10 +271,15 @@ func WritePromCSVResult(r result.ScenarioResults) error {
return fmt.Errorf("failed to open cpu archive file")
}
defer cpufp.Close()
vswitch := csv.NewWriter(vswitchfp)
defer vswitch.Flush()
cpuarchive := csv.NewWriter(cpufp)
defer cpuarchive.Flush()
podarchive := csv.NewWriter(podfp)
defer podarchive.Flush()
podmemarchive := csv.NewWriter(podmemfp)
defer podmemarchive.Flush()

roleField := []string{"Role"}
cpudata := append(append(roleField,
commonCsvHeaderFields()...),
Expand All @@ -253,20 +296,32 @@ func WritePromCSVResult(r result.ScenarioResults) error {
"Pod Name",
"Utilization",
)
vswtichdata := append(append(roleField,
commonCsvHeaderFields()...),
"CPU Utilization",
"Memory Utilization",
)
if err := cpuarchive.Write(cpudata); err != nil {
return fmt.Errorf("failed to write cpu archive to file")
}
if err := podarchive.Write(poddata); err != nil {
return fmt.Errorf("failed to write pod archive to file")
}
if err := podmemarchive.Write(poddata); err != nil {
return fmt.Errorf("failed to write pod archive to file")
}
if err := vswitch.Write(vswtichdata); err != nil {
return fmt.Errorf("failed to write vswitch archive to file")
}
for _, row := range r.Results {
if err := writeArchive(cpuarchive, podarchive, "Client", row, row.ClientPodCPU.Results); err != nil {
if err := writeArchive(vswitch, cpuarchive, podarchive, podmemarchive, "Client", row, row.ClientPodCPU.Results, row.ClientPodMem.MemResults); err != nil {
return err
}
if err := writeArchive(cpuarchive, podarchive, "Server", row, row.ServerPodCPU.Results); err != nil {
if err := writeArchive(vswitch, cpuarchive, podarchive, podmemarchive, "Server", row, row.ServerPodCPU.Results, row.ServerPodMem.MemResults); err != nil {
return err
}
}

return nil
}

Expand Down
83 changes: 72 additions & 11 deletions pkg/metrics/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,16 @@ type NodeInfo struct {

// NodeCPU stores CPU information for a specific Node
type NodeCPU struct {
Idle float64 `json:"idleCPU"`
User float64 `json:"userCPU"`
Steal float64 `json:"stealCPU"`
System float64 `json:"systemCPU"`
Nice float64 `json:"niceCPU"`
Irq float64 `json:"irqCPU"`
Softirq float64 `json:"softCPU"`
Iowait float64 `json:"ioCPU"`
Idle float64 `json:"idleCPU"`
User float64 `json:"userCPU"`
Steal float64 `json:"stealCPU"`
System float64 `json:"systemCPU"`
Nice float64 `json:"niceCPU"`
Irq float64 `json:"irqCPU"`
Softirq float64 `json:"softCPU"`
Iowait float64 `json:"ioCPU"`
VSwitchCPU float64 `json:"vSwitchCPU"`
VSwitchMem float64 `json:"vSwitchMem"`
}

// PodCPU stores pod CPU
Expand All @@ -40,9 +42,15 @@ type PodCPU struct {
Value float64 `json:"cpuUsage"`
}

type PodMem struct {
Name string `json:"podName"`
Value float64 `json:"memUsage"`
}

// PodValues is a collection of PodCPU
type PodValues struct {
Results []PodCPU
Results []PodCPU
MemResults []PodMem
}

// PromConnect stores the prom information
Expand Down Expand Up @@ -166,10 +174,10 @@ func QueryNodeCPU(node NodeInfo, conn PromConnect, start time.Time, end time.Tim
return cpu, true
}

// TopPodCPU will return the top 5 CPU consumers for a specific node
// TopPodCPU will return the top 10 CPU consumers for a specific node
func TopPodCPU(node NodeInfo, conn PromConnect, start time.Time, end time.Time) (PodValues, bool) {
var pods PodValues
query := fmt.Sprintf("topk(5,sum(irate(container_cpu_usage_seconds_total{name!=\"\",instance=~\"%s:.*\"}[2m]) * 100) by (pod, namespace, instance))", node.IP)
query := fmt.Sprintf("topk(10,sum(irate(container_cpu_usage_seconds_total{name!=\"\",instance=~\"%s:.*\"}[2m]) * 100) by (pod, namespace, instance))", node.IP)
logging.Debugf("Prom Query : %s", query)
val, err := conn.Client.QueryRange(query, start, end, time.Minute)
if err != nil {
Expand All @@ -187,6 +195,59 @@ func TopPodCPU(node NodeInfo, conn PromConnect, start time.Time, end time.Time)
return pods, true
}

// VSwitchCPU will return the vswitchd cpu usage for specific node
func VSwitchCPU(node NodeInfo, conn PromConnect, start time.Time, end time.Time, ndata *NodeCPU) bool {
query := fmt.Sprintf("irate(container_cpu_usage_seconds_total{id=~\"/system.slice/ovs-vswitchd.service\", node=~\"%s\"}[2m])*100", node.NodeName)
logging.Debugf("Prom Query : %s", query)
val, err := conn.Client.QueryRange(query, start, end, time.Minute)
if err != nil {
logging.Error("Issue querying Prometheus")
return false
}
v := val.(model.Matrix)
for _, s := range v {
ndata.VSwitchCPU = avg(s.Values)
}
return true
}

// VSwitchMem will return the vswitchd cpu usage for specific node
func VSwitchMem(node NodeInfo, conn PromConnect, start time.Time, end time.Time, ndata *NodeCPU) bool {
query := fmt.Sprintf("container_memory_rss{id=~\"/system.slice/ovs-vswitchd.service\", node=~\"%s\"}", node.NodeName)
logging.Debugf("Prom Query : %s", query)
val, err := conn.Client.QueryRange(query, start, end, time.Minute)
if err != nil {
logging.Error("Issue querying Prometheus")
return false
}
v := val.(model.Matrix)
for _, s := range v {
ndata.VSwitchMem = avg(s.Values)
}
return true
}

// TopPodMem will return the top 10 Mem consumers for a specific node
func TopPodMem(node NodeInfo, conn PromConnect, start time.Time, end time.Time) (PodValues, bool) {
var pods PodValues
query := fmt.Sprintf("topk(10,sum(container_memory_rss{container!=\"POD\",name!=\"\",node=~\"%s\"}) by (pod, namespace, node))", node.NodeName)
logging.Debugf("Prom Query : %s", query)
val, err := conn.Client.QueryRange(query, start, end, time.Minute)
if err != nil {
logging.Error("Issue querying Prometheus")
return pods, false
}
v := val.(model.Matrix)
for _, s := range v {
p := PodMem{
Name: string(s.Metric["pod"]),
Value: avg(s.Values),
}
pods.MemResults = append(pods.MemResults, p)
}
return pods, true
}

// Calculates average for the given data
func avg(data []model.SamplePair) float64 {
sum := 0.0
Expand Down
Loading
Loading