From 11f8d3764470fb2ac57c99cc52ad9fb55fc42bbf Mon Sep 17 00:00:00 2001 From: Blake Devcich Date: Wed, 13 Dec 2023 10:20:15 -0600 Subject: [PATCH] Added CPU Profiling Support via pprof This adds an option to enable cpu profiling using pprof at startup. The profiling data is dumped to a file when the daemon is stopped. Signed-off-by: Blake Devcich --- daemons/compute/server/main.go | 25 ++++++++++++++++++++++- daemons/compute/server/servers/server.go | 26 ++++++++++++++---------- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/daemons/compute/server/main.go b/daemons/compute/server/main.go index f1f3920e..2c26a5fa 100644 --- a/daemons/compute/server/main.go +++ b/daemons/compute/server/main.go @@ -27,7 +27,9 @@ import ( "os" "os/signal" "runtime" + "runtime/pprof" "syscall" + "time" "github.com/takama/daemon" "google.golang.org/grpc" @@ -49,7 +51,7 @@ type Service struct { daemon.Daemon } -func (service *Service) Manage() (string, error) { +func (service *Service) Manage() (msg string, err error) { if len(os.Args) > 1 { command := os.Args[1] @@ -99,6 +101,27 @@ func (service *Service) Manage() (string, error) { return fmt.Sprintf("Failed to set permissions on socket %s", *socketAddr), err } + // Enable CPU profiling with pprof + if len(options.CpuProfile) > 0 { + filename := options.CpuProfile + "-" + time.Now().UTC().Format(time.RFC3339) + f, err := os.Create(filename) + if err != nil { + return fmt.Sprintf("could not create CPU profile"), err + } + defer func() { + if cerr := f.Close(); cerr != nil { + err = cerr + msg = "could not close CPU profile" + } + }() + + if err := pprof.StartCPUProfile(f); err != nil { + return fmt.Sprintf("could not start CPU profile"), err + } + stdlog.Printf("CPU profiling enabled: %s. Stop daemon to dump contents to file.\n", filename) + defer pprof.StopCPUProfile() + } + go service.Run(server, listener) for { diff --git a/daemons/compute/server/servers/server.go b/daemons/compute/server/servers/server.go index 2692288c..6c2afc4f 100644 --- a/daemons/compute/server/servers/server.go +++ b/daemons/compute/server/servers/server.go @@ -32,10 +32,11 @@ type ServerOptions struct { tokenFile string certFile string - name string - nodeName string - sysConfig string - simulated bool + name string + nodeName string + sysConfig string + CpuProfile string + simulated bool k8sQPS int k8sBurst int @@ -43,13 +44,14 @@ type ServerOptions struct { func GetOptions() (*ServerOptions, error) { opts := ServerOptions{ - host: os.Getenv("KUBERNETES_SERVICE_HOST"), - port: os.Getenv("KUBERNETES_SERVICE_PORT"), - name: os.Getenv("NODE_NAME"), - nodeName: os.Getenv("NNF_NODE_NAME"), - tokenFile: os.Getenv("NNF_DATA_MOVEMENT_SERVICE_TOKEN_FILE"), - certFile: os.Getenv("NNF_DATA_MOVEMENT_SERVICE_CERT_FILE"), - simulated: false, + host: os.Getenv("KUBERNETES_SERVICE_HOST"), + port: os.Getenv("KUBERNETES_SERVICE_PORT"), + name: os.Getenv("NODE_NAME"), + nodeName: os.Getenv("NNF_NODE_NAME"), + tokenFile: os.Getenv("NNF_DATA_MOVEMENT_SERVICE_TOKEN_FILE"), + certFile: os.Getenv("NNF_DATA_MOVEMENT_SERVICE_CERT_FILE"), + CpuProfile: os.Getenv("NNF_DATA_MOVEMENT_SERVICE_CPU_PROFILE"), + simulated: false, // These options adjust the client-side rate-limiting for k8s. The new defaults are 50 and // 100 (rather than 5, 10). See more info https://github.com/kubernetes/kubernetes/pull/116121 @@ -68,6 +70,8 @@ func GetOptions() (*ServerOptions, error) { flag.BoolVar(&opts.simulated, "simulated", opts.simulated, "Run in simulation mode where no requests are sent to the server") flag.IntVar(&opts.k8sQPS, "kubernetes-qps", opts.k8sQPS, "Kubernetes client queries per second (QPS)") flag.IntVar(&opts.k8sBurst, "kubernetes-burst", opts.k8sBurst, "Kubernetes client additional concurrent calls above QPS") + flag.StringVar(&opts.CpuProfile, "cpu-profile", opts.CpuProfile, + "Enable and dump CPU profiling data to this file after daemon is stopped. Timestamp is added to end of filename.") flag.Parse() return &opts, nil }