From b5f6519563b4fd1c9cf966029d0f9cd0d014204d Mon Sep 17 00:00:00 2001 From: Michele Costa Date: Tue, 7 Nov 2023 10:57:58 +0000 Subject: [PATCH 1/4] Create exec context which starts and stops a pod --- pkg/clients/exec_command.go | 216 ++++++++++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) diff --git a/pkg/clients/exec_command.go b/pkg/clients/exec_command.go index 8532cc13..788df89c 100644 --- a/pkg/clients/exec_command.go +++ b/pkg/clients/exec_command.go @@ -5,16 +5,25 @@ package clients import ( "bytes" "context" + "errors" "fmt" "strings" + "time" log "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" k8sErrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" "k8s.io/client-go/tools/remotecommand" "k8s.io/kubectl/pkg/scheme" ) +const ( + startTimeout = 5 * time.Second + deletionTimeout = 10 * time.Minute +) + type ExecContext interface { ExecCommand([]string) (string, string, error) ExecCommandStdIn([]string, bytes.Buffer) (string, string, error) @@ -155,3 +164,210 @@ func (c *ContainerExecContext) ExecCommand(command []string) (stdout, stderr str func (c *ContainerExecContext) ExecCommandStdIn(command []string, buffIn bytes.Buffer) (stdout, stderr string, err error) { return c.execCommand(command, &buffIn) } + +// ContainerExecContext encapsulates the context in which a command is run; the namespace, pod, and container. +type ContainerCreationExecContext struct { + *ContainerExecContext + labels map[string]string + pod *corev1.Pod + containerSecurityContext *corev1.SecurityContext + containerImage string + command []string + volumes []*Volume + hostNetwork bool +} + +type Volume struct { + VolumeSource corev1.VolumeSource + Name string + MountPath string +} + +func (c *ContainerCreationExecContext) CreatePod() error { + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: c.podName, + Namespace: c.namespace, + Labels: c.labels, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: c.containerName, + Image: c.containerImage, + ImagePullPolicy: corev1.PullIfNotPresent, + }, + }, + HostNetwork: c.hostNetwork, + }, + } + if len(c.command) > 0 { + pod.Spec.Containers[0].Command = c.command + } + if c.containerSecurityContext != nil { + pod.Spec.Containers[0].SecurityContext = c.containerSecurityContext + } + if len(c.volumes) > 0 { + volumes := make([]corev1.Volume, 0) + volumeMounts := make([]corev1.VolumeMount, 0) + + for _, v := range c.volumes { + volumes = append(volumes, corev1.Volume{Name: v.Name, VolumeSource: v.VolumeSource}) + pod.Spec.Volumes = volumes + volumeMounts = append(volumeMounts, corev1.VolumeMount{Name: v.Name, MountPath: v.MountPath}) + pod.Spec.Containers[0].VolumeMounts = volumeMounts + } + } + + pod, err := c.clientset.K8sClient.CoreV1().Pods(pod.Namespace).Create( + context.TODO(), + pod, + metav1.CreateOptions{}, + ) + c.pod = pod + if err != nil { + return fmt.Errorf("failed to create pod: %w", err) + } + return nil +} + +func (c *ContainerCreationExecContext) listPods(options *metav1.ListOptions) (*corev1.PodList, error) { + pods, err := c.clientset.K8sClient.CoreV1().Pods(c.pod.Namespace).List( + context.TODO(), + *options, + ) + if err != nil { + return pods, fmt.Errorf("failed to find pods: %s", err.Error()) + } + return pods, nil +} + +func (c *ContainerCreationExecContext) refeshPod() error { + pods, err := c.listPods(&metav1.ListOptions{ + FieldSelector: fields.OneTermEqualSelector("metadata.name", c.podName).String(), + ResourceVersion: c.pod.ResourceVersion, + }) + if err != nil { + return err + } + if len(pods.Items) == 0 || len(pods.Items) > 1 { + // I don't think k8s allows more than one pod with the same name + return errors.New("found multiple pods with the same name") + } + c.pod = &pods.Items[0] + + return nil +} + +func (c *ContainerCreationExecContext) IsPodRunning() (bool, error) { + err := c.refeshPod() + if err != nil { + return false, err + } + if c.pod.Status.Phase == corev1.PodRunning { + return true, nil + } + return false, nil +} + +func (c *ContainerCreationExecContext) WaitForPodToStart() error { + start := time.Now() + for time.Since(start) <= startTimeout { + running, err := c.IsPodRunning() + if err != nil { + return err + } + if running { + return nil + } + } + return errors.New("timed out waiting for pod to start") +} + +func (c *ContainerCreationExecContext) CreatePodAndWaitForStart() error { + var err error + running := false + if c.pod != nil { + running, err = c.IsPodRunning() + if err != nil { + return err + } + } + if !running { + err := c.CreatePod() + if err != nil { + return err + } + } + return c.WaitForPodToStart() +} + +func (c *ContainerCreationExecContext) DeletePod() error { + deletePolicy := metav1.DeletePropagationForeground + err := c.clientset.K8sClient.CoreV1().Pods(c.pod.Namespace).Delete( + context.TODO(), + c.pod.Name, + metav1.DeleteOptions{ + PropagationPolicy: &deletePolicy, + }) + if err != nil { + return fmt.Errorf("failed to delete pod: %w", err) + } + return nil +} + +func (c *ContainerCreationExecContext) WaitForPodToDelete() error { + start := time.Now() + for time.Since(start) <= deletionTimeout { + pods, err := c.listPods(&metav1.ListOptions{}) + if err != nil { + return err + } + found := false + for _, pod := range pods.Items { //nolint:gocritic // This isn't my object I can't use a pointer + if pod.Name == c.podName { + found = true + } + } + if !found { + return nil + } + } + return errors.New("pod has not terminated within the timeout") +} + +func (c *ContainerCreationExecContext) DeletePodAndWait() error { + err := c.DeletePod() + if err != nil { + return err + } + return c.WaitForPodToDelete() +} + +func NewContainerCreationExecContext( + clientset *Clientset, + namespace, podName, containerName, containerImage string, + labels map[string]string, + command []string, + containerSecurityContext *corev1.SecurityContext, + hostNetwork bool, + volumes []*Volume, +) *ContainerCreationExecContext { + ctx := ContainerExecContext{ + namespace: namespace, + podNamePrefix: podName, + podName: podName, + containerName: containerName, + clientset: clientset, + } + + return &ContainerCreationExecContext{ + ContainerExecContext: &ctx, + containerImage: containerImage, + labels: labels, + command: command, + containerSecurityContext: containerSecurityContext, + hostNetwork: hostNetwork, + volumes: volumes, + } +} From 86cc8f8311255cb74af0aa3aa4a4cfecbfe6eb86 Mon Sep 17 00:00:00 2001 From: Michele Costa Date: Tue, 7 Nov 2023 10:59:43 +0000 Subject: [PATCH 2/4] Add DPLL Netlink Collector --- pkg/collectors/contexts/contexts.go | 47 +++- .../devices/{dpll.go => dpll_fs.go} | 30 +-- .../devices/{dpll_test.go => dpll_fs_test.go} | 2 +- pkg/collectors/devices/dpll_netlink.go | 209 ++++++++++++++++++ pkg/collectors/dpll_collector.go | 74 +------ pkg/collectors/dpll_collector_fs.go | 89 ++++++++ pkg/collectors/dpll_collector_netlink.go | 109 +++++++++ pkg/runner/runner.go | 1 + 8 files changed, 476 insertions(+), 85 deletions(-) rename pkg/collectors/devices/{dpll.go => dpll_fs.go} (77%) rename pkg/collectors/devices/{dpll_test.go => dpll_fs_test.go} (97%) create mode 100644 pkg/collectors/devices/dpll_netlink.go create mode 100644 pkg/collectors/dpll_collector_fs.go create mode 100644 pkg/collectors/dpll_collector_netlink.go diff --git a/pkg/collectors/contexts/contexts.go b/pkg/collectors/contexts/contexts.go index 3cd4873f..2736bf40 100644 --- a/pkg/collectors/contexts/contexts.go +++ b/pkg/collectors/contexts/contexts.go @@ -5,14 +5,19 @@ package contexts import ( "fmt" + corev1 "k8s.io/api/core/v1" + "github.com/redhat-partner-solutions/vse-sync-collection-tools/pkg/clients" ) const ( - PTPNamespace = "openshift-ptp" - PTPPodNamePrefix = "linuxptp-daemon-" - PTPContainer = "linuxptp-daemon-container" - GPSContainer = "gpsd" + PTPNamespace = "openshift-ptp" + PTPPodNamePrefix = "linuxptp-daemon-" + PTPContainer = "linuxptp-daemon-container" + GPSContainer = "gpsd" + NetlinkDebugPod = "ptp-dpll-netlink-debug-pod" + NetlinkDebugContainer = "ptp-dpll-netlink-debug-container" + NetlinkDebugContainerImage = "quay.io/jnunez/tools:dpll9.2_dev" ) func GetPTPDaemonContext(clientset *clients.Clientset) (clients.ExecContext, error) { @@ -22,3 +27,37 @@ func GetPTPDaemonContext(clientset *clients.Clientset) (clients.ExecContext, err } return ctx, nil } + +func GetNetlinkContext(clientset *clients.Clientset) (*clients.ContainerCreationExecContext, error) { + hpt := corev1.HostPathDirectory + ctx := clients.NewContainerCreationExecContext( + clientset, + PTPNamespace, + NetlinkDebugPod, + NetlinkDebugContainer, + NetlinkDebugContainerImage, + map[string]string{}, + []string{"sleep", "inf"}, + &corev1.SecurityContext{ + Capabilities: &corev1.Capabilities{ + // Requires NET_ADMIN: having (NET_RAW + NET_BIND_SERVICE + NET_BROADCAST) does not work + // Without NET_ADMIN it will not connect to the netlink interface + // Requires SYS_AMDIN: having every other permission does not work. + // Without SYS_ADMIN lspci does not include the Serial number in the comments thefore can not calculate the clockID + Add: []corev1.Capability{ + "SYS_ADMIN", + "NET_ADMIN", + }, + }, + }, + true, + []*clients.Volume{ + { + Name: "modules", + MountPath: "/lib/modules", + VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: "/lib/modules", Type: &hpt}}, + }, + }, + ) + return ctx, nil +} diff --git a/pkg/collectors/devices/dpll.go b/pkg/collectors/devices/dpll_fs.go similarity index 77% rename from pkg/collectors/devices/dpll.go rename to pkg/collectors/devices/dpll_fs.go index f855efcd..4c7695dd 100644 --- a/pkg/collectors/devices/dpll.go +++ b/pkg/collectors/devices/dpll_fs.go @@ -19,7 +19,7 @@ const ( unitConversionFactor = 100 ) -type DevDPLLInfo struct { +type DevFilesystemDPLLInfo struct { Timestamp string `fetcherKey:"date" json:"timestamp"` EECState string `fetcherKey:"dpll_0_state" json:"eecstate"` PPSState string `fetcherKey:"dpll_1_state" json:"state"` @@ -27,7 +27,7 @@ type DevDPLLInfo struct { } // AnalyserJSON returns the json expected by the analysers -func (dpllInfo *DevDPLLInfo) GetAnalyserFormat() ([]*callbacks.AnalyserFormatType, error) { +func (dpllInfo *DevFilesystemDPLLInfo) GetAnalyserFormat() ([]*callbacks.AnalyserFormatType, error) { formatted := callbacks.AnalyserFormatType{ ID: "dpll/time-error", Data: map[string]any{ @@ -41,14 +41,14 @@ func (dpllInfo *DevDPLLInfo) GetAnalyserFormat() ([]*callbacks.AnalyserFormatTyp } var ( - dpllFetcher map[string]*fetcher.Fetcher + dpllFSFetcher map[string]*fetcher.Fetcher ) func init() { - dpllFetcher = make(map[string]*fetcher.Fetcher) + dpllFSFetcher = make(map[string]*fetcher.Fetcher) } -func postProcessDPLL(result map[string]string) (map[string]any, error) { +func postProcessDPLLFilesystem(result map[string]string) (map[string]any, error) { processedResult := make(map[string]any) offset, err := strconv.ParseFloat(result["dpll_1_offset"], 32) if err != nil { @@ -58,9 +58,9 @@ func postProcessDPLL(result map[string]string) (map[string]any, error) { return processedResult, nil } -// BuildDPLLInfoFetcher popluates the fetcher required for +// BuildFilesystemDPLLInfoFetcher popluates the fetcher required for // collecting the DPLLInfo -func BuildDPLLInfoFetcher(interfaceName string) error { //nolint:dupl // Further dedup risks be too abstract or fragile +func BuildFilesystemDPLLInfoFetcher(interfaceName string) error { //nolint:dupl // Further dedup risks be too abstract or fragile fetcherInst, err := fetcher.FetcherFactory( []*clients.Cmd{dateCmd}, []fetcher.AddCommandArgs{ @@ -85,21 +85,21 @@ func BuildDPLLInfoFetcher(interfaceName string) error { //nolint:dupl // Further log.Errorf("failed to create fetcher for dpll: %s", err.Error()) return fmt.Errorf("failed to create fetcher for dpll: %w", err) } - dpllFetcher[interfaceName] = fetcherInst - fetcherInst.SetPostProcessor(postProcessDPLL) + dpllFSFetcher[interfaceName] = fetcherInst + fetcherInst.SetPostProcessor(postProcessDPLLFilesystem) return nil } -// GetDevDPLLInfo returns the device DPLL info for an interface. -func GetDevDPLLInfo(ctx clients.ExecContext, interfaceName string) (DevDPLLInfo, error) { - dpllInfo := DevDPLLInfo{} - fetcherInst, fetchedInstanceOk := dpllFetcher[interfaceName] +// GetDevDPLLFilesystemInfo returns the device DPLL info for an interface. +func GetDevDPLLFilesystemInfo(ctx clients.ExecContext, interfaceName string) (DevFilesystemDPLLInfo, error) { + dpllInfo := DevFilesystemDPLLInfo{} + fetcherInst, fetchedInstanceOk := dpllFSFetcher[interfaceName] if !fetchedInstanceOk { - err := BuildDPLLInfoFetcher(interfaceName) + err := BuildFilesystemDPLLInfoFetcher(interfaceName) if err != nil { return dpllInfo, err } - fetcherInst, fetchedInstanceOk = dpllFetcher[interfaceName] + fetcherInst, fetchedInstanceOk = dpllFSFetcher[interfaceName] if !fetchedInstanceOk { return dpllInfo, errors.New("failed to create fetcher for DPLLInfo") } diff --git a/pkg/collectors/devices/dpll_test.go b/pkg/collectors/devices/dpll_fs_test.go similarity index 97% rename from pkg/collectors/devices/dpll_test.go rename to pkg/collectors/devices/dpll_fs_test.go index f1daa775..9215a72e 100644 --- a/pkg/collectors/devices/dpll_test.go +++ b/pkg/collectors/devices/dpll_fs_test.go @@ -56,7 +56,7 @@ var _ = Describe("NewContainerContext", func() { ctx, err := clients.NewContainerContext(clientset, "TestNamespace", "Test", "TestContainer") Expect(err).NotTo(HaveOccurred()) - info, err := devices.GetDevDPLLInfo(ctx, "aFakeInterface") + info, err := devices.GetDevDPLLFilesystemInfo(ctx, "aFakeInterface") Expect(err).NotTo(HaveOccurred()) Expect(info.Timestamp).To(Equal("2023-06-16T11:49:47.0584Z")) Expect(info.EECState).To(Equal(eecState)) diff --git a/pkg/collectors/devices/dpll_netlink.go b/pkg/collectors/devices/dpll_netlink.go new file mode 100644 index 00000000..7d4675d8 --- /dev/null +++ b/pkg/collectors/devices/dpll_netlink.go @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +package devices + +import ( + "encoding/json" + "errors" + "fmt" + "strconv" + "strings" + + log "github.com/sirupsen/logrus" + + "github.com/redhat-partner-solutions/vse-sync-collection-tools/pkg/callbacks" + "github.com/redhat-partner-solutions/vse-sync-collection-tools/pkg/clients" + "github.com/redhat-partner-solutions/vse-sync-collection-tools/pkg/fetcher" +) + +var states = map[string]string{ + "unknown": "-1", + "invalid": "0", + "freerun": "1", + "locked": "2", + "locked-ho-acq": "3", + "holdover": "4", +} + +type DevNetlinkDPLLInfo struct { + Timestamp string `fetcherKey:"date" json:"timestamp"` + EECState string `fetcherKey:"eec" json:"eecstate"` + PPSState string `fetcherKey:"pps" json:"state"` +} + +// AnalyserJSON returns the json expected by the analysers +func (dpllInfo *DevNetlinkDPLLInfo) GetAnalyserFormat() ([]*callbacks.AnalyserFormatType, error) { + formatted := callbacks.AnalyserFormatType{ + ID: "dpll/states", + Data: map[string]any{ + "timestamp": dpllInfo.Timestamp, + "eecstate": dpllInfo.EECState, + "state": dpllInfo.PPSState, + }, + } + return []*callbacks.AnalyserFormatType{&formatted}, nil +} + +type NetlinkEntry struct { + LockStatus string `json:"lock-status"` //nolint:tagliatelle // not my choice + Driver string `json:"module-name"` //nolint:tagliatelle // not my choice + ClockType string `json:"type"` //nolint:tagliatelle // not my choice + ClockID int64 `json:"clock-id"` //nolint:tagliatelle // not my choice + ID int `json:"id"` //nolint:tagliatelle // not my choice +} + +// # Example output +// [{'clock-id': 5799633565435100136, +// 'id': 0, +// 'lock-status': 'locked-ho-acq', +// 'mode': 'automatic', +// 'mode-supported': ['automatic'], +// 'module-name': 'ice', +// 'type': 'eec'}, +// {'clock-id': 5799633565435100136, +// 'id': 1, +// 'lock-status': 'locked-ho-acq', +// 'mode': 'automatic', +// 'mode-supported': ['automatic'], +// 'module-name': 'ice', +// 'type': 'pps'}] + +var ( + dpllNetlinkFetcher map[int64]*fetcher.Fetcher + dpllClockIDFetcher map[string]*fetcher.Fetcher +) + +func init() { + dpllNetlinkFetcher = make(map[int64]*fetcher.Fetcher) + dpllClockIDFetcher = make(map[string]*fetcher.Fetcher) +} + +func buildPostProcessDPLLNetlink(clockID int64) fetcher.PostProcessFuncType { + return func(result map[string]string) (map[string]any, error) { + processedResult := make(map[string]any) + + entries := make([]NetlinkEntry, 0) + cleaned := strings.ReplaceAll(result["dpll-netlink"], "'", "\"") + err := json.Unmarshal([]byte(cleaned), &entries) + if err != nil { + log.Errorf("Failed to unmarshal netlink output: %s", err.Error()) + } + + log.Debug("entries: ", entries) + for _, entry := range entries { + if entry.ClockID == clockID { + state, ok := states[entry.LockStatus] + if !ok { + log.Errorf("Unknown state: %s", state) + state = "-1" + } + processedResult[entry.ClockType] = state + } + } + return processedResult, nil + } +} + +// BuildDPLLNetlinkInfoFetcher popluates the fetcher required for +// collecting the DPLLInfo +func BuildDPLLNetlinkInfoFetcher(clockID int64) error { //nolint:dupl // Further dedup risks be too abstract or fragile + fetcherInst, err := fetcher.FetcherFactory( + []*clients.Cmd{dateCmd}, + []fetcher.AddCommandArgs{ + { + Key: "dpll-netlink", + Command: "/linux/tools/net/ynl/cli.py --spec /linux/Documentation/netlink/specs/dpll.yaml --dump device-get", + Trim: true, + }, + }, + ) + if err != nil { + log.Errorf("failed to create fetcher for dpll netlink: %s", err.Error()) + return fmt.Errorf("failed to create fetcher for dpll netlink: %w", err) + } + dpllNetlinkFetcher[clockID] = fetcherInst + fetcherInst.SetPostProcessor(buildPostProcessDPLLNetlink(clockID)) + return nil +} + +// GetDevDPLLInfo returns the device DPLL info for an interface. +func GetDevDPLLNetlinkInfo(ctx clients.ExecContext, clockID int64) (DevNetlinkDPLLInfo, error) { + dpllInfo := DevNetlinkDPLLInfo{} + fetcherInst, fetchedInstanceOk := dpllNetlinkFetcher[clockID] + if !fetchedInstanceOk { + err := BuildDPLLNetlinkInfoFetcher(clockID) + if err != nil { + return dpllInfo, err + } + fetcherInst, fetchedInstanceOk = dpllNetlinkFetcher[clockID] + if !fetchedInstanceOk { + return dpllInfo, errors.New("failed to create fetcher for DPLLInfo using netlink interface") + } + } + err := fetcherInst.Fetch(ctx, &dpllInfo) + if err != nil { + log.Debugf("failed to fetch dpllInfo via netlink: %s", err.Error()) + return dpllInfo, fmt.Errorf("failed to fetch dpllInfo via netlink: %w", err) + } + return dpllInfo, nil +} + +func BuildClockIDFetcher(interfaceName string) error { + fetcherInst, err := fetcher.FetcherFactory( + []*clients.Cmd{dateCmd}, + []fetcher.AddCommandArgs{ + { + Key: "dpll-netlink-clock-id", + Command: fmt.Sprintf( + `export IFNAME=%s; export BUSID=$(readlink /sys/class/net/$IFNAME/device | xargs basename | cut -d ':' -f 2,3);`+ + ` echo $(("16#$(lspci -v | grep $BUSID -A 20 |grep 'Serial Number' | awk '{print $NF}' | tr -d '-')"))`, + interfaceName, + ), + Trim: true, + }, + }, + ) + if err != nil { + log.Errorf("failed to create fetcher for dpll clock ID: %s", err.Error()) + return fmt.Errorf("failed to create fetcher for dpll clock ID: %w", err) + } + fetcherInst.SetPostProcessor(postProcessDPLLNetlinkClockID) + dpllClockIDFetcher[interfaceName] = fetcherInst + return nil +} + +func postProcessDPLLNetlinkClockID(result map[string]string) (map[string]any, error) { + processedResult := make(map[string]any) + clockID, err := strconv.ParseInt(result["dpll-netlink-clock-id"], 10, 64) + if err != nil { + return processedResult, fmt.Errorf("failed to parse int for clock id: %w", err) + } + processedResult["clockID"] = clockID + return processedResult, nil +} + +type NetlinkClockID struct { + Timestamp string `fetcherKey:"date" json:"timestamp"` + ClockID int64 `fetcherKey:"clockID" json:"clockId"` +} + +func GetClockID(ctx clients.ExecContext, interfaceName string) (NetlinkClockID, error) { + clockID := NetlinkClockID{} + fetcherInst, fetchedInstanceOk := dpllClockIDFetcher[interfaceName] + if !fetchedInstanceOk { + err := BuildClockIDFetcher(interfaceName) + if err != nil { + return clockID, err + } + fetcherInst, fetchedInstanceOk = dpllClockIDFetcher[interfaceName] + if !fetchedInstanceOk { + return clockID, errors.New("failed to create fetcher for DPLLInfo using netlink interface") + } + } + err := fetcherInst.Fetch(ctx, &clockID) + if err != nil { + log.Debugf("failed to fetch netlink clockID %s", err.Error()) + return clockID, fmt.Errorf("failed to fetch netlink clockID %w", err) + } + return clockID, nil +} diff --git a/pkg/collectors/dpll_collector.go b/pkg/collectors/dpll_collector.go index 0dfa44a4..1503ea44 100644 --- a/pkg/collectors/dpll_collector.go +++ b/pkg/collectors/dpll_collector.go @@ -3,87 +3,31 @@ package collectors import ( - "errors" "fmt" - "github.com/redhat-partner-solutions/vse-sync-collection-tools/pkg/clients" + log "github.com/sirupsen/logrus" + "github.com/redhat-partner-solutions/vse-sync-collection-tools/pkg/collectors/contexts" "github.com/redhat-partner-solutions/vse-sync-collection-tools/pkg/collectors/devices" - "github.com/redhat-partner-solutions/vse-sync-collection-tools/pkg/utils" ) -type DPLLCollector struct { - *baseCollector - ctx clients.ExecContext - interfaceName string -} - const ( DPLLCollectorName = "DPLL" - DPLLInfo = "dpll-info" ) -// polls for the dpll info then passes it to the callback -func (dpll *DPLLCollector) poll() error { - dpllInfo, err := devices.GetDevDPLLInfo(dpll.ctx, dpll.interfaceName) - - if err != nil { - return fmt.Errorf("failed to fetch %s %w", DPLLInfo, err) - } - err = dpll.callback.Call(&dpllInfo, DPLLInfo) - if err != nil { - return fmt.Errorf("callback failed %w", err) - } - return nil -} - -// Poll collects information from the cluster then -// calls the callback.Call to allow that to persist it -func (dpll *DPLLCollector) Poll(resultsChan chan PollResult, wg *utils.WaitGroupCount) { - defer func() { - wg.Done() - }() - errorsToReturn := make([]error, 0) - err := dpll.poll() - if err != nil { - errorsToReturn = append(errorsToReturn, err) - } - resultsChan <- PollResult{ - CollectorName: DPLLCollectorName, - Errors: errorsToReturn, - } -} - // Returns a new DPLLCollector from the CollectionConstuctor Factory func NewDPLLCollector(constructor *CollectionConstructor) (Collector, error) { ctx, err := contexts.GetPTPDaemonContext(constructor.Clientset) if err != nil { - return &DPLLCollector{}, fmt.Errorf("failed to create DPLLCollector: %w", err) + return &DPLLNetlinkCollector{}, fmt.Errorf("failed to create DPLLCollector: %w", err) } - err = devices.BuildDPLLInfoFetcher(constructor.PTPInterface) - if err != nil { - return &DPLLCollector{}, fmt.Errorf("failed to build fetcher for DPLLInfo %w", err) - } - - collector := DPLLCollector{ - baseCollector: newBaseCollector( - constructor.PollInterval, - false, - constructor.Callback, - ), - ctx: ctx, - interfaceName: constructor.PTPInterface, + dpllFSExists, err := devices.IsDPLLFileSystemPresent(ctx, constructor.PTPInterface) + log.Debug("DPLL FS exists: ", dpllFSExists) + if dpllFSExists && err == nil { + return NewDPLLFilesystemCollector(constructor) + } else { + return NewDPLLNetlinkCollector(constructor) } - - dpllFSExists, err := devices.IsDPLLFileSystemPresent(collector.ctx, collector.interfaceName) - if err != nil { - return &collector, utils.NewRequirementsNotMetError(fmt.Errorf("checking for the DPLL filesystem failed: %w", err)) - } - if !dpllFSExists { - return &collector, utils.NewRequirementsNotMetError(errors.New("filesystem with DPLL stats not present")) - } - - return &collector, nil } func init() { diff --git a/pkg/collectors/dpll_collector_fs.go b/pkg/collectors/dpll_collector_fs.go new file mode 100644 index 00000000..c36d83ec --- /dev/null +++ b/pkg/collectors/dpll_collector_fs.go @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +package collectors + +import ( + "fmt" + + "github.com/redhat-partner-solutions/vse-sync-collection-tools/pkg/clients" + "github.com/redhat-partner-solutions/vse-sync-collection-tools/pkg/collectors/contexts" + "github.com/redhat-partner-solutions/vse-sync-collection-tools/pkg/collectors/devices" + "github.com/redhat-partner-solutions/vse-sync-collection-tools/pkg/utils" +) + +type DPLLFilesystemCollector struct { + *baseCollector + ctx clients.ExecContext + interfaceName string +} + +const ( + DPLLFilesystemCollectorName = "DPLL-Filesystem" + DPLLInfo = "dpll-info-fs" +) + +// Start sets up the collector so it is ready to be polled +func (dpll *DPLLFilesystemCollector) Start() error { + dpll.running = true + return nil +} + +// polls for the dpll info then passes it to the callback +func (dpll *DPLLFilesystemCollector) poll() error { + dpllInfo, err := devices.GetDevDPLLFilesystemInfo(dpll.ctx, dpll.interfaceName) + + if err != nil { + return fmt.Errorf("failed to fetch %s %w", DPLLInfo, err) + } + err = dpll.callback.Call(&dpllInfo, DPLLInfo) + if err != nil { + return fmt.Errorf("callback failed %w", err) + } + return nil +} + +// Poll collects information from the cluster then +// calls the callback.Call to allow that to persist it +func (dpll *DPLLFilesystemCollector) Poll(resultsChan chan PollResult, wg *utils.WaitGroupCount) { + defer func() { + wg.Done() + }() + errorsToReturn := make([]error, 0) + err := dpll.poll() + if err != nil { + errorsToReturn = append(errorsToReturn, err) + } + resultsChan <- PollResult{ + CollectorName: DPLLFilesystemCollectorName, + Errors: errorsToReturn, + } +} + +// CleanUp stops a running collector +func (dpll *DPLLFilesystemCollector) CleanUp() error { + dpll.running = false + return nil +} + +// Returns a new DPLLFilesystemCollector from the CollectionConstuctor Factory +func NewDPLLFilesystemCollector(constructor *CollectionConstructor) (Collector, error) { + ctx, err := contexts.GetPTPDaemonContext(constructor.Clientset) + if err != nil { + return &DPLLFilesystemCollector{}, fmt.Errorf("failed to create DPLLFilesystemCollector: %w", err) + } + err = devices.BuildFilesystemDPLLInfoFetcher(constructor.PTPInterface) + if err != nil { + return &DPLLFilesystemCollector{}, fmt.Errorf("failed to build fetcher for DPLLInfo %w", err) + } + + collector := DPLLFilesystemCollector{ + baseCollector: newBaseCollector( + constructor.PollInterval, + false, + constructor.Callback, + ), + interfaceName: constructor.PTPInterface, + ctx: ctx, + } + return &collector, nil +} diff --git a/pkg/collectors/dpll_collector_netlink.go b/pkg/collectors/dpll_collector_netlink.go new file mode 100644 index 00000000..ffe4ce96 --- /dev/null +++ b/pkg/collectors/dpll_collector_netlink.go @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +package collectors + +import ( + "fmt" + + log "github.com/sirupsen/logrus" + + "github.com/redhat-partner-solutions/vse-sync-collection-tools/pkg/clients" + "github.com/redhat-partner-solutions/vse-sync-collection-tools/pkg/collectors/contexts" + "github.com/redhat-partner-solutions/vse-sync-collection-tools/pkg/collectors/devices" + "github.com/redhat-partner-solutions/vse-sync-collection-tools/pkg/utils" +) + +type DPLLNetlinkCollector struct { + *baseCollector + ctx *clients.ContainerCreationExecContext + interfaceName string + clockID int64 +} + +const ( + DPLLNetlinkCollectorName = "DPLL-Netlink" + DPLLNetlinkInfo = "dpll-info-nl" +) + +// Start sets up the collector so it is ready to be polled +func (dpll *DPLLNetlinkCollector) Start() error { + dpll.running = true + err := dpll.ctx.CreatePodAndWaitForStart() + if err != nil { + return fmt.Errorf("dpll netlink collector failed to start pod: %w", err) + } + log.Debug("dpll.interfaceName: ", dpll.interfaceName) + log.Debug("dpll.ctx: ", dpll.ctx) + clockIDStuct, err := devices.GetClockID(dpll.ctx, dpll.interfaceName) + if err != nil { + return fmt.Errorf("dpll netlink collector failed to find clock id: %w", err) + } + log.Debug("clockIDStuct.ClockID: ", clockIDStuct.ClockID) + err = devices.BuildDPLLNetlinkInfoFetcher(clockIDStuct.ClockID) + if err != nil { + return fmt.Errorf("failed to build fetcher for DPLLNetlinkInfo %w", err) + } + dpll.clockID = clockIDStuct.ClockID + return nil +} + +// polls for the dpll info then passes it to the callback +func (dpll *DPLLNetlinkCollector) poll() error { + dpllInfo, err := devices.GetDevDPLLNetlinkInfo(dpll.ctx, dpll.clockID) + + if err != nil { + return fmt.Errorf("failed to fetch %s %w", DPLLNetlinkInfo, err) + } + err = dpll.callback.Call(&dpllInfo, DPLLNetlinkInfo) + if err != nil { + return fmt.Errorf("callback failed %w", err) + } + return nil +} + +// Poll collects information from the cluster then +// calls the callback.Call to allow that to persist it +func (dpll *DPLLNetlinkCollector) Poll(resultsChan chan PollResult, wg *utils.WaitGroupCount) { + defer func() { + wg.Done() + }() + errorsToReturn := make([]error, 0) + err := dpll.poll() + if err != nil { + errorsToReturn = append(errorsToReturn, err) + } + resultsChan <- PollResult{ + CollectorName: DPLLNetlinkCollectorName, + Errors: errorsToReturn, + } +} + +// CleanUp stops a running collector +func (dpll *DPLLNetlinkCollector) CleanUp() error { + dpll.running = false + err := dpll.ctx.DeletePodAndWait() + if err != nil { + return fmt.Errorf("dpll netlink collector failed to clean up: %w", err) + } + return nil +} + +// Returns a new DPLLNetlinkCollector from the CollectionConstuctor Factory +func NewDPLLNetlinkCollector(constructor *CollectionConstructor) (Collector, error) { + ctx, err := contexts.GetNetlinkContext(constructor.Clientset) + if err != nil { + return &DPLLNetlinkCollector{}, fmt.Errorf("failed to create DPLLNetlinkCollector: %w", err) + } + + collector := DPLLNetlinkCollector{ + baseCollector: newBaseCollector( + constructor.PollInterval, + false, + constructor.Callback, + ), + interfaceName: constructor.PTPInterface, + ctx: ctx, + } + + return &collector, nil +} diff --git a/pkg/runner/runner.go b/pkg/runner/runner.go index cc5d8e9a..e9b5c4b9 100644 --- a/pkg/runner/runner.go +++ b/pkg/runner/runner.go @@ -186,6 +186,7 @@ func (runner *CollectorRunner) start() { for collectorName, collector := range runner.collectorInstances { log.Debugf("start collector %v", collector) err := collector.Start() + log.Info("collector:", collectorName, " start error: ", err) utils.IfErrorExitOrPanic(err) log.Debugf("Spawning collector: %v", collector) From 978dfad049b77e22c93e968b481558cec142dfaf Mon Sep 17 00:00:00 2001 From: Michele Costa Date: Wed, 29 Nov 2023 11:11:45 +0000 Subject: [PATCH 3/4] Fix func names and refresh error --- pkg/clients/exec_command.go | 31 ++++++++++++------------ pkg/collectors/dpll_collector_netlink.go | 2 +- pkg/runner/runner.go | 1 - 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/pkg/clients/exec_command.go b/pkg/clients/exec_command.go index 788df89c..34ba9c46 100644 --- a/pkg/clients/exec_command.go +++ b/pkg/clients/exec_command.go @@ -183,7 +183,7 @@ type Volume struct { MountPath string } -func (c *ContainerCreationExecContext) CreatePod() error { +func (c *ContainerCreationExecContext) createPod() error { pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: c.podName, @@ -250,16 +250,15 @@ func (c *ContainerCreationExecContext) refeshPod() error { if err != nil { return err } - if len(pods.Items) == 0 || len(pods.Items) > 1 { - // I don't think k8s allows more than one pod with the same name - return errors.New("found multiple pods with the same name") + if len(pods.Items) == 0 { + return fmt.Errorf("failed to find pod: %s", c.podName) } c.pod = &pods.Items[0] return nil } -func (c *ContainerCreationExecContext) IsPodRunning() (bool, error) { +func (c *ContainerCreationExecContext) isPodRunning() (bool, error) { err := c.refeshPod() if err != nil { return false, err @@ -270,39 +269,40 @@ func (c *ContainerCreationExecContext) IsPodRunning() (bool, error) { return false, nil } -func (c *ContainerCreationExecContext) WaitForPodToStart() error { +func (c *ContainerCreationExecContext) waitForPodToStart() error { start := time.Now() for time.Since(start) <= startTimeout { - running, err := c.IsPodRunning() + running, err := c.isPodRunning() if err != nil { return err } if running { return nil } + time.Sleep(time.Microsecond) } return errors.New("timed out waiting for pod to start") } -func (c *ContainerCreationExecContext) CreatePodAndWaitForStart() error { +func (c *ContainerCreationExecContext) CreatePodAndWait() error { var err error running := false if c.pod != nil { - running, err = c.IsPodRunning() + running, err = c.isPodRunning() if err != nil { return err } } if !running { - err := c.CreatePod() + err := c.createPod() if err != nil { return err } } - return c.WaitForPodToStart() + return c.waitForPodToStart() } -func (c *ContainerCreationExecContext) DeletePod() error { +func (c *ContainerCreationExecContext) deletePod() error { deletePolicy := metav1.DeletePropagationForeground err := c.clientset.K8sClient.CoreV1().Pods(c.pod.Namespace).Delete( context.TODO(), @@ -316,7 +316,7 @@ func (c *ContainerCreationExecContext) DeletePod() error { return nil } -func (c *ContainerCreationExecContext) WaitForPodToDelete() error { +func (c *ContainerCreationExecContext) waitForPodToDelete() error { start := time.Now() for time.Since(start) <= deletionTimeout { pods, err := c.listPods(&metav1.ListOptions{}) @@ -332,16 +332,17 @@ func (c *ContainerCreationExecContext) WaitForPodToDelete() error { if !found { return nil } + time.Sleep(time.Microsecond) } return errors.New("pod has not terminated within the timeout") } func (c *ContainerCreationExecContext) DeletePodAndWait() error { - err := c.DeletePod() + err := c.deletePod() if err != nil { return err } - return c.WaitForPodToDelete() + return c.waitForPodToDelete() } func NewContainerCreationExecContext( diff --git a/pkg/collectors/dpll_collector_netlink.go b/pkg/collectors/dpll_collector_netlink.go index ffe4ce96..d53b9c92 100644 --- a/pkg/collectors/dpll_collector_netlink.go +++ b/pkg/collectors/dpll_collector_netlink.go @@ -28,7 +28,7 @@ const ( // Start sets up the collector so it is ready to be polled func (dpll *DPLLNetlinkCollector) Start() error { dpll.running = true - err := dpll.ctx.CreatePodAndWaitForStart() + err := dpll.ctx.CreatePodAndWait() if err != nil { return fmt.Errorf("dpll netlink collector failed to start pod: %w", err) } diff --git a/pkg/runner/runner.go b/pkg/runner/runner.go index e9b5c4b9..cc5d8e9a 100644 --- a/pkg/runner/runner.go +++ b/pkg/runner/runner.go @@ -186,7 +186,6 @@ func (runner *CollectorRunner) start() { for collectorName, collector := range runner.collectorInstances { log.Debugf("start collector %v", collector) err := collector.Start() - log.Info("collector:", collectorName, " start error: ", err) utils.IfErrorExitOrPanic(err) log.Debugf("Spawning collector: %v", collector) From d46f4312fa879395f59d877dd76e003dbfae579a Mon Sep 17 00:00:00 2001 From: Michele Costa Date: Tue, 19 Dec 2023 11:53:00 +0000 Subject: [PATCH 4/4] Update debug container to redhat-partner-solutions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jose Núñez --- pkg/collectors/contexts/contexts.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/collectors/contexts/contexts.go b/pkg/collectors/contexts/contexts.go index 2736bf40..8bda10ea 100644 --- a/pkg/collectors/contexts/contexts.go +++ b/pkg/collectors/contexts/contexts.go @@ -17,7 +17,7 @@ const ( GPSContainer = "gpsd" NetlinkDebugPod = "ptp-dpll-netlink-debug-pod" NetlinkDebugContainer = "ptp-dpll-netlink-debug-container" - NetlinkDebugContainerImage = "quay.io/jnunez/tools:dpll9.2_dev" + NetlinkDebugContainerImage = "quay.io/redhat-partner-solutions/dpll-debug:0.1" ) func GetPTPDaemonContext(clientset *clients.Clientset) (clients.ExecContext, error) {