diff --git a/pkg/cgroup/resolve_container.go b/pkg/cgroup/resolve_container.go index 1f38b1cc7d..0176be5e44 100644 --- a/pkg/cgroup/resolve_container.go +++ b/pkg/cgroup/resolve_container.go @@ -25,6 +25,7 @@ import ( "path/filepath" "regexp" "strings" + "sync" "github.com/sustainable-computing-io/kepler/pkg/kubelet" "github.com/sustainable-computing-io/kepler/pkg/utils" @@ -40,149 +41,130 @@ type ContainerInfo struct { const ( unknownPath string = "unknown" - - procPath string = "/proc/%d/cgroup" - cgroupPath string = "/sys/fs/cgroup" + procPath string = "/proc/%d/cgroup" + cgroupPath string = "/sys/fs/cgroup" ) var ( - byteOrder binary.ByteOrder = utils.DetermineHostByteOrder() - podLister kubelet.KubeletPodLister = kubelet.KubeletPodLister{} - - // map to cache data to speedup lookups - containerIDCache = map[uint64]string{} - containerIDToContainerInfo = map[string]*ContainerInfo{} - cGroupIDToPath = map[uint64]string{} - - validContainerIDRegex = regexp.MustCompile("^[a-zA-Z0-9]+$") - regexReplaceContainerIDPrefix = regexp.MustCompile(`.*//`) + instance *cache ) +type cache struct { + containerIDCache sync.Map // map[uint64]string + containerIDToContainerInfo sync.Map // map[string]*ContainerInfo + cGroupIDToPath sync.Map // map[uint64]string + byteOrder binary.ByteOrder +} + func Init() (*[]corev1.Pod, error) { pods := []corev1.Pod{} return &pods, nil } -func GetContainerID(cGroupID, pid uint64, withCGroupID bool) (string, error) { - info, err := GetContainerInfo(cGroupID, pid, withCGroupID) - return info.ContainerID, err +// init() Creates the singleton Config instance +func init() { + instance = newCache() } -func GetContainerInfo(cGroupID, pid uint64, withCGroupID bool) (*ContainerInfo, error) { - var err error - var containerID string +func GetCache() *cache { + return instance +} - name := utils.SystemProcessName - namespace := utils.SystemProcessNamespace - if cGroupID == 1 && withCGroupID { - // some kernel processes have cgroup id equal 1 or 0 - name = utils.KernelProcessName - namespace = utils.KernelProcessNamespace - } - info := &ContainerInfo{ - ContainerID: name, - ContainerName: name, - PodName: name, - Namespace: namespace, +// newConfig creates and returns a new Config instance. +func newCache() *cache { + return &cache{ + containerIDCache: sync.Map{}, + containerIDToContainerInfo: sync.Map{}, + cGroupIDToPath: sync.Map{}, + byteOrder: utils.DetermineHostByteOrder(), } +} - if containerID, err = getContainerIDFromPath(cGroupID, pid, withCGroupID); err != nil { - return info, err +func (c *cache) hasContainerID(id string) bool { + _, ok := instance.containerIDToContainerInfo.Load(id) + return ok +} + +func (c *cache) setContainerIDToContainerInfo(id string, info *ContainerInfo) { + instance.containerIDToContainerInfo.Store(id, info) +} + +func (c *cache) getContainerInfo(id string) (ContainerInfo, error) { + info, ok := instance.containerIDToContainerInfo.Load(id) + if !ok { + return ContainerInfo{}, fmt.Errorf("container info not found for id: %s", id) } - if i, ok := containerIDToContainerInfo[containerID]; ok { - return i, nil - } else { - info.ContainerID = containerID - containerIDToContainerInfo[containerID] = info + containerInfo, ok := info.(*ContainerInfo) + if !ok { + return ContainerInfo{}, fmt.Errorf("invalid type stored for id: %s", id) } - return containerIDToContainerInfo[containerID], nil + return *containerInfo, nil } -func ParseContainerIDFromPodStatus(containerID string) string { - return regexReplaceContainerIDPrefix.ReplaceAllString(containerID, "") +func (c *cache) setContainerIDCache(pid uint64, id string) { + instance.containerIDCache.Store(pid, id) } -func getContainerIDFromPath(cGroupID, pid uint64, withCGroupID bool) (string, error) { - if cGroupID == 1 && withCGroupID { - return utils.KernelProcessName, nil +func (c *cache) getContainerIDFromCache(pid uint64) (string, bool) { + value, ok := instance.containerIDCache.Load(pid) + if !ok { + return "", false } - var err error - var containerID string - if withCGroupID { - containerID, err = getContainerIDFromcGroupID(cGroupID) - } else { - containerID, err = GetContainerIDFromPID(pid) + containerID, ok := value.(string) + if !ok { + return "", false } - return containerID, err -} - -// AddContainerIDToCache add the container id to cache using the pid as the key -func AddContainerIDToCache(pid uint64, containerID string) { - containerIDCache[pid] = containerID + return containerID, true } -// GetContainerIDFromPID find the container ID using the process PID -func GetContainerIDFromPID(pid uint64) (string, error) { - if p, ok := containerIDCache[pid]; ok { - return p, nil +func (c *cache) getGetContainerIDFromPID(pid uint64) (string, error) { + p, ok := instance.containerIDCache.Load(pid) + if ok { + containerID, ok := p.(string) + if ok { + return containerID, nil + } } - var err error - var path string - if path, err = getPathFromPID(procPath, pid); err != nil { + path, err := getPathFromPID(procPath, pid) + if err != nil { return utils.SystemProcessName, err } containerID, err := extractPodContainerIDfromPath(path) - AddContainerIDToCache(pid, containerID) - return containerIDCache[pid], err -} - -func getPathFromPID(searchPath string, pid uint64) (string, error) { - path := fmt.Sprintf(searchPath, pid) - file, err := os.Open(path) if err != nil { - return "", fmt.Errorf("failed to open cgroup description file for pid %d: %v", pid, err) + return utils.SystemProcessName, err } - defer file.Close() + AddContainerIDToCache(pid, containerID) - scanner := bufio.NewScanner(file) - for scanner.Scan() { - line := scanner.Text() - if strings.Contains(line, "pod") || strings.Contains(line, "containerd") || strings.Contains(line, "crio") { - // check if the string has ".scope" in it and truncate everything else after ".scope" - if strings.Contains(line, ".scope") { - line = strings.Split(line, ".scope")[0] + ".scope" - } - return line, nil - } - } - // this process doesn't belong to a pod, return unknown path to avoid future lookups - return unknownPath, nil + return containerID, nil } -func getContainerIDFromcGroupID(cGroupID uint64) (string, error) { - if id, ok := containerIDCache[cGroupID]; ok { +func (c *cache) getContainerIDFromcGroupID(cGroupID uint64) (string, error) { + id, ok := instance.getContainerIDFromCache(cGroupID) + if ok { return id, nil } - var err error - var path string - if path, err = getPathFromcGroupID(cGroupID); err != nil { + path, err := instance.getPathFromcGroupID(cGroupID) + if err != nil { return utils.SystemProcessName, err } containerID, err := extractPodContainerIDfromPath(path) + if err != nil { + return utils.SystemProcessName, err + } AddContainerIDToCache(cGroupID, containerID) - return containerIDCache[cGroupID], err + + return containerID, nil } -// getPathFromcGroupID uses cgroupfs to get cgroup path from id -// it needs cgroup v2 (per https://github.com/iovisor/bpftrace/issues/950) and kernel 4.18+ (https://github.com/torvalds/linux/commit/bf6fa2c893c5237b48569a13fa3c673041430b6c) -func getPathFromcGroupID(cgroupID uint64) (string, error) { - if p, ok := cGroupIDToPath[cgroupID]; ok { - return p, nil +func (c *cache) getPathFromcGroupID(cgroupID uint64) (string, error) { + p, ok := instance.cGroupIDToPath.Load(cgroupID) + if ok { + return p.(string), nil } err := filepath.WalkDir(cgroupPath, func(path string, dentry fs.DirEntry, err error) error { @@ -193,55 +175,29 @@ func getPathFromcGroupID(cgroupID uint64) (string, error) { if !dentry.IsDir() { return nil } - getCgroupID, err := utils.GetCgroupIDFromPath(byteOrder, path) + + getCgroupID, err := utils.GetCgroupIDFromPath(instance.byteOrder, path) if err != nil { - return fmt.Errorf("error resolving handle: %v", err) + return fmt.Errorf("error resolving handle: %w", err) } - cGroupIDToPath[getCgroupID] = path + instance.cGroupIDToPath.Store(getCgroupID, path) return nil }) if err != nil { return unknownPath, fmt.Errorf("failed to find cgroup id: %v", err) } - if p, ok := cGroupIDToPath[cgroupID]; ok { - return p, nil - } - - cGroupIDToPath[cgroupID] = unknownPath - return cGroupIDToPath[cgroupID], nil -} - -func validContainerID(id string) string { - match := validContainerIDRegex.MatchString(id) - if match { - return id - } - return utils.SystemProcessName -} - -// Get containerID from path. cgroup v1 and cgroup v2 will use different regex -func extractPodContainerIDfromPath(path string) (string, error) { - return extractPodContainerIDfromPathWithCgroup(path) -} -func extractPodContainerIDfromPathWithCgroup(path string) (string, error) { - if path == unknownPath { - return utils.SystemProcessName, fmt.Errorf("failed to find pod's container id") + p, ok = instance.cGroupIDToPath.Load(cgroupID) + if ok { + return p.(string), nil } - path = strings.TrimSuffix(path, "/container") - path = strings.TrimSuffix(path, ".scope") - - // get the last 64 characters of the path - if len(path) < 64 { - return utils.SystemProcessName, fmt.Errorf("failed to find pod's container id") - } - containerID := path[len(path)-64:] - return validContainerID(containerID), nil + instance.cGroupIDToPath.Store(cgroupID, unknownPath) + return unknownPath, nil } -func getAliveContainers(pods *[]corev1.Pod) map[string]bool { +func (c *cache) getAliveContainers(pods *[]corev1.Pod) map[string]bool { aliveContainers := make(map[string]bool) for i := 0; i < len(*pods); i++ { @@ -249,45 +205,165 @@ func getAliveContainers(pods *[]corev1.Pod) map[string]bool { for j := 0; j < len(statuses); j++ { containerID := ParseContainerIDFromPodStatus(statuses[j].ContainerID) aliveContainers[containerID] = true - containerIDToContainerInfo[containerID] = &ContainerInfo{ + instance.setContainerIDToContainerInfo(containerID, &ContainerInfo{ ContainerID: containerID, ContainerName: statuses[j].Name, PodName: (*pods)[i].Name, Namespace: (*pods)[i].Namespace, - } + }) } statuses = (*pods)[i].Status.ContainerStatuses for j := 0; j < len(statuses); j++ { containerID := ParseContainerIDFromPodStatus(statuses[j].ContainerID) aliveContainers[containerID] = true - containerIDToContainerInfo[containerID] = &ContainerInfo{ + instance.setContainerIDToContainerInfo(containerID, &ContainerInfo{ ContainerID: containerID, ContainerName: statuses[j].Name, PodName: (*pods)[i].Name, Namespace: (*pods)[i].Namespace, - } + }) } statuses = (*pods)[i].Status.EphemeralContainerStatuses for j := 0; j < len(statuses); j++ { containerID := ParseContainerIDFromPodStatus(statuses[j].ContainerID) aliveContainers[containerID] = true - containerIDToContainerInfo[containerID] = &ContainerInfo{ + instance.setContainerIDToContainerInfo(containerID, &ContainerInfo{ ContainerID: containerID, ContainerName: statuses[j].Name, PodName: (*pods)[i].Name, Namespace: (*pods)[i].Namespace, - } + }) } } return aliveContainers } -// GetAliveContainers returns alive pod map +func GetContainerID(cGroupID, pid uint64, withCGroupID bool) (string, error) { + info, err := GetContainerInfo(cGroupID, pid, withCGroupID) + return info.ContainerID, err +} + +func GetContainerInfo(cGroupID, pid uint64, withCGroupID bool) (ContainerInfo, error) { + var containerID string + name := utils.SystemProcessName + namespace := utils.SystemProcessNamespace + if cGroupID == 1 && withCGroupID { + name = utils.KernelProcessName + namespace = utils.KernelProcessNamespace + } + + info := ContainerInfo{ + ContainerID: name, + ContainerName: name, + PodName: name, + Namespace: namespace, + } + + containerID, err := getContainerIDFromPath(cGroupID, pid, withCGroupID) + if err != nil { + return info, err + } + + if instance.hasContainerID(containerID) { + return instance.getContainerInfo(containerID) + } + + info.ContainerID = containerID + instance.setContainerIDToContainerInfo(containerID, &info) + + return instance.getContainerInfo(containerID) +} + +// ParseContainerIDFromPodStatus removes any prefix from the container ID to standardize it +func ParseContainerIDFromPodStatus(containerID string) string { + regexReplaceContainerIDPrefix := regexp.MustCompile(`.*//`) + return regexReplaceContainerIDPrefix.ReplaceAllString(containerID, "") +} + +// getContainerIDFromPath retrieves the container ID from the cgroup path or PID +func getContainerIDFromPath(cGroupID, pid uint64, withCGroupID bool) (string, error) { + if cGroupID == 1 && withCGroupID { + return utils.KernelProcessName, nil + } + if withCGroupID { + return instance.getContainerIDFromcGroupID(cGroupID) + } + return instance.getGetContainerIDFromPID(pid) +} + +// extractPodContainerIDfromPath extracts the container ID from the provided cgroup path +func extractPodContainerIDfromPath(path string) (string, error) { + return extractPodContainerIDfromPathWithCgroup(path) +} + +// extractPodContainerIDfromPathWithCgroup extracts the container ID from a cgroup path +func extractPodContainerIDfromPathWithCgroup(path string) (string, error) { + if path == unknownPath { + return utils.SystemProcessName, fmt.Errorf("failed to find pod's container id") + } + + path = strings.TrimSuffix(path, "/container") + path = strings.TrimSuffix(path, ".scope") + + // Ensure the path is long enough to extract the container ID + if len(path) < 64 { + return utils.SystemProcessName, fmt.Errorf("path too short to determine container ID") + } + + containerID := path[len(path)-64:] + return validContainerID(containerID), nil +} + +// validContainerID validates and returns the container ID if it matches the expected format +func validContainerID(id string) string { + validContainerIDRegex := regexp.MustCompile("^[a-zA-Z0-9]+$") + if validContainerIDRegex.MatchString(id) { + return id + } + return utils.SystemProcessName +} + +// AddContainerIDToCache adds the container ID to the cache +func AddContainerIDToCache(pid uint64, containerID string) { + instance.setContainerIDCache(pid, containerID) +} + +// GetContainerIDFromPID retrieves the container ID using the process PID +func GetContainerIDFromPID(pid uint64) (string, error) { + return instance.getGetContainerIDFromPID(pid) +} + +// getPathFromPID retrieves the cgroup path from the PID +func getPathFromPID(searchPath string, pid uint64) (string, error) { + path := fmt.Sprintf(searchPath, pid) + file, err := os.Open(path) + if err != nil { + return "", fmt.Errorf("failed to open cgroup description file for pid %d: %v", pid, err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if strings.Contains(line, "pod") || strings.Contains(line, "containerd") || strings.Contains(line, "crio") { + // Check if the string has ".scope" in it and truncate everything else after ".scope" + if strings.Contains(line, ".scope") { + line = strings.Split(line, ".scope")[0] + ".scope" + } + return line, nil + } + } + // This process doesn't belong to a pod, return unknown path to avoid future lookups + return unknownPath, nil +} + +// GetAliveContainers returns a map of alive containers from the provided pods func GetAliveContainers() (map[string]bool, error) { + podLister := kubelet.KubeletPodLister{} pods, err := podLister.ListPods() if err != nil { return nil, err } - return getAliveContainers(pods), nil + return instance.getAliveContainers(pods), nil } diff --git a/pkg/cgroup/resolve_container_test.go b/pkg/cgroup/resolve_container_test.go index b6faed82ea..bbb6123bb1 100644 --- a/pkg/cgroup/resolve_container_test.go +++ b/pkg/cgroup/resolve_container_test.go @@ -116,7 +116,8 @@ func TestGetAliveContainers(t *testing.T) { } for _, testcase := range testcases { t.Run(testcase.name, func(t *testing.T) { - res := getAliveContainers(&testcase.pods) + c := GetCache() + res := c.getAliveContainers(&testcase.pods) g.Expect(res).To(Equal(testcase.expectResults)) }) }