Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kernfs_memcg: Add helpers to gather memcgroup related data #96

Merged
merged 2 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
251 changes: 251 additions & 0 deletions drgn_tools/kernfs_memcg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
# Copyright (c) 2024, Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
"""
Kernfs_memcg
--------------

The ``drgn.helpers.linux.kernfs_memcg`` module provides helpers for working with the
Linux memcg subsystem.
"""
import argparse
from typing import Iterator

from drgn import cast
from drgn import container_of
from drgn import FaultError
from drgn import Object
from drgn import Program
from drgn.helpers.common.format import decode_enum_type_flags
from drgn.helpers.linux import cgroup_path
from drgn.helpers.linux import css_for_each_descendant_pre
from drgn.helpers.linux import find_slab_cache
from drgn.helpers.linux import for_each_page
from drgn.helpers.linux import inode_path
from drgn.helpers.linux import kernfs_path
from drgn.helpers.linux import PageSlab
from drgn.helpers.linux import slab_cache_for_each_allocated_object
from drgn.helpers.linux import slab_cache_is_merged

from drgn_tools.corelens import CorelensModule
from drgn_tools.dentry import dentry_path_any_mount


_KERNFS_TYPE_MASK = 0xF
# cgroup subsystem id for memory cgroup, from kernel/cgroup/cgroup.c
_MEMORY_CGRP_ID = 4


def decode_css_flags(css: Object) -> str:
"""
Get a human-readable representation of cgroup_subsys_state.flags

:param css: ``struct cgroup_subsys_state *``
"""
CSS_DYING = css.prog_["CSS_DYING"]
flags = css.flags.value_()
if not flags:
# There is no dedicated flag value to indicate a zombie cgroup.
# A css.flags value of 0 indicates that cgroup destruction is
# complete but cgroup object has not been fully freed because
# of being pinned by some other object
return "ZOMBIE"

return decode_enum_type_flags(flags, CSS_DYING.type_, False)


def for_each_kernfs_node(prog: Program) -> Iterator[Object]:
"""
Iterate over all kernfs_node objects in the system.

:returns: Iterator of ``struct kernfs_node *`` objects.
"""
kernfs_node_cache = find_slab_cache(prog, "kernfs_node_cache")
if not slab_cache_is_merged(kernfs_node_cache):
brenns10 marked this conversation as resolved.
Show resolved Hide resolved
for kn in slab_cache_for_each_allocated_object(
kernfs_node_cache, "struct kernfs_node"
):
yield kn
else:
print(
"Can't iterate through kernfs_node_cache, because its merged with other slab-cache(s)"
)


def dump_memcgroup_hierarchy(prog: Program) -> None:
"""
Dump hierarchy of active mem cgroups.
"""
cgroup_subsys = prog["cgroup_subsys"][_MEMORY_CGRP_ID]
css = cgroup_subsys.root.cgrp.self.address_of_()
print(f"dumping: {cgroup_subsys.name.string_().decode()} hierarchy")
for pos in css_for_each_descendant_pre(css):
cgroup_state = decode_css_flags(pos)
print(
f"path: {cgroup_path(pos.cgroup).decode()} state: {cgroup_state}"
)


def kernfs_node_of_cgroup(kn: Object) -> bool:
"""
Check if a kernfs_node object represents a cgroup object.

:param kn: ``struct kernfs_node *``
:returns: True if kernfs_node object represents a cgroup object,
False otherwise.
"""
if (kn.flags.value_() & _KERNFS_TYPE_MASK) == kn.prog_.constant(
"KERNFS_DIR"
).value_():
try:
cgrp = cast("struct cgroup *", kn.priv)
return cgrp.kn == kn
except FaultError:
return False
else:
return False


def kernfs_node_of_memcgroup(kn: Object) -> bool:
"""
Check if a kernfs_node object represents a mem cgroup object.

:param kn: ``struct kernfs_node *``
:returns: True if kernfs_node object represents a mem cgroup object,
False otherwise.
"""
if kernfs_node_of_cgroup(kn):
prog = kn.prog_
cgrp = cast("struct cgroup *", kn.priv)
return prog["cgroup_subsys"][_MEMORY_CGRP_ID].root == cgrp.root
else:
return False


def dump_memcg_kernfs_nodes(prog: Program) -> None:
"""
List all kernfs_node objects that represent a mem cgroup.
"""
count = 0
for kn in for_each_kernfs_node(prog):
if kernfs_node_of_memcgroup(kn):
count = count + 1
path = kernfs_path(kn).decode()
print("kernfs_node: ", hex(kn.value_()), " ", path)

print("Total number of memcg kernfs_node objects: ", count)


def get_num_active_mem_cgroups(prog: Program) -> int:
"""
Get number of active mem cgroups.
"""
mem_cgroup_subsys = prog["cgroup_subsys"][_MEMORY_CGRP_ID]
# add 1 to number of active memcgroups to account for root memcgroup
return mem_cgroup_subsys.root.cgrp.nr_descendants.value_() + 1


def get_num_dying_mem_cgroups(prog: Program) -> int:
"""
Get number of inactive or dying mem cgroups.
"""
mem_cgroup_subsys = prog["cgroup_subsys"][_MEMORY_CGRP_ID]
return mem_cgroup_subsys.root.cgrp.nr_dying_descendants.value_()


def get_num_mem_cgroups(prog: Program) -> None:
active_mem_cgroups = get_num_active_mem_cgroups(prog)
dying_mem_cgroups = get_num_dying_mem_cgroups(prog)
print(
f"There are {active_mem_cgroups} active and {dying_mem_cgroups} dying memcgroups \n"
)


# By default (max_pages == 0) we scan all pages,
# that have memcg ref but if max_pages is specified
# then we bail out after getting those many pages
# or after scanning all pages , whichever happens first.
def dump_page_cache_pages_pinning_cgroups(prog: Program, max_pages: int = 0):
"""
Dump page-cache pages that have reference to a mem-cgroup.
The ouput also contains information such as the cgroup that is pinned, its flags
(to indicate current state of cgroup) and file cached by this page.

:params: max_pages: specify how many pages to find. By default first 10000 such
pages are listed. Use 0 to list all such pages.

"""
mem_cgroup_root = prog["cgroup_subsys"][_MEMORY_CGRP_ID].root
total_count = 0
found_count = 0
fault_count = 0
for page in for_each_page(prog):
total_count = total_count + 1
try:
# Ignore slab pages
if PageSlab(page):
continue
# Ignore non page-cache pages
if not page.mapping:
continue
try:
mem_cgroup = page.mem_cgroup
except AttributeError:
mem_cgroup = page.memcg_data

if not mem_cgroup.value_() or mem_cgroup.value_() & 3:
continue
cgroup_subsys_state = cast(
"struct cgroup_subsys_state *", mem_cgroup
)
if cgroup_subsys_state.cgroup.root == mem_cgroup_root:
found_count = found_count + 1
cgrp = cgroup_subsys_state.cgroup
address_space = page.mapping
inode = address_space.host
if inode_path(inode) is None:
continue
dentry = container_of(
inode.i_dentry.first, "struct dentry", "d_u.d_alias"
)
path = dentry_path_any_mount(dentry).decode()
cgroup_state = decode_css_flags(cgrp.self.address_of_())
print(
f"page: 0x{page.value_():x} cgroup: {cgroup_path(cgrp).decode()} state: {cgroup_state} path: {path}\n"
)
if max_pages and found_count == max_pages:
break
except FaultError:
fault_count = fault_count + 1
continue

print(
f"Scanned {total_count} pages, found {found_count} pages with memory cgroup refs, found {fault_count} faults."
)


class NumMemCgroups(CorelensModule):
"""Print number of active and dying memcgroups"""

name = "num-memcgroups"

def run(self, prog: Program, args: argparse.Namespace) -> None:
get_num_mem_cgroups(prog)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make this more generic like the output of cat /proc/cgroups, not only support memory cgroup, but other cgroup as well? It doesn't have to be include with this pull request, you can start a new one for it if you want.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. I can include this change in a different PR. I included memcgroup numbers here because we often run into issue due to zombie memcgroups and hence having their number readily available will help us to decide if we need to run PagesPinningMemcgroups module.


class PagesPinningMemcgroups(CorelensModule):
"""Print information related to pages, that are pinning memcgroup(s)"""

name = "pages-pinning-memcg"
run_when = "never"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It makes senes this only run when user requests it explicitly. Should we make the default behavior to dump all pages pinning zombie cgroup, that looks more common to me when troubleshooting zombie cgroup issues?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. I have modified it to dump all pages by default.


def add_args(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--max",
"-m",
type=int,
default=0,
help="Maximum number of pages to show. By default(0) all such pages are shown.",
)

def run(self, prog: Program, args: argparse.Namespace) -> None:
dump_page_cache_pages_pinning_cgroups(prog, max_pages=args.max)
33 changes: 33 additions & 0 deletions tests/test_kernfs_memcg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright (c) 2024, Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
import drgn

from drgn_tools import kernfs_memcg as kernfs_memcg


def test_dump_page_cache_pages_pinning_cgroups(prog: drgn.Program) -> None:
kernfs_memcg.dump_page_cache_pages_pinning_cgroups(prog, 10)


def test_dump_memcgroup_hierarchy(prog: drgn.Program) -> None:
kernfs_memcg.dump_memcgroup_hierarchy(prog)


def test_kernfs_node_of_memcgroup(prog: drgn.Program) -> None:
count = 0
for kn in kernfs_memcg.for_each_kernfs_node(prog):
if kernfs_memcg.kernfs_node_of_memcgroup(kn):
count = count + 1
if count >= 5:
print("Found 5 memcgroup, kernfs_node objects.")
break


def test_get_num_active_mem_cgroups(prog: drgn.Program) -> None:
count = kernfs_memcg.get_num_active_mem_cgroups(prog)
print(f"number of active memcgroups: {count}\n")


def test_get_num_dying_mem_cgroups(prog: drgn.Program) -> None:
count = kernfs_memcg.get_num_dying_mem_cgroups(prog)
print(f"number of dying memcgroups: {count}\n")
Loading