Skip to content

Commit

Permalink
raw_exec: oom_score_adj support (#23308)
Browse files Browse the repository at this point in the history
  • Loading branch information
pkazmierczak authored Jun 14, 2024
1 parent 94bb91a commit 85430be
Show file tree
Hide file tree
Showing 11 changed files with 145 additions and 82 deletions.
3 changes: 3 additions & 0 deletions .changelog/23308.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
raw_exec: Added support for oom_score_adj
```
9 changes: 9 additions & 0 deletions drivers/rawexec/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ var (
"args": hclspec.NewAttr("args", "list(string)", false),
"cgroup_v2_override": hclspec.NewAttr("cgroup_v2_override", "string", false),
"cgroup_v1_override": hclspec.NewAttr("cgroup_v1_override", "list(map(string))", false),
"oom_score_adj": hclspec.NewAttr("oom_score_adj", "number", false),
})

// capabilities is returned by the Capabilities RPC and indicates what
Expand Down Expand Up @@ -156,6 +157,9 @@ type TaskConfig struct {
//
// * All resource isolation guarantees are lost FOR ALL TASKS if set *
OverrideCgroupV1 hclutils.MapStrStr `codec:"cgroup_v1_override"`

// OOMScoreAdj sets the oom_score_adj on Linux systems
OOMScoreAdj int `codec:"oom_score_adj"`
}

// TaskState is the state which is encoded in the handle returned in
Expand Down Expand Up @@ -324,6 +328,10 @@ func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drive
return nil, nil, fmt.Errorf("failed to decode driver config: %v", err)
}

if driverConfig.OOMScoreAdj < 0 {
return nil, nil, fmt.Errorf("oom_score_adj must not be negative")
}

d.logger.Info("starting task", "driver_cfg", hclog.Fmt("%+v", driverConfig))
handle := drivers.NewTaskHandle(taskHandleVersion)
handle.Config = cfg
Expand Down Expand Up @@ -353,6 +361,7 @@ func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drive
Resources: cfg.Resources.Copy(),
OverrideCgroupV2: cgroupslib.CustomPathCG2(driverConfig.OverrideCgroupV2),
OverrideCgroupV1: driverConfig.OverrideCgroupV1,
OOMScoreAdj: int32(driverConfig.OOMScoreAdj),
}

// ensure only one of cgroups_v1_override and cgroups_v2_override have been
Expand Down
4 changes: 4 additions & 0 deletions drivers/shared/executor/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,10 @@ type ExecCommand struct {
//
// * All resource isolation guarantees are lost FOR ALL TASKS if set *
OverrideCgroupV1 map[string]string

// OOMScoreAdj allows setting oom_score_adj (likelihood of process being
// OOM killed) on Linux systems
OOMScoreAdj int32
}

func (c *ExecCommand) getCgroupOr(controller, fallback string) string {
Expand Down
10 changes: 4 additions & 6 deletions drivers/shared/executor/executor_universal_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ func (e *UniversalExecutor) statCG(cgroup string) (int, func(), error) {
func (e *UniversalExecutor) configureResourceContainer(command *ExecCommand, pid int) (func(), error) {
cgroup := command.StatsCgroup()

// ensure tasks do not inherit Nomad agent oom_score_adj value
if err := e.setOomAdj(); err != nil {
// ensure tasks get the desired oom_score_adj value set
if err := e.setOomAdj(command.OOMScoreAdj); err != nil {
return nil, err
}

Expand Down Expand Up @@ -280,12 +280,10 @@ func (e *UniversalExecutor) configureCG2(cgroup string, command *ExecCommand) {
_ = ed.Write("cpuset.cpus", cpusetCpus)
}

func (e *UniversalExecutor) setOomAdj() error {
// children should not inherit Nomad agent oom_score_adj value
//
func (e *UniversalExecutor) setOomAdj(oomScore int32) error {
// /proc/self/oom_score_adj should work on both cgroups v1 and v2 systems
// range is -1000 to 1000; 0 is the default
return os.WriteFile("/proc/self/oom_score_adj", []byte("0"), 0644)
return os.WriteFile("/proc/self/oom_score_adj", []byte(strconv.Itoa(int(oomScore))), 0644)
}

func (*UniversalExecutor) computeCPU(command *ExecCommand) uint64 {
Expand Down
28 changes: 28 additions & 0 deletions drivers/shared/executor/executor_universal_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ package executor

import (
"fmt"
"os"
"strconv"
"strings"
"testing"

"github.com/hashicorp/nomad/ci"
Expand Down Expand Up @@ -99,3 +102,28 @@ func TestExecutor_InvalidCgroup(t *testing.T) {
must.ErrorContains(t, err, "unable to configure cgroups: no such file or directory")

}

func TestUniversalExecutor_setOomAdj(t *testing.T) {
ci.Parallel(t)

factory := universalFactory
testExecCmd := testExecutorCommand(t)
execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir
execCmd.Cmd = "sleep"
execCmd.Args = []string{"infinity"}
execCmd.OOMScoreAdj = 1000

factory.configureExecCmd(t, execCmd)
defer allocDir.Destroy()
executor := factory.new(testlog.HCLogger(t), compute)
defer executor.Shutdown("", 0)

p, err := executor.Launch(execCmd)
must.NoError(t, err)

oomScore, err := os.ReadFile(fmt.Sprintf("/proc/%d/oom_score_adj", p.Pid))
must.NoError(t, err)

oomScoreInt, _ := strconv.Atoi(strings.TrimSuffix(string(oomScore), "\n"))
must.Eq(t, execCmd.OOMScoreAdj, int32(oomScoreInt))
}
1 change: 1 addition & 0 deletions drivers/shared/executor/grpc_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ func (c *grpcExecutorClient) Launch(cmd *ExecCommand) (*ProcessState, error) {
Capabilities: cmd.Capabilities,
CgroupV2Override: cmd.OverrideCgroupV2,
CgroupV1Override: cmd.OverrideCgroupV1,
OomScoreAdj: cmd.OOMScoreAdj,
}
resp, err := c.client.Launch(ctx, req)
if err != nil {
Expand Down
1 change: 1 addition & 0 deletions drivers/shared/executor/grpc_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ func (s *grpcExecutorServer) Launch(ctx context.Context, req *proto.LaunchReques
Capabilities: req.Capabilities,
OverrideCgroupV2: req.CgroupV2Override,
OverrideCgroupV1: req.CgroupV1Override,
OOMScoreAdj: req.OomScoreAdj,
})

if err != nil {
Expand Down
Loading

0 comments on commit 85430be

Please sign in to comment.