Skip to content

Commit

Permalink
DHCP lease maintenance should terminate when interface no longer exists.
Browse files Browse the repository at this point in the history
Due to oberservations that threads can grow and the dhcp daemon uses an increasing amount of memory.

This situation can happen organically when using say, bridge CNI, and the bridge has been removed outside of the bridge CNI lifecycle, and an interface no longer exists on a pod.

Does so on a retry loop using the `backoffRetry()` method.

Signed-off-by: dougbtv <[email protected]>
  • Loading branch information
dougbtv committed Jan 21, 2025
1 parent e4ca66b commit 55262dc
Showing 1 changed file with 62 additions and 0 deletions.
62 changes: 62 additions & 0 deletions plugins/ipam/dhcp/lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ package main

import (
"context"
"errors"
"fmt"
"log"
"math/rand"
"net"
"reflect"
"strings"
"sync"
"sync/atomic"
Expand Down Expand Up @@ -55,6 +57,13 @@ const (
leaseStateRebinding
)

// Timing for retrying link existence check
const (
linkCheckDelay0 = 1 * time.Second
linkCheckRetryMax = 10 * time.Second
linkCheckTotalTimeout = 30 * time.Second
)

// This implementation uses 1 OS thread per lease. This is because
// all the network operations have to be done in network namespace
// of the interface. This can be improved by switching to the proper
Expand All @@ -65,6 +74,7 @@ type DHCPLease struct {
clientID string
latestLease *nclient4.Lease
link netlink.Link
linkexists bool

Check failure on line 77 in plugins/ipam/dhcp/lease.go

View workflow job for this annotation

GitHub Actions / Lint

field `linkexists` is unused (unused)
renewalTime time.Time
rebindingTime time.Time
expireTime time.Time
Expand Down Expand Up @@ -292,6 +302,14 @@ func (l *DHCPLease) maintain() {
for {
var sleepDur time.Duration

linkCheckCtx, cancel := context.WithTimeoutCause(l.ctx, l.resendTimeout, errNoMoreTries)
defer cancel()
linkExists, _ := l.checkLinkExistsWithBackoff(linkCheckCtx)
if !linkExists {
log.Printf("%v: interface %s no longer exists or link check failed, terminating lease maintenance", l.clientID, l.link.Attrs().Name)
return
}

switch state {
case leaseStateBound:
sleepDur = time.Until(l.renewalTime)
Expand Down Expand Up @@ -344,6 +362,50 @@ func (l *DHCPLease) maintain() {
}
}

func (l *DHCPLease) checkLinkExistsWithBackoff(ctx context.Context) (bool, error) {
checkFunc := func() (bool, error) {
_, err := netlink.LinkByName(l.link.Attrs().Name)
if err != nil {
log.Printf("!bang: Error checking network link '%s': %v, type: %v", l.link.Attrs().Name, err, reflect.TypeOf(err))
var linkNotFoundErr *netlink.LinkNotFoundError
if errors.As(err, &linkNotFoundErr) {
log.Printf("!bang Link not found error detected! this is what I want to see.")
return false, nil
}
log.Printf("!bang Unexpected error type: %v, retrying...", reflect.TypeOf(err))
return false, err
}
return true, nil
}

ctx, cancel := context.WithTimeout(ctx, linkCheckTotalTimeout)
defer cancel()

exists, err := backoffRetryLinkExists(ctx, linkCheckRetryMax, checkFunc)
return exists, err
}

func backoffRetryLinkExists(ctx context.Context, maxDelay time.Duration, f func() (bool, error)) (bool, error) {
var baseDelay time.Duration = linkCheckDelay0

Check failure on line 389 in plugins/ipam/dhcp/lease.go

View workflow job for this annotation

GitHub Actions / Lint

var-declaration: should omit type time.Duration from declaration of var baseDelay; it will be inferred from the right-hand side (revive)
for {
exists, err := f()
if err == nil {
return exists, nil
}

log.Printf("!bang: Retrying due to error: %v. Next retry in %v seconds", err, baseDelay)

select {
case <-ctx.Done():
return false, ctx.Err() // Context's done, return with its error
case <-time.After(baseDelay):
if baseDelay < maxDelay {
baseDelay *= 2
}
}
}
}

func (l *DHCPLease) downIface() {
if err := netlink.LinkSetDown(l.link); err != nil {
log.Printf("%v: failed to bring %v interface DOWN: %v", l.clientID, l.link.Attrs().Name, err)
Expand Down

0 comments on commit 55262dc

Please sign in to comment.