Skip to content

Commit

Permalink
fix(scheduler): fix data race (project-zot#2085)
Browse files Browse the repository at this point in the history
* fix(scheduler): data race when pushing new tasks

the problem here is that scheduler can be closed in two ways:
- canceling the context given as argument to scheduler.RunScheduler()
- running scheduler.Shutdown()

because of this shutdown can trigger a data race between calling scheduler.inShutdown()
and actually pushing tasks into the pool workers

solved that by keeping a quit channel and listening on both quit channel and ctx.Done()
and closing the worker chan and scheduler afterwards.

Signed-off-by: Petu Eusebiu <[email protected]>

* refactor(scheduler): refactor into a single shutdown

before this we could stop scheduler either by closing the context
provided to RunScheduler(ctx) or by running Shutdown().

simplify things by getting rid of the external context in RunScheduler().
keep an internal context in the scheduler itself and pass it down to all tasks.

Signed-off-by: Petu Eusebiu <[email protected]>

---------

Signed-off-by: Petu Eusebiu <[email protected]>
  • Loading branch information
eusebiu-constantin-petu-dbk authored Dec 11, 2023
1 parent d71a1f4 commit 7642e5a
Show file tree
Hide file tree
Showing 31 changed files with 495 additions and 327 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ test-prereq: check-skopeo $(TESTDATA) $(ORAS)
.PHONY: test-extended
test-extended: $(if $(findstring ui,$(BUILD_LABELS)), ui)
test-extended: test-prereq
go test -failfast -tags $(BUILD_LABELS),containers_image_openpgp -trimpath -race -timeout 15m -cover -coverpkg ./... -coverprofile=coverage-extended.txt -covermode=atomic ./...
go test -failfast -tags $(BUILD_LABELS),containers_image_openpgp -trimpath -race -timeout 20m -cover -coverpkg ./... -coverprofile=coverage-extended.txt -covermode=atomic ./...
rm -rf /tmp/getter*; rm -rf /tmp/trivy*

.PHONY: test-minimal
Expand Down
88 changes: 88 additions & 0 deletions pkg/api/authn_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"net/http"
"net/http/httptest"
"os"
"path"
"testing"
"time"

Expand All @@ -23,9 +24,14 @@ import (
"zotregistry.io/zot/pkg/api/config"
"zotregistry.io/zot/pkg/api/constants"
extconf "zotregistry.io/zot/pkg/extensions/config"
"zotregistry.io/zot/pkg/extensions/monitoring"
"zotregistry.io/zot/pkg/log"
mTypes "zotregistry.io/zot/pkg/meta/types"
reqCtx "zotregistry.io/zot/pkg/requestcontext"
"zotregistry.io/zot/pkg/scheduler"
"zotregistry.io/zot/pkg/storage"
storageConstants "zotregistry.io/zot/pkg/storage/constants"
"zotregistry.io/zot/pkg/storage/local"
authutils "zotregistry.io/zot/pkg/test/auth"
test "zotregistry.io/zot/pkg/test/common"
"zotregistry.io/zot/pkg/test/mocks"
Expand Down Expand Up @@ -922,6 +928,88 @@ func TestAPIKeysGeneratorErrors(t *testing.T) {
})
}

func TestCookiestoreCleanup(t *testing.T) {
log := log.Logger{}
metrics := monitoring.NewMetricsServer(true, log)

Convey("Test cookiestore cleanup works", t, func() {
taskScheduler := scheduler.NewScheduler(config.New(), metrics, log)
taskScheduler.RateLimit = 50 * time.Millisecond
taskScheduler.RunScheduler()

rootDir := t.TempDir()

err := os.MkdirAll(path.Join(rootDir, "_sessions"), storageConstants.DefaultDirPerms)
So(err, ShouldBeNil)

sessionPath := path.Join(rootDir, "_sessions", "session_1234")

err = os.WriteFile(sessionPath, []byte("session"), storageConstants.DefaultFilePerms)
So(err, ShouldBeNil)

err = os.Chtimes(sessionPath, time.Time{}, time.Time{})
So(err, ShouldBeNil)

imgStore := local.NewImageStore(rootDir, false, false, log, metrics, nil, nil)

storeController := storage.StoreController{
DefaultStore: imgStore,
}

cookieStore, err := api.NewCookieStore(storeController)
So(err, ShouldBeNil)

cookieStore.RunSessionCleaner(taskScheduler)

time.Sleep(2 * time.Second)

taskScheduler.Shutdown()

// make sure session is removed
_, err = os.Stat(sessionPath)
So(err, ShouldNotBeNil)
})

Convey("Test cookiestore cleanup without permissions on rootDir", t, func() {
taskScheduler := scheduler.NewScheduler(config.New(), metrics, log)
taskScheduler.RateLimit = 50 * time.Millisecond
taskScheduler.RunScheduler()

rootDir := t.TempDir()

err := os.MkdirAll(path.Join(rootDir, "_sessions"), storageConstants.DefaultDirPerms)
So(err, ShouldBeNil)

sessionPath := path.Join(rootDir, "_sessions", "session_1234")

err = os.WriteFile(sessionPath, []byte("session"), storageConstants.DefaultFilePerms)
So(err, ShouldBeNil)

imgStore := local.NewImageStore(rootDir, false, false, log, metrics, nil, nil)

storeController := storage.StoreController{
DefaultStore: imgStore,
}

cookieStore, err := api.NewCookieStore(storeController)
So(err, ShouldBeNil)

err = os.Chmod(rootDir, 0o000)
So(err, ShouldBeNil)

defer func() {
err = os.Chmod(rootDir, storageConstants.DefaultDirPerms)
So(err, ShouldBeNil)
}()

cookieStore.RunSessionCleaner(taskScheduler)

time.Sleep(1 * time.Second)

taskScheduler.Shutdown()
})
}

type mockUUIDGenerator struct {
guuid.Generator
succeedAttempts int
Expand Down
25 changes: 14 additions & 11 deletions pkg/api/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,12 @@ func (c *Controller) GetPort() int {
return c.chosenPort
}

func (c *Controller) Run(reloadCtx context.Context) error {
func (c *Controller) Run() error {
if err := c.initCookieStore(); err != nil {
return err
}

c.StartBackgroundTasks(reloadCtx)
c.StartBackgroundTasks()

// setup HTTP API router
engine := mux.NewRouter()
Expand Down Expand Up @@ -216,7 +216,7 @@ func (c *Controller) Run(reloadCtx context.Context) error {
return server.Serve(listener)
}

func (c *Controller) Init(reloadCtx context.Context) error {
func (c *Controller) Init() error {
// print the current configuration, but strip secrets
c.Log.Info().Interface("params", c.Config.Sanitize()).Msg("configuration settings")

Expand All @@ -237,7 +237,7 @@ func (c *Controller) Init(reloadCtx context.Context) error {
return err
}

if err := c.InitMetaDB(reloadCtx); err != nil {
if err := c.InitMetaDB(); err != nil {
return err
}

Expand Down Expand Up @@ -280,7 +280,7 @@ func (c *Controller) initCookieStore() error {
return nil
}

func (c *Controller) InitMetaDB(reloadCtx context.Context) error {
func (c *Controller) InitMetaDB() error {
// init metaDB if search is enabled or we need to store user profiles, api keys or signatures
if c.Config.IsSearchEnabled() || c.Config.IsBasicAuthnEnabled() || c.Config.IsImageTrustEnabled() ||
c.Config.IsRetentionEnabled() {
Expand Down Expand Up @@ -310,7 +310,7 @@ func (c *Controller) InitMetaDB(reloadCtx context.Context) error {
return nil
}

func (c *Controller) LoadNewConfig(reloadCtx context.Context, newConfig *config.Config) {
func (c *Controller) LoadNewConfig(newConfig *config.Config) {
// reload access control config
c.Config.HTTP.AccessControl = newConfig.HTTP.AccessControl

Expand Down Expand Up @@ -364,21 +364,24 @@ func (c *Controller) LoadNewConfig(reloadCtx context.Context, newConfig *config.

c.InitCVEInfo()

c.StartBackgroundTasks(reloadCtx)

c.Log.Info().Interface("reloaded params", c.Config.Sanitize()).
Msg("loaded new configuration settings")
}

func (c *Controller) Shutdown() {
c.taskScheduler.Shutdown()
c.StopBackgroundTasks()
ctx := context.Background()
_ = c.Server.Shutdown(ctx)
}

func (c *Controller) StartBackgroundTasks(reloadCtx context.Context) {
// Will stop scheduler and wait for all tasks to finish their work.
func (c *Controller) StopBackgroundTasks() {
c.taskScheduler.Shutdown()
}

func (c *Controller) StartBackgroundTasks() {
c.taskScheduler = scheduler.NewScheduler(c.Config, c.Metrics, c.Log)
c.taskScheduler.RunScheduler(reloadCtx)
c.taskScheduler.RunScheduler()

// Enable running garbage-collect periodically for DefaultStore
if c.Config.Storage.GC {
Expand Down
22 changes: 11 additions & 11 deletions pkg/api/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,10 +300,10 @@ func TestRunAlreadyRunningServer(t *testing.T) {
cm.StartAndWait(port)
defer cm.StopServer()

err := ctlr.Init(context.Background())
err := ctlr.Init()
So(err, ShouldNotBeNil)

err = ctlr.Run(context.Background())
err = ctlr.Run()
So(err, ShouldNotBeNil)
})
}
Expand Down Expand Up @@ -377,7 +377,7 @@ func TestObjectStorageController(t *testing.T) {
ctlr := makeController(conf, tmp)
So(ctlr, ShouldNotBeNil)

err := ctlr.Init(context.Background())
err := ctlr.Init()
So(err, ShouldNotBeNil)
})

Expand Down Expand Up @@ -1218,7 +1218,7 @@ func TestMultipleInstance(t *testing.T) {
}
ctlr := api.NewController(conf)
ctlr.Log.Info().Int64("seedUser", seedUser).Int64("seedPass", seedPass).Msg("random seed for username & password")
err := ctlr.Init(context.Background())
err := ctlr.Init()
So(err, ShouldEqual, errors.ErrImgStoreNotFound)

globalDir := t.TempDir()
Expand Down Expand Up @@ -1311,7 +1311,7 @@ func TestMultipleInstance(t *testing.T) {

ctlr.Config.Storage.SubPaths = subPathMap

err := ctlr.Init(context.Background())
err := ctlr.Init()
So(err, ShouldNotBeNil)

// subpath root directory does not exist.
Expand All @@ -1320,15 +1320,15 @@ func TestMultipleInstance(t *testing.T) {

ctlr.Config.Storage.SubPaths = subPathMap

err = ctlr.Init(context.Background())
err = ctlr.Init()
So(err, ShouldNotBeNil)

subPathMap["/a"] = config.StorageConfig{RootDirectory: subDir, Dedupe: true, GC: true}
subPathMap["/b"] = config.StorageConfig{RootDirectory: subDir, Dedupe: true, GC: true}

ctlr.Config.Storage.SubPaths = subPathMap

err = ctlr.Init(context.Background())
err = ctlr.Init()
So(err, ShouldNotBeNil)
})
}
Expand Down Expand Up @@ -1826,12 +1826,12 @@ func TestTSLFailedReadingOfCACert(t *testing.T) {
defer cancel()
ctlr := makeController(conf, t.TempDir())

err = ctlr.Init(ctx)
err = ctlr.Init()
So(err, ShouldBeNil)

errChan := make(chan error, 1)
go func() {
err = ctlr.Run(ctx)
err = ctlr.Run()
errChan <- err
}()

Expand Down Expand Up @@ -1866,12 +1866,12 @@ func TestTSLFailedReadingOfCACert(t *testing.T) {
defer cancel()
ctlr := makeController(conf, t.TempDir())

err = ctlr.Init(ctx)
err = ctlr.Init()
So(err, ShouldBeNil)

errChan := make(chan error, 1)
go func() {
err = ctlr.Run(ctx)
err = ctlr.Run()
errChan <- err
}()

Expand Down
4 changes: 3 additions & 1 deletion pkg/api/cookiestore.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,9 @@ type CleanTask struct {
func (cleanTask *CleanTask) DoWork(ctx context.Context) error {
for _, session := range cleanTask.sessions {
if err := os.Remove(session); err != nil {
return err
if !os.IsNotExist(err) {
return err
}
}
}

Expand Down
18 changes: 6 additions & 12 deletions pkg/cli/client/cve_cmd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,16 +163,14 @@ func TestNegativeServerResponse(t *testing.T) {
ctlr := api.NewController(conf)
ctlr.Log.Logger = ctlr.Log.Output(writers)

ctx := context.Background()

if err := ctlr.Init(ctx); err != nil {
if err := ctlr.Init(); err != nil {
panic(err)
}

ctlr.CveScanner = getMockCveScanner(ctlr.MetaDB)

go func() {
if err := ctlr.Run(ctx); !errors.Is(err, http.ErrServerClosed) {
if err := ctlr.Run(); !errors.Is(err, http.ErrServerClosed) {
panic(err)
}
}()
Expand Down Expand Up @@ -239,16 +237,14 @@ func TestServerCVEResponse(t *testing.T) {
ctlr := api.NewController(conf)
ctlr.Log.Logger = ctlr.Log.Output(writers)

ctx := context.Background()

if err := ctlr.Init(ctx); err != nil {
if err := ctlr.Init(); err != nil {
panic(err)
}

ctlr.CveScanner = getMockCveScanner(ctlr.MetaDB)

go func() {
if err := ctlr.Run(ctx); !errors.Is(err, http.ErrServerClosed) {
if err := ctlr.Run(); !errors.Is(err, http.ErrServerClosed) {
panic(err)
}
}()
Expand Down Expand Up @@ -578,9 +574,7 @@ func TestCVESort(t *testing.T) {
t.FailNow()
}

ctx := context.Background()

if err := ctlr.Init(ctx); err != nil {
if err := ctlr.Init(); err != nil {
panic(err)
}

Expand Down Expand Up @@ -617,7 +611,7 @@ func TestCVESort(t *testing.T) {
}

go func() {
if err := ctlr.Run(ctx); !errors.Is(err, http.ErrServerClosed) {
if err := ctlr.Run(); !errors.Is(err, http.ErrServerClosed) {
panic(err)
}
}()
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/client/image_cmd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -866,7 +866,7 @@ func TestServerResponseGQLWithoutPermissions(t *testing.T) {
}

ctlr := api.NewController(conf)
if err := ctlr.Init(context.Background()); err != nil {
if err := ctlr.Init(); err != nil {
So(err, ShouldNotBeNil)
}
})
Expand Down
Loading

0 comments on commit 7642e5a

Please sign in to comment.