From 5d4079c8bc1f25fe4ae33bbaabe5856e525ab614 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 16 Dec 2024 16:22:34 -0500 Subject: [PATCH] drop DB indexes on close Signed-off-by: Alex Goodman --- cmd/grype/cli/commands/db_import.go | 14 +- grype/db/internal/gormadapter/open.go | 141 ++++++++++++++++----- grype/db/internal/gormadapter/open_test.go | 54 ++------ grype/db/v3/store/store.go | 25 ++-- grype/db/v4/store/store.go | 29 ++--- grype/db/v5/store/store.go | 29 ++--- grype/db/v6/db.go | 39 +++++- grype/db/v6/db_metadata_store_test.go | 2 +- grype/db/v6/installation/curator.go | 97 ++++++++------ grype/db/v6/installation/curator_test.go | 14 +- grype/db/v6/internal/db.go | 26 ---- grype/db/v6/store.go | 77 ++++++++--- grype/db/v6/store_test.go | 51 ++++++++ 13 files changed, 377 insertions(+), 221 deletions(-) delete mode 100644 grype/db/v6/internal/db.go create mode 100644 grype/db/v6/store_test.go diff --git a/cmd/grype/cli/commands/db_import.go b/cmd/grype/cli/commands/db_import.go index 2d4948f0bd7..9a71e40df25 100644 --- a/cmd/grype/cli/commands/db_import.go +++ b/cmd/grype/cli/commands/db_import.go @@ -11,17 +11,17 @@ import ( "github.com/anchore/grype/grype/db/v6/distribution" "github.com/anchore/grype/grype/db/v6/installation" "github.com/anchore/grype/internal" + "github.com/anchore/grype/internal/log" ) func DBImport(app clio.Application) *cobra.Command { opts := dbOptionsDefault(app.ID()) return app.SetupCommand(&cobra.Command{ - Use: "import FILE", - Short: "import a vulnerability database archive", - Long: fmt.Sprintf("import a vulnerability database archive from a local FILE.\nDB archives can be obtained from %q.", internal.DBUpdateURL), - Args: cobra.ExactArgs(1), - PreRunE: disableUI(app), + Use: "import FILE", + Short: "import a vulnerability database archive", + Long: fmt.Sprintf("import a vulnerability database archive from a local FILE.\nDB archives can be obtained from %q.", internal.DBUpdateURL), + Args: cobra.ExactArgs(1), RunE: func(_ *cobra.Command, args []string) error { return runDBImport(*opts, args[0]) }, @@ -48,10 +48,12 @@ func newDBImport(opts options.Database, dbArchivePath string) error { return fmt.Errorf("unable to create curator: %w", err) } + log.WithFields("path", dbArchivePath).Infof("Importing vulnerability database archive") if err := c.Import(dbArchivePath); err != nil { return fmt.Errorf("unable to import vulnerability database: %w", err) } - return stderrPrintLnf("Vulnerability database imported") + log.Info("Vulnerability database imported") + return nil } func legacyDBImport(opts options.Database, dbArchivePath string) error { diff --git a/grype/db/internal/gormadapter/open.go b/grype/db/internal/gormadapter/open.go index 1d68833f645..d6070e9e30e 100644 --- a/grype/db/internal/gormadapter/open.go +++ b/grype/db/internal/gormadapter/open.go @@ -4,37 +4,75 @@ import ( "fmt" "os" "path/filepath" + "strings" "github.com/glebarez/sqlite" "gorm.io/gorm" + + "github.com/anchore/grype/internal/log" ) +var commonStatements = []string{ + `PRAGMA foreign_keys = ON`, // needed for v6+ + `PRAGMA page_size = 16384`, // 16 KB, useful for smaller DBs since ~85% of the DB is from the blobs table +} + var writerStatements = []string{ - // performance improvements (note: will result in lost data on write interruptions). - // on my box it reduces the time to write from 10 minutes to 10 seconds (with ~1GB memory utilization spikes) - `PRAGMA synchronous = OFF`, - `PRAGMA journal_mode = MEMORY`, - `PRAGMA cache_size = 100000`, - `PRAGMA mmap_size = 268435456`, // 256 MB + // performance improvements (note: will result in lost data on write interruptions) + `PRAGMA synchronous = OFF`, // minimize the amount of syncing to disk, prioritizing write performance over durability + `PRAGMA journal_mode = MEMORY`, // do not write the journal to disk (maximizing write performance); OFF is faster but less safe in terms of DB consistency } -var readOptions = []string{ - "immutable=1", - "cache=shared", - "mode=ro", +var heavyWriteStatements = []string{ + `PRAGMA cache_size = -1073741824`, // ~1 GB (negative means treat as bytes not page count); one caveat is to not pick a value that risks swapping behavior, negating performance gains + `PRAGMA mmap_size = 1073741824`, // ~1 GB; the maximum size of the memory-mapped I/O buffer (to access the database file as if it were a part of the process’s virtual memory) +} + +var readConnectionOptions = []string{ + "immutable=1", // indicates that the database file is guaranteed not to change during the connection’s lifetime (slight performance benefit for read-only cases) + "mode=ro", // opens the database in as read-only (an enforcement mechanism to allow immutable=1 to be effective) + "cache=shared", // multiple database connections within the same process share a single page cache } type config struct { - path string - write bool - memory bool + path string + writable bool + truncate bool + allowLargeMemoryFootprint bool + models []any + initialData []any + memory bool } type Option func(*config) -func WithTruncate(truncate bool) Option { +func WithTruncate(truncate bool, models []any, initialData []any) Option { return func(c *config) { - c.write = truncate + c.truncate = truncate + if truncate { + c.writable = true + c.models = models + c.initialData = initialData + c.allowLargeMemoryFootprint = true + } + } +} + +func WithMigrate(models []any) Option { + return func(c *config) { + c.models = models + } +} + +func WithWritable(write bool) Option { + return func(c *config) { + c.writable = write + } +} + +func WithLargeMemoryFootprint(largeFootprint bool) Option { + return func(c *config) { + c.allowLargeMemoryFootprint = largeFootprint } } @@ -52,10 +90,6 @@ func (c *config) apply(path string, opts []Option) { c.path = path } -func (c config) shouldTruncate() bool { - return c.write && !c.memory -} - func (c config) connectionString() string { var conn string if c.path == "" { @@ -64,9 +98,11 @@ func (c config) connectionString() string { conn = fmt.Sprintf("file:%s?cache=shared", c.path) } - if !c.write && !c.memory { - // &immutable=1&cache=shared&mode=ro - for _, o := range readOptions { + if !c.writable && !c.memory { + if !strings.Contains(conn, "?") { + conn += "?" + } + for _, o := range readConnectionOptions { conn += fmt.Sprintf("&%s", o) } } @@ -77,8 +113,12 @@ func (c config) connectionString() string { func Open(path string, options ...Option) (*gorm.DB, error) { cfg := newConfig(path, options) - if cfg.shouldTruncate() { - if err := prepareWritableDB(path); err != nil { + if cfg.truncate && !cfg.writable { + return nil, fmt.Errorf("cannot truncate a read-only DB") + } + + if cfg.truncate { + if err := deleteDB(path); err != nil { return nil, err } } @@ -88,22 +128,57 @@ func Open(path string, options ...Option) (*gorm.DB, error) { return nil, fmt.Errorf("unable to connect to DB: %w", err) } - if cfg.write { - for _, sqlStmt := range writerStatements { - dbObj.Exec(sqlStmt) - if dbObj.Error != nil { - return nil, fmt.Errorf("unable to execute (%s): %w", sqlStmt, dbObj.Error) - } + if cfg.writable { + log.Trace("applying writable DB statements") + if err := applyStatements(dbObj, writerStatements); err != nil { + return nil, fmt.Errorf("unable to apply DB writer statements: %w", err) + } + } + + if cfg.truncate && cfg.allowLargeMemoryFootprint { + log.Trace("applying large memory footprint DB statements") + if err := applyStatements(dbObj, heavyWriteStatements); err != nil { + return nil, fmt.Errorf("unable to apply DB heavy writer statements: %w", err) + } + } + + for _, stmt := range commonStatements { + dbObj.Exec(stmt) + if dbObj.Error != nil { + return nil, fmt.Errorf("unable to execute common statement (%s): %w", stmt, dbObj.Error) + } + } + + if len(cfg.models) > 0 { + log.Trace("applying DB migrations") + if err := dbObj.AutoMigrate(cfg.models...); err != nil { + return nil, fmt.Errorf("unable to migrate: %w", err) } } - // needed for v6+ - dbObj.Exec("PRAGMA foreign_keys = ON") + if len(cfg.initialData) > 0 { + log.Trace("applying initial data") + for _, d := range cfg.initialData { + if err := dbObj.Create(d).Error; err != nil { + return nil, fmt.Errorf("unable to create initial data: %w", err) + } + } + } return dbObj, nil } -func prepareWritableDB(path string) error { +func applyStatements(db *gorm.DB, statements []string) error { + for _, sqlStmt := range statements { + db.Exec(sqlStmt) + if db.Error != nil { + return fmt.Errorf("unable to execute (%s): %w", sqlStmt, db.Error) + } + } + return nil +} + +func deleteDB(path string) error { if _, err := os.Stat(path); err == nil { if err := os.Remove(path); err != nil { return fmt.Errorf("unable to remove existing DB file: %w", err) diff --git a/grype/db/internal/gormadapter/open_test.go b/grype/db/internal/gormadapter/open_test.go index 1dfc8408877..b9c78063833 100644 --- a/grype/db/internal/gormadapter/open_test.go +++ b/grype/db/internal/gormadapter/open_test.go @@ -33,7 +33,7 @@ func TestConfigApply(t *testing.T) { { name: "apply with truncate option", path: "test.db", - options: []Option{WithTruncate(true)}, + options: []Option{WithTruncate(true, nil, nil)}, // migration and initial data don't matter expectedPath: "test.db", expectedMemory: false, }, @@ -49,44 +49,6 @@ func TestConfigApply(t *testing.T) { } } -func TestConfigShouldTruncate(t *testing.T) { - tests := []struct { - name string - write bool - memory bool - expectedTruncate bool - }{ - { - name: "should truncate when write is true and not memory", - write: true, - memory: false, - expectedTruncate: true, - }, - { - name: "should not truncate when write is false", - write: false, - memory: false, - expectedTruncate: false, - }, - { - name: "should not truncate when using in-memory DB", - write: true, - memory: true, - expectedTruncate: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - c := config{ - write: tt.write, - memory: tt.memory, - } - require.Equal(t, tt.expectedTruncate, c.shouldTruncate()) - }) - } -} - func TestConfigConnectionString(t *testing.T) { tests := []struct { name string @@ -105,7 +67,7 @@ func TestConfigConnectionString(t *testing.T) { name: "read-only path", path: "test.db", write: false, - expectedConnStr: "file:test.db?cache=shared&immutable=1&cache=shared&mode=ro", + expectedConnStr: "file:test.db?cache=shared&immutable=1&mode=ro&cache=shared", }, { name: "in-memory mode", @@ -119,9 +81,9 @@ func TestConfigConnectionString(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { c := config{ - path: tt.path, - write: tt.write, - memory: tt.memory, + path: tt.path, + writable: tt.write, + memory: tt.memory, } require.Equal(t, tt.expectedConnStr, c.connectionString()) }) @@ -134,7 +96,7 @@ func TestPrepareWritableDB(t *testing.T) { tempDir := t.TempDir() dbPath := filepath.Join(tempDir, "newdir", "test.db") - err := prepareWritableDB(dbPath) + err := deleteDB(dbPath) require.NoError(t, err) _, err = os.Stat(filepath.Dir(dbPath)) @@ -151,7 +113,7 @@ func TestPrepareWritableDB(t *testing.T) { _, err = os.Stat(dbPath) require.NoError(t, err) - err = prepareWritableDB(dbPath) + err = deleteDB(dbPath) require.NoError(t, err) _, err = os.Stat(dbPath) @@ -160,7 +122,7 @@ func TestPrepareWritableDB(t *testing.T) { t.Run("returns error if unable to create parent directory", func(t *testing.T) { invalidDir := filepath.Join("/root", "invalidDir", "test.db") - err := prepareWritableDB(invalidDir) + err := deleteDB(invalidDir) require.Error(t, err) require.Contains(t, err.Error(), "unable to create parent directory") }) diff --git a/grype/db/v3/store/store.go b/grype/db/v3/store/store.go index bcf9f6895cf..efd9e0cf9d7 100644 --- a/grype/db/v3/store/store.go +++ b/grype/db/v3/store/store.go @@ -19,28 +19,21 @@ type store struct { db *gorm.DB } +func models() []any { + return []any{ + model.IDModel{}, + model.VulnerabilityModel{}, + model.VulnerabilityMetadataModel{}, + } +} + // New creates a new instance of the store. func New(dbFilePath string, overwrite bool) (v3.Store, error) { - db, err := gormadapter.Open(dbFilePath, gormadapter.WithTruncate(overwrite)) + db, err := gormadapter.Open(dbFilePath, gormadapter.WithTruncate(overwrite, models(), nil)) if err != nil { return nil, err } - if overwrite { - // TODO: automigrate could write to the database, - // we should be validating the database is the correct database based on the version in the ID table before - // automigrating - if err := db.AutoMigrate(&model.IDModel{}); err != nil { - return nil, fmt.Errorf("unable to migrate ID model: %w", err) - } - if err := db.AutoMigrate(&model.VulnerabilityModel{}); err != nil { - return nil, fmt.Errorf("unable to migrate Vulnerability model: %w", err) - } - if err := db.AutoMigrate(&model.VulnerabilityMetadataModel{}); err != nil { - return nil, fmt.Errorf("unable to migrate Vulnerability Metadata model: %w", err) - } - } - return &store{ db: db, }, nil diff --git a/grype/db/v4/store/store.go b/grype/db/v4/store/store.go index 1a8727d519a..61fec9ef11a 100644 --- a/grype/db/v4/store/store.go +++ b/grype/db/v4/store/store.go @@ -19,31 +19,22 @@ type store struct { db *gorm.DB } +func models() []any { + return []any{ + model.IDModel{}, + model.VulnerabilityModel{}, + model.VulnerabilityMetadataModel{}, + model.VulnerabilityMatchExclusionModel{}, + } +} + // New creates a new instance of the store. func New(dbFilePath string, overwrite bool) (v4.Store, error) { - db, err := gormadapter.Open(dbFilePath, gormadapter.WithTruncate(overwrite)) + db, err := gormadapter.Open(dbFilePath, gormadapter.WithTruncate(overwrite, models(), nil)) if err != nil { return nil, err } - if overwrite { - // TODO: automigrate could write to the database, - // we should be validating the database is the correct database based on the version in the ID table before - // automigrating - if err := db.AutoMigrate(&model.IDModel{}); err != nil { - return nil, fmt.Errorf("unable to migrate ID model: %w", err) - } - if err := db.AutoMigrate(&model.VulnerabilityModel{}); err != nil { - return nil, fmt.Errorf("unable to migrate Vulnerability model: %w", err) - } - if err := db.AutoMigrate(&model.VulnerabilityMetadataModel{}); err != nil { - return nil, fmt.Errorf("unable to migrate Vulnerability Metadata model: %w", err) - } - if err := db.AutoMigrate(&model.VulnerabilityMatchExclusionModel{}); err != nil { - return nil, fmt.Errorf("unable to migrate Vulnerability Match Exclusion model: %w", err) - } - } - return &store{ db: db, }, nil diff --git a/grype/db/v5/store/store.go b/grype/db/v5/store/store.go index 3b9b527cae8..8b282f76ed2 100644 --- a/grype/db/v5/store/store.go +++ b/grype/db/v5/store/store.go @@ -19,31 +19,22 @@ type store struct { db *gorm.DB } +func models() []any { + return []any{ + model.IDModel{}, + model.VulnerabilityModel{}, + model.VulnerabilityMetadataModel{}, + model.VulnerabilityMatchExclusionModel{}, + } +} + // New creates a new instance of the store. func New(dbFilePath string, overwrite bool) (v5.Store, error) { - db, err := gormadapter.Open(dbFilePath, gormadapter.WithTruncate(overwrite)) + db, err := gormadapter.Open(dbFilePath, gormadapter.WithTruncate(overwrite, models(), nil)) if err != nil { return nil, err } - if overwrite { - // TODO: automigrate could write to the database, - // we should be validating the database is the correct database based on the version in the ID table before - // automigrating - if err := db.AutoMigrate(&model.IDModel{}); err != nil { - return nil, fmt.Errorf("unable to migrate ID model: %w", err) - } - if err := db.AutoMigrate(&model.VulnerabilityModel{}); err != nil { - return nil, fmt.Errorf("unable to migrate Vulnerability model: %w", err) - } - if err := db.AutoMigrate(&model.VulnerabilityMetadataModel{}); err != nil { - return nil, fmt.Errorf("unable to migrate Vulnerability Metadata model: %w", err) - } - if err := db.AutoMigrate(&model.VulnerabilityMatchExclusionModel{}); err != nil { - return nil, fmt.Errorf("unable to migrate Vulnerability Match Exclusion model: %w", err) - } - } - return &store{ db: db, }, nil diff --git a/grype/db/v6/db.go b/grype/db/v6/db.go index 5a7cbe46dd1..84d36d43f70 100644 --- a/grype/db/v6/db.go +++ b/grype/db/v6/db.go @@ -1,8 +1,13 @@ package v6 import ( + "fmt" "io" "path/filepath" + + "gorm.io/gorm" + + "github.com/anchore/grype/grype/db/internal/gormadapter" ) const ( @@ -59,9 +64,39 @@ func (c Config) DBFilePath() string { } func NewReader(cfg Config) (Reader, error) { - return newStore(cfg, false) + return newStore(cfg, false, false) } func NewWriter(cfg Config) (ReadWriter, error) { - return newStore(cfg, true) + return newStore(cfg, true, true) +} + +func Hydrater() func(string) error { + return func(path string) error { + // this will auto-migrate any models, creating and populating indexes as needed + // we don't pass any data initialization here because the data is already in the db archive and we do not want + // to affect the entries themselves, only indexes and schema. + _, err := newStore(Config{DBDirPath: path}, false, true) + return err + } +} + +// NewLowLevelDB creates a new empty DB for writing or opens an existing one for reading from the given path. This is +// not recommended for typical interactions with the vulnerability DB, use NewReader and NewWriter instead. +func NewLowLevelDB(dbFilePath string, empty, writable bool) (*gorm.DB, error) { + var opts []gormadapter.Option + + if empty && !writable { + return nil, fmt.Errorf("cannot open an empty database for reading only") + } + + if empty { + opts = append(opts, gormadapter.WithTruncate(true, Models(), InitialData())) + } + + if writable { + opts = append(opts, gormadapter.WithWritable(true)) + } + + return gormadapter.Open(dbFilePath, opts...) } diff --git a/grype/db/v6/db_metadata_store_test.go b/grype/db/v6/db_metadata_store_test.go index 8b0713f6640..69542c6374a 100644 --- a/grype/db/v6/db_metadata_store_test.go +++ b/grype/db/v6/db_metadata_store_test.go @@ -56,7 +56,7 @@ func setupTestStore(t testing.TB, d ...string) *store { s, err := newStore(Config{ DBDirPath: dir, - }, true) + }, true, true) require.NoError(t, err) return s diff --git a/grype/db/v6/installation/curator.go b/grype/db/v6/installation/curator.go index 39bed76b60d..6d02f480975 100644 --- a/grype/db/v6/installation/curator.go +++ b/grype/db/v6/installation/curator.go @@ -60,16 +60,18 @@ func (c Config) DBDirectoryPath() string { } type curator struct { - fs afero.Fs - client distribution.Client - config Config + fs afero.Fs + client distribution.Client + config Config + hydrator func(string) error } func NewCurator(cfg Config, downloader distribution.Client) (db.Curator, error) { return curator{ - fs: afero.NewOsFs(), - client: downloader, - config: cfg, + fs: afero.NewOsFs(), + client: downloader, + config: cfg, + hydrator: db.Hydrater(), }, nil } @@ -147,41 +149,15 @@ func (c curator) Update() (bool, error) { return false, nil } - mon := newMonitor() - defer mon.SetCompleted() - - mon.Set("checking for update") - update, checkErr := c.client.IsUpdateAvailable(current) - if checkErr != nil { - // we want to continue even if we can't check for an update - log.Warnf("unable to check for vulnerability database update") - log.WithFields("error", checkErr).Debug("check for vulnerability update failed") + update, err := c.update(current) + if err != nil { + return false, err } if update == nil { - if checkErr == nil { - // there was no update (or any issue while checking for an update) - c.setLastSuccessfulUpdateCheck() - } - - mon.Set("no update available") return false, nil } - log.Infof("downloading new vulnerability DB") - mon.Set("downloading") - dest, err := c.client.Download(*update, filepath.Dir(c.config.DBRootDir), mon.downloadProgress) - if err != nil { - return false, fmt.Errorf("unable to update vulnerability database: %w", err) - } - - if err := c.activate(dest, mon); err != nil { - return false, fmt.Errorf("unable to activate new vulnerability database: %w", err) - } - - // only set the last successful update check if the update was successful - c.setLastSuccessfulUpdateCheck() - if current != nil { log.WithFields( "from", current.Built.String(), @@ -218,6 +194,47 @@ func (c curator) isUpdateCheckAllowed() bool { return *elapsed > c.config.UpdateCheckMaxFrequency } +func (c curator) update(current *db.Description) (*distribution.Archive, error) { + mon := newMonitor() + defer mon.SetCompleted() + + mon.Set("checking for update") + update, checkErr := c.client.IsUpdateAvailable(current) + if checkErr != nil { + // we want to continue even if we can't check for an update + log.Warnf("unable to check for vulnerability database update") + log.WithFields("error", checkErr).Debug("check for vulnerability update failed") + } + + if update == nil { + if checkErr == nil { + // there was no update (or any issue while checking for an update) + c.setLastSuccessfulUpdateCheck() + } + + mon.Set("no update available") + return nil, nil + } + + log.Infof("downloading new vulnerability DB") + mon.Set("downloading") + dest, err := c.client.Download(*update, filepath.Dir(c.config.DBRootDir), mon.downloadProgress) + if err != nil { + return nil, fmt.Errorf("unable to update vulnerability database: %w", err) + } + + if err := c.activate(dest, mon); err != nil { + return nil, fmt.Errorf("unable to activate new vulnerability database: %w", err) + } + + mon.Set("updated") + + // only set the last successful update check if the update was successful + c.setLastSuccessfulUpdateCheck() + + return update, nil +} + func (c curator) durationSinceUpdateCheck() (*time.Duration, error) { // open `$dbDir/last_update_check` file and read the timestamp and do now() - timestamp @@ -310,6 +327,8 @@ func (c curator) Import(dbArchivePath string) error { return err } + mon.Set("imported") + return nil } @@ -317,10 +336,16 @@ func (c curator) Import(dbArchivePath string) error { func (c curator) activate(dbDirPath string, mon monitor) error { defer mon.importProgress.SetCompleted() + dbFilePath := filepath.Join(dbDirPath, db.VulnerabilityDBFileName) + + mon.Set("hydrating") + if err := c.hydrator(dbDirPath); err != nil { + return err + } + mon.Set("hashing") // overwrite the checksums file with the new checksums - dbFilePath := filepath.Join(dbDirPath, db.VulnerabilityDBFileName) digest, err := db.CalculateDBDigest(dbFilePath) if err != nil { return err diff --git a/grype/db/v6/installation/curator_test.go b/grype/db/v6/installation/curator_test.go index 2b64d0fdb5c..3a0cc698979 100644 --- a/grype/db/v6/installation/curator_test.go +++ b/grype/db/v6/installation/curator_test.go @@ -16,7 +16,6 @@ import ( db "github.com/anchore/grype/grype/db/v6" "github.com/anchore/grype/grype/db/v6/distribution" - "github.com/anchore/grype/grype/db/v6/internal" "github.com/anchore/grype/internal/schemaver" ) @@ -118,7 +117,7 @@ func writeTestChecksumsFile(t *testing.T, fs afero.Fs, dir string, checksums str func writeTestDescriptionToDB(t *testing.T, dir string, desc db.Description) string { c := db.Config{DBDirPath: dir} - d, err := internal.NewDB(c.DBFilePath(), db.Models(), false) + d, err := db.NewLowLevelDB(c.DBFilePath(), false, false) require.NoError(t, err) if err := d.Unscoped().Where("true").Delete(&db.DBMetadata{}).Error; err != nil { @@ -176,6 +175,12 @@ func TestCurator_Update(t *testing.T) { t.Run("happy path: successful update", func(t *testing.T) { c := setupCuratorForUpdate(t, withWorkingUpdateIntegrations()) mc := c.client.(*mockClient) + // nop hydrator, assert error if NOT called + hydrateCalled := false + c.hydrator = func(string) error { + hydrateCalled = true + return nil + } updated, err := c.Update() @@ -184,6 +189,7 @@ func TestCurator_Update(t *testing.T) { require.FileExists(t, filepath.Join(c.config.DBDirectoryPath(), lastUpdateCheckFileName)) mc.AssertExpectations(t) + assert.True(t, hydrateCalled, "expected hydrator to be called") }) t.Run("error checking for updates", func(t *testing.T) { @@ -220,6 +226,10 @@ func TestCurator_Update(t *testing.T) { t.Run("error during activation: cannot move dir", func(t *testing.T) { c := setupCuratorForUpdate(t, withWorkingUpdateIntegrations()) mc := c.client.(*mockClient) + // nop hydrator + c.hydrator = func(string) error { + return nil + } // simulate not being able to move the staged dir to the db dir c.fs = afero.NewReadOnlyFs(c.fs) diff --git a/grype/db/v6/internal/db.go b/grype/db/v6/internal/db.go deleted file mode 100644 index ba0368c4757..00000000000 --- a/grype/db/v6/internal/db.go +++ /dev/null @@ -1,26 +0,0 @@ -package internal - -import ( - "fmt" - - "gorm.io/gorm" - - "github.com/anchore/grype/grype/db/internal/gormadapter" -) - -// NewDB creates a new empty DB for writing or opens an existing one for reading from the given path. -func NewDB(dbFilePath string, models []any, truncate bool) (*gorm.DB, error) { - db, err := gormadapter.Open(dbFilePath, gormadapter.WithTruncate(truncate)) - if err != nil { - return nil, err - } - - if len(models) > 0 && truncate { - // note: never migrate if this is for reading only (if we are truncating). Migrating will change the contents - // of the DB so any checksums verifications will fail even though this is logically a no-op. - if err := db.AutoMigrate(models...); err != nil { - return nil, fmt.Errorf("unable to create tables: %w", err) - } - } - return db, nil -} diff --git a/grype/db/v6/store.go b/grype/db/v6/store.go index 1624bd57930..3a0cd6ca000 100644 --- a/grype/db/v6/store.go +++ b/grype/db/v6/store.go @@ -2,10 +2,10 @@ package v6 import ( "fmt" + "strings" "gorm.io/gorm" - "github.com/anchore/grype/grype/db/v6/internal" "github.com/anchore/grype/internal/log" ) @@ -18,26 +18,29 @@ type store struct { blobStore *blobStore db *gorm.DB config Config - write bool + readOnly bool } -func newStore(cfg Config, write bool) (*store, error) { +func InitialData() []any { + d := KnownOperatingSystemAliases() + var data []any + for _, v := range d { + data = append(data, &v) + } + return data +} + +func newStore(cfg Config, empty, writable bool) (*store, error) { var path string if cfg.DBDirPath != "" { path = cfg.DBFilePath() } - db, err := internal.NewDB(path, Models(), write) + + db, err := NewLowLevelDB(path, empty, writable) if err != nil { return nil, fmt.Errorf("failed to open db: %w", err) } - if write { - // add hard-coded os aliases - if err := db.Create(KnownOperatingSystemAliases()).Error; err != nil { - return nil, fmt.Errorf("failed to add os aliases: %w", err) - } - } - bs := newBlobStore(db) return &store{ dbMetadataStore: newDBMetadataStore(db), @@ -48,25 +51,69 @@ func newStore(cfg Config, write bool) (*store, error) { blobStore: bs, db: db, config: cfg, - write: write, + readOnly: !empty && !writable, }, nil } // Close closes the store and finalizes the blobs when the DB is open for writing. If open for reading, it does nothing. func (s *store) Close() error { log.Debug("closing store") - if !s.write { + if s.readOnly { return nil } + // this will drop the digest blob table entirely if err := s.blobStore.Close(); err != nil { return fmt.Errorf("failed to finalize blobs: %w", err) } - err := s.db.Exec("VACUUM").Error - if err != nil { + // drop all indexes, which saves a lot of space distribution-wise (these get re-created on running gorm auto-migrate) + if err := dropAllIndexes(s.db); err != nil { + return err + } + + // compact the DB size + log.Debug("vacuuming database") + if err := s.db.Exec("VACUUM").Error; err != nil { return fmt.Errorf("failed to vacuum: %w", err) } + // since we are using riskier statements to optimize write speeds, do a last integrity check + log.Debug("running integrity check") + if err := s.db.Exec("PRAGMA integrity_check").Error; err != nil { + return fmt.Errorf("integrity check failed: %w", err) + } + + return nil +} + +func dropAllIndexes(db *gorm.DB) error { + tables, err := db.Migrator().GetTables() + if err != nil { + return fmt.Errorf("failed to get tables: %w", err) + } + + log.WithFields("tables", len(tables)).Debug("discovering indexes") + + for _, table := range tables { + indexes, err := db.Migrator().GetIndexes(table) + if err != nil { + return fmt.Errorf("failed to get indexes for table %s: %w", table, err) + } + + log.WithFields("table", table, "indexes", len(indexes)).Trace("dropping indexes") + for _, index := range indexes { + // skip auto-generated UNIQUE or PRIMARY KEY indexes (sqlite will not allow you to drop these without more major surgery) + if strings.HasPrefix(index.Name(), "sqlite_autoindex") { + log.WithFields("table", table, "index", index.Name()).Trace("skip dropping autoindex") + continue + } + log.WithFields("table", table, "index", index.Name()).Trace("dropping index") + if err := db.Migrator().DropIndex(table, index.Name()); err != nil { + return fmt.Errorf("failed to drop index %s on table %s: %w", index, table, err) + } + } + } + return nil } diff --git a/grype/db/v6/store_test.go b/grype/db/v6/store_test.go new file mode 100644 index 00000000000..d6abc9dba0b --- /dev/null +++ b/grype/db/v6/store_test.go @@ -0,0 +1,51 @@ +package v6 + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestStoreClose(t *testing.T) { + t.Run("readonly mode does nothing", func(t *testing.T) { + s := setupTestStore(t) + s.readOnly = true + + err := s.Close() + require.NoError(t, err) + + // the blob_digests table should still exist + var exists int + s.db.Raw("SELECT count(*) FROM sqlite_master WHERE type = 'table' AND name = 'blob_digests'").Scan(&exists) + assert.Equal(t, 1, exists) + + // ensure we have our indexes + var indexes []string + s.db.Raw(`SELECT name FROM sqlite_master WHERE type = 'index' AND name NOT LIKE 'sqlite_autoindex%'`).Scan(&indexes) + assert.NotEmpty(t, indexes) + + }) + + t.Run("successful close in writable mode", func(t *testing.T) { + s := setupTestStore(t) + + // ensure we have indexes to start with + var indexes []string + s.db.Raw(`SELECT name FROM sqlite_master WHERE type = 'index' AND name NOT LIKE 'sqlite_autoindex%'`).Scan(&indexes) + assert.NotEmpty(t, indexes) + + err := s.Close() + require.NoError(t, err) + + // ensure the digests table was dropped + var exists int + s.db.Raw("SELECT count(*) FROM sqlite_master WHERE type = 'table' AND name = 'blob_digests'").Scan(&exists) + assert.Equal(t, 0, exists) + + // ensure all of our indexes were dropped + indexes = nil + s.db.Raw(`SELECT name FROM sqlite_master WHERE type = 'index' AND name NOT LIKE 'sqlite_autoindex%'`).Scan(&indexes) + assert.Empty(t, indexes) + }) +}