Skip to content

Commit

Permalink
drop DB indexes on close
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Goodman <[email protected]>
  • Loading branch information
wagoodman committed Dec 16, 2024
1 parent 1f594c9 commit 5d4079c
Show file tree
Hide file tree
Showing 13 changed files with 377 additions and 221 deletions.
14 changes: 8 additions & 6 deletions cmd/grype/cli/commands/db_import.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@ import (
"github.com/anchore/grype/grype/db/v6/distribution"
"github.com/anchore/grype/grype/db/v6/installation"
"github.com/anchore/grype/internal"
"github.com/anchore/grype/internal/log"
)

func DBImport(app clio.Application) *cobra.Command {
opts := dbOptionsDefault(app.ID())

return app.SetupCommand(&cobra.Command{
Use: "import FILE",
Short: "import a vulnerability database archive",
Long: fmt.Sprintf("import a vulnerability database archive from a local FILE.\nDB archives can be obtained from %q.", internal.DBUpdateURL),
Args: cobra.ExactArgs(1),
PreRunE: disableUI(app),
Use: "import FILE",
Short: "import a vulnerability database archive",
Long: fmt.Sprintf("import a vulnerability database archive from a local FILE.\nDB archives can be obtained from %q.", internal.DBUpdateURL),
Args: cobra.ExactArgs(1),
RunE: func(_ *cobra.Command, args []string) error {
return runDBImport(*opts, args[0])
},
Expand All @@ -48,10 +48,12 @@ func newDBImport(opts options.Database, dbArchivePath string) error {
return fmt.Errorf("unable to create curator: %w", err)
}

log.WithFields("path", dbArchivePath).Infof("Importing vulnerability database archive")
if err := c.Import(dbArchivePath); err != nil {
return fmt.Errorf("unable to import vulnerability database: %w", err)
}
return stderrPrintLnf("Vulnerability database imported")
log.Info("Vulnerability database imported")
return nil
}

func legacyDBImport(opts options.Database, dbArchivePath string) error {
Expand Down
141 changes: 108 additions & 33 deletions grype/db/internal/gormadapter/open.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,75 @@ import (
"fmt"
"os"
"path/filepath"
"strings"

"github.com/glebarez/sqlite"
"gorm.io/gorm"

"github.com/anchore/grype/internal/log"
)

var commonStatements = []string{
`PRAGMA foreign_keys = ON`, // needed for v6+
`PRAGMA page_size = 16384`, // 16 KB, useful for smaller DBs since ~85% of the DB is from the blobs table
}

var writerStatements = []string{
// performance improvements (note: will result in lost data on write interruptions).
// on my box it reduces the time to write from 10 minutes to 10 seconds (with ~1GB memory utilization spikes)
`PRAGMA synchronous = OFF`,
`PRAGMA journal_mode = MEMORY`,
`PRAGMA cache_size = 100000`,
`PRAGMA mmap_size = 268435456`, // 256 MB
// performance improvements (note: will result in lost data on write interruptions)
`PRAGMA synchronous = OFF`, // minimize the amount of syncing to disk, prioritizing write performance over durability
`PRAGMA journal_mode = MEMORY`, // do not write the journal to disk (maximizing write performance); OFF is faster but less safe in terms of DB consistency
}

var readOptions = []string{
"immutable=1",
"cache=shared",
"mode=ro",
var heavyWriteStatements = []string{
`PRAGMA cache_size = -1073741824`, // ~1 GB (negative means treat as bytes not page count); one caveat is to not pick a value that risks swapping behavior, negating performance gains
`PRAGMA mmap_size = 1073741824`, // ~1 GB; the maximum size of the memory-mapped I/O buffer (to access the database file as if it were a part of the process’s virtual memory)
}

var readConnectionOptions = []string{
"immutable=1", // indicates that the database file is guaranteed not to change during the connection’s lifetime (slight performance benefit for read-only cases)
"mode=ro", // opens the database in as read-only (an enforcement mechanism to allow immutable=1 to be effective)
"cache=shared", // multiple database connections within the same process share a single page cache
}

type config struct {
path string
write bool
memory bool
path string
writable bool
truncate bool
allowLargeMemoryFootprint bool
models []any
initialData []any
memory bool
}

type Option func(*config)

func WithTruncate(truncate bool) Option {
func WithTruncate(truncate bool, models []any, initialData []any) Option {
return func(c *config) {
c.write = truncate
c.truncate = truncate
if truncate {
c.writable = true
c.models = models
c.initialData = initialData
c.allowLargeMemoryFootprint = true
}
}
}

func WithMigrate(models []any) Option {
return func(c *config) {
c.models = models
}
}

func WithWritable(write bool) Option {
return func(c *config) {
c.writable = write
}
}

func WithLargeMemoryFootprint(largeFootprint bool) Option {
return func(c *config) {
c.allowLargeMemoryFootprint = largeFootprint
}
}

Expand All @@ -52,10 +90,6 @@ func (c *config) apply(path string, opts []Option) {
c.path = path
}

func (c config) shouldTruncate() bool {
return c.write && !c.memory
}

func (c config) connectionString() string {
var conn string
if c.path == "" {
Expand All @@ -64,9 +98,11 @@ func (c config) connectionString() string {
conn = fmt.Sprintf("file:%s?cache=shared", c.path)
}

if !c.write && !c.memory {
// &immutable=1&cache=shared&mode=ro
for _, o := range readOptions {
if !c.writable && !c.memory {
if !strings.Contains(conn, "?") {
conn += "?"
}
for _, o := range readConnectionOptions {
conn += fmt.Sprintf("&%s", o)
}
}
Expand All @@ -77,8 +113,12 @@ func (c config) connectionString() string {
func Open(path string, options ...Option) (*gorm.DB, error) {
cfg := newConfig(path, options)

if cfg.shouldTruncate() {
if err := prepareWritableDB(path); err != nil {
if cfg.truncate && !cfg.writable {
return nil, fmt.Errorf("cannot truncate a read-only DB")
}

if cfg.truncate {
if err := deleteDB(path); err != nil {
return nil, err
}
}
Expand All @@ -88,22 +128,57 @@ func Open(path string, options ...Option) (*gorm.DB, error) {
return nil, fmt.Errorf("unable to connect to DB: %w", err)
}

if cfg.write {
for _, sqlStmt := range writerStatements {
dbObj.Exec(sqlStmt)
if dbObj.Error != nil {
return nil, fmt.Errorf("unable to execute (%s): %w", sqlStmt, dbObj.Error)
}
if cfg.writable {
log.Trace("applying writable DB statements")
if err := applyStatements(dbObj, writerStatements); err != nil {
return nil, fmt.Errorf("unable to apply DB writer statements: %w", err)
}
}

if cfg.truncate && cfg.allowLargeMemoryFootprint {
log.Trace("applying large memory footprint DB statements")
if err := applyStatements(dbObj, heavyWriteStatements); err != nil {
return nil, fmt.Errorf("unable to apply DB heavy writer statements: %w", err)
}
}

for _, stmt := range commonStatements {
dbObj.Exec(stmt)
if dbObj.Error != nil {
return nil, fmt.Errorf("unable to execute common statement (%s): %w", stmt, dbObj.Error)
}
}

if len(cfg.models) > 0 {
log.Trace("applying DB migrations")
if err := dbObj.AutoMigrate(cfg.models...); err != nil {
return nil, fmt.Errorf("unable to migrate: %w", err)
}
}

// needed for v6+
dbObj.Exec("PRAGMA foreign_keys = ON")
if len(cfg.initialData) > 0 {
log.Trace("applying initial data")
for _, d := range cfg.initialData {
if err := dbObj.Create(d).Error; err != nil {
return nil, fmt.Errorf("unable to create initial data: %w", err)
}
}
}

return dbObj, nil
}

func prepareWritableDB(path string) error {
func applyStatements(db *gorm.DB, statements []string) error {
for _, sqlStmt := range statements {
db.Exec(sqlStmt)
if db.Error != nil {
return fmt.Errorf("unable to execute (%s): %w", sqlStmt, db.Error)
}
}
return nil
}

func deleteDB(path string) error {
if _, err := os.Stat(path); err == nil {
if err := os.Remove(path); err != nil {
return fmt.Errorf("unable to remove existing DB file: %w", err)
Expand Down
54 changes: 8 additions & 46 deletions grype/db/internal/gormadapter/open_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func TestConfigApply(t *testing.T) {
{
name: "apply with truncate option",
path: "test.db",
options: []Option{WithTruncate(true)},
options: []Option{WithTruncate(true, nil, nil)}, // migration and initial data don't matter
expectedPath: "test.db",
expectedMemory: false,
},
Expand All @@ -49,44 +49,6 @@ func TestConfigApply(t *testing.T) {
}
}

func TestConfigShouldTruncate(t *testing.T) {
tests := []struct {
name string
write bool
memory bool
expectedTruncate bool
}{
{
name: "should truncate when write is true and not memory",
write: true,
memory: false,
expectedTruncate: true,
},
{
name: "should not truncate when write is false",
write: false,
memory: false,
expectedTruncate: false,
},
{
name: "should not truncate when using in-memory DB",
write: true,
memory: true,
expectedTruncate: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
c := config{
write: tt.write,
memory: tt.memory,
}
require.Equal(t, tt.expectedTruncate, c.shouldTruncate())
})
}
}

func TestConfigConnectionString(t *testing.T) {
tests := []struct {
name string
Expand All @@ -105,7 +67,7 @@ func TestConfigConnectionString(t *testing.T) {
name: "read-only path",
path: "test.db",
write: false,
expectedConnStr: "file:test.db?cache=shared&immutable=1&cache=shared&mode=ro",
expectedConnStr: "file:test.db?cache=shared&immutable=1&mode=ro&cache=shared",
},
{
name: "in-memory mode",
Expand All @@ -119,9 +81,9 @@ func TestConfigConnectionString(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
c := config{
path: tt.path,
write: tt.write,
memory: tt.memory,
path: tt.path,
writable: tt.write,
memory: tt.memory,
}
require.Equal(t, tt.expectedConnStr, c.connectionString())
})
Expand All @@ -134,7 +96,7 @@ func TestPrepareWritableDB(t *testing.T) {
tempDir := t.TempDir()
dbPath := filepath.Join(tempDir, "newdir", "test.db")

err := prepareWritableDB(dbPath)
err := deleteDB(dbPath)
require.NoError(t, err)

_, err = os.Stat(filepath.Dir(dbPath))
Expand All @@ -151,7 +113,7 @@ func TestPrepareWritableDB(t *testing.T) {
_, err = os.Stat(dbPath)
require.NoError(t, err)

err = prepareWritableDB(dbPath)
err = deleteDB(dbPath)
require.NoError(t, err)

_, err = os.Stat(dbPath)
Expand All @@ -160,7 +122,7 @@ func TestPrepareWritableDB(t *testing.T) {

t.Run("returns error if unable to create parent directory", func(t *testing.T) {
invalidDir := filepath.Join("/root", "invalidDir", "test.db")
err := prepareWritableDB(invalidDir)
err := deleteDB(invalidDir)
require.Error(t, err)
require.Contains(t, err.Error(), "unable to create parent directory")
})
Expand Down
25 changes: 9 additions & 16 deletions grype/db/v3/store/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,21 @@ type store struct {
db *gorm.DB
}

func models() []any {
return []any{
model.IDModel{},
model.VulnerabilityModel{},
model.VulnerabilityMetadataModel{},
}
}

// New creates a new instance of the store.
func New(dbFilePath string, overwrite bool) (v3.Store, error) {
db, err := gormadapter.Open(dbFilePath, gormadapter.WithTruncate(overwrite))
db, err := gormadapter.Open(dbFilePath, gormadapter.WithTruncate(overwrite, models(), nil))
if err != nil {
return nil, err
}

if overwrite {
// TODO: automigrate could write to the database,
// we should be validating the database is the correct database based on the version in the ID table before
// automigrating
if err := db.AutoMigrate(&model.IDModel{}); err != nil {
return nil, fmt.Errorf("unable to migrate ID model: %w", err)
}
if err := db.AutoMigrate(&model.VulnerabilityModel{}); err != nil {
return nil, fmt.Errorf("unable to migrate Vulnerability model: %w", err)
}
if err := db.AutoMigrate(&model.VulnerabilityMetadataModel{}); err != nil {
return nil, fmt.Errorf("unable to migrate Vulnerability Metadata model: %w", err)
}
}

return &store{
db: db,
}, nil
Expand Down
Loading

0 comments on commit 5d4079c

Please sign in to comment.