Skip to content

Commit

Permalink
Drop DB v6 indexes on close (#2335)
Browse files Browse the repository at this point in the history
* drop DB indexes on close

Signed-off-by: Alex Goodman <[email protected]>

* pr review comments

Signed-off-by: Alex Goodman <[email protected]>

---------

Signed-off-by: Alex Goodman <[email protected]>
  • Loading branch information
wagoodman authored Dec 16, 2024
1 parent 65cd041 commit 69330e5
Show file tree
Hide file tree
Showing 16 changed files with 411 additions and 233 deletions.
16 changes: 10 additions & 6 deletions cmd/grype/cli/commands/db_import.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@ import (
"github.com/anchore/grype/grype/db/v6/distribution"
"github.com/anchore/grype/grype/db/v6/installation"
"github.com/anchore/grype/internal"
"github.com/anchore/grype/internal/log"
)

func DBImport(app clio.Application) *cobra.Command {
opts := dbOptionsDefault(app.ID())

return app.SetupCommand(&cobra.Command{
Use: "import FILE",
Short: "import a vulnerability database archive",
Long: fmt.Sprintf("import a vulnerability database archive from a local FILE.\nDB archives can be obtained from %q.", internal.DBUpdateURL),
Args: cobra.ExactArgs(1),
PreRunE: disableUI(app),
Use: "import FILE",
Short: "import a vulnerability database archive",
Long: fmt.Sprintf("import a vulnerability database archive from a local FILE.\nDB archives can be obtained from %q.", internal.DBUpdateURL),
Args: cobra.ExactArgs(1),
RunE: func(_ *cobra.Command, args []string) error {
return runDBImport(*opts, args[0])
},
Expand All @@ -48,10 +48,14 @@ func newDBImport(opts options.Database, dbArchivePath string) error {
return fmt.Errorf("unable to create curator: %w", err)
}

log.WithFields("path", dbArchivePath).Infof("importing vulnerability database archive")
if err := c.Import(dbArchivePath); err != nil {
return fmt.Errorf("unable to import vulnerability database: %w", err)
}
return stderrPrintLnf("Vulnerability database imported")

s := c.Status()
log.WithFields("built", s.Built.String(), "status", s.Status()).Info("vulnerability database imported")
return nil
}

func legacyDBImport(opts options.Database, dbArchivePath string) error {
Expand Down
14 changes: 2 additions & 12 deletions cmd/grype/cli/commands/db_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,18 +70,13 @@ func newDBStatus(opts dbStatusOptions) error {
}

func presentDBStatus(format string, writer io.Writer, status v6.Status) error {
statusStr := "valid"
if status.Err != nil {
statusStr = "invalid"
}

switch format {
case textOutputFormat:
fmt.Fprintln(writer, "Path: ", status.Path)
fmt.Fprintln(writer, "Schema: ", status.SchemaVersion)
fmt.Fprintln(writer, "Built: ", status.Built.String())
fmt.Fprintln(writer, "Checksum: ", status.Checksum)
fmt.Fprintln(writer, "Status: ", statusStr)
fmt.Fprintln(writer, "Status: ", status.Status())
case jsonOutputFormat:
enc := json.NewEncoder(writer)
enc.SetEscapeHTML(false)
Expand All @@ -107,18 +102,13 @@ func legacyDBStatus(opts dbStatusOptions) error {

status := dbCurator.Status()

statusStr := "valid"
if status.Err != nil {
statusStr = "invalid"
}

switch opts.Output {
case textOutputFormat:
fmt.Println("Location: ", status.Location)
fmt.Println("Built: ", status.Built.String())
fmt.Println("Schema: ", status.SchemaVersion)
fmt.Println("Checksum: ", status.Checksum)
fmt.Println("Status: ", statusStr)
fmt.Println("Status: ", status.Status())
case jsonOutputFormat:
enc := json.NewEncoder(os.Stdout)
enc.SetEscapeHTML(false)
Expand Down
154 changes: 121 additions & 33 deletions grype/db/internal/gormadapter/open.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,81 @@ import (
"fmt"
"os"
"path/filepath"
"strings"

"github.com/glebarez/sqlite"
"gorm.io/gorm"

"github.com/anchore/grype/internal/log"
)

var commonStatements = []string{
`PRAGMA foreign_keys = ON`, // needed for v6+
}

var writerStatements = []string{
// performance improvements (note: will result in lost data on write interruptions).
// on my box it reduces the time to write from 10 minutes to 10 seconds (with ~1GB memory utilization spikes)
`PRAGMA synchronous = OFF`,
`PRAGMA journal_mode = MEMORY`,
`PRAGMA cache_size = 100000`,
`PRAGMA mmap_size = 268435456`, // 256 MB
// performance improvements (note: will result in lost data on write interruptions)
`PRAGMA synchronous = OFF`, // minimize the amount of syncing to disk, prioritizing write performance over durability
`PRAGMA journal_mode = MEMORY`, // do not write the journal to disk (maximizing write performance); OFF is faster but less safe in terms of DB consistency
}

var readOptions = []string{
"immutable=1",
"cache=shared",
"mode=ro",
var heavyWriteStatements = []string{
`PRAGMA cache_size = -1073741824`, // ~1 GB (negative means treat as bytes not page count); one caveat is to not pick a value that risks swapping behavior, negating performance gains
`PRAGMA mmap_size = 1073741824`, // ~1 GB; the maximum size of the memory-mapped I/O buffer (to access the database file as if it were a part of the process’s virtual memory)
}

var readConnectionOptions = []string{
"immutable=1", // indicates that the database file is guaranteed not to change during the connection’s lifetime (slight performance benefit for read-only cases)
"mode=ro", // opens the database in as read-only (an enforcement mechanism to allow immutable=1 to be effective)
"cache=shared", // multiple database connections within the same process share a single page cache
}

type config struct {
path string
write bool
memory bool
path string
writable bool
truncate bool
allowLargeMemoryFootprint bool
models []any
initialData []any
memory bool
statements []string
}

type Option func(*config)

func WithTruncate(truncate bool) Option {
func WithTruncate(truncate bool, models []any, initialData []any) Option {
return func(c *config) {
c.truncate = truncate
if truncate {
c.writable = true
c.models = models
c.initialData = initialData
c.allowLargeMemoryFootprint = true
}
}
}

func WithStatements(statements ...string) Option {
return func(c *config) {
c.statements = append(c.statements, statements...)
}
}

func WithModels(models []any) Option {
return func(c *config) {
c.models = append(c.models, models...)
}
}

func WithWritable(write bool) Option {
return func(c *config) {
c.write = truncate
c.writable = write
}
}

func WithLargeMemoryFootprint(largeFootprint bool) Option {
return func(c *config) {
c.allowLargeMemoryFootprint = largeFootprint
}
}

Expand All @@ -52,10 +96,6 @@ func (c *config) apply(path string, opts []Option) {
c.path = path
}

func (c config) shouldTruncate() bool {
return c.write && !c.memory
}

func (c config) connectionString() string {
var conn string
if c.path == "" {
Expand All @@ -64,9 +104,11 @@ func (c config) connectionString() string {
conn = fmt.Sprintf("file:%s?cache=shared", c.path)
}

if !c.write && !c.memory {
// &immutable=1&cache=shared&mode=ro
for _, o := range readOptions {
if !c.writable && !c.memory {
if !strings.Contains(conn, "?") {
conn += "?"
}
for _, o := range readConnectionOptions {
conn += fmt.Sprintf("&%s", o)
}
}
Expand All @@ -77,8 +119,12 @@ func (c config) connectionString() string {
func Open(path string, options ...Option) (*gorm.DB, error) {
cfg := newConfig(path, options)

if cfg.shouldTruncate() {
if err := prepareWritableDB(path); err != nil {
if cfg.truncate && !cfg.writable {
return nil, fmt.Errorf("cannot truncate a read-only DB")
}

if cfg.truncate {
if err := deleteDB(path); err != nil {
return nil, err
}
}
Expand All @@ -88,22 +134,64 @@ func Open(path string, options ...Option) (*gorm.DB, error) {
return nil, fmt.Errorf("unable to connect to DB: %w", err)
}

if cfg.write {
for _, sqlStmt := range writerStatements {
dbObj.Exec(sqlStmt)
if dbObj.Error != nil {
return nil, fmt.Errorf("unable to execute (%s): %w", sqlStmt, dbObj.Error)
}
if cfg.writable {
log.Trace("applying writable DB statements")
if err := applyStatements(dbObj, writerStatements); err != nil {
return nil, fmt.Errorf("unable to apply DB writer statements: %w", err)
}
}

if cfg.truncate && cfg.allowLargeMemoryFootprint {
log.Trace("applying large memory footprint DB statements")
if err := applyStatements(dbObj, heavyWriteStatements); err != nil {
return nil, fmt.Errorf("unable to apply DB heavy writer statements: %w", err)
}
}

if len(commonStatements) > 0 {
log.Trace("applying common DB statements")
if err := applyStatements(dbObj, commonStatements); err != nil {
return nil, fmt.Errorf("unable to apply DB common statements: %w", err)
}
}

if len(cfg.statements) > 0 {
log.Trace("applying custom DB statements")
if err := applyStatements(dbObj, cfg.statements); err != nil {
return nil, fmt.Errorf("unable to apply DB custom statements: %w", err)
}
}

if len(cfg.models) > 0 {
log.Trace("applying DB migrations")
if err := dbObj.AutoMigrate(cfg.models...); err != nil {
return nil, fmt.Errorf("unable to migrate: %w", err)
}
}

// needed for v6+
dbObj.Exec("PRAGMA foreign_keys = ON")
if len(cfg.initialData) > 0 {
log.Trace("applying initial data")
for _, d := range cfg.initialData {
if err := dbObj.Create(d).Error; err != nil {
return nil, fmt.Errorf("unable to create initial data: %w", err)
}
}
}

return dbObj, nil
}

func prepareWritableDB(path string) error {
func applyStatements(db *gorm.DB, statements []string) error {
for _, sqlStmt := range statements {
db.Exec(sqlStmt)
if db.Error != nil {
return fmt.Errorf("unable to execute (%s): %w", sqlStmt, db.Error)
}
}
return nil
}

func deleteDB(path string) error {
if _, err := os.Stat(path); err == nil {
if err := os.Remove(path); err != nil {
return fmt.Errorf("unable to remove existing DB file: %w", err)
Expand Down
54 changes: 8 additions & 46 deletions grype/db/internal/gormadapter/open_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func TestConfigApply(t *testing.T) {
{
name: "apply with truncate option",
path: "test.db",
options: []Option{WithTruncate(true)},
options: []Option{WithTruncate(true, nil, nil)}, // migration and initial data don't matter
expectedPath: "test.db",
expectedMemory: false,
},
Expand All @@ -49,44 +49,6 @@ func TestConfigApply(t *testing.T) {
}
}

func TestConfigShouldTruncate(t *testing.T) {
tests := []struct {
name string
write bool
memory bool
expectedTruncate bool
}{
{
name: "should truncate when write is true and not memory",
write: true,
memory: false,
expectedTruncate: true,
},
{
name: "should not truncate when write is false",
write: false,
memory: false,
expectedTruncate: false,
},
{
name: "should not truncate when using in-memory DB",
write: true,
memory: true,
expectedTruncate: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
c := config{
write: tt.write,
memory: tt.memory,
}
require.Equal(t, tt.expectedTruncate, c.shouldTruncate())
})
}
}

func TestConfigConnectionString(t *testing.T) {
tests := []struct {
name string
Expand All @@ -105,7 +67,7 @@ func TestConfigConnectionString(t *testing.T) {
name: "read-only path",
path: "test.db",
write: false,
expectedConnStr: "file:test.db?cache=shared&immutable=1&cache=shared&mode=ro",
expectedConnStr: "file:test.db?cache=shared&immutable=1&mode=ro&cache=shared",
},
{
name: "in-memory mode",
Expand All @@ -119,9 +81,9 @@ func TestConfigConnectionString(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
c := config{
path: tt.path,
write: tt.write,
memory: tt.memory,
path: tt.path,
writable: tt.write,
memory: tt.memory,
}
require.Equal(t, tt.expectedConnStr, c.connectionString())
})
Expand All @@ -134,7 +96,7 @@ func TestPrepareWritableDB(t *testing.T) {
tempDir := t.TempDir()
dbPath := filepath.Join(tempDir, "newdir", "test.db")

err := prepareWritableDB(dbPath)
err := deleteDB(dbPath)
require.NoError(t, err)

_, err = os.Stat(filepath.Dir(dbPath))
Expand All @@ -151,7 +113,7 @@ func TestPrepareWritableDB(t *testing.T) {
_, err = os.Stat(dbPath)
require.NoError(t, err)

err = prepareWritableDB(dbPath)
err = deleteDB(dbPath)
require.NoError(t, err)

_, err = os.Stat(dbPath)
Expand All @@ -160,7 +122,7 @@ func TestPrepareWritableDB(t *testing.T) {

t.Run("returns error if unable to create parent directory", func(t *testing.T) {
invalidDir := filepath.Join("/root", "invalidDir", "test.db")
err := prepareWritableDB(invalidDir)
err := deleteDB(invalidDir)
require.Error(t, err)
require.Contains(t, err.Error(), "unable to create parent directory")
})
Expand Down
Loading

0 comments on commit 69330e5

Please sign in to comment.