Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop DB v6 indexes on close #2335

Merged
merged 2 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions cmd/grype/cli/commands/db_import.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@ import (
"github.com/anchore/grype/grype/db/v6/distribution"
"github.com/anchore/grype/grype/db/v6/installation"
"github.com/anchore/grype/internal"
"github.com/anchore/grype/internal/log"
)

func DBImport(app clio.Application) *cobra.Command {
opts := dbOptionsDefault(app.ID())

return app.SetupCommand(&cobra.Command{
Use: "import FILE",
Short: "import a vulnerability database archive",
Long: fmt.Sprintf("import a vulnerability database archive from a local FILE.\nDB archives can be obtained from %q.", internal.DBUpdateURL),
Args: cobra.ExactArgs(1),
PreRunE: disableUI(app),
Use: "import FILE",
Short: "import a vulnerability database archive",
Long: fmt.Sprintf("import a vulnerability database archive from a local FILE.\nDB archives can be obtained from %q.", internal.DBUpdateURL),
Args: cobra.ExactArgs(1),
RunE: func(_ *cobra.Command, args []string) error {
return runDBImport(*opts, args[0])
},
Expand All @@ -48,10 +48,14 @@ func newDBImport(opts options.Database, dbArchivePath string) error {
return fmt.Errorf("unable to create curator: %w", err)
}

log.WithFields("path", dbArchivePath).Infof("importing vulnerability database archive")
if err := c.Import(dbArchivePath); err != nil {
return fmt.Errorf("unable to import vulnerability database: %w", err)
}
return stderrPrintLnf("Vulnerability database imported")

s := c.Status()
log.WithFields("built", s.Built.String(), "status", s.Status()).Info("vulnerability database imported")
return nil
}

func legacyDBImport(opts options.Database, dbArchivePath string) error {
Expand Down
14 changes: 2 additions & 12 deletions cmd/grype/cli/commands/db_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,18 +70,13 @@ func newDBStatus(opts dbStatusOptions) error {
}

func presentDBStatus(format string, writer io.Writer, status v6.Status) error {
statusStr := "valid"
if status.Err != nil {
statusStr = "invalid"
}

switch format {
case textOutputFormat:
fmt.Fprintln(writer, "Path: ", status.Path)
fmt.Fprintln(writer, "Schema: ", status.SchemaVersion)
fmt.Fprintln(writer, "Built: ", status.Built.String())
fmt.Fprintln(writer, "Checksum: ", status.Checksum)
fmt.Fprintln(writer, "Status: ", statusStr)
fmt.Fprintln(writer, "Status: ", status.Status())
case jsonOutputFormat:
enc := json.NewEncoder(writer)
enc.SetEscapeHTML(false)
Expand All @@ -107,18 +102,13 @@ func legacyDBStatus(opts dbStatusOptions) error {

status := dbCurator.Status()

statusStr := "valid"
if status.Err != nil {
statusStr = "invalid"
}

switch opts.Output {
case textOutputFormat:
fmt.Println("Location: ", status.Location)
fmt.Println("Built: ", status.Built.String())
fmt.Println("Schema: ", status.SchemaVersion)
fmt.Println("Checksum: ", status.Checksum)
fmt.Println("Status: ", statusStr)
fmt.Println("Status: ", status.Status())
case jsonOutputFormat:
enc := json.NewEncoder(os.Stdout)
enc.SetEscapeHTML(false)
Expand Down
154 changes: 121 additions & 33 deletions grype/db/internal/gormadapter/open.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,81 @@ import (
"fmt"
"os"
"path/filepath"
"strings"

"github.com/glebarez/sqlite"
"gorm.io/gorm"

"github.com/anchore/grype/internal/log"
)

var commonStatements = []string{
`PRAGMA foreign_keys = ON`, // needed for v6+
}

var writerStatements = []string{
// performance improvements (note: will result in lost data on write interruptions).
// on my box it reduces the time to write from 10 minutes to 10 seconds (with ~1GB memory utilization spikes)
`PRAGMA synchronous = OFF`,
`PRAGMA journal_mode = MEMORY`,
`PRAGMA cache_size = 100000`,
`PRAGMA mmap_size = 268435456`, // 256 MB
// performance improvements (note: will result in lost data on write interruptions)
`PRAGMA synchronous = OFF`, // minimize the amount of syncing to disk, prioritizing write performance over durability
`PRAGMA journal_mode = MEMORY`, // do not write the journal to disk (maximizing write performance); OFF is faster but less safe in terms of DB consistency
}

var readOptions = []string{
"immutable=1",
"cache=shared",
"mode=ro",
var heavyWriteStatements = []string{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: this doesn't seem to have options directly related to write performance but rather memory, with an option named allowLargeMemoryFootprint should this be largeMemoryFootprintStatements?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#2335 (comment) so when there are heavy writes expected we want to allow for a larger memory footprint (to improve write performance).

`PRAGMA cache_size = -1073741824`, // ~1 GB (negative means treat as bytes not page count); one caveat is to not pick a value that risks swapping behavior, negating performance gains
`PRAGMA mmap_size = 1073741824`, // ~1 GB; the maximum size of the memory-mapped I/O buffer (to access the database file as if it were a part of the process’s virtual memory)
}

var readConnectionOptions = []string{
"immutable=1", // indicates that the database file is guaranteed not to change during the connection’s lifetime (slight performance benefit for read-only cases)
"mode=ro", // opens the database in as read-only (an enforcement mechanism to allow immutable=1 to be effective)
"cache=shared", // multiple database connections within the same process share a single page cache
}

type config struct {
path string
write bool
memory bool
path string
writable bool
truncate bool
allowLargeMemoryFootprint bool
models []any
initialData []any
memory bool
statements []string
}

type Option func(*config)

func WithTruncate(truncate bool) Option {
func WithTruncate(truncate bool, models []any, initialData []any) Option {
return func(c *config) {
c.truncate = truncate
if truncate {
c.writable = true
c.models = models
c.initialData = initialData
c.allowLargeMemoryFootprint = true
}
}
}

func WithStatements(statements ...string) Option {
return func(c *config) {
c.statements = append(c.statements, statements...)
}
}

func WithModels(models []any) Option {
return func(c *config) {
c.models = append(c.models, models...)
}
}

func WithWritable(write bool) Option {
return func(c *config) {
c.write = truncate
c.writable = write
}
}

func WithLargeMemoryFootprint(largeFootprint bool) Option {
return func(c *config) {
c.allowLargeMemoryFootprint = largeFootprint
}
}

Expand All @@ -52,10 +96,6 @@ func (c *config) apply(path string, opts []Option) {
c.path = path
}

func (c config) shouldTruncate() bool {
return c.write && !c.memory
}

func (c config) connectionString() string {
var conn string
if c.path == "" {
Expand All @@ -64,9 +104,11 @@ func (c config) connectionString() string {
conn = fmt.Sprintf("file:%s?cache=shared", c.path)
}

if !c.write && !c.memory {
// &immutable=1&cache=shared&mode=ro
for _, o := range readOptions {
if !c.writable && !c.memory {
if !strings.Contains(conn, "?") {
conn += "?"
}
for _, o := range readConnectionOptions {
conn += fmt.Sprintf("&%s", o)
}
}
Expand All @@ -77,8 +119,12 @@ func (c config) connectionString() string {
func Open(path string, options ...Option) (*gorm.DB, error) {
cfg := newConfig(path, options)

if cfg.shouldTruncate() {
if err := prepareWritableDB(path); err != nil {
if cfg.truncate && !cfg.writable {
return nil, fmt.Errorf("cannot truncate a read-only DB")
}

if cfg.truncate {
if err := deleteDB(path); err != nil {
return nil, err
}
}
Expand All @@ -88,22 +134,64 @@ func Open(path string, options ...Option) (*gorm.DB, error) {
return nil, fmt.Errorf("unable to connect to DB: %w", err)
}

if cfg.write {
for _, sqlStmt := range writerStatements {
dbObj.Exec(sqlStmt)
if dbObj.Error != nil {
return nil, fmt.Errorf("unable to execute (%s): %w", sqlStmt, dbObj.Error)
}
if cfg.writable {
log.Trace("applying writable DB statements")
if err := applyStatements(dbObj, writerStatements); err != nil {
return nil, fmt.Errorf("unable to apply DB writer statements: %w", err)
}
}

if cfg.truncate && cfg.allowLargeMemoryFootprint {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the memory footprint related to truncating?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is -- when we start with an empty DB we will be doing a lot of writing (and allowing for a larger memory footprint in those cases allows for writing to the DB much faster).

log.Trace("applying large memory footprint DB statements")
if err := applyStatements(dbObj, heavyWriteStatements); err != nil {
return nil, fmt.Errorf("unable to apply DB heavy writer statements: %w", err)
}
}

if len(commonStatements) > 0 {
log.Trace("applying common DB statements")
if err := applyStatements(dbObj, commonStatements); err != nil {
return nil, fmt.Errorf("unable to apply DB common statements: %w", err)
}
}

if len(cfg.statements) > 0 {
log.Trace("applying custom DB statements")
if err := applyStatements(dbObj, cfg.statements); err != nil {
return nil, fmt.Errorf("unable to apply DB custom statements: %w", err)
}
}

if len(cfg.models) > 0 {
log.Trace("applying DB migrations")
if err := dbObj.AutoMigrate(cfg.models...); err != nil {
return nil, fmt.Errorf("unable to migrate: %w", err)
}
}

// needed for v6+
dbObj.Exec("PRAGMA foreign_keys = ON")
if len(cfg.initialData) > 0 {
log.Trace("applying initial data")
for _, d := range cfg.initialData {
if err := dbObj.Create(d).Error; err != nil {
return nil, fmt.Errorf("unable to create initial data: %w", err)
}
}
}

return dbObj, nil
}

func prepareWritableDB(path string) error {
func applyStatements(db *gorm.DB, statements []string) error {
for _, sqlStmt := range statements {
db.Exec(sqlStmt)
if db.Error != nil {
return fmt.Errorf("unable to execute (%s): %w", sqlStmt, db.Error)
}
}
return nil
}

func deleteDB(path string) error {
if _, err := os.Stat(path); err == nil {
if err := os.Remove(path); err != nil {
return fmt.Errorf("unable to remove existing DB file: %w", err)
Expand Down
54 changes: 8 additions & 46 deletions grype/db/internal/gormadapter/open_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func TestConfigApply(t *testing.T) {
{
name: "apply with truncate option",
path: "test.db",
options: []Option{WithTruncate(true)},
options: []Option{WithTruncate(true, nil, nil)}, // migration and initial data don't matter
expectedPath: "test.db",
expectedMemory: false,
},
Expand All @@ -49,44 +49,6 @@ func TestConfigApply(t *testing.T) {
}
}

func TestConfigShouldTruncate(t *testing.T) {
tests := []struct {
name string
write bool
memory bool
expectedTruncate bool
}{
{
name: "should truncate when write is true and not memory",
write: true,
memory: false,
expectedTruncate: true,
},
{
name: "should not truncate when write is false",
write: false,
memory: false,
expectedTruncate: false,
},
{
name: "should not truncate when using in-memory DB",
write: true,
memory: true,
expectedTruncate: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
c := config{
write: tt.write,
memory: tt.memory,
}
require.Equal(t, tt.expectedTruncate, c.shouldTruncate())
})
}
}

func TestConfigConnectionString(t *testing.T) {
tests := []struct {
name string
Expand All @@ -105,7 +67,7 @@ func TestConfigConnectionString(t *testing.T) {
name: "read-only path",
path: "test.db",
write: false,
expectedConnStr: "file:test.db?cache=shared&immutable=1&cache=shared&mode=ro",
expectedConnStr: "file:test.db?cache=shared&immutable=1&mode=ro&cache=shared",
},
{
name: "in-memory mode",
Expand All @@ -119,9 +81,9 @@ func TestConfigConnectionString(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
c := config{
path: tt.path,
write: tt.write,
memory: tt.memory,
path: tt.path,
writable: tt.write,
memory: tt.memory,
}
require.Equal(t, tt.expectedConnStr, c.connectionString())
})
Expand All @@ -134,7 +96,7 @@ func TestPrepareWritableDB(t *testing.T) {
tempDir := t.TempDir()
dbPath := filepath.Join(tempDir, "newdir", "test.db")

err := prepareWritableDB(dbPath)
err := deleteDB(dbPath)
require.NoError(t, err)

_, err = os.Stat(filepath.Dir(dbPath))
Expand All @@ -151,7 +113,7 @@ func TestPrepareWritableDB(t *testing.T) {
_, err = os.Stat(dbPath)
require.NoError(t, err)

err = prepareWritableDB(dbPath)
err = deleteDB(dbPath)
require.NoError(t, err)

_, err = os.Stat(dbPath)
Expand All @@ -160,7 +122,7 @@ func TestPrepareWritableDB(t *testing.T) {

t.Run("returns error if unable to create parent directory", func(t *testing.T) {
invalidDir := filepath.Join("/root", "invalidDir", "test.db")
err := prepareWritableDB(invalidDir)
err := deleteDB(invalidDir)
require.Error(t, err)
require.Contains(t, err.Error(), "unable to create parent directory")
})
Expand Down
Loading
Loading