-
Notifications
You must be signed in to change notification settings - Fork 602
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Drop DB v6 indexes on close #2335
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,37 +4,81 @@ import ( | |
"fmt" | ||
"os" | ||
"path/filepath" | ||
"strings" | ||
|
||
"github.com/glebarez/sqlite" | ||
"gorm.io/gorm" | ||
|
||
"github.com/anchore/grype/internal/log" | ||
) | ||
|
||
var commonStatements = []string{ | ||
`PRAGMA foreign_keys = ON`, // needed for v6+ | ||
} | ||
|
||
var writerStatements = []string{ | ||
// performance improvements (note: will result in lost data on write interruptions). | ||
// on my box it reduces the time to write from 10 minutes to 10 seconds (with ~1GB memory utilization spikes) | ||
`PRAGMA synchronous = OFF`, | ||
`PRAGMA journal_mode = MEMORY`, | ||
`PRAGMA cache_size = 100000`, | ||
`PRAGMA mmap_size = 268435456`, // 256 MB | ||
// performance improvements (note: will result in lost data on write interruptions) | ||
`PRAGMA synchronous = OFF`, // minimize the amount of syncing to disk, prioritizing write performance over durability | ||
`PRAGMA journal_mode = MEMORY`, // do not write the journal to disk (maximizing write performance); OFF is faster but less safe in terms of DB consistency | ||
} | ||
|
||
var readOptions = []string{ | ||
"immutable=1", | ||
"cache=shared", | ||
"mode=ro", | ||
var heavyWriteStatements = []string{ | ||
`PRAGMA cache_size = -1073741824`, // ~1 GB (negative means treat as bytes not page count); one caveat is to not pick a value that risks swapping behavior, negating performance gains | ||
`PRAGMA mmap_size = 1073741824`, // ~1 GB; the maximum size of the memory-mapped I/O buffer (to access the database file as if it were a part of the process’s virtual memory) | ||
} | ||
|
||
var readConnectionOptions = []string{ | ||
"immutable=1", // indicates that the database file is guaranteed not to change during the connection’s lifetime (slight performance benefit for read-only cases) | ||
"mode=ro", // opens the database in as read-only (an enforcement mechanism to allow immutable=1 to be effective) | ||
"cache=shared", // multiple database connections within the same process share a single page cache | ||
} | ||
|
||
type config struct { | ||
path string | ||
write bool | ||
memory bool | ||
path string | ||
writable bool | ||
truncate bool | ||
allowLargeMemoryFootprint bool | ||
models []any | ||
initialData []any | ||
memory bool | ||
statements []string | ||
} | ||
|
||
type Option func(*config) | ||
|
||
func WithTruncate(truncate bool) Option { | ||
func WithTruncate(truncate bool, models []any, initialData []any) Option { | ||
return func(c *config) { | ||
c.truncate = truncate | ||
if truncate { | ||
c.writable = true | ||
c.models = models | ||
c.initialData = initialData | ||
c.allowLargeMemoryFootprint = true | ||
} | ||
} | ||
} | ||
|
||
func WithStatements(statements ...string) Option { | ||
return func(c *config) { | ||
c.statements = append(c.statements, statements...) | ||
} | ||
} | ||
|
||
func WithModels(models []any) Option { | ||
return func(c *config) { | ||
c.models = append(c.models, models...) | ||
} | ||
} | ||
|
||
func WithWritable(write bool) Option { | ||
return func(c *config) { | ||
c.write = truncate | ||
c.writable = write | ||
} | ||
} | ||
|
||
func WithLargeMemoryFootprint(largeFootprint bool) Option { | ||
return func(c *config) { | ||
c.allowLargeMemoryFootprint = largeFootprint | ||
} | ||
} | ||
|
||
|
@@ -52,10 +96,6 @@ func (c *config) apply(path string, opts []Option) { | |
c.path = path | ||
} | ||
|
||
func (c config) shouldTruncate() bool { | ||
return c.write && !c.memory | ||
} | ||
|
||
func (c config) connectionString() string { | ||
var conn string | ||
if c.path == "" { | ||
|
@@ -64,9 +104,11 @@ func (c config) connectionString() string { | |
conn = fmt.Sprintf("file:%s?cache=shared", c.path) | ||
} | ||
|
||
if !c.write && !c.memory { | ||
// &immutable=1&cache=shared&mode=ro | ||
for _, o := range readOptions { | ||
if !c.writable && !c.memory { | ||
if !strings.Contains(conn, "?") { | ||
conn += "?" | ||
} | ||
for _, o := range readConnectionOptions { | ||
conn += fmt.Sprintf("&%s", o) | ||
} | ||
} | ||
|
@@ -77,8 +119,12 @@ func (c config) connectionString() string { | |
func Open(path string, options ...Option) (*gorm.DB, error) { | ||
cfg := newConfig(path, options) | ||
|
||
if cfg.shouldTruncate() { | ||
if err := prepareWritableDB(path); err != nil { | ||
if cfg.truncate && !cfg.writable { | ||
return nil, fmt.Errorf("cannot truncate a read-only DB") | ||
} | ||
|
||
if cfg.truncate { | ||
if err := deleteDB(path); err != nil { | ||
return nil, err | ||
} | ||
} | ||
|
@@ -88,22 +134,64 @@ func Open(path string, options ...Option) (*gorm.DB, error) { | |
return nil, fmt.Errorf("unable to connect to DB: %w", err) | ||
} | ||
|
||
if cfg.write { | ||
for _, sqlStmt := range writerStatements { | ||
dbObj.Exec(sqlStmt) | ||
if dbObj.Error != nil { | ||
return nil, fmt.Errorf("unable to execute (%s): %w", sqlStmt, dbObj.Error) | ||
} | ||
if cfg.writable { | ||
log.Trace("applying writable DB statements") | ||
if err := applyStatements(dbObj, writerStatements); err != nil { | ||
return nil, fmt.Errorf("unable to apply DB writer statements: %w", err) | ||
} | ||
} | ||
|
||
if cfg.truncate && cfg.allowLargeMemoryFootprint { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the memory footprint related to truncating? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is -- when we start with an empty DB we will be doing a lot of writing (and allowing for a larger memory footprint in those cases allows for writing to the DB much faster). |
||
log.Trace("applying large memory footprint DB statements") | ||
if err := applyStatements(dbObj, heavyWriteStatements); err != nil { | ||
return nil, fmt.Errorf("unable to apply DB heavy writer statements: %w", err) | ||
} | ||
} | ||
|
||
if len(commonStatements) > 0 { | ||
log.Trace("applying common DB statements") | ||
if err := applyStatements(dbObj, commonStatements); err != nil { | ||
return nil, fmt.Errorf("unable to apply DB common statements: %w", err) | ||
} | ||
} | ||
|
||
if len(cfg.statements) > 0 { | ||
log.Trace("applying custom DB statements") | ||
if err := applyStatements(dbObj, cfg.statements); err != nil { | ||
return nil, fmt.Errorf("unable to apply DB custom statements: %w", err) | ||
} | ||
} | ||
|
||
if len(cfg.models) > 0 { | ||
log.Trace("applying DB migrations") | ||
if err := dbObj.AutoMigrate(cfg.models...); err != nil { | ||
return nil, fmt.Errorf("unable to migrate: %w", err) | ||
} | ||
} | ||
|
||
// needed for v6+ | ||
dbObj.Exec("PRAGMA foreign_keys = ON") | ||
if len(cfg.initialData) > 0 { | ||
log.Trace("applying initial data") | ||
for _, d := range cfg.initialData { | ||
if err := dbObj.Create(d).Error; err != nil { | ||
return nil, fmt.Errorf("unable to create initial data: %w", err) | ||
} | ||
} | ||
} | ||
|
||
return dbObj, nil | ||
} | ||
|
||
func prepareWritableDB(path string) error { | ||
func applyStatements(db *gorm.DB, statements []string) error { | ||
for _, sqlStmt := range statements { | ||
db.Exec(sqlStmt) | ||
if db.Error != nil { | ||
return fmt.Errorf("unable to execute (%s): %w", sqlStmt, db.Error) | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func deleteDB(path string) error { | ||
if _, err := os.Stat(path); err == nil { | ||
if err := os.Remove(path); err != nil { | ||
return fmt.Errorf("unable to remove existing DB file: %w", err) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: this doesn't seem to have options directly related to write performance but rather memory, with an option named
allowLargeMemoryFootprint
should this belargeMemoryFootprintStatements
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#2335 (comment) so when there are heavy writes expected we want to allow for a larger memory footprint (to improve write performance).