Skip to content

Commit

Permalink
perf(storage): enhance GLSN Log scanning efficiency
Browse files Browse the repository at this point in the history
This PR substantially enhances the performance of GLSN-based log scanning in
Varlog's storage layer by shifting from point lookups to an iterator approach.
While introducing a minimal overhead of one additional object allocation, this
modification significantly reduces operation times and increases throughput for
larger scan sizes. The slight increase in object allocation is a small trade-off
for the significant performance gains achieved, as demonstrated by the
benchmark.

```
goos: darwin
goarch: amd64
pkg: github.com/kakao/varlog/internal/storage
cpu: Intel(R) Core(TM) i7-4870HQ CPU @ 2.50GHz
                                    │    base     │                diff                 │
                                    │   sec/op    │   sec/op     vs base                │
Storage_ScanWithGLSN/numLogs=1-8      2.298µ ± 1%   3.088µ ± 1%  +34.36% (p=0.000 n=20)
Storage_ScanWithGLSN/numLogs=10-8     9.682µ ± 1%   7.499µ ± 1%  -22.55% (p=0.000 n=20)
Storage_ScanWithGLSN/numLogs=100-8    89.18µ ± 2%   49.93µ ± 0%  -44.01% (p=0.000 n=20)
Storage_ScanWithGLSN/numLogs=1000-8   968.2µ ± 2%   472.4µ ± 0%  -51.21% (p=0.000 n=20)
geomean                               37.23µ        27.18µ       -26.98%

                                    │     base     │                diff                 │
                                    │     B/op     │     B/op      vs base               │
Storage_ScanWithGLSN/numLogs=1-8        120.0 ± 0%     128.0 ± 0%  +6.67% (p=0.000 n=20)
Storage_ScanWithGLSN/numLogs=10-8       264.0 ± 0%     272.5 ± 0%  +3.22% (p=0.000 n=20)
Storage_ScanWithGLSN/numLogs=100-8    1.669Ki ± 0%   1.676Ki ± 0%  +0.41% (p=0.000 n=20)
Storage_ScanWithGLSN/numLogs=1000-8   15.78Ki ± 0%   15.77Ki ± 0%  -0.06% (p=0.000 n=20)
geomean                                 967.1          991.5       +2.53%

                                    │    base     │                diff                 │
                                    │  allocs/op  │  allocs/op   vs base                │
Storage_ScanWithGLSN/numLogs=1-8       5.000 ± 0%    6.000 ± 0%  +20.00% (p=0.000 n=20)
Storage_ScanWithGLSN/numLogs=10-8      14.00 ± 0%    15.00 ± 0%   +7.14% (p=0.000 n=20)
Storage_ScanWithGLSN/numLogs=100-8     104.0 ± 0%    105.0 ± 0%   +0.96% (p=0.000 n=20)
Storage_ScanWithGLSN/numLogs=1000-8   1.004k ± 0%   1.005k ± 0%   +0.10% (p=0.000 n=20)
geomean                                52.00         55.51        +6.77%
```

Benchmark results summary:
- Environment: Intel(R) Core(TM) i7-4870HQ CPU @ 2.50GHz, goos: darwin, goarch:
  amd64
- Performance and memory efficiency both improve as the number of logs
  increases. Initial setup overhead results in a performance outlier for 1 log.
  However, significant enhancements are observed for larger log counts: 22.55%
  for 10 logs, 44.01% for 100 logs, and 51.21% for 1000 logs. These improvements
  highlight the method's scalability, with memory allocations remaining stable
  or even reducing, against expectations. The geometric mean shows only a 2.53%
  increase in bytes per operation and a 6.77% increase in allocations per
  operation, underscoring the approach's effectiveness in optimizing performance
  and memory usage.

These results demonstrate the scalability and efficiency of using a pebble
iterator for GLSN-based log scanning. The changes lead to a geomean throughput
increase of -26.98%, highlighting substantial performance improvements for
larger scan sizes without considerable memory overhead.

Additionally, the insights gained from this optimization present an opportunity
to rethink the Read RPC for more efficient log entry lookup by log sequence
number. However, this exploration is beyond the current scope of work.
  • Loading branch information
ijsong committed Feb 23, 2024
1 parent cdaa7ac commit 670d854
Showing 1 changed file with 54 additions and 11 deletions.
65 changes: 54 additions & 11 deletions internal/storage/scanner.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package storage

import (
"errors"
"fmt"
"io"
"slices"
"sync"

"github.com/cockroachdb/pebble"
Expand All @@ -14,6 +16,7 @@ import (
var scannerPool = sync.Pool{
New: func() interface{} {
s := &Scanner{}
s.lazy.dkUpper = make([]byte, dataKeyLength)
s.cks.lower = make([]byte, commitKeyLength)
s.cks.upper = make([]byte, commitKeyLength)
s.dks.lower = make([]byte, dataKeyLength)
Expand All @@ -23,9 +26,12 @@ var scannerPool = sync.Pool{
}

type Scanner struct {
scanConfig
stg *Storage
it *pebble.Iterator
stg *Storage
it *pebble.Iterator
lazy struct {
dataIt *pebble.Iterator
dkUpper []byte
}
cks struct {
lower []byte
upper []byte
Expand All @@ -34,6 +40,8 @@ type Scanner struct {
lower []byte
upper []byte
}

scanConfig
}

func newScanner() *Scanner {
Expand All @@ -56,37 +64,71 @@ func (s *Scanner) Value() (le varlogpb.LogEntry, err error) {
}

func (s *Scanner) Next() bool {
if s.lazy.dataIt != nil {
_ = s.lazy.dataIt.Next()
}
return s.it.Next()
}

func (s *Scanner) Close() (err error) {
if s.it != nil {
err = s.it.Close()
}
if s.lazy.dataIt != nil {
if e := s.lazy.dataIt.Close(); e != nil {
if err != nil {
err = errors.Join(err, e)
} else {
err = e

Check warning on line 82 in internal/storage/scanner.go

View check run for this annotation

Codecov / codecov/patch

internal/storage/scanner.go#L79-L82

Added lines #L79 - L82 were not covered by tests
}
}
}
s.release()
return err
}

func (s *Scanner) valueByGLSN() (le varlogpb.LogEntry, err error) {
ck := s.it.Key()
dk := s.it.Value()
data, closer, err := s.stg.dataDB.Get(dk)
if err != nil {
if err == pebble.ErrNotFound {
return le, fmt.Errorf("%s: %w", s.stg.path, ErrInconsistentWriteCommitState)
llsn := decodeDataKey(dk)
if s.lazy.dataIt == nil {
err = s.initLazyIterator(dk, llsn)
if err != nil {
return le, err
}
return le, err
}
if slices.Compare(dk, s.lazy.dataIt.Key()) != 0 {
return le, fmt.Errorf("%s: %w", s.stg.path, ErrInconsistentWriteCommitState)

Check warning on line 101 in internal/storage/scanner.go

View check run for this annotation

Codecov / codecov/patch

internal/storage/scanner.go#L101

Added line #L101 was not covered by tests
}

le.GLSN = decodeCommitKey(ck)
le.LLSN = decodeDataKey(dk)
le.LLSN = llsn
data := s.lazy.dataIt.Value()
if len(data) > 0 {
le.Data = make([]byte, len(data))
copy(le.Data, data)
}
_ = closer.Close()
return le, nil
}

func (s *Scanner) initLazyIterator(beginKey []byte, beginLLSN types.LLSN) (err error) {
endLLSN := beginLLSN + types.LLSN(s.end.GLSN-s.begin.GLSN)
s.lazy.dkUpper = encodeDataKeyInternal(endLLSN, s.lazy.dkUpper)
itOpt := &pebble.IterOptions{
LowerBound: beginKey,
UpperBound: s.lazy.dkUpper,
}

s.lazy.dataIt, err = s.stg.dataDB.NewIter(itOpt)
if err != nil {
return err

Check warning on line 124 in internal/storage/scanner.go

View check run for this annotation

Codecov / codecov/patch

internal/storage/scanner.go#L124

Added line #L124 was not covered by tests
}
if !s.lazy.dataIt.First() {
return fmt.Errorf("%s: %w", s.stg.path, ErrInconsistentWriteCommitState)
}
return nil
}

func (s *Scanner) valueByLLSN() (le varlogpb.LogEntry, err error) {
le.LLSN = decodeDataKey(s.it.Key())
if len(s.it.Value()) > 0 {
Expand All @@ -97,9 +139,10 @@ func (s *Scanner) valueByLLSN() (le varlogpb.LogEntry, err error) {
}

func (s *Scanner) release() {
s.scanConfig = scanConfig{}
s.stg = nil
s.it = nil
s.lazy.dataIt = nil
s.scanConfig = scanConfig{}
scannerPool.Put(s)
}

Expand Down

0 comments on commit 670d854

Please sign in to comment.