Skip to content

Commit

Permalink
updating merge planner to use fileSize in budget calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
Thejas-bhat committed Feb 28, 2025
1 parent 2916459 commit de1239e
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 3 deletions.
8 changes: 8 additions & 0 deletions index/scorch/merge.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,14 @@ func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
return nil, err
}
}

po, err := s.parsePersisterOptions()
if err != nil {
return nil, err
}

mergePlannerOptions.FloorSegmentFileSize = int64(po.MaxSizeInMemoryMerge)

return &mergePlannerOptions, nil
}

Expand Down
31 changes: 28 additions & 3 deletions index/scorch/mergeplan/merge_plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ type MergePlanOptions struct {
// of tiny segments from resulting in a long tail in the index.
FloorSegmentSize int64

// Small segments' file size are rounded up to this size to prevent lot
// of tiny segments causing a long tail in the index.
FloorSegmentFileSize int64

// Controls how aggressively merges that reclaim more deletions
// are favored. Higher values will more aggressively target
// merges that reclaim deletions, but be careful not to go so high
Expand Down Expand Up @@ -126,6 +130,13 @@ func (o *MergePlanOptions) RaiseToFloorSegmentSize(s int64) int64 {
return o.FloorSegmentSize
}

func (o *MergePlanOptions) RaiseToFloorSegmentFileSize(s int64) int64 {
if s > o.FloorSegmentFileSize {
return s
}
return o.FloorSegmentFileSize
}

// MaxSegmentSizeLimit represents the maximum size of a segment,
// this limit comes with hit-1 optimisation/max encoding limit uint31.
const MaxSegmentSizeLimit = 1<<31 - 1
Expand Down Expand Up @@ -155,6 +166,7 @@ var SingleSegmentMergePlanOptions = MergePlanOptions{
SegmentsPerMergeTask: 10,
FloorSegmentSize: 1 << 30,
ReclaimDeletesWeight: 2.0,
FloorSegmentFileSize: 1 << 40,
}

// -------------------------------------------
Expand All @@ -176,12 +188,18 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {

var eligibles []Segment
var eligiblesLiveSize int64
var eligiblesFileSize int64
var minFileSize int64 = math.MaxInt64

for _, segment := range segments {
if minLiveSize > segment.LiveSize() {
minLiveSize = segment.LiveSize()
}

if minFileSize > segment.FileSize() {
minFileSize = segment.FileSize()
}

isEligible := segment.LiveSize() < o.MaxSegmentSize/2
// An eligible segment (based on #documents) may be too large
// and thus need a stricter check based on the file size.
Expand All @@ -195,17 +213,24 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
if isEligible {
eligibles = append(eligibles, segment)
eligiblesLiveSize += segment.LiveSize()
eligiblesFileSize += segment.FileSize()
}
}

minLiveSize = o.RaiseToFloorSegmentSize(minLiveSize)

calcBudget := o.CalcBudget
if calcBudget == nil {
calcBudget = CalcBudget
}

budgetNumSegments := calcBudget(eligiblesLiveSize, minLiveSize, o)
var budgetNumSegments int
if o.FloorSegmentFileSize > 0 {
minFileSize = o.RaiseToFloorSegmentFileSize(minFileSize)
budgetNumSegments = calcBudget(eligiblesFileSize, minFileSize, o)

} else {
minLiveSize = o.RaiseToFloorSegmentSize(minLiveSize)
budgetNumSegments = calcBudget(eligiblesLiveSize, minLiveSize, o)
}

scoreSegments := o.ScoreSegments
if scoreSegments == nil {
Expand Down

0 comments on commit de1239e

Please sign in to comment.