From de1239e435fd58c00c1284e76d7a56231151c95c Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Fri, 24 Jan 2025 16:32:00 +0530 Subject: [PATCH] updating merge planner to use fileSize in budget calculation --- index/scorch/merge.go | 8 +++++++ index/scorch/mergeplan/merge_plan.go | 31 +++++++++++++++++++++++++--- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/index/scorch/merge.go b/index/scorch/merge.go index 91b9fe6df..c1cfb96dc 100644 --- a/index/scorch/merge.go +++ b/index/scorch/merge.go @@ -225,6 +225,14 @@ func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions, return nil, err } } + + po, err := s.parsePersisterOptions() + if err != nil { + return nil, err + } + + mergePlannerOptions.FloorSegmentFileSize = int64(po.MaxSizeInMemoryMerge) + return &mergePlannerOptions, nil } diff --git a/index/scorch/mergeplan/merge_plan.go b/index/scorch/mergeplan/merge_plan.go index ac6d8b22b..8ddde74a5 100644 --- a/index/scorch/mergeplan/merge_plan.go +++ b/index/scorch/mergeplan/merge_plan.go @@ -99,6 +99,10 @@ type MergePlanOptions struct { // of tiny segments from resulting in a long tail in the index. FloorSegmentSize int64 + // Small segments' file size are rounded up to this size to prevent lot + // of tiny segments causing a long tail in the index. + FloorSegmentFileSize int64 + // Controls how aggressively merges that reclaim more deletions // are favored. Higher values will more aggressively target // merges that reclaim deletions, but be careful not to go so high @@ -126,6 +130,13 @@ func (o *MergePlanOptions) RaiseToFloorSegmentSize(s int64) int64 { return o.FloorSegmentSize } +func (o *MergePlanOptions) RaiseToFloorSegmentFileSize(s int64) int64 { + if s > o.FloorSegmentFileSize { + return s + } + return o.FloorSegmentFileSize +} + // MaxSegmentSizeLimit represents the maximum size of a segment, // this limit comes with hit-1 optimisation/max encoding limit uint31. const MaxSegmentSizeLimit = 1<<31 - 1 @@ -155,6 +166,7 @@ var SingleSegmentMergePlanOptions = MergePlanOptions{ SegmentsPerMergeTask: 10, FloorSegmentSize: 1 << 30, ReclaimDeletesWeight: 2.0, + FloorSegmentFileSize: 1 << 40, } // ------------------------------------------- @@ -176,12 +188,18 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) { var eligibles []Segment var eligiblesLiveSize int64 + var eligiblesFileSize int64 + var minFileSize int64 = math.MaxInt64 for _, segment := range segments { if minLiveSize > segment.LiveSize() { minLiveSize = segment.LiveSize() } + if minFileSize > segment.FileSize() { + minFileSize = segment.FileSize() + } + isEligible := segment.LiveSize() < o.MaxSegmentSize/2 // An eligible segment (based on #documents) may be too large // and thus need a stricter check based on the file size. @@ -195,17 +213,24 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) { if isEligible { eligibles = append(eligibles, segment) eligiblesLiveSize += segment.LiveSize() + eligiblesFileSize += segment.FileSize() } } - minLiveSize = o.RaiseToFloorSegmentSize(minLiveSize) - calcBudget := o.CalcBudget if calcBudget == nil { calcBudget = CalcBudget } - budgetNumSegments := calcBudget(eligiblesLiveSize, minLiveSize, o) + var budgetNumSegments int + if o.FloorSegmentFileSize > 0 { + minFileSize = o.RaiseToFloorSegmentFileSize(minFileSize) + budgetNumSegments = calcBudget(eligiblesFileSize, minFileSize, o) + + } else { + minLiveSize = o.RaiseToFloorSegmentSize(minLiveSize) + budgetNumSegments = calcBudget(eligiblesLiveSize, minLiveSize, o) + } scoreSegments := o.ScoreSegments if scoreSegments == nil {