Adding upstream version 2.5.1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
c71cb8b61d
commit
982828099e
783 changed files with 150650 additions and 0 deletions
454
index/scorch/mergeplan/merge_plan.go
Normal file
454
index/scorch/mergeplan/merge_plan.go
Normal file
|
@ -0,0 +1,454 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package mergeplan provides a segment merge planning approach that's
|
||||
// inspired by Lucene's TieredMergePolicy.java and descriptions like
|
||||
// http://blog.mikemccandless.com/2011/02/visualizing-lucenes-segment-merges.html
|
||||
package mergeplan
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// A Segment represents the information that the planner needs to
|
||||
// calculate segment merging.
|
||||
type Segment interface {
|
||||
// Unique id of the segment -- used for sorting.
|
||||
Id() uint64
|
||||
|
||||
// Full segment size (the size before any logical deletions).
|
||||
FullSize() int64
|
||||
|
||||
// Size of the live data of the segment; i.e., FullSize() minus
|
||||
// any logical deletions.
|
||||
LiveSize() int64
|
||||
|
||||
HasVector() bool
|
||||
|
||||
// Size of the persisted segment file.
|
||||
FileSize() int64
|
||||
}
|
||||
|
||||
// Plan() will functionally compute a merge plan. A segment will be
|
||||
// assigned to at most a single MergeTask in the output MergePlan. A
|
||||
// segment not assigned to any MergeTask means the segment should
|
||||
// remain unmerged.
|
||||
func Plan(segments []Segment, o *MergePlanOptions) (*MergePlan, error) {
|
||||
return plan(segments, o)
|
||||
}
|
||||
|
||||
// A MergePlan is the result of the Plan() API.
|
||||
//
|
||||
// The planner doesn’t know how or whether these tasks are executed --
|
||||
// that’s up to a separate merge execution system, which might execute
|
||||
// these tasks concurrently or not, and which might execute all the
|
||||
// tasks or not.
|
||||
type MergePlan struct {
|
||||
Tasks []*MergeTask
|
||||
}
|
||||
|
||||
// A MergeTask represents several segments that should be merged
|
||||
// together into a single segment.
|
||||
type MergeTask struct {
|
||||
Segments []Segment
|
||||
}
|
||||
|
||||
// The MergePlanOptions is designed to be reusable between planning calls.
|
||||
type MergePlanOptions struct {
|
||||
// Max # segments per logarithmic tier, or max width of any
|
||||
// logarithmic “step”. Smaller values mean more merging but fewer
|
||||
// segments. Should be >= SegmentsPerMergeTask, else you'll have
|
||||
// too much merging.
|
||||
MaxSegmentsPerTier int
|
||||
|
||||
// Max size of any segment produced after merging. Actual
|
||||
// merging, however, may produce segment sizes different than the
|
||||
// planner’s predicted sizes.
|
||||
MaxSegmentSize int64
|
||||
|
||||
// Max size (in bytes) of the persisted segment file that contains the
|
||||
// vectors. This is used to prevent merging of segments that
|
||||
// contain vectors that are too large.
|
||||
MaxSegmentFileSize int64
|
||||
|
||||
// The growth factor for each tier in a staircase of idealized
|
||||
// segments computed by CalcBudget().
|
||||
TierGrowth float64
|
||||
|
||||
// The number of segments in any resulting MergeTask. e.g.,
|
||||
// len(result.Tasks[ * ].Segments) == SegmentsPerMergeTask.
|
||||
SegmentsPerMergeTask int
|
||||
|
||||
// Small segments are rounded up to this size, i.e., treated as
|
||||
// equal (floor) size for consideration. This is to prevent lots
|
||||
// of tiny segments from resulting in a long tail in the index.
|
||||
FloorSegmentSize int64
|
||||
|
||||
// Small segments' file size are rounded up to this size to prevent lot
|
||||
// of tiny segments causing a long tail in the index.
|
||||
FloorSegmentFileSize int64
|
||||
|
||||
// Controls how aggressively merges that reclaim more deletions
|
||||
// are favored. Higher values will more aggressively target
|
||||
// merges that reclaim deletions, but be careful not to go so high
|
||||
// that way too much merging takes place; a value of 3.0 is
|
||||
// probably nearly too high. A value of 0.0 means deletions don't
|
||||
// impact merge selection.
|
||||
ReclaimDeletesWeight float64
|
||||
|
||||
// Optional, defaults to mergeplan.CalcBudget().
|
||||
CalcBudget func(totalSize int64, firstTierSize int64,
|
||||
o *MergePlanOptions) (budgetNumSegments int)
|
||||
|
||||
// Optional, defaults to mergeplan.ScoreSegments().
|
||||
ScoreSegments func(segments []Segment, o *MergePlanOptions) float64
|
||||
|
||||
// Optional.
|
||||
Logger func(string)
|
||||
}
|
||||
|
||||
// Returns the higher of the input or FloorSegmentSize.
|
||||
func (o *MergePlanOptions) RaiseToFloorSegmentSize(s int64) int64 {
|
||||
if s > o.FloorSegmentSize {
|
||||
return s
|
||||
}
|
||||
return o.FloorSegmentSize
|
||||
}
|
||||
|
||||
func (o *MergePlanOptions) RaiseToFloorSegmentFileSize(s int64) int64 {
|
||||
if s > o.FloorSegmentFileSize {
|
||||
return s
|
||||
}
|
||||
return o.FloorSegmentFileSize
|
||||
}
|
||||
|
||||
// MaxSegmentSizeLimit represents the maximum size of a segment,
|
||||
// this limit comes with hit-1 optimisation/max encoding limit uint31.
|
||||
const MaxSegmentSizeLimit = 1<<31 - 1
|
||||
|
||||
// ErrMaxSegmentSizeTooLarge is returned when the size of the segment
|
||||
// exceeds the MaxSegmentSizeLimit
|
||||
var ErrMaxSegmentSizeTooLarge = errors.New("MaxSegmentSize exceeds the size limit")
|
||||
|
||||
// DefaultMergePlanOptions suggests the default options.
|
||||
var DefaultMergePlanOptions = MergePlanOptions{
|
||||
MaxSegmentsPerTier: 10,
|
||||
MaxSegmentSize: 5000000,
|
||||
MaxSegmentFileSize: 4000000000, // 4GB
|
||||
TierGrowth: 10.0,
|
||||
SegmentsPerMergeTask: 10,
|
||||
FloorSegmentSize: 2000,
|
||||
ReclaimDeletesWeight: 2.0,
|
||||
}
|
||||
|
||||
// SingleSegmentMergePlanOptions helps in creating a
|
||||
// single segment index.
|
||||
var SingleSegmentMergePlanOptions = MergePlanOptions{
|
||||
MaxSegmentsPerTier: 1,
|
||||
MaxSegmentSize: 1 << 30,
|
||||
MaxSegmentFileSize: 1 << 40,
|
||||
TierGrowth: 1.0,
|
||||
SegmentsPerMergeTask: 10,
|
||||
FloorSegmentSize: 1 << 30,
|
||||
ReclaimDeletesWeight: 2.0,
|
||||
FloorSegmentFileSize: 1 << 40,
|
||||
}
|
||||
|
||||
// -------------------------------------------
|
||||
|
||||
func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
|
||||
if len(segmentsIn) <= 1 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if o == nil {
|
||||
o = &DefaultMergePlanOptions
|
||||
}
|
||||
|
||||
segments := append([]Segment(nil), segmentsIn...) // Copy.
|
||||
|
||||
sort.Sort(byLiveSizeDescending(segments))
|
||||
|
||||
var minLiveSize int64 = math.MaxInt64
|
||||
|
||||
var eligibles []Segment
|
||||
var eligiblesLiveSize int64
|
||||
var eligiblesFileSize int64
|
||||
var minFileSize int64 = math.MaxInt64
|
||||
|
||||
for _, segment := range segments {
|
||||
if minLiveSize > segment.LiveSize() {
|
||||
minLiveSize = segment.LiveSize()
|
||||
}
|
||||
|
||||
if minFileSize > segment.FileSize() {
|
||||
minFileSize = segment.FileSize()
|
||||
}
|
||||
|
||||
isEligible := segment.LiveSize() < o.MaxSegmentSize/2
|
||||
// An eligible segment (based on #documents) may be too large
|
||||
// and thus need a stricter check based on the file size.
|
||||
// This is particularly important for segments that contain
|
||||
// vectors.
|
||||
if isEligible && segment.HasVector() && o.MaxSegmentFileSize > 0 {
|
||||
isEligible = segment.FileSize() < o.MaxSegmentFileSize/2
|
||||
}
|
||||
|
||||
// Only small-enough segments are eligible.
|
||||
if isEligible {
|
||||
eligibles = append(eligibles, segment)
|
||||
eligiblesLiveSize += segment.LiveSize()
|
||||
eligiblesFileSize += segment.FileSize()
|
||||
}
|
||||
}
|
||||
|
||||
calcBudget := o.CalcBudget
|
||||
if calcBudget == nil {
|
||||
calcBudget = CalcBudget
|
||||
}
|
||||
|
||||
var budgetNumSegments int
|
||||
if o.FloorSegmentFileSize > 0 {
|
||||
minFileSize = o.RaiseToFloorSegmentFileSize(minFileSize)
|
||||
budgetNumSegments = calcBudget(eligiblesFileSize, minFileSize, o)
|
||||
|
||||
} else {
|
||||
minLiveSize = o.RaiseToFloorSegmentSize(minLiveSize)
|
||||
budgetNumSegments = calcBudget(eligiblesLiveSize, minLiveSize, o)
|
||||
}
|
||||
|
||||
scoreSegments := o.ScoreSegments
|
||||
if scoreSegments == nil {
|
||||
scoreSegments = ScoreSegments
|
||||
}
|
||||
|
||||
rv := &MergePlan{}
|
||||
|
||||
var empties []Segment
|
||||
for _, eligible := range eligibles {
|
||||
if eligible.LiveSize() <= 0 {
|
||||
empties = append(empties, eligible)
|
||||
}
|
||||
}
|
||||
if len(empties) > 0 {
|
||||
rv.Tasks = append(rv.Tasks, &MergeTask{Segments: empties})
|
||||
eligibles = removeSegments(eligibles, empties)
|
||||
}
|
||||
|
||||
// While we’re over budget, keep looping, which might produce
|
||||
// another MergeTask.
|
||||
for len(eligibles) > 0 && (len(eligibles)+len(rv.Tasks)) > budgetNumSegments {
|
||||
// Track a current best roster as we examine and score
|
||||
// potential rosters of merges.
|
||||
var bestRoster []Segment
|
||||
var bestRosterScore float64 // Lower score is better.
|
||||
|
||||
for startIdx := 0; startIdx < len(eligibles); startIdx++ {
|
||||
var roster []Segment
|
||||
var rosterLiveSize int64
|
||||
var rosterFileSize int64 // useful for segments with vectors
|
||||
|
||||
for idx := startIdx; idx < len(eligibles) && len(roster) < o.SegmentsPerMergeTask; idx++ {
|
||||
eligible := eligibles[idx]
|
||||
|
||||
if rosterLiveSize+eligible.LiveSize() >= o.MaxSegmentSize {
|
||||
continue
|
||||
}
|
||||
|
||||
if eligible.HasVector() {
|
||||
efs := eligible.FileSize()
|
||||
if rosterFileSize+efs >= o.MaxSegmentFileSize {
|
||||
continue
|
||||
}
|
||||
rosterFileSize += efs
|
||||
}
|
||||
|
||||
roster = append(roster, eligible)
|
||||
rosterLiveSize += eligible.LiveSize()
|
||||
}
|
||||
|
||||
if len(roster) > 0 {
|
||||
rosterScore := scoreSegments(roster, o)
|
||||
|
||||
if len(bestRoster) == 0 || rosterScore < bestRosterScore {
|
||||
bestRoster = roster
|
||||
bestRosterScore = rosterScore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(bestRoster) == 0 {
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
rv.Tasks = append(rv.Tasks, &MergeTask{Segments: bestRoster})
|
||||
|
||||
eligibles = removeSegments(eligibles, bestRoster)
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// Compute the number of segments that would be needed to cover the
|
||||
// totalSize, by climbing up a logarithmically growing staircase of
|
||||
// segment tiers.
|
||||
func CalcBudget(totalSize int64, firstTierSize int64, o *MergePlanOptions) (
|
||||
budgetNumSegments int) {
|
||||
tierSize := firstTierSize
|
||||
if tierSize < 1 {
|
||||
tierSize = 1
|
||||
}
|
||||
|
||||
maxSegmentsPerTier := o.MaxSegmentsPerTier
|
||||
if maxSegmentsPerTier < 1 {
|
||||
maxSegmentsPerTier = 1
|
||||
}
|
||||
|
||||
tierGrowth := o.TierGrowth
|
||||
if tierGrowth < 1.0 {
|
||||
tierGrowth = 1.0
|
||||
}
|
||||
|
||||
for totalSize > 0 {
|
||||
segmentsInTier := float64(totalSize) / float64(tierSize)
|
||||
if segmentsInTier < float64(maxSegmentsPerTier) {
|
||||
budgetNumSegments += int(math.Ceil(segmentsInTier))
|
||||
break
|
||||
}
|
||||
|
||||
budgetNumSegments += maxSegmentsPerTier
|
||||
totalSize -= int64(maxSegmentsPerTier) * tierSize
|
||||
tierSize = int64(float64(tierSize) * tierGrowth)
|
||||
}
|
||||
|
||||
return budgetNumSegments
|
||||
}
|
||||
|
||||
// Of note, removeSegments() keeps the ordering of the results stable.
|
||||
func removeSegments(segments []Segment, toRemove []Segment) []Segment {
|
||||
rv := make([]Segment, 0, len(segments)-len(toRemove))
|
||||
OUTER:
|
||||
for _, segment := range segments {
|
||||
for _, r := range toRemove {
|
||||
if segment == r {
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
rv = append(rv, segment)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
// Smaller result score is better.
|
||||
func ScoreSegments(segments []Segment, o *MergePlanOptions) float64 {
|
||||
var totBeforeSize int64
|
||||
var totAfterSize int64
|
||||
var totAfterSizeFloored int64
|
||||
|
||||
for _, segment := range segments {
|
||||
totBeforeSize += segment.FullSize()
|
||||
totAfterSize += segment.LiveSize()
|
||||
totAfterSizeFloored += o.RaiseToFloorSegmentSize(segment.LiveSize())
|
||||
}
|
||||
|
||||
if totBeforeSize <= 0 || totAfterSize <= 0 || totAfterSizeFloored <= 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Roughly guess the "balance" of the segments -- whether the
|
||||
// segments are about the same size.
|
||||
balance :=
|
||||
float64(o.RaiseToFloorSegmentSize(segments[0].LiveSize())) /
|
||||
float64(totAfterSizeFloored)
|
||||
|
||||
// Gently favor smaller merges over bigger ones. We don't want to
|
||||
// make the exponent too large else we end up with poor merges of
|
||||
// small segments in order to avoid the large merges.
|
||||
score := balance * math.Pow(float64(totAfterSize), 0.05)
|
||||
|
||||
// Strongly favor merges that reclaim deletes.
|
||||
nonDelRatio := float64(totAfterSize) / float64(totBeforeSize)
|
||||
|
||||
score *= math.Pow(nonDelRatio, o.ReclaimDeletesWeight)
|
||||
|
||||
return score
|
||||
}
|
||||
|
||||
// ------------------------------------------
|
||||
|
||||
// ToBarChart returns an ASCII rendering of the segments and the plan.
|
||||
// The barMax is the max width of the bars in the bar chart.
|
||||
func ToBarChart(prefix string, barMax int, segments []Segment, plan *MergePlan) string {
|
||||
rv := make([]string, 0, len(segments))
|
||||
|
||||
var maxFullSize int64
|
||||
for _, segment := range segments {
|
||||
if maxFullSize < segment.FullSize() {
|
||||
maxFullSize = segment.FullSize()
|
||||
}
|
||||
}
|
||||
if maxFullSize < 0 {
|
||||
maxFullSize = 1
|
||||
}
|
||||
|
||||
for _, segment := range segments {
|
||||
barFull := int(segment.FullSize())
|
||||
barLive := int(segment.LiveSize())
|
||||
|
||||
if maxFullSize > int64(barMax) {
|
||||
barFull = int(float64(barMax) * float64(barFull) / float64(maxFullSize))
|
||||
barLive = int(float64(barMax) * float64(barLive) / float64(maxFullSize))
|
||||
}
|
||||
|
||||
barKind := " "
|
||||
barChar := "."
|
||||
|
||||
if plan != nil {
|
||||
TASK_LOOP:
|
||||
for taski, task := range plan.Tasks {
|
||||
for _, taskSegment := range task.Segments {
|
||||
if taskSegment == segment {
|
||||
barKind = "*"
|
||||
barChar = fmt.Sprintf("%d", taski)
|
||||
break TASK_LOOP
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bar :=
|
||||
strings.Repeat(barChar, barLive)[0:barLive] +
|
||||
strings.Repeat("x", barFull-barLive)[0:barFull-barLive]
|
||||
|
||||
rv = append(rv, fmt.Sprintf("%s %5d: %5d /%5d - %s %s", prefix,
|
||||
segment.Id(),
|
||||
segment.LiveSize(),
|
||||
segment.FullSize(),
|
||||
barKind, bar))
|
||||
}
|
||||
|
||||
return strings.Join(rv, "\n")
|
||||
}
|
||||
|
||||
// ValidateMergePlannerOptions validates the merge planner options
|
||||
func ValidateMergePlannerOptions(options *MergePlanOptions) error {
|
||||
if options.MaxSegmentSize > MaxSegmentSizeLimit {
|
||||
return ErrMaxSegmentSizeTooLarge
|
||||
}
|
||||
return nil
|
||||
}
|
721
index/scorch/mergeplan/merge_plan_test.go
Normal file
721
index/scorch/mergeplan/merge_plan_test.go
Normal file
|
@ -0,0 +1,721 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package mergeplan
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Implements the Segment interface for testing,
|
||||
type segment struct {
|
||||
MyId uint64
|
||||
MyFullSize int64
|
||||
MyLiveSize int64
|
||||
|
||||
MyHasVector bool
|
||||
MyFileSize int64
|
||||
}
|
||||
|
||||
func (s *segment) Id() uint64 { return s.MyId }
|
||||
func (s *segment) FullSize() int64 { return s.MyFullSize }
|
||||
func (s *segment) LiveSize() int64 { return s.MyLiveSize }
|
||||
func (s *segment) HasVector() bool { return s.MyHasVector }
|
||||
func (s *segment) FileSize() int64 { return s.MyFileSize }
|
||||
|
||||
func makeLinearSegments(n int) (rv []Segment) {
|
||||
for i := 0; i < n; i++ {
|
||||
rv = append(rv, &segment{
|
||||
MyId: uint64(i),
|
||||
MyFullSize: int64(i),
|
||||
MyLiveSize: int64(i),
|
||||
})
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
// ----------------------------------------
|
||||
|
||||
func TestSimplePlan(t *testing.T) {
|
||||
segs := makeLinearSegments(10)
|
||||
|
||||
tests := []struct {
|
||||
Desc string
|
||||
Segments []Segment
|
||||
Options *MergePlanOptions
|
||||
ExpectPlan *MergePlan
|
||||
ExpectErr error
|
||||
}{
|
||||
{
|
||||
"nil segments",
|
||||
nil, nil, nil, nil,
|
||||
},
|
||||
{
|
||||
"empty segments",
|
||||
[]Segment{},
|
||||
nil, nil, nil,
|
||||
},
|
||||
{
|
||||
"1 segment",
|
||||
[]Segment{segs[1]},
|
||||
nil,
|
||||
nil,
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"2 segments",
|
||||
[]Segment{
|
||||
segs[1],
|
||||
segs[2],
|
||||
},
|
||||
nil,
|
||||
&MergePlan{
|
||||
Tasks: []*MergeTask{
|
||||
{
|
||||
Segments: []Segment{
|
||||
segs[2],
|
||||
segs[1],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"3 segments",
|
||||
[]Segment{
|
||||
segs[1],
|
||||
segs[2],
|
||||
segs[9],
|
||||
},
|
||||
nil,
|
||||
&MergePlan{
|
||||
Tasks: []*MergeTask{
|
||||
{
|
||||
Segments: []Segment{
|
||||
segs[9],
|
||||
segs[2],
|
||||
segs[1],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"many segments",
|
||||
[]Segment{
|
||||
segs[1],
|
||||
segs[2],
|
||||
segs[3],
|
||||
segs[4],
|
||||
segs[5],
|
||||
segs[6],
|
||||
},
|
||||
&MergePlanOptions{
|
||||
MaxSegmentsPerTier: 1,
|
||||
MaxSegmentSize: 1000,
|
||||
TierGrowth: 2.0,
|
||||
SegmentsPerMergeTask: 2,
|
||||
FloorSegmentSize: 1,
|
||||
},
|
||||
&MergePlan{
|
||||
Tasks: []*MergeTask{
|
||||
{
|
||||
Segments: []Segment{
|
||||
segs[6],
|
||||
segs[5],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
}
|
||||
|
||||
for testi, test := range tests {
|
||||
plan, err := Plan(test.Segments, test.Options)
|
||||
|
||||
if err != test.ExpectErr {
|
||||
testj, _ := json.Marshal(&test)
|
||||
|
||||
t.Errorf("testi: %d, test: %s, got err: %v", testi, testj, err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(plan, test.ExpectPlan) {
|
||||
testj, _ := json.Marshal(&test)
|
||||
|
||||
planj, _ := json.Marshal(&plan)
|
||||
|
||||
t.Errorf("testi: %d, test: %s, got plan: %s",
|
||||
testi, testj, planj)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------
|
||||
|
||||
func TestSort(t *testing.T) {
|
||||
segs := makeLinearSegments(10)
|
||||
|
||||
sort.Sort(byLiveSizeDescending(segs))
|
||||
|
||||
for i := 1; i < len(segs); i++ {
|
||||
if segs[i].LiveSize() >= segs[i-1].LiveSize() {
|
||||
t.Errorf("not descending")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------
|
||||
|
||||
func TestCalcBudget(t *testing.T) {
|
||||
tests := []struct {
|
||||
totalSize int64
|
||||
firstTierSize int64
|
||||
o MergePlanOptions
|
||||
expect int
|
||||
}{
|
||||
{0, 0, MergePlanOptions{}, 0},
|
||||
{1, 0, MergePlanOptions{}, 1},
|
||||
{9, 0, MergePlanOptions{}, 9},
|
||||
{
|
||||
1, 1,
|
||||
MergePlanOptions{
|
||||
MaxSegmentsPerTier: 1,
|
||||
MaxSegmentSize: 1000,
|
||||
TierGrowth: 2.0,
|
||||
SegmentsPerMergeTask: 2,
|
||||
FloorSegmentSize: 1,
|
||||
},
|
||||
1,
|
||||
},
|
||||
{
|
||||
21, 1,
|
||||
MergePlanOptions{
|
||||
MaxSegmentsPerTier: 1,
|
||||
MaxSegmentSize: 1000,
|
||||
TierGrowth: 2.0,
|
||||
SegmentsPerMergeTask: 2,
|
||||
FloorSegmentSize: 1,
|
||||
},
|
||||
5,
|
||||
},
|
||||
{
|
||||
21, 1,
|
||||
MergePlanOptions{
|
||||
MaxSegmentsPerTier: 2,
|
||||
MaxSegmentSize: 1000,
|
||||
TierGrowth: 2.0,
|
||||
SegmentsPerMergeTask: 2,
|
||||
FloorSegmentSize: 1,
|
||||
},
|
||||
7,
|
||||
},
|
||||
{
|
||||
1000, 2000, DefaultMergePlanOptions,
|
||||
1,
|
||||
},
|
||||
{
|
||||
5000, 2000, DefaultMergePlanOptions,
|
||||
3,
|
||||
},
|
||||
{
|
||||
10000, 2000, DefaultMergePlanOptions,
|
||||
5,
|
||||
},
|
||||
{
|
||||
30000, 2000, DefaultMergePlanOptions,
|
||||
11,
|
||||
},
|
||||
{
|
||||
1000000, 2000, DefaultMergePlanOptions,
|
||||
24,
|
||||
},
|
||||
{
|
||||
1000000000, 2000, DefaultMergePlanOptions,
|
||||
54,
|
||||
},
|
||||
}
|
||||
|
||||
for testi, test := range tests {
|
||||
res := CalcBudget(test.totalSize, test.firstTierSize, &test.o)
|
||||
if res != test.expect {
|
||||
t.Errorf("testi: %d, test: %#v, res: %v",
|
||||
testi, test, res)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCalcBudgetForSingleSegmentMergePolicy(t *testing.T) {
|
||||
mpolicy := MergePlanOptions{
|
||||
MaxSegmentsPerTier: 1,
|
||||
MaxSegmentSize: 1 << 30, // ~ 1 Billion
|
||||
SegmentsPerMergeTask: 10,
|
||||
FloorSegmentSize: 1 << 30,
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
totalSize int64
|
||||
firstTierSize int64
|
||||
o MergePlanOptions
|
||||
expect int
|
||||
}{
|
||||
{0, mpolicy.RaiseToFloorSegmentSize(0), mpolicy, 0},
|
||||
{1, mpolicy.RaiseToFloorSegmentSize(1), mpolicy, 1},
|
||||
{9, mpolicy.RaiseToFloorSegmentSize(0), mpolicy, 1},
|
||||
{1, mpolicy.RaiseToFloorSegmentSize(1), mpolicy, 1},
|
||||
{21, mpolicy.RaiseToFloorSegmentSize(21), mpolicy, 1},
|
||||
{21, mpolicy.RaiseToFloorSegmentSize(21), mpolicy, 1},
|
||||
{1000, mpolicy.RaiseToFloorSegmentSize(2000), mpolicy, 1},
|
||||
{5000, mpolicy.RaiseToFloorSegmentSize(5000), mpolicy, 1},
|
||||
{10000, mpolicy.RaiseToFloorSegmentSize(10000), mpolicy, 1},
|
||||
{30000, mpolicy.RaiseToFloorSegmentSize(30000), mpolicy, 1},
|
||||
{1000000, mpolicy.RaiseToFloorSegmentSize(1000000), mpolicy, 1},
|
||||
{1000000000, 1 << 30, mpolicy, 1},
|
||||
{1013423541, 1 << 30, mpolicy, 1},
|
||||
{98765442, 1 << 30, mpolicy, 1},
|
||||
}
|
||||
|
||||
for testi, test := range tests {
|
||||
res := CalcBudget(test.totalSize, test.firstTierSize, &test.o)
|
||||
if res != test.expect {
|
||||
t.Errorf("testi: %d, test: %#v, res: %v",
|
||||
testi, test, res)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------
|
||||
|
||||
func TestInsert1SameSizedSegmentBetweenMerges(t *testing.T) {
|
||||
o := &MergePlanOptions{
|
||||
MaxSegmentSize: 1000,
|
||||
MaxSegmentsPerTier: 3,
|
||||
TierGrowth: 3.0,
|
||||
SegmentsPerMergeTask: 3,
|
||||
}
|
||||
|
||||
spec := testCyclesSpec{
|
||||
descrip: "i1sssbm",
|
||||
verbose: os.Getenv("VERBOSE") == "i1sssbm" || os.Getenv("VERBOSE") == "y",
|
||||
n: 200,
|
||||
o: o,
|
||||
beforePlan: func(spec *testCyclesSpec) {
|
||||
spec.segments = append(spec.segments, &segment{
|
||||
MyId: spec.nextSegmentId,
|
||||
MyFullSize: 1,
|
||||
MyLiveSize: 1,
|
||||
})
|
||||
spec.nextSegmentId++
|
||||
},
|
||||
}
|
||||
|
||||
spec.runCycles(t)
|
||||
}
|
||||
|
||||
func TestInsertManySameSizedSegmentsBetweenMerges(t *testing.T) {
|
||||
o := &MergePlanOptions{
|
||||
MaxSegmentSize: 1000,
|
||||
MaxSegmentsPerTier: 3,
|
||||
TierGrowth: 3.0,
|
||||
SegmentsPerMergeTask: 3,
|
||||
}
|
||||
|
||||
spec := testCyclesSpec{
|
||||
descrip: "imsssbm",
|
||||
verbose: os.Getenv("VERBOSE") == "imsssbm" || os.Getenv("VERBOSE") == "y",
|
||||
n: 20,
|
||||
o: o,
|
||||
beforePlan: func(spec *testCyclesSpec) {
|
||||
for i := 0; i < 10; i++ {
|
||||
spec.segments = append(spec.segments, &segment{
|
||||
MyId: spec.nextSegmentId,
|
||||
MyFullSize: 1,
|
||||
MyLiveSize: 1,
|
||||
})
|
||||
spec.nextSegmentId++
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
spec.runCycles(t)
|
||||
}
|
||||
|
||||
func TestInsertManySameSizedSegmentsWithDeletionsBetweenMerges(t *testing.T) {
|
||||
o := &MergePlanOptions{
|
||||
MaxSegmentSize: 1000,
|
||||
MaxSegmentsPerTier: 3,
|
||||
TierGrowth: 3.0,
|
||||
SegmentsPerMergeTask: 3,
|
||||
}
|
||||
|
||||
spec := testCyclesSpec{
|
||||
descrip: "imssswdbm",
|
||||
verbose: os.Getenv("VERBOSE") == "imssswdbm" || os.Getenv("VERBOSE") == "y",
|
||||
n: 20,
|
||||
o: o,
|
||||
beforePlan: func(spec *testCyclesSpec) {
|
||||
for i := 0; i < 10; i++ {
|
||||
// Deletions are a shrinking of the live size.
|
||||
for i, seg := range spec.segments {
|
||||
if (spec.cycle+i)%5 == 0 {
|
||||
s := seg.(*segment)
|
||||
if s.MyLiveSize > 0 {
|
||||
s.MyLiveSize -= 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
spec.segments = append(spec.segments, &segment{
|
||||
MyId: spec.nextSegmentId,
|
||||
MyFullSize: 1,
|
||||
MyLiveSize: 1,
|
||||
})
|
||||
spec.nextSegmentId++
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
spec.runCycles(t)
|
||||
}
|
||||
|
||||
func TestInsertManyDifferentSizedSegmentsBetweenMerges(t *testing.T) {
|
||||
o := &MergePlanOptions{
|
||||
MaxSegmentSize: 1000,
|
||||
MaxSegmentsPerTier: 3,
|
||||
TierGrowth: 3.0,
|
||||
SegmentsPerMergeTask: 3,
|
||||
}
|
||||
|
||||
spec := testCyclesSpec{
|
||||
descrip: "imdssbm",
|
||||
verbose: os.Getenv("VERBOSE") == "imdssbm" || os.Getenv("VERBOSE") == "y",
|
||||
n: 20,
|
||||
o: o,
|
||||
beforePlan: func(spec *testCyclesSpec) {
|
||||
for i := 0; i < 10; i++ {
|
||||
spec.segments = append(spec.segments, &segment{
|
||||
MyId: spec.nextSegmentId,
|
||||
MyFullSize: int64(1 + (i % 5)),
|
||||
MyLiveSize: int64(1 + (i % 5)),
|
||||
})
|
||||
spec.nextSegmentId++
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
spec.runCycles(t)
|
||||
}
|
||||
|
||||
func TestManySameSizedSegmentsWithDeletesBetweenMerges(t *testing.T) {
|
||||
o := &MergePlanOptions{
|
||||
MaxSegmentSize: 1000,
|
||||
MaxSegmentsPerTier: 3,
|
||||
TierGrowth: 3.0,
|
||||
SegmentsPerMergeTask: 3,
|
||||
}
|
||||
|
||||
var numPlansWithTasks int
|
||||
|
||||
spec := testCyclesSpec{
|
||||
descrip: "mssswdbm",
|
||||
verbose: os.Getenv("VERBOSE") == "mssswdbm" || os.Getenv("VERBOSE") == "y",
|
||||
n: 20,
|
||||
o: o,
|
||||
beforePlan: func(spec *testCyclesSpec) {
|
||||
// Deletions are a shrinking of the live size.
|
||||
for i, seg := range spec.segments {
|
||||
if (spec.cycle+i)%5 == 0 {
|
||||
s := seg.(*segment)
|
||||
if s.MyLiveSize > 0 {
|
||||
s.MyLiveSize -= 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < 10; i++ {
|
||||
spec.segments = append(spec.segments, &segment{
|
||||
MyId: spec.nextSegmentId,
|
||||
MyFullSize: 1,
|
||||
MyLiveSize: 1,
|
||||
})
|
||||
spec.nextSegmentId++
|
||||
}
|
||||
},
|
||||
afterPlan: func(spec *testCyclesSpec, plan *MergePlan) {
|
||||
if plan != nil && len(plan.Tasks) > 0 {
|
||||
numPlansWithTasks++
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
spec.runCycles(t)
|
||||
|
||||
if numPlansWithTasks <= 0 {
|
||||
t.Errorf("expected some plans with tasks")
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateMergePlannerOptions(t *testing.T) {
|
||||
o := &MergePlanOptions{
|
||||
MaxSegmentSize: 1 << 32,
|
||||
MaxSegmentsPerTier: 3,
|
||||
TierGrowth: 3.0,
|
||||
SegmentsPerMergeTask: 3,
|
||||
}
|
||||
err := ValidateMergePlannerOptions(o)
|
||||
if err != ErrMaxSegmentSizeTooLarge {
|
||||
t.Error("Validation expected to fail as the MaxSegmentSize exceeds limit")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPlanMaxSegmentSizeLimit(t *testing.T) {
|
||||
o := &MergePlanOptions{
|
||||
MaxSegmentSize: 20,
|
||||
MaxSegmentsPerTier: 5,
|
||||
TierGrowth: 3.0,
|
||||
SegmentsPerMergeTask: 5,
|
||||
FloorSegmentSize: 5,
|
||||
}
|
||||
segments := makeLinearSegments(20)
|
||||
|
||||
s := rand.NewSource(time.Now().UnixNano())
|
||||
r := rand.New(s)
|
||||
|
||||
max := 20
|
||||
min := 5
|
||||
randomInRange := func() int64 {
|
||||
return int64(r.Intn(max-min) + min)
|
||||
}
|
||||
for i := 1; i < 20; i++ {
|
||||
o.MaxSegmentSize = randomInRange()
|
||||
plans, err := Plan(segments, o)
|
||||
if err != nil {
|
||||
t.Errorf("Plan failed, err: %v", err)
|
||||
}
|
||||
if len(plans.Tasks) == 0 {
|
||||
t.Errorf("expected some plans with tasks")
|
||||
}
|
||||
|
||||
for _, task := range plans.Tasks {
|
||||
var totalLiveSize int64
|
||||
for _, segs := range task.Segments {
|
||||
totalLiveSize += segs.LiveSize()
|
||||
}
|
||||
if totalLiveSize >= o.MaxSegmentSize {
|
||||
t.Errorf("merged segments size: %d exceeding the MaxSegmentSize"+
|
||||
"limit: %d", totalLiveSize, o.MaxSegmentSize)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------
|
||||
|
||||
type testCyclesSpec struct {
|
||||
descrip string
|
||||
verbose bool
|
||||
|
||||
n int // Number of cycles to run.
|
||||
o *MergePlanOptions
|
||||
|
||||
beforePlan func(*testCyclesSpec)
|
||||
afterPlan func(*testCyclesSpec, *MergePlan)
|
||||
|
||||
cycle int
|
||||
segments []Segment
|
||||
nextSegmentId uint64
|
||||
}
|
||||
|
||||
func (spec *testCyclesSpec) runCycles(t *testing.T) {
|
||||
numPlansWithTasks := 0
|
||||
|
||||
for spec.cycle < spec.n {
|
||||
if spec.verbose {
|
||||
emit(spec.descrip, spec.cycle, 0, spec.segments, nil)
|
||||
}
|
||||
|
||||
if spec.beforePlan != nil {
|
||||
spec.beforePlan(spec)
|
||||
}
|
||||
|
||||
if spec.verbose {
|
||||
emit(spec.descrip, spec.cycle, 1, spec.segments, nil)
|
||||
}
|
||||
|
||||
plan, err := Plan(spec.segments, spec.o)
|
||||
if err != nil {
|
||||
t.Fatalf("expected no err, got: %v", err)
|
||||
}
|
||||
|
||||
if spec.afterPlan != nil {
|
||||
spec.afterPlan(spec, plan)
|
||||
}
|
||||
|
||||
if spec.verbose {
|
||||
emit(spec.descrip, spec.cycle, 2, spec.segments, plan)
|
||||
}
|
||||
|
||||
if plan != nil {
|
||||
if len(plan.Tasks) > 0 {
|
||||
numPlansWithTasks++
|
||||
}
|
||||
|
||||
for _, task := range plan.Tasks {
|
||||
spec.segments = removeSegments(spec.segments, task.Segments)
|
||||
|
||||
var totLiveSize int64
|
||||
for _, segment := range task.Segments {
|
||||
totLiveSize += segment.LiveSize()
|
||||
}
|
||||
|
||||
if totLiveSize > 0 {
|
||||
spec.segments = append(spec.segments, &segment{
|
||||
MyId: spec.nextSegmentId,
|
||||
MyFullSize: totLiveSize,
|
||||
MyLiveSize: totLiveSize,
|
||||
})
|
||||
spec.nextSegmentId++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
spec.cycle++
|
||||
}
|
||||
|
||||
if numPlansWithTasks <= 0 {
|
||||
t.Errorf("expected some plans with tasks")
|
||||
}
|
||||
}
|
||||
|
||||
func emit(descrip string, cycle int, step int, segments []Segment, plan *MergePlan) {
|
||||
if os.Getenv("VERBOSE") == "" {
|
||||
return
|
||||
}
|
||||
|
||||
suffix := ""
|
||||
if plan != nil && len(plan.Tasks) > 0 {
|
||||
suffix = "hasPlan"
|
||||
}
|
||||
|
||||
fmt.Printf("%s %d.%d ---------- %s\n", descrip, cycle, step, suffix)
|
||||
fmt.Printf("%s\n", ToBarChart(descrip, 100, segments, plan))
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Test Vector Segment Merging
|
||||
|
||||
func TestPlanMaxSegmentFileSize(t *testing.T) {
|
||||
tests := []struct {
|
||||
segments []Segment
|
||||
o *MergePlanOptions
|
||||
|
||||
expectedTasks [][]uint64
|
||||
}{
|
||||
{
|
||||
[]Segment{
|
||||
&segment{ // ineligible
|
||||
MyId: 1,
|
||||
MyFullSize: 4000,
|
||||
MyLiveSize: 3900,
|
||||
|
||||
MyHasVector: true,
|
||||
MyFileSize: 3900 * 1000 * 4, // > 2MB
|
||||
},
|
||||
&segment{ // ineligible
|
||||
MyId: 2,
|
||||
MyFullSize: 6000,
|
||||
MyLiveSize: 5500, // > 5000
|
||||
|
||||
MyHasVector: true,
|
||||
MyFileSize: 5500 * 1000 * 4, // > 2MB
|
||||
},
|
||||
&segment{ // eligible
|
||||
MyId: 3,
|
||||
MyFullSize: 500,
|
||||
MyLiveSize: 490,
|
||||
|
||||
MyHasVector: true,
|
||||
MyFileSize: 490 * 1000 * 4,
|
||||
},
|
||||
&segment{ // eligible
|
||||
MyId: 4,
|
||||
MyFullSize: 500,
|
||||
MyLiveSize: 480,
|
||||
|
||||
MyHasVector: true,
|
||||
MyFileSize: 480 * 1000 * 4,
|
||||
},
|
||||
&segment{ // eligible
|
||||
MyId: 5,
|
||||
MyFullSize: 500,
|
||||
MyLiveSize: 300,
|
||||
|
||||
MyHasVector: true,
|
||||
MyFileSize: 300 * 1000 * 4,
|
||||
},
|
||||
&segment{ // eligible
|
||||
MyId: 6,
|
||||
MyFullSize: 500,
|
||||
MyLiveSize: 400,
|
||||
|
||||
MyHasVector: true,
|
||||
MyFileSize: 400 * 1000 * 4,
|
||||
},
|
||||
},
|
||||
&MergePlanOptions{
|
||||
MaxSegmentSize: 5000, // number of documents
|
||||
// considering vector dimension as 1000
|
||||
// vectorBytes = 5000 * 1000 * 4 = 20MB, which is too large
|
||||
// So, let's set the fileSize limit to 4MB
|
||||
MaxSegmentFileSize: 4000000, // 4MB
|
||||
MaxSegmentsPerTier: 1,
|
||||
SegmentsPerMergeTask: 2,
|
||||
TierGrowth: 2.0,
|
||||
FloorSegmentSize: 1,
|
||||
},
|
||||
[][]uint64{
|
||||
{3, 4},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for testi, test := range tests {
|
||||
t.Run(fmt.Sprintf("Test-%d", testi), func(t *testing.T) {
|
||||
plans, err := Plan(test.segments, test.o)
|
||||
if err != nil {
|
||||
t.Fatalf("Plan failed, err: %v", err)
|
||||
}
|
||||
|
||||
for i, task := range plans.Tasks {
|
||||
var segIDs []uint64
|
||||
for _, seg := range task.Segments {
|
||||
segIDs = append(segIDs, seg.Id())
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(segIDs, test.expectedTasks[0]) {
|
||||
t.Errorf("expected task segments: %v, got: %v", test.expectedTasks[i], segIDs)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
28
index/scorch/mergeplan/sort.go
Normal file
28
index/scorch/mergeplan/sort.go
Normal file
|
@ -0,0 +1,28 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package mergeplan
|
||||
|
||||
type byLiveSizeDescending []Segment
|
||||
|
||||
func (a byLiveSizeDescending) Len() int { return len(a) }
|
||||
|
||||
func (a byLiveSizeDescending) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||
|
||||
func (a byLiveSizeDescending) Less(i, j int) bool {
|
||||
if a[i].LiveSize() != a[j].LiveSize() {
|
||||
return a[i].LiveSize() > a[j].LiveSize()
|
||||
}
|
||||
return a[i].Id() < a[j].Id()
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue