Adding upstream version 2.5.1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
c71cb8b61d
commit
982828099e
783 changed files with 150650 additions and 0 deletions
2909
search/facet/benchmark_data.txt
Normal file
2909
search/facet/benchmark_data.txt
Normal file
File diff suppressed because it is too large
Load diff
163
search/facet/facet_builder_datetime.go
Normal file
163
search/facet/facet_builder_datetime.go
Normal file
|
@ -0,0 +1,163 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package facet
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/numeric"
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
)
|
||||
|
||||
var (
|
||||
reflectStaticSizeDateTimeFacetBuilder int
|
||||
reflectStaticSizedateTimeRange int
|
||||
)
|
||||
|
||||
func init() {
|
||||
var dtfb DateTimeFacetBuilder
|
||||
reflectStaticSizeDateTimeFacetBuilder = int(reflect.TypeOf(dtfb).Size())
|
||||
var dtr dateTimeRange
|
||||
reflectStaticSizedateTimeRange = int(reflect.TypeOf(dtr).Size())
|
||||
}
|
||||
|
||||
type dateTimeRange struct {
|
||||
start time.Time
|
||||
end time.Time
|
||||
}
|
||||
|
||||
type DateTimeFacetBuilder struct {
|
||||
size int
|
||||
field string
|
||||
termsCount map[string]int
|
||||
total int
|
||||
missing int
|
||||
ranges map[string]*dateTimeRange
|
||||
sawValue bool
|
||||
}
|
||||
|
||||
func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
|
||||
return &DateTimeFacetBuilder{
|
||||
size: size,
|
||||
field: field,
|
||||
termsCount: make(map[string]int),
|
||||
ranges: make(map[string]*dateTimeRange, 0),
|
||||
}
|
||||
}
|
||||
|
||||
func (fb *DateTimeFacetBuilder) Size() int {
|
||||
sizeInBytes := reflectStaticSizeDateTimeFacetBuilder + size.SizeOfPtr +
|
||||
len(fb.field)
|
||||
|
||||
for k := range fb.termsCount {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfInt
|
||||
}
|
||||
|
||||
for k := range fb.ranges {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfPtr + reflectStaticSizedateTimeRange
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) {
|
||||
r := dateTimeRange{
|
||||
start: start,
|
||||
end: end,
|
||||
}
|
||||
fb.ranges[name] = &r
|
||||
}
|
||||
|
||||
func (fb *DateTimeFacetBuilder) Field() string {
|
||||
return fb.field
|
||||
}
|
||||
|
||||
func (fb *DateTimeFacetBuilder) UpdateVisitor(term []byte) {
|
||||
fb.sawValue = true
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
i64, err := prefixCoded.Int64()
|
||||
if err == nil {
|
||||
t := time.Unix(0, i64)
|
||||
|
||||
// look at each of the ranges for a match
|
||||
for rangeName, r := range fb.ranges {
|
||||
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
|
||||
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
|
||||
fb.total++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (fb *DateTimeFacetBuilder) StartDoc() {
|
||||
fb.sawValue = false
|
||||
}
|
||||
|
||||
func (fb *DateTimeFacetBuilder) EndDoc() {
|
||||
if !fb.sawValue {
|
||||
fb.missing++
|
||||
}
|
||||
}
|
||||
|
||||
func (fb *DateTimeFacetBuilder) Result() *search.FacetResult {
|
||||
rv := search.FacetResult{
|
||||
Field: fb.field,
|
||||
Total: fb.total,
|
||||
Missing: fb.missing,
|
||||
}
|
||||
|
||||
rv.DateRanges = make([]*search.DateRangeFacet, 0, len(fb.termsCount))
|
||||
|
||||
for term, count := range fb.termsCount {
|
||||
dateRange := fb.ranges[term]
|
||||
tf := &search.DateRangeFacet{
|
||||
Name: term,
|
||||
Count: count,
|
||||
}
|
||||
if !dateRange.start.IsZero() {
|
||||
start := dateRange.start.Format(time.RFC3339Nano)
|
||||
tf.Start = &start
|
||||
}
|
||||
if !dateRange.end.IsZero() {
|
||||
end := dateRange.end.Format(time.RFC3339Nano)
|
||||
tf.End = &end
|
||||
}
|
||||
rv.DateRanges = append(rv.DateRanges, tf)
|
||||
}
|
||||
|
||||
sort.Sort(rv.DateRanges)
|
||||
|
||||
// we now have the list of the top N facets
|
||||
if fb.size < len(rv.DateRanges) {
|
||||
rv.DateRanges = rv.DateRanges[:fb.size]
|
||||
}
|
||||
|
||||
notOther := 0
|
||||
for _, nr := range rv.DateRanges {
|
||||
notOther += nr.Count
|
||||
}
|
||||
rv.Other = fb.total - notOther
|
||||
|
||||
return &rv
|
||||
}
|
157
search/facet/facet_builder_numeric.go
Normal file
157
search/facet/facet_builder_numeric.go
Normal file
|
@ -0,0 +1,157 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package facet
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/numeric"
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
)
|
||||
|
||||
var (
|
||||
reflectStaticSizeNumericFacetBuilder int
|
||||
reflectStaticSizenumericRange int
|
||||
)
|
||||
|
||||
func init() {
|
||||
var nfb NumericFacetBuilder
|
||||
reflectStaticSizeNumericFacetBuilder = int(reflect.TypeOf(nfb).Size())
|
||||
var nr numericRange
|
||||
reflectStaticSizenumericRange = int(reflect.TypeOf(nr).Size())
|
||||
}
|
||||
|
||||
type numericRange struct {
|
||||
min *float64
|
||||
max *float64
|
||||
}
|
||||
|
||||
type NumericFacetBuilder struct {
|
||||
size int
|
||||
field string
|
||||
termsCount map[string]int
|
||||
total int
|
||||
missing int
|
||||
ranges map[string]*numericRange
|
||||
sawValue bool
|
||||
}
|
||||
|
||||
func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
|
||||
return &NumericFacetBuilder{
|
||||
size: size,
|
||||
field: field,
|
||||
termsCount: make(map[string]int),
|
||||
ranges: make(map[string]*numericRange, 0),
|
||||
}
|
||||
}
|
||||
|
||||
func (fb *NumericFacetBuilder) Size() int {
|
||||
sizeInBytes := reflectStaticSizeNumericFacetBuilder + size.SizeOfPtr +
|
||||
len(fb.field)
|
||||
|
||||
for k := range fb.termsCount {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfInt
|
||||
}
|
||||
|
||||
for k := range fb.ranges {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfPtr + reflectStaticSizenumericRange
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) {
|
||||
r := numericRange{
|
||||
min: min,
|
||||
max: max,
|
||||
}
|
||||
fb.ranges[name] = &r
|
||||
}
|
||||
|
||||
func (fb *NumericFacetBuilder) Field() string {
|
||||
return fb.field
|
||||
}
|
||||
|
||||
func (fb *NumericFacetBuilder) UpdateVisitor(term []byte) {
|
||||
fb.sawValue = true
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
i64, err := prefixCoded.Int64()
|
||||
if err == nil {
|
||||
f64 := numeric.Int64ToFloat64(i64)
|
||||
|
||||
// look at each of the ranges for a match
|
||||
for rangeName, r := range fb.ranges {
|
||||
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
|
||||
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
|
||||
fb.total++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (fb *NumericFacetBuilder) StartDoc() {
|
||||
fb.sawValue = false
|
||||
}
|
||||
|
||||
func (fb *NumericFacetBuilder) EndDoc() {
|
||||
if !fb.sawValue {
|
||||
fb.missing++
|
||||
}
|
||||
}
|
||||
|
||||
func (fb *NumericFacetBuilder) Result() *search.FacetResult {
|
||||
rv := search.FacetResult{
|
||||
Field: fb.field,
|
||||
Total: fb.total,
|
||||
Missing: fb.missing,
|
||||
}
|
||||
|
||||
rv.NumericRanges = make([]*search.NumericRangeFacet, 0, len(fb.termsCount))
|
||||
|
||||
for term, count := range fb.termsCount {
|
||||
numericRange := fb.ranges[term]
|
||||
tf := &search.NumericRangeFacet{
|
||||
Name: term,
|
||||
Count: count,
|
||||
Min: numericRange.min,
|
||||
Max: numericRange.max,
|
||||
}
|
||||
|
||||
rv.NumericRanges = append(rv.NumericRanges, tf)
|
||||
}
|
||||
|
||||
sort.Sort(rv.NumericRanges)
|
||||
|
||||
// we now have the list of the top N facets
|
||||
if fb.size < len(rv.NumericRanges) {
|
||||
rv.NumericRanges = rv.NumericRanges[:fb.size]
|
||||
}
|
||||
|
||||
notOther := 0
|
||||
for _, nr := range rv.NumericRanges {
|
||||
notOther += nr.Count
|
||||
}
|
||||
rv.Other = fb.total - notOther
|
||||
|
||||
return &rv
|
||||
}
|
64
search/facet/facet_builder_numeric_test.go
Normal file
64
search/facet/facet_builder_numeric_test.go
Normal file
|
@ -0,0 +1,64 @@
|
|||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package facet
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/numeric"
|
||||
)
|
||||
|
||||
var pcodedvalues []numeric.PrefixCoded
|
||||
|
||||
func init() {
|
||||
pcodedvalues = []numeric.PrefixCoded{{0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1}, {0x20, 0x0, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f}, {0x20, 0x0, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7a, 0x1d, 0xa}, {0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x1, 0x16, 0x9, 0x4a, 0x7b}}
|
||||
}
|
||||
|
||||
func BenchmarkNumericFacet10(b *testing.B) {
|
||||
numericFacetN(b, 10)
|
||||
}
|
||||
|
||||
func BenchmarkNumericFacet100(b *testing.B) {
|
||||
numericFacetN(b, 100)
|
||||
}
|
||||
|
||||
func BenchmarkNumericFacet1000(b *testing.B) {
|
||||
numericFacetN(b, 1000)
|
||||
}
|
||||
|
||||
func numericFacetN(b *testing.B, numTerms int) {
|
||||
field := "test"
|
||||
nfb := NewNumericFacetBuilder(field, numTerms)
|
||||
min, max := 0.0, 9999999998.0
|
||||
|
||||
for i := 0; i <= numTerms; i++ {
|
||||
max++
|
||||
min--
|
||||
|
||||
nfb.AddRange("rangename"+strconv.Itoa(i), &min, &max)
|
||||
|
||||
for _, pv := range pcodedvalues {
|
||||
nfb.StartDoc()
|
||||
nfb.UpdateVisitor(pv)
|
||||
nfb.EndDoc()
|
||||
}
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
nfb.Result()
|
||||
}
|
||||
}
|
115
search/facet/facet_builder_terms.go
Normal file
115
search/facet/facet_builder_terms.go
Normal file
|
@ -0,0 +1,115 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package facet
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTermsFacetBuilder int
|
||||
|
||||
func init() {
|
||||
var tfb TermsFacetBuilder
|
||||
reflectStaticSizeTermsFacetBuilder = int(reflect.TypeOf(tfb).Size())
|
||||
}
|
||||
|
||||
type TermsFacetBuilder struct {
|
||||
size int
|
||||
field string
|
||||
termsCount map[string]int
|
||||
total int
|
||||
missing int
|
||||
sawValue bool
|
||||
}
|
||||
|
||||
func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
|
||||
return &TermsFacetBuilder{
|
||||
size: size,
|
||||
field: field,
|
||||
termsCount: make(map[string]int),
|
||||
}
|
||||
}
|
||||
|
||||
func (fb *TermsFacetBuilder) Size() int {
|
||||
sizeInBytes := reflectStaticSizeTermsFacetBuilder + size.SizeOfPtr +
|
||||
len(fb.field)
|
||||
|
||||
for k := range fb.termsCount {
|
||||
sizeInBytes += size.SizeOfString + len(k) +
|
||||
size.SizeOfInt
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (fb *TermsFacetBuilder) Field() string {
|
||||
return fb.field
|
||||
}
|
||||
|
||||
func (fb *TermsFacetBuilder) UpdateVisitor(term []byte) {
|
||||
fb.sawValue = true
|
||||
fb.termsCount[string(term)] = fb.termsCount[string(term)] + 1
|
||||
fb.total++
|
||||
}
|
||||
|
||||
func (fb *TermsFacetBuilder) StartDoc() {
|
||||
fb.sawValue = false
|
||||
}
|
||||
|
||||
func (fb *TermsFacetBuilder) EndDoc() {
|
||||
if !fb.sawValue {
|
||||
fb.missing++
|
||||
}
|
||||
}
|
||||
|
||||
func (fb *TermsFacetBuilder) Result() *search.FacetResult {
|
||||
rv := search.FacetResult{
|
||||
Field: fb.field,
|
||||
Total: fb.total,
|
||||
Missing: fb.missing,
|
||||
}
|
||||
|
||||
rv.Terms = &search.TermFacets{}
|
||||
|
||||
for term, count := range fb.termsCount {
|
||||
tf := &search.TermFacet{
|
||||
Term: term,
|
||||
Count: count,
|
||||
}
|
||||
|
||||
rv.Terms.Add(tf)
|
||||
}
|
||||
|
||||
sort.Sort(rv.Terms)
|
||||
|
||||
// we now have the list of the top N facets
|
||||
trimTopN := fb.size
|
||||
if trimTopN > rv.Terms.Len() {
|
||||
trimTopN = rv.Terms.Len()
|
||||
}
|
||||
rv.Terms.TrimToTopN(trimTopN)
|
||||
|
||||
notOther := 0
|
||||
for _, tf := range rv.Terms.Terms() {
|
||||
notOther += tf.Count
|
||||
}
|
||||
rv.Other = fb.total - notOther
|
||||
|
||||
return &rv
|
||||
}
|
72
search/facet/facet_builder_terms_test.go
Normal file
72
search/facet/facet_builder_terms_test.go
Normal file
|
@ -0,0 +1,72 @@
|
|||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package facet
|
||||
|
||||
import (
|
||||
"os"
|
||||
"regexp"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var terms []string
|
||||
|
||||
func init() {
|
||||
wsRegexp := regexp.MustCompile(`\W+`)
|
||||
input, err := os.ReadFile("benchmark_data.txt")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
terms = wsRegexp.Split(string(input), -1)
|
||||
}
|
||||
|
||||
func BenchmarkTermsFacet10(b *testing.B) {
|
||||
termsFacetN(b, 10)
|
||||
}
|
||||
|
||||
func BenchmarkTermsFacet100(b *testing.B) {
|
||||
termsFacetN(b, 100)
|
||||
}
|
||||
|
||||
func BenchmarkTermsFacet1000(b *testing.B) {
|
||||
termsFacetN(b, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkTermsFacet10000(b *testing.B) {
|
||||
termsFacetN(b, 10000)
|
||||
}
|
||||
|
||||
// func BenchmarkTermsFacet100000(b *testing.B) {
|
||||
// termsFacetN(b, 100000)
|
||||
// }
|
||||
|
||||
func termsFacetN(b *testing.B, numTerms int) {
|
||||
field := "test"
|
||||
termsLen := len(terms)
|
||||
tfb := NewTermsFacetBuilder(field, 3)
|
||||
i := 0
|
||||
for len(tfb.termsCount) < numTerms && i <= termsLen {
|
||||
j := i % termsLen
|
||||
term := terms[j]
|
||||
tfb.StartDoc()
|
||||
tfb.UpdateVisitor([]byte(term))
|
||||
tfb.EndDoc()
|
||||
i++
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
tfb.Result()
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue