Adding upstream version 2.5.1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
c71cb8b61d
commit
982828099e
783 changed files with 150650 additions and 0 deletions
50
search/collector/bench_test.go
Normal file
50
search/collector/bench_test.go
Normal file
|
@ -0,0 +1,50 @@
|
|||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"math/rand"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
type createCollector func() search.Collector
|
||||
|
||||
func benchHelper(numOfMatches int, cc createCollector, b *testing.B) {
|
||||
matches := make([]*search.DocumentMatch, 0, numOfMatches)
|
||||
for i := 0; i < numOfMatches; i++ {
|
||||
matches = append(matches, &search.DocumentMatch{
|
||||
IndexInternalID: index.IndexInternalID(strconv.Itoa(i)),
|
||||
Score: rand.Float64(),
|
||||
})
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for run := 0; run < b.N; run++ {
|
||||
searcher := &stubSearcher{
|
||||
matches: matches,
|
||||
}
|
||||
collector := cc()
|
||||
err := collector.Collect(context.Background(), searcher, &stubReader{})
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
172
search/collector/eligible.go
Normal file
172
search/collector/eligible.go
Normal file
|
@ -0,0 +1,172 @@
|
|||
// Copyright (c) 2024 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
type EligibleCollector struct {
|
||||
size int
|
||||
total uint64
|
||||
took time.Duration
|
||||
eligibleSelector index.EligibleDocumentSelector
|
||||
}
|
||||
|
||||
func NewEligibleCollector(size int) *EligibleCollector {
|
||||
return newEligibleCollector(size)
|
||||
}
|
||||
|
||||
func newEligibleCollector(size int) *EligibleCollector {
|
||||
// No sort order & skip always 0 since this is only to filter eligible docs.
|
||||
ec := &EligibleCollector{
|
||||
size: size,
|
||||
}
|
||||
return ec
|
||||
}
|
||||
|
||||
func makeEligibleDocumentMatchHandler(ctx *search.SearchContext, reader index.IndexReader) (search.DocumentMatchHandler, error) {
|
||||
if ec, ok := ctx.Collector.(*EligibleCollector); ok {
|
||||
if vr, ok := reader.(index.VectorIndexReader); ok {
|
||||
// create a new eligible document selector to add eligible document matches
|
||||
ec.eligibleSelector = vr.NewEligibleDocumentSelector()
|
||||
// return a document match handler that adds eligible document matches
|
||||
// to the eligible document selector
|
||||
return func(d *search.DocumentMatch) error {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
err := ec.eligibleSelector.AddEligibleDocumentMatch(d.IndexInternalID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// recycle the DocumentMatch
|
||||
ctx.DocumentMatchPool.Put(d)
|
||||
return nil
|
||||
}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("reader is not a VectorIndexReader")
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("eligiblity collector not available")
|
||||
}
|
||||
|
||||
func (ec *EligibleCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
|
||||
startTime := time.Now()
|
||||
var err error
|
||||
var next *search.DocumentMatch
|
||||
|
||||
backingSize := ec.size
|
||||
if backingSize > PreAllocSizeSkipCap {
|
||||
backingSize = PreAllocSizeSkipCap + 1
|
||||
}
|
||||
searchContext := &search.SearchContext{
|
||||
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), 0),
|
||||
Collector: ec,
|
||||
IndexReader: reader,
|
||||
}
|
||||
|
||||
dmHandler, err := makeEligibleDocumentMatchHandler(searchContext, reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
search.RecordSearchCost(ctx, search.AbortM, 0)
|
||||
return ctx.Err()
|
||||
default:
|
||||
next, err = searcher.Next(searchContext)
|
||||
}
|
||||
for err == nil && next != nil {
|
||||
if ec.total%CheckDoneEvery == 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
search.RecordSearchCost(ctx, search.AbortM, 0)
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
}
|
||||
ec.total++
|
||||
|
||||
err = dmHandler(next)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
next, err = searcher.Next(searchContext)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// help finalize/flush the results in case
|
||||
// of custom document match handlers.
|
||||
err = dmHandler(nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// compute search duration
|
||||
ec.took = time.Since(startTime)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// The eligible collector does not return any document matches and hence
|
||||
// this method is a dummy method returning nil, to conform to the
|
||||
// search.Collector interface.
|
||||
func (ec *EligibleCollector) Results() search.DocumentMatchCollection {
|
||||
return nil
|
||||
}
|
||||
|
||||
// EligibleSelector returns the eligible document selector, which can be used
|
||||
// to retrieve the list of eligible documents from this collector.
|
||||
// If the collector has no results, it returns nil.
|
||||
func (ec *EligibleCollector) EligibleSelector() index.EligibleDocumentSelector {
|
||||
if ec.total == 0 {
|
||||
return nil
|
||||
}
|
||||
return ec.eligibleSelector
|
||||
}
|
||||
|
||||
func (ec *EligibleCollector) Total() uint64 {
|
||||
return ec.total
|
||||
}
|
||||
|
||||
// No concept of scoring in the eligible collector.
|
||||
func (ec *EligibleCollector) MaxScore() float64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (ec *EligibleCollector) Took() time.Duration {
|
||||
return ec.took
|
||||
}
|
||||
|
||||
func (ec *EligibleCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
|
||||
// facet unsupported for pre-filtering in KNN search
|
||||
}
|
||||
|
||||
func (ec *EligibleCollector) FacetResults() search.FacetResults {
|
||||
// facet unsupported for pre-filtering in KNN search
|
||||
return nil
|
||||
}
|
99
search/collector/heap.go
Normal file
99
search/collector/heap.go
Normal file
|
@ -0,0 +1,99 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
)
|
||||
|
||||
type collectStoreHeap struct {
|
||||
heap search.DocumentMatchCollection
|
||||
compare collectorCompare
|
||||
}
|
||||
|
||||
func newStoreHeap(capacity int, compare collectorCompare) *collectStoreHeap {
|
||||
rv := &collectStoreHeap{
|
||||
heap: make(search.DocumentMatchCollection, 0, capacity),
|
||||
compare: compare,
|
||||
}
|
||||
heap.Init(rv)
|
||||
return rv
|
||||
}
|
||||
|
||||
func (c *collectStoreHeap) AddNotExceedingSize(doc *search.DocumentMatch,
|
||||
size int) *search.DocumentMatch {
|
||||
c.add(doc)
|
||||
if c.Len() > size {
|
||||
return c.removeLast()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *collectStoreHeap) add(doc *search.DocumentMatch) {
|
||||
heap.Push(c, doc)
|
||||
}
|
||||
|
||||
func (c *collectStoreHeap) removeLast() *search.DocumentMatch {
|
||||
return heap.Pop(c).(*search.DocumentMatch)
|
||||
}
|
||||
|
||||
func (c *collectStoreHeap) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) {
|
||||
count := c.Len()
|
||||
size := count - skip
|
||||
if size <= 0 {
|
||||
return make(search.DocumentMatchCollection, 0), nil
|
||||
}
|
||||
rv := make(search.DocumentMatchCollection, size)
|
||||
for i := size - 1; i >= 0; i-- {
|
||||
doc := heap.Pop(c).(*search.DocumentMatch)
|
||||
rv[i] = doc
|
||||
err := fixup(doc)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (c *collectStoreHeap) Internal() search.DocumentMatchCollection {
|
||||
return c.heap
|
||||
}
|
||||
|
||||
// heap interface implementation
|
||||
|
||||
func (c *collectStoreHeap) Len() int {
|
||||
return len(c.heap)
|
||||
}
|
||||
|
||||
func (c *collectStoreHeap) Less(i, j int) bool {
|
||||
so := c.compare(c.heap[i], c.heap[j])
|
||||
return -so < 0
|
||||
}
|
||||
|
||||
func (c *collectStoreHeap) Swap(i, j int) {
|
||||
c.heap[i], c.heap[j] = c.heap[j], c.heap[i]
|
||||
}
|
||||
|
||||
func (c *collectStoreHeap) Push(x interface{}) {
|
||||
c.heap = append(c.heap, x.(*search.DocumentMatch))
|
||||
}
|
||||
|
||||
func (c *collectStoreHeap) Pop() interface{} {
|
||||
var rv *search.DocumentMatch
|
||||
rv, c.heap = c.heap[len(c.heap)-1], c.heap[:len(c.heap)-1]
|
||||
return rv
|
||||
}
|
262
search/collector/knn.go
Normal file
262
search/collector/knn.go
Normal file
|
@ -0,0 +1,262 @@
|
|||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
type collectStoreKNN struct {
|
||||
internalHeaps []collectorStore
|
||||
kValues []int64
|
||||
allHits map[*search.DocumentMatch]struct{}
|
||||
ejectedDocs map[*search.DocumentMatch]struct{}
|
||||
}
|
||||
|
||||
func newStoreKNN(internalHeaps []collectorStore, kValues []int64) *collectStoreKNN {
|
||||
return &collectStoreKNN{
|
||||
internalHeaps: internalHeaps,
|
||||
kValues: kValues,
|
||||
ejectedDocs: make(map[*search.DocumentMatch]struct{}),
|
||||
allHits: make(map[*search.DocumentMatch]struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// Adds a document to the collector store and returns the documents that were ejected
|
||||
// from the store. The documents that were ejected from the store are the ones that
|
||||
// were not in the top K documents for any of the heaps.
|
||||
// These document are put back into the pool document match pool in the KNN Collector.
|
||||
func (c *collectStoreKNN) AddDocument(doc *search.DocumentMatch) []*search.DocumentMatch {
|
||||
for heapIdx := 0; heapIdx < len(c.internalHeaps); heapIdx++ {
|
||||
if _, ok := doc.ScoreBreakdown[heapIdx]; !ok {
|
||||
continue
|
||||
}
|
||||
ejectedDoc := c.internalHeaps[heapIdx].AddNotExceedingSize(doc, int(c.kValues[heapIdx]))
|
||||
if ejectedDoc != nil {
|
||||
delete(ejectedDoc.ScoreBreakdown, heapIdx)
|
||||
c.ejectedDocs[ejectedDoc] = struct{}{}
|
||||
}
|
||||
}
|
||||
var rv []*search.DocumentMatch
|
||||
for doc := range c.ejectedDocs {
|
||||
if len(doc.ScoreBreakdown) == 0 {
|
||||
rv = append(rv, doc)
|
||||
}
|
||||
// clear out the ejectedDocs map to reuse it in the next AddDocument call
|
||||
delete(c.ejectedDocs, doc)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (c *collectStoreKNN) Final(fixup collectorFixup) (search.DocumentMatchCollection, error) {
|
||||
for _, heap := range c.internalHeaps {
|
||||
for _, doc := range heap.Internal() {
|
||||
// duplicates may be present across the internal heaps
|
||||
// meaning the same document match may be in the top K
|
||||
// for multiple KNN queries.
|
||||
c.allHits[doc] = struct{}{}
|
||||
}
|
||||
}
|
||||
size := len(c.allHits)
|
||||
if size <= 0 {
|
||||
return make(search.DocumentMatchCollection, 0), nil
|
||||
}
|
||||
rv := make(search.DocumentMatchCollection, size)
|
||||
i := 0
|
||||
for doc := range c.allHits {
|
||||
if fixup != nil {
|
||||
err := fixup(doc)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
rv[i] = doc
|
||||
i++
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func MakeKNNDocMatchHandler(ctx *search.SearchContext) (search.DocumentMatchHandler, error) {
|
||||
var hc *KNNCollector
|
||||
var ok bool
|
||||
if hc, ok = ctx.Collector.(*KNNCollector); ok {
|
||||
return func(d *search.DocumentMatch) error {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
toRelease := hc.knnStore.AddDocument(d)
|
||||
for _, doc := range toRelease {
|
||||
ctx.DocumentMatchPool.Put(doc)
|
||||
}
|
||||
return nil
|
||||
}, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func GetNewKNNCollectorStore(kArray []int64) *collectStoreKNN {
|
||||
internalHeaps := make([]collectorStore, len(kArray))
|
||||
for knnIdx, k := range kArray {
|
||||
// TODO - Check if the datatype of k can be made into an int instead of int64
|
||||
idx := knnIdx
|
||||
internalHeaps[idx] = getOptimalCollectorStore(int(k), 0, func(i, j *search.DocumentMatch) int {
|
||||
if i.ScoreBreakdown[idx] < j.ScoreBreakdown[idx] {
|
||||
return 1
|
||||
}
|
||||
return -1
|
||||
})
|
||||
}
|
||||
return newStoreKNN(internalHeaps, kArray)
|
||||
}
|
||||
|
||||
// implements Collector interface
|
||||
type KNNCollector struct {
|
||||
knnStore *collectStoreKNN
|
||||
size int
|
||||
total uint64
|
||||
took time.Duration
|
||||
results search.DocumentMatchCollection
|
||||
maxScore float64
|
||||
}
|
||||
|
||||
func NewKNNCollector(kArray []int64, size int64) *KNNCollector {
|
||||
return &KNNCollector{
|
||||
knnStore: GetNewKNNCollectorStore(kArray),
|
||||
size: int(size),
|
||||
}
|
||||
}
|
||||
|
||||
func (hc *KNNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
|
||||
startTime := time.Now()
|
||||
var err error
|
||||
var next *search.DocumentMatch
|
||||
|
||||
// pre-allocate enough space in the DocumentMatchPool
|
||||
// unless the sum of K is too large, then cap it
|
||||
// everything should still work, just allocates DocumentMatches on demand
|
||||
backingSize := hc.size
|
||||
if backingSize > PreAllocSizeSkipCap {
|
||||
backingSize = PreAllocSizeSkipCap + 1
|
||||
}
|
||||
searchContext := &search.SearchContext{
|
||||
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), 0),
|
||||
Collector: hc,
|
||||
IndexReader: reader,
|
||||
}
|
||||
|
||||
dmHandlerMakerKNN := MakeKNNDocMatchHandler
|
||||
if cv := ctx.Value(search.MakeKNNDocumentMatchHandlerKey); cv != nil {
|
||||
dmHandlerMakerKNN = cv.(search.MakeKNNDocumentMatchHandler)
|
||||
}
|
||||
// use the application given builder for making the custom document match
|
||||
// handler and perform callbacks/invocations on the newly made handler.
|
||||
dmHandler, err := dmHandlerMakerKNN(searchContext)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
search.RecordSearchCost(ctx, search.AbortM, 0)
|
||||
return ctx.Err()
|
||||
default:
|
||||
next, err = searcher.Next(searchContext)
|
||||
}
|
||||
for err == nil && next != nil {
|
||||
if hc.total%CheckDoneEvery == 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
search.RecordSearchCost(ctx, search.AbortM, 0)
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
}
|
||||
hc.total++
|
||||
|
||||
err = dmHandler(next)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
next, err = searcher.Next(searchContext)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// help finalize/flush the results in case
|
||||
// of custom document match handlers.
|
||||
err = dmHandler(nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// compute search duration
|
||||
hc.took = time.Since(startTime)
|
||||
|
||||
// finalize actual results
|
||||
err = hc.finalizeResults(reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hc *KNNCollector) finalizeResults(r index.IndexReader) error {
|
||||
var err error
|
||||
hc.results, err = hc.knnStore.Final(func(doc *search.DocumentMatch) error {
|
||||
if doc.ID == "" {
|
||||
// look up the id since we need it for lookup
|
||||
var err error
|
||||
doc.ID, err = r.ExternalID(doc.IndexInternalID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func (hc *KNNCollector) Results() search.DocumentMatchCollection {
|
||||
return hc.results
|
||||
}
|
||||
|
||||
func (hc *KNNCollector) Total() uint64 {
|
||||
return hc.total
|
||||
}
|
||||
|
||||
func (hc *KNNCollector) MaxScore() float64 {
|
||||
return hc.maxScore
|
||||
}
|
||||
|
||||
func (hc *KNNCollector) Took() time.Duration {
|
||||
return hc.took
|
||||
}
|
||||
|
||||
func (hc *KNNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
|
||||
// facet unsupported for vector search
|
||||
}
|
||||
|
||||
func (hc *KNNCollector) FacetResults() search.FacetResults {
|
||||
// facet unsupported for vector search
|
||||
return nil
|
||||
}
|
96
search/collector/list.go
Normal file
96
search/collector/list.go
Normal file
|
@ -0,0 +1,96 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
)
|
||||
|
||||
type collectStoreList struct {
|
||||
results *list.List
|
||||
compare collectorCompare
|
||||
}
|
||||
|
||||
func newStoreList(capacity int, compare collectorCompare) *collectStoreList {
|
||||
rv := &collectStoreList{
|
||||
results: list.New(),
|
||||
compare: compare,
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch {
|
||||
c.add(doc)
|
||||
if c.len() > size {
|
||||
return c.removeLast()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *collectStoreList) add(doc *search.DocumentMatch) {
|
||||
for e := c.results.Front(); e != nil; e = e.Next() {
|
||||
curr := e.Value.(*search.DocumentMatch)
|
||||
if c.compare(doc, curr) >= 0 {
|
||||
c.results.InsertBefore(doc, e)
|
||||
return
|
||||
}
|
||||
}
|
||||
// if we got to the end, we still have to add it
|
||||
c.results.PushBack(doc)
|
||||
}
|
||||
|
||||
func (c *collectStoreList) removeLast() *search.DocumentMatch {
|
||||
return c.results.Remove(c.results.Front()).(*search.DocumentMatch)
|
||||
}
|
||||
|
||||
func (c *collectStoreList) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) {
|
||||
if c.results.Len()-skip > 0 {
|
||||
rv := make(search.DocumentMatchCollection, c.results.Len()-skip)
|
||||
i := 0
|
||||
skipped := 0
|
||||
for e := c.results.Back(); e != nil; e = e.Prev() {
|
||||
if skipped < skip {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
rv[i] = e.Value.(*search.DocumentMatch)
|
||||
err := fixup(rv[i])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
i++
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
return search.DocumentMatchCollection{}, nil
|
||||
}
|
||||
|
||||
func (c *collectStoreList) Internal() search.DocumentMatchCollection {
|
||||
rv := make(search.DocumentMatchCollection, c.results.Len())
|
||||
i := 0
|
||||
for e := c.results.Front(); e != nil; e = e.Next() {
|
||||
rv[i] = e.Value.(*search.DocumentMatch)
|
||||
i++
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (c *collectStoreList) len() int {
|
||||
return c.results.Len()
|
||||
}
|
187
search/collector/search_test.go
Normal file
187
search/collector/search_test.go
Normal file
|
@ -0,0 +1,187 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
type stubSearcher struct {
|
||||
index int
|
||||
matches []*search.DocumentMatch
|
||||
}
|
||||
|
||||
func (ss *stubSearcher) SetBytesRead(val uint64) {
|
||||
|
||||
}
|
||||
|
||||
func (ss *stubSearcher) BytesRead() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (ss *stubSearcher) Size() int {
|
||||
sizeInBytes := int(reflect.TypeOf(*ss).Size())
|
||||
|
||||
for _, entry := range ss.matches {
|
||||
if entry != nil {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (ss *stubSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
|
||||
if ss.index < len(ss.matches) {
|
||||
rv := ctx.DocumentMatchPool.Get()
|
||||
rv.IndexInternalID = ss.matches[ss.index].IndexInternalID
|
||||
rv.Score = ss.matches[ss.index].Score
|
||||
ss.index++
|
||||
return rv, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (ss *stubSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
|
||||
|
||||
for ss.index < len(ss.matches) && ss.matches[ss.index].IndexInternalID.Compare(ID) < 0 {
|
||||
ss.index++
|
||||
}
|
||||
if ss.index < len(ss.matches) {
|
||||
rv := ctx.DocumentMatchPool.Get()
|
||||
rv.IndexInternalID = ss.matches[ss.index].IndexInternalID
|
||||
rv.Score = ss.matches[ss.index].Score
|
||||
ss.index++
|
||||
return rv, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (ss *stubSearcher) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ss *stubSearcher) Weight() float64 {
|
||||
return 0.0
|
||||
}
|
||||
|
||||
func (ss *stubSearcher) SetQueryNorm(float64) {
|
||||
}
|
||||
|
||||
func (ss *stubSearcher) Count() uint64 {
|
||||
return uint64(len(ss.matches))
|
||||
}
|
||||
|
||||
func (ss *stubSearcher) Min() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (ss *stubSearcher) DocumentMatchPoolSize() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
type stubReader struct{}
|
||||
|
||||
func (sr *stubReader) Size() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (sr *stubReader) TermFieldReader(ctx context.Context, term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) DocIDReaderAll() (index.DocIDReader, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) FieldDict(field string) (index.FieldDict, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) Document(id string) (index.Document, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string, visitor index.DocValueVisitor) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) Fields() ([]string, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) GetInternal(key []byte) ([]byte, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) DocCount() (uint64, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) ExternalID(id index.IndexInternalID) (string, error) {
|
||||
return string(id), nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) InternalID(id string) (index.IndexInternalID, error) {
|
||||
return []byte(id), nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) DumpAll() chan interface{} {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) DumpDoc(id string) chan interface{} {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) DumpFields() chan interface{} {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sr *stubReader) DocValueReader(fields []string) (index.DocValueReader, error) {
|
||||
return &DocValueReader{i: sr, fields: fields}, nil
|
||||
}
|
||||
|
||||
type DocValueReader struct {
|
||||
i *stubReader
|
||||
fields []string
|
||||
}
|
||||
|
||||
func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID, visitor index.DocValueVisitor) error {
|
||||
return dvr.i.DocumentVisitFieldTerms(id, dvr.fields, visitor)
|
||||
}
|
||||
func (dvr *DocValueReader) BytesRead() uint64 {
|
||||
return 0
|
||||
}
|
83
search/collector/slice.go
Normal file
83
search/collector/slice.go
Normal file
|
@ -0,0 +1,83 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
)
|
||||
|
||||
type collectStoreSlice struct {
|
||||
slice search.DocumentMatchCollection
|
||||
compare collectorCompare
|
||||
}
|
||||
|
||||
func newStoreSlice(capacity int, compare collectorCompare) *collectStoreSlice {
|
||||
rv := &collectStoreSlice{
|
||||
slice: make(search.DocumentMatchCollection, 0, capacity),
|
||||
compare: compare,
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (c *collectStoreSlice) AddNotExceedingSize(doc *search.DocumentMatch,
|
||||
size int) *search.DocumentMatch {
|
||||
c.add(doc)
|
||||
if c.len() > size {
|
||||
return c.removeLast()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *collectStoreSlice) add(doc *search.DocumentMatch) {
|
||||
// find where to insert, starting at end (lowest)
|
||||
i := len(c.slice)
|
||||
for ; i > 0; i-- {
|
||||
cmp := c.compare(doc, c.slice[i-1])
|
||||
if cmp >= 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
// insert at i
|
||||
c.slice = append(c.slice, nil)
|
||||
copy(c.slice[i+1:], c.slice[i:])
|
||||
c.slice[i] = doc
|
||||
}
|
||||
|
||||
func (c *collectStoreSlice) removeLast() *search.DocumentMatch {
|
||||
var rv *search.DocumentMatch
|
||||
rv, c.slice = c.slice[len(c.slice)-1], c.slice[:len(c.slice)-1]
|
||||
return rv
|
||||
}
|
||||
|
||||
func (c *collectStoreSlice) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) {
|
||||
for i := skip; i < len(c.slice); i++ {
|
||||
err := fixup(c.slice[i])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if skip <= len(c.slice) {
|
||||
return c.slice[skip:], nil
|
||||
}
|
||||
return search.DocumentMatchCollection{}, nil
|
||||
}
|
||||
|
||||
func (c *collectStoreSlice) Internal() search.DocumentMatchCollection {
|
||||
return c.slice
|
||||
}
|
||||
|
||||
func (c *collectStoreSlice) len() int {
|
||||
return len(c.slice)
|
||||
}
|
558
search/collector/topn.go
Normal file
558
search/collector/topn.go
Normal file
|
@ -0,0 +1,558 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/numeric"
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTopNCollector int
|
||||
|
||||
func init() {
|
||||
var coll TopNCollector
|
||||
reflectStaticSizeTopNCollector = int(reflect.TypeOf(coll).Size())
|
||||
}
|
||||
|
||||
type collectorStore interface {
|
||||
// Add the document, and if the new store size exceeds the provided size
|
||||
// the last element is removed and returned. If the size has not been
|
||||
// exceeded, nil is returned.
|
||||
AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch
|
||||
|
||||
Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error)
|
||||
|
||||
// Provide access the internal heap implementation
|
||||
Internal() search.DocumentMatchCollection
|
||||
}
|
||||
|
||||
// PreAllocSizeSkipCap will cap preallocation to this amount when
|
||||
// size+skip exceeds this value
|
||||
var PreAllocSizeSkipCap = 1000
|
||||
|
||||
type collectorCompare func(i, j *search.DocumentMatch) int
|
||||
|
||||
type collectorFixup func(d *search.DocumentMatch) error
|
||||
|
||||
// TopNCollector collects the top N hits, optionally skipping some results
|
||||
type TopNCollector struct {
|
||||
size int
|
||||
skip int
|
||||
total uint64
|
||||
bytesRead uint64
|
||||
maxScore float64
|
||||
took time.Duration
|
||||
sort search.SortOrder
|
||||
results search.DocumentMatchCollection
|
||||
facetsBuilder *search.FacetsBuilder
|
||||
|
||||
store collectorStore
|
||||
|
||||
needDocIds bool
|
||||
neededFields []string
|
||||
cachedScoring []bool
|
||||
cachedDesc []bool
|
||||
|
||||
lowestMatchOutsideResults *search.DocumentMatch
|
||||
updateFieldVisitor index.DocValueVisitor
|
||||
dvReader index.DocValueReader
|
||||
searchAfter *search.DocumentMatch
|
||||
|
||||
knnHits map[string]*search.DocumentMatch
|
||||
computeNewScoreExpl search.ScoreExplCorrectionCallbackFunc
|
||||
}
|
||||
|
||||
// CheckDoneEvery controls how frequently we check the context deadline
|
||||
const CheckDoneEvery = uint64(1024)
|
||||
|
||||
// NewTopNCollector builds a collector to find the top 'size' hits
|
||||
// skipping over the first 'skip' hits
|
||||
// ordering hits by the provided sort order
|
||||
func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
|
||||
return newTopNCollector(size, skip, sort)
|
||||
}
|
||||
|
||||
// NewTopNCollectorAfter builds a collector to find the top 'size' hits
|
||||
// skipping over the first 'skip' hits
|
||||
// ordering hits by the provided sort order
|
||||
func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector {
|
||||
rv := newTopNCollector(size, 0, sort)
|
||||
rv.searchAfter = createSearchAfterDocument(sort, after)
|
||||
return rv
|
||||
}
|
||||
|
||||
func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
|
||||
hc := &TopNCollector{size: size, skip: skip, sort: sort}
|
||||
|
||||
hc.store = getOptimalCollectorStore(size, skip, func(i, j *search.DocumentMatch) int {
|
||||
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
|
||||
})
|
||||
|
||||
// these lookups traverse an interface, so do once up-front
|
||||
if sort.RequiresDocID() {
|
||||
hc.needDocIds = true
|
||||
}
|
||||
hc.neededFields = sort.RequiredFields()
|
||||
hc.cachedScoring = sort.CacheIsScore()
|
||||
hc.cachedDesc = sort.CacheDescending()
|
||||
|
||||
return hc
|
||||
}
|
||||
|
||||
func createSearchAfterDocument(sort search.SortOrder, after []string) *search.DocumentMatch {
|
||||
rv := &search.DocumentMatch{
|
||||
Sort: after,
|
||||
}
|
||||
for pos, ss := range sort {
|
||||
if ss.RequiresDocID() {
|
||||
rv.ID = after[pos]
|
||||
}
|
||||
if ss.RequiresScoring() {
|
||||
if score, err := strconv.ParseFloat(after[pos], 64); err == nil {
|
||||
rv.Score = score
|
||||
}
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
// Filter document matches based on the SearchAfter field in the SearchRequest.
|
||||
func FilterHitsBySearchAfter(hits []*search.DocumentMatch, sort search.SortOrder, after []string) []*search.DocumentMatch {
|
||||
if len(hits) == 0 {
|
||||
return hits
|
||||
}
|
||||
// create a search after document
|
||||
searchAfter := createSearchAfterDocument(sort, after)
|
||||
// filter the hits
|
||||
idx := 0
|
||||
cachedScoring := sort.CacheIsScore()
|
||||
cachedDesc := sort.CacheDescending()
|
||||
for _, hit := range hits {
|
||||
if sort.Compare(cachedScoring, cachedDesc, hit, searchAfter) > 0 {
|
||||
hits[idx] = hit
|
||||
idx++
|
||||
}
|
||||
}
|
||||
return hits[:idx]
|
||||
}
|
||||
|
||||
func getOptimalCollectorStore(size, skip int, comparator collectorCompare) collectorStore {
|
||||
// pre-allocate space on the store to avoid reslicing
|
||||
// unless the size + skip is too large, then cap it
|
||||
// everything should still work, just reslices as necessary
|
||||
backingSize := size + skip + 1
|
||||
if size+skip > PreAllocSizeSkipCap {
|
||||
backingSize = PreAllocSizeSkipCap + 1
|
||||
}
|
||||
|
||||
if size+skip > 10 {
|
||||
return newStoreHeap(backingSize, comparator)
|
||||
} else {
|
||||
return newStoreSlice(backingSize, comparator)
|
||||
}
|
||||
}
|
||||
|
||||
func (hc *TopNCollector) Size() int {
|
||||
sizeInBytes := reflectStaticSizeTopNCollector + size.SizeOfPtr
|
||||
|
||||
if hc.facetsBuilder != nil {
|
||||
sizeInBytes += hc.facetsBuilder.Size()
|
||||
}
|
||||
|
||||
for _, entry := range hc.neededFields {
|
||||
sizeInBytes += len(entry) + size.SizeOfString
|
||||
}
|
||||
|
||||
sizeInBytes += len(hc.cachedScoring) + len(hc.cachedDesc)
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
// Collect goes to the index to find the matching documents
|
||||
func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
|
||||
startTime := time.Now()
|
||||
var err error
|
||||
var next *search.DocumentMatch
|
||||
|
||||
// pre-allocate enough space in the DocumentMatchPool
|
||||
// unless the size + skip is too large, then cap it
|
||||
// everything should still work, just allocates DocumentMatches on demand
|
||||
backingSize := hc.size + hc.skip + 1
|
||||
if hc.size+hc.skip > PreAllocSizeSkipCap {
|
||||
backingSize = PreAllocSizeSkipCap + 1
|
||||
}
|
||||
searchContext := &search.SearchContext{
|
||||
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
|
||||
Collector: hc,
|
||||
IndexReader: reader,
|
||||
}
|
||||
|
||||
hc.dvReader, err = reader.DocValueReader(hc.neededFields)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
hc.updateFieldVisitor = func(field string, term []byte) {
|
||||
if hc.facetsBuilder != nil {
|
||||
hc.facetsBuilder.UpdateVisitor(field, term)
|
||||
}
|
||||
hc.sort.UpdateVisitor(field, term)
|
||||
}
|
||||
|
||||
dmHandlerMaker := MakeTopNDocumentMatchHandler
|
||||
if cv := ctx.Value(search.MakeDocumentMatchHandlerKey); cv != nil {
|
||||
dmHandlerMaker = cv.(search.MakeDocumentMatchHandler)
|
||||
}
|
||||
// use the application given builder for making the custom document match
|
||||
// handler and perform callbacks/invocations on the newly made handler.
|
||||
dmHandler, loadID, err := dmHandlerMaker(searchContext)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
hc.needDocIds = hc.needDocIds || loadID
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
search.RecordSearchCost(ctx, search.AbortM, 0)
|
||||
return ctx.Err()
|
||||
default:
|
||||
next, err = searcher.Next(searchContext)
|
||||
}
|
||||
for err == nil && next != nil {
|
||||
if hc.total%CheckDoneEvery == 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
search.RecordSearchCost(ctx, search.AbortM, 0)
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
err = hc.adjustDocumentMatch(searchContext, reader, next)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
err = hc.prepareDocumentMatch(searchContext, reader, next, false)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
err = dmHandler(next)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
next, err = searcher.Next(searchContext)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if hc.knnHits != nil {
|
||||
// we may have some knn hits left that did not match any of the top N tf-idf hits
|
||||
// we need to add them to the collector store to consider them as well.
|
||||
for _, knnDoc := range hc.knnHits {
|
||||
err = hc.prepareDocumentMatch(searchContext, reader, knnDoc, true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = dmHandler(knnDoc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
statsCallbackFn := ctx.Value(search.SearchIOStatsCallbackKey)
|
||||
if statsCallbackFn != nil {
|
||||
// hc.bytesRead corresponds to the
|
||||
// total bytes read as part of docValues being read every hit
|
||||
// which must be accounted by invoking the callback.
|
||||
statsCallbackFn.(search.SearchIOStatsCallbackFunc)(hc.bytesRead)
|
||||
|
||||
search.RecordSearchCost(ctx, search.AddM, hc.bytesRead)
|
||||
}
|
||||
|
||||
// help finalize/flush the results in case
|
||||
// of custom document match handlers.
|
||||
err = dmHandler(nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// compute search duration
|
||||
hc.took = time.Since(startTime)
|
||||
|
||||
// finalize actual results
|
||||
err = hc.finalizeResults(reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var sortByScoreOpt = []string{"_score"}
|
||||
|
||||
func (hc *TopNCollector) adjustDocumentMatch(ctx *search.SearchContext,
|
||||
reader index.IndexReader, d *search.DocumentMatch) (err error) {
|
||||
if hc.knnHits != nil {
|
||||
d.ID, err = reader.ExternalID(d.IndexInternalID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if knnHit, ok := hc.knnHits[d.ID]; ok {
|
||||
d.Score, d.Expl = hc.computeNewScoreExpl(d, knnHit)
|
||||
delete(hc.knnHits, d.ID)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hc *TopNCollector) prepareDocumentMatch(ctx *search.SearchContext,
|
||||
reader index.IndexReader, d *search.DocumentMatch, isKnnDoc bool) (err error) {
|
||||
|
||||
// visit field terms for features that require it (sort, facets)
|
||||
if !isKnnDoc && len(hc.neededFields) > 0 {
|
||||
err = hc.visitFieldTerms(reader, d, hc.updateFieldVisitor)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else if isKnnDoc && hc.facetsBuilder != nil {
|
||||
// we need to visit the field terms for the knn document
|
||||
// only for those fields that are required for faceting
|
||||
// and not for sorting. This is because the knn document's
|
||||
// sort value is already computed in the knn collector.
|
||||
err = hc.visitFieldTerms(reader, d, func(field string, term []byte) {
|
||||
if hc.facetsBuilder != nil {
|
||||
hc.facetsBuilder.UpdateVisitor(field, term)
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// increment total hits
|
||||
hc.total++
|
||||
d.HitNumber = hc.total
|
||||
|
||||
// update max score
|
||||
if d.Score > hc.maxScore {
|
||||
hc.maxScore = d.Score
|
||||
}
|
||||
// early exit as the document match had its sort value calculated in the knn
|
||||
// collector itself
|
||||
if isKnnDoc {
|
||||
return nil
|
||||
}
|
||||
|
||||
// see if we need to load ID (at this early stage, for example to sort on it)
|
||||
if hc.needDocIds && d.ID == "" {
|
||||
d.ID, err = reader.ExternalID(d.IndexInternalID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// compute this hits sort value
|
||||
if len(hc.sort) == 1 && hc.cachedScoring[0] {
|
||||
d.Sort = sortByScoreOpt
|
||||
} else {
|
||||
hc.sort.Value(d)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func MakeTopNDocumentMatchHandler(
|
||||
ctx *search.SearchContext) (search.DocumentMatchHandler, bool, error) {
|
||||
var hc *TopNCollector
|
||||
var ok bool
|
||||
if hc, ok = ctx.Collector.(*TopNCollector); ok {
|
||||
return func(d *search.DocumentMatch) error {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// support search after based pagination,
|
||||
// if this hit is <= the search after sort key
|
||||
// we should skip it
|
||||
if hc.searchAfter != nil {
|
||||
// exact sort order matches use hit number to break tie
|
||||
// but we want to allow for exact match, so we pretend
|
||||
hc.searchAfter.HitNumber = d.HitNumber
|
||||
if hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.searchAfter) <= 0 {
|
||||
ctx.DocumentMatchPool.Put(d)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// optimization, we track lowest sorting hit already removed from heap
|
||||
// with this one comparison, we can avoid all heap operations if
|
||||
// this hit would have been added and then immediately removed
|
||||
if hc.lowestMatchOutsideResults != nil {
|
||||
cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d,
|
||||
hc.lowestMatchOutsideResults)
|
||||
if cmp >= 0 {
|
||||
// this hit can't possibly be in the result set, so avoid heap ops
|
||||
ctx.DocumentMatchPool.Put(d)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
removed := hc.store.AddNotExceedingSize(d, hc.size+hc.skip)
|
||||
if removed != nil {
|
||||
if hc.lowestMatchOutsideResults == nil {
|
||||
hc.lowestMatchOutsideResults = removed
|
||||
} else {
|
||||
cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc,
|
||||
removed, hc.lowestMatchOutsideResults)
|
||||
if cmp < 0 {
|
||||
tmp := hc.lowestMatchOutsideResults
|
||||
hc.lowestMatchOutsideResults = removed
|
||||
ctx.DocumentMatchPool.Put(tmp)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}, false, nil
|
||||
}
|
||||
return nil, false, nil
|
||||
}
|
||||
|
||||
// visitFieldTerms is responsible for visiting the field terms of the
|
||||
// search hit, and passing visited terms to the sort and facet builder
|
||||
func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.DocumentMatch, v index.DocValueVisitor) error {
|
||||
if hc.facetsBuilder != nil {
|
||||
hc.facetsBuilder.StartDoc()
|
||||
}
|
||||
if d.ID != "" && d.IndexInternalID == nil {
|
||||
// this document may have been sent over as preSearchData and
|
||||
// we need to look up the internal id to visit the doc values for it
|
||||
var err error
|
||||
d.IndexInternalID, err = reader.InternalID(d.ID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
err := hc.dvReader.VisitDocValues(d.IndexInternalID, v)
|
||||
if hc.facetsBuilder != nil {
|
||||
hc.facetsBuilder.EndDoc()
|
||||
}
|
||||
|
||||
hc.bytesRead += hc.dvReader.BytesRead()
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// SetFacetsBuilder registers a facet builder for this collector
|
||||
func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
|
||||
hc.facetsBuilder = facetsBuilder
|
||||
fieldsRequiredForFaceting := facetsBuilder.RequiredFields()
|
||||
// for each of these fields, append only if not already there in hc.neededFields.
|
||||
for _, field := range fieldsRequiredForFaceting {
|
||||
found := false
|
||||
for _, neededField := range hc.neededFields {
|
||||
if field == neededField {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
hc.neededFields = append(hc.neededFields, field)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// finalizeResults starts with the heap containing the final top size+skip
|
||||
// it now throws away the results to be skipped
|
||||
// and does final doc id lookup (if necessary)
|
||||
func (hc *TopNCollector) finalizeResults(r index.IndexReader) error {
|
||||
var err error
|
||||
hc.results, err = hc.store.Final(hc.skip, func(doc *search.DocumentMatch) error {
|
||||
if doc.ID == "" {
|
||||
// look up the id since we need it for lookup
|
||||
var err error
|
||||
doc.ID, err = r.ExternalID(doc.IndexInternalID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
doc.Complete(nil)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Decode geo sort keys back to its distance values
|
||||
for i, so := range hc.sort {
|
||||
if _, ok := so.(*search.SortGeoDistance); ok {
|
||||
for _, dm := range hc.results {
|
||||
// The string is a int64 bit representation of a float64 distance
|
||||
distInt, err := numeric.PrefixCoded(dm.Sort[i]).Int64()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dm.Sort[i] = strconv.FormatFloat(numeric.Int64ToFloat64(distInt), 'f', -1, 64)
|
||||
}
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Results returns the collected hits
|
||||
func (hc *TopNCollector) Results() search.DocumentMatchCollection {
|
||||
return hc.results
|
||||
}
|
||||
|
||||
// Total returns the total number of hits
|
||||
func (hc *TopNCollector) Total() uint64 {
|
||||
return hc.total
|
||||
}
|
||||
|
||||
// MaxScore returns the maximum score seen across all the hits
|
||||
func (hc *TopNCollector) MaxScore() float64 {
|
||||
return hc.maxScore
|
||||
}
|
||||
|
||||
// Took returns the time spent collecting hits
|
||||
func (hc *TopNCollector) Took() time.Duration {
|
||||
return hc.took
|
||||
}
|
||||
|
||||
// FacetResults returns the computed facets results
|
||||
func (hc *TopNCollector) FacetResults() search.FacetResults {
|
||||
if hc.facetsBuilder != nil {
|
||||
return hc.facetsBuilder.Results()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hc *TopNCollector) SetKNNHits(knnHits search.DocumentMatchCollection, newScoreExplComputer search.ScoreExplCorrectionCallbackFunc) {
|
||||
hc.knnHits = make(map[string]*search.DocumentMatch, len(knnHits))
|
||||
for _, hit := range knnHits {
|
||||
hc.knnHits[hit.ID] = hit
|
||||
}
|
||||
hc.computeNewScoreExpl = newScoreExplComputer
|
||||
}
|
868
search/collector/topn_test.go
Normal file
868
search/collector/topn_test.go
Normal file
|
@ -0,0 +1,868 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/index/scorch"
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/facet"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func TestTop10Scores(t *testing.T) {
|
||||
// a stub search with more than 10 matches
|
||||
// the top-10 scores are > 10
|
||||
// everything else is less than 10
|
||||
searcher := &stubSearcher{
|
||||
matches: []*search.DocumentMatch{
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("a"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("b"),
|
||||
Score: 9,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("c"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("d"),
|
||||
Score: 9,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("e"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("f"),
|
||||
Score: 9,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("g"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("h"),
|
||||
Score: 9,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("i"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("j"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("k"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("l"),
|
||||
Score: 99,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("m"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("n"),
|
||||
Score: 11,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
collector := NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
err := collector.Collect(context.Background(), searcher, &stubReader{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
maxScore := collector.MaxScore()
|
||||
if maxScore != 99.0 {
|
||||
t.Errorf("expected max score 99.0, got %f", maxScore)
|
||||
}
|
||||
|
||||
total := collector.Total()
|
||||
if total != 14 {
|
||||
t.Errorf("expected 14 total results, got %d", total)
|
||||
}
|
||||
|
||||
results := collector.Results()
|
||||
|
||||
if len(results) != 10 {
|
||||
t.Logf("results: %v", results)
|
||||
t.Fatalf("expected 10 results, got %d", len(results))
|
||||
}
|
||||
|
||||
if results[0].ID != "l" {
|
||||
t.Errorf("expected first result to have ID 'l', got %s", results[0].ID)
|
||||
}
|
||||
|
||||
if results[0].Score != 99.0 {
|
||||
t.Errorf("expected highest score to be 99.0, got %f", results[0].Score)
|
||||
}
|
||||
|
||||
minScore := 1000.0
|
||||
for _, result := range results {
|
||||
if result.Score < minScore {
|
||||
minScore = result.Score
|
||||
}
|
||||
}
|
||||
|
||||
if minScore < 10 {
|
||||
t.Errorf("expected minimum score to be higher than 10, got %f", minScore)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTop10ScoresSkip10(t *testing.T) {
|
||||
// a stub search with more than 10 matches
|
||||
// the top-10 scores are > 10
|
||||
// everything else is less than 10
|
||||
searcher := &stubSearcher{
|
||||
matches: []*search.DocumentMatch{
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("a"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("b"),
|
||||
Score: 9.5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("c"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("d"),
|
||||
Score: 9,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("e"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("f"),
|
||||
Score: 9,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("g"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("h"),
|
||||
Score: 9,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("i"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("j"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("k"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("l"),
|
||||
Score: 99,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("m"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("n"),
|
||||
Score: 11,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
collector := NewTopNCollector(10, 10, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
err := collector.Collect(context.Background(), searcher, &stubReader{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
maxScore := collector.MaxScore()
|
||||
if maxScore != 99.0 {
|
||||
t.Errorf("expected max score 99.0, got %f", maxScore)
|
||||
}
|
||||
|
||||
total := collector.Total()
|
||||
if total != 14 {
|
||||
t.Errorf("expected 14 total results, got %d", total)
|
||||
}
|
||||
|
||||
results := collector.Results()
|
||||
|
||||
if len(results) != 4 {
|
||||
t.Fatalf("expected 4 results, got %d", len(results))
|
||||
}
|
||||
|
||||
if results[0].ID != "b" {
|
||||
t.Errorf("expected first result to have ID 'b', got %s", results[0].ID)
|
||||
}
|
||||
|
||||
if results[0].Score != 9.5 {
|
||||
t.Errorf("expected highest score to be 9.5, got %f", results[0].Score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTop10ScoresSkip10Only9Hits(t *testing.T) {
|
||||
// a stub search with only 10 matches
|
||||
searcher := &stubSearcher{
|
||||
matches: []*search.DocumentMatch{
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("a"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("c"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("e"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("g"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("i"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("j"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("k"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("m"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("n"),
|
||||
Score: 11,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
collector := NewTopNCollector(10, 10, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
err := collector.Collect(context.Background(), searcher, &stubReader{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
total := collector.Total()
|
||||
if total != 9 {
|
||||
t.Errorf("expected 9 total results, got %d", total)
|
||||
}
|
||||
|
||||
results := collector.Results()
|
||||
|
||||
if len(results) != 0 {
|
||||
t.Fatalf("expected 0 results, got %d", len(results))
|
||||
}
|
||||
}
|
||||
|
||||
func TestPaginationSameScores(t *testing.T) {
|
||||
// a stub search with more than 10 matches
|
||||
// all documents have the same score
|
||||
searcher := &stubSearcher{
|
||||
matches: []*search.DocumentMatch{
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("a"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("b"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("c"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("d"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("e"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("f"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("g"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("h"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("i"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("j"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("k"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("l"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("m"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("n"),
|
||||
Score: 5,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// first get first 5 hits
|
||||
collector := NewTopNCollector(5, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
err := collector.Collect(context.Background(), searcher, &stubReader{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
total := collector.Total()
|
||||
if total != 14 {
|
||||
t.Errorf("expected 14 total results, got %d", total)
|
||||
}
|
||||
|
||||
results := collector.Results()
|
||||
|
||||
if len(results) != 5 {
|
||||
t.Fatalf("expected 5 results, got %d", len(results))
|
||||
}
|
||||
|
||||
firstResults := make(map[string]struct{})
|
||||
for _, hit := range results {
|
||||
firstResults[hit.ID] = struct{}{}
|
||||
}
|
||||
|
||||
// a stub search with more than 10 matches
|
||||
// all documents have the same score
|
||||
searcher = &stubSearcher{
|
||||
matches: []*search.DocumentMatch{
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("a"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("b"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("c"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("d"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("e"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("f"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("g"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("h"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("i"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("j"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("k"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("l"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("m"),
|
||||
Score: 5,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("n"),
|
||||
Score: 5,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// now get next 5 hits
|
||||
collector = NewTopNCollector(5, 5, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
err = collector.Collect(context.Background(), searcher, &stubReader{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
total = collector.Total()
|
||||
if total != 14 {
|
||||
t.Errorf("expected 14 total results, got %d", total)
|
||||
}
|
||||
|
||||
results = collector.Results()
|
||||
|
||||
if len(results) != 5 {
|
||||
t.Fatalf("expected 5 results, got %d", len(results))
|
||||
}
|
||||
|
||||
// make sure that none of these hits repeat ones we saw in the top 5
|
||||
for _, hit := range results {
|
||||
if _, ok := firstResults[hit.ID]; ok {
|
||||
t.Errorf("doc ID %s is in top 5 and next 5 result sets", hit.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestStreamResults verifies the search.DocumentMatchHandler
|
||||
func TestStreamResults(t *testing.T) {
|
||||
matches := []*search.DocumentMatch{
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("a"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("b"),
|
||||
Score: 1,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("c"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("d"),
|
||||
Score: 999,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("e"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("f"),
|
||||
Score: 9,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("g"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("h"),
|
||||
Score: 89,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("i"),
|
||||
Score: 101,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("j"),
|
||||
Score: 112,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("k"),
|
||||
Score: 10,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("l"),
|
||||
Score: 99,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("m"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("n"),
|
||||
Score: 111,
|
||||
},
|
||||
}
|
||||
|
||||
searcher := &stubSearcher{
|
||||
matches: matches,
|
||||
}
|
||||
ind := 0
|
||||
docMatchHandler := func(hit *search.DocumentMatch) error {
|
||||
if hit == nil {
|
||||
return nil // search completed
|
||||
}
|
||||
if !bytes.Equal(hit.IndexInternalID, matches[ind].IndexInternalID) {
|
||||
t.Errorf("%d hit IndexInternalID actual: %s, expected: %s",
|
||||
ind, hit.IndexInternalID, matches[ind].IndexInternalID)
|
||||
}
|
||||
if hit.Score != matches[ind].Score {
|
||||
t.Errorf("%d hit Score actual: %s, expected: %s",
|
||||
ind, hit.IndexInternalID, matches[ind].IndexInternalID)
|
||||
}
|
||||
ind++
|
||||
return nil
|
||||
}
|
||||
|
||||
var handlerMaker search.MakeDocumentMatchHandler = func(ctx *search.SearchContext) (search.DocumentMatchHandler, bool, error) {
|
||||
return docMatchHandler, false, nil
|
||||
}
|
||||
|
||||
ctx := context.WithValue(context.Background(), search.MakeDocumentMatchHandlerKey, handlerMaker)
|
||||
|
||||
collector := NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
err := collector.Collect(ctx, searcher, &stubReader{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
maxScore := collector.MaxScore()
|
||||
if maxScore != 999.0 {
|
||||
t.Errorf("expected max score 99.0, got %f", maxScore)
|
||||
}
|
||||
|
||||
total := collector.Total()
|
||||
if int(total) != ind {
|
||||
t.Errorf("expected 14 total results, got %d", total)
|
||||
}
|
||||
|
||||
results := collector.Results()
|
||||
|
||||
if len(results) != 0 {
|
||||
t.Fatalf("expected 0 results, got %d", len(results))
|
||||
}
|
||||
}
|
||||
|
||||
// TestCollectorChaining verifies the chaining of collectors.
|
||||
// The custom DocumentMatchHandler can process every hit for
|
||||
// the search query and then pass the hit to the topn collector
|
||||
// to eventually have the sorted top `N` results.
|
||||
func TestCollectorChaining(t *testing.T) {
|
||||
matches := []*search.DocumentMatch{
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("a"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("b"),
|
||||
Score: 1,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("c"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("d"),
|
||||
Score: 999,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("e"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("f"),
|
||||
Score: 9,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("g"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("h"),
|
||||
Score: 89,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("i"),
|
||||
Score: 101,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("j"),
|
||||
Score: 112,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("k"),
|
||||
Score: 10,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("l"),
|
||||
Score: 99,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("m"),
|
||||
Score: 11,
|
||||
},
|
||||
{
|
||||
IndexInternalID: index.IndexInternalID("n"),
|
||||
Score: 111,
|
||||
},
|
||||
}
|
||||
|
||||
searcher := &stubSearcher{
|
||||
matches: matches,
|
||||
}
|
||||
|
||||
var topNHandler search.DocumentMatchHandler
|
||||
ind := 0
|
||||
docMatchHandler := func(hit *search.DocumentMatch) error {
|
||||
if hit == nil {
|
||||
return nil // search completed
|
||||
}
|
||||
if !bytes.Equal(hit.IndexInternalID, matches[ind].IndexInternalID) {
|
||||
t.Errorf("%d hit IndexInternalID actual: %s, expected: %s",
|
||||
ind, hit.IndexInternalID, matches[ind].IndexInternalID)
|
||||
}
|
||||
if hit.Score != matches[ind].Score {
|
||||
t.Errorf("%d hit Score actual: %s, expected: %s",
|
||||
ind, hit.IndexInternalID, matches[ind].IndexInternalID)
|
||||
}
|
||||
ind++
|
||||
// give the hit back to the topN collector
|
||||
err := topNHandler(hit)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected err: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var handlerMaker search.MakeDocumentMatchHandler = func(ctx *search.SearchContext) (search.DocumentMatchHandler, bool, error) {
|
||||
topNHandler, _, _ = MakeTopNDocumentMatchHandler(ctx)
|
||||
return docMatchHandler, false, nil
|
||||
}
|
||||
|
||||
ctx := context.WithValue(context.Background(), search.MakeDocumentMatchHandlerKey,
|
||||
handlerMaker)
|
||||
|
||||
collector := NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
err := collector.Collect(ctx, searcher, &stubReader{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
maxScore := collector.MaxScore()
|
||||
if maxScore != 999.0 {
|
||||
t.Errorf("expected max score 99.0, got %f", maxScore)
|
||||
}
|
||||
|
||||
total := collector.Total()
|
||||
if int(total) != ind {
|
||||
t.Errorf("expected 14 total results, got %d", total)
|
||||
}
|
||||
|
||||
results := collector.Results()
|
||||
|
||||
if len(results) != 10 { // as it is paged
|
||||
t.Fatalf("expected 0 results, got %d", len(results))
|
||||
}
|
||||
|
||||
if results[0].ID != "d" {
|
||||
t.Errorf("expected first result to have ID 'l', got %s", results[0].ID)
|
||||
}
|
||||
|
||||
if results[0].Score != 999.0 {
|
||||
t.Errorf("expected highest score to be 999.0, got %f", results[0].Score)
|
||||
}
|
||||
|
||||
minScore := 1000.0
|
||||
for _, result := range results {
|
||||
if result.Score < minScore {
|
||||
minScore = result.Score
|
||||
}
|
||||
}
|
||||
|
||||
if minScore < 10 {
|
||||
t.Errorf("expected minimum score to be higher than 10, got %f", minScore)
|
||||
}
|
||||
}
|
||||
|
||||
func setupIndex(t *testing.T) index.Index {
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
i, err := scorch.NewScorch(
|
||||
scorch.Name,
|
||||
map[string]interface{}{
|
||||
"path": "",
|
||||
},
|
||||
analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = i.Open()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
return i
|
||||
}
|
||||
|
||||
func TestSetFacetsBuilder(t *testing.T) {
|
||||
// Field common to both sorting and faceting.
|
||||
sortFacetsField := "locations"
|
||||
|
||||
coll := NewTopNCollector(10, 0, search.SortOrder{&search.SortField{Field: sortFacetsField}})
|
||||
|
||||
i := setupIndex(t)
|
||||
indexReader, err := i.Reader()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
fb := search.NewFacetsBuilder(indexReader)
|
||||
facetBuilder := facet.NewTermsFacetBuilder(sortFacetsField, 100)
|
||||
fb.Add("locations_facet", facetBuilder)
|
||||
coll.SetFacetsBuilder(fb)
|
||||
|
||||
// Should not duplicate the "locations" field in the collector.
|
||||
if len(coll.neededFields) != 1 || coll.neededFields[0] != sortFacetsField {
|
||||
t.Errorf("expected fields in collector: %v, observed: %v", []string{sortFacetsField}, coll.neededFields)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkTop10of0Scores(b *testing.B) {
|
||||
benchHelper(0, func() search.Collector {
|
||||
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
||||
|
||||
func BenchmarkTop10of3Scores(b *testing.B) {
|
||||
benchHelper(3, func() search.Collector {
|
||||
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
||||
|
||||
func BenchmarkTop10of10Scores(b *testing.B) {
|
||||
benchHelper(10, func() search.Collector {
|
||||
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
||||
|
||||
func BenchmarkTop10of25Scores(b *testing.B) {
|
||||
benchHelper(25, func() search.Collector {
|
||||
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
||||
|
||||
func BenchmarkTop10of50Scores(b *testing.B) {
|
||||
benchHelper(50, func() search.Collector {
|
||||
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
||||
|
||||
func BenchmarkTop10of10000Scores(b *testing.B) {
|
||||
benchHelper(10000, func() search.Collector {
|
||||
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
||||
|
||||
func BenchmarkTop100of0Scores(b *testing.B) {
|
||||
benchHelper(0, func() search.Collector {
|
||||
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
||||
|
||||
func BenchmarkTop100of3Scores(b *testing.B) {
|
||||
benchHelper(3, func() search.Collector {
|
||||
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
||||
|
||||
func BenchmarkTop100of10Scores(b *testing.B) {
|
||||
benchHelper(10, func() search.Collector {
|
||||
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
||||
|
||||
func BenchmarkTop100of25Scores(b *testing.B) {
|
||||
benchHelper(25, func() search.Collector {
|
||||
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
||||
|
||||
func BenchmarkTop100of50Scores(b *testing.B) {
|
||||
benchHelper(50, func() search.Collector {
|
||||
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
||||
|
||||
func BenchmarkTop100of10000Scores(b *testing.B) {
|
||||
benchHelper(10000, func() search.Collector {
|
||||
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
||||
|
||||
func BenchmarkTop1000of10000Scores(b *testing.B) {
|
||||
benchHelper(10000, func() search.Collector {
|
||||
return NewTopNCollector(1000, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
||||
|
||||
func BenchmarkTop10000of100000Scores(b *testing.B) {
|
||||
benchHelper(100000, func() search.Collector {
|
||||
return NewTopNCollector(10000, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
||||
|
||||
func BenchmarkTop10of100000Scores(b *testing.B) {
|
||||
benchHelper(100000, func() search.Collector {
|
||||
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
||||
|
||||
func BenchmarkTop100of100000Scores(b *testing.B) {
|
||||
benchHelper(100000, func() search.Collector {
|
||||
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
||||
|
||||
func BenchmarkTop1000of100000Scores(b *testing.B) {
|
||||
benchHelper(100000, func() search.Collector {
|
||||
return NewTopNCollector(1000, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
||||
|
||||
func BenchmarkTop10000of1000000Scores(b *testing.B) {
|
||||
benchHelper(1000000, func() search.Collector {
|
||||
return NewTopNCollector(10000, 0, search.SortOrder{&search.SortScore{Desc: true}})
|
||||
}, b)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue