1
0
Fork 0
golang-github-blevesearch-b.../search/collector/topn_test.go
Daniel Baumann 982828099e
Adding upstream version 2.5.1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-05-19 00:20:02 +02:00

868 lines
21 KiB
Go

// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"bytes"
"context"
"testing"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/facet"
index "github.com/blevesearch/bleve_index_api"
)
func TestTop10Scores(t *testing.T) {
// a stub search with more than 10 matches
// the top-10 scores are > 10
// everything else is less than 10
searcher := &stubSearcher{
matches: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("a"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("b"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("c"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("d"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("e"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("f"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("g"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("h"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("i"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("j"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("k"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("l"),
Score: 99,
},
{
IndexInternalID: index.IndexInternalID("m"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("n"),
Score: 11,
},
},
}
collector := NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
}
maxScore := collector.MaxScore()
if maxScore != 99.0 {
t.Errorf("expected max score 99.0, got %f", maxScore)
}
total := collector.Total()
if total != 14 {
t.Errorf("expected 14 total results, got %d", total)
}
results := collector.Results()
if len(results) != 10 {
t.Logf("results: %v", results)
t.Fatalf("expected 10 results, got %d", len(results))
}
if results[0].ID != "l" {
t.Errorf("expected first result to have ID 'l', got %s", results[0].ID)
}
if results[0].Score != 99.0 {
t.Errorf("expected highest score to be 99.0, got %f", results[0].Score)
}
minScore := 1000.0
for _, result := range results {
if result.Score < minScore {
minScore = result.Score
}
}
if minScore < 10 {
t.Errorf("expected minimum score to be higher than 10, got %f", minScore)
}
}
func TestTop10ScoresSkip10(t *testing.T) {
// a stub search with more than 10 matches
// the top-10 scores are > 10
// everything else is less than 10
searcher := &stubSearcher{
matches: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("a"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("b"),
Score: 9.5,
},
{
IndexInternalID: index.IndexInternalID("c"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("d"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("e"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("f"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("g"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("h"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("i"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("j"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("k"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("l"),
Score: 99,
},
{
IndexInternalID: index.IndexInternalID("m"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("n"),
Score: 11,
},
},
}
collector := NewTopNCollector(10, 10, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
}
maxScore := collector.MaxScore()
if maxScore != 99.0 {
t.Errorf("expected max score 99.0, got %f", maxScore)
}
total := collector.Total()
if total != 14 {
t.Errorf("expected 14 total results, got %d", total)
}
results := collector.Results()
if len(results) != 4 {
t.Fatalf("expected 4 results, got %d", len(results))
}
if results[0].ID != "b" {
t.Errorf("expected first result to have ID 'b', got %s", results[0].ID)
}
if results[0].Score != 9.5 {
t.Errorf("expected highest score to be 9.5, got %f", results[0].Score)
}
}
func TestTop10ScoresSkip10Only9Hits(t *testing.T) {
// a stub search with only 10 matches
searcher := &stubSearcher{
matches: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("a"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("c"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("e"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("g"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("i"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("j"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("k"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("m"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("n"),
Score: 11,
},
},
}
collector := NewTopNCollector(10, 10, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
}
total := collector.Total()
if total != 9 {
t.Errorf("expected 9 total results, got %d", total)
}
results := collector.Results()
if len(results) != 0 {
t.Fatalf("expected 0 results, got %d", len(results))
}
}
func TestPaginationSameScores(t *testing.T) {
// a stub search with more than 10 matches
// all documents have the same score
searcher := &stubSearcher{
matches: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("a"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("b"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("c"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("d"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("e"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("f"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("g"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("h"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("i"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("j"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("k"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("l"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("m"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("n"),
Score: 5,
},
},
}
// first get first 5 hits
collector := NewTopNCollector(5, 0, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
}
total := collector.Total()
if total != 14 {
t.Errorf("expected 14 total results, got %d", total)
}
results := collector.Results()
if len(results) != 5 {
t.Fatalf("expected 5 results, got %d", len(results))
}
firstResults := make(map[string]struct{})
for _, hit := range results {
firstResults[hit.ID] = struct{}{}
}
// a stub search with more than 10 matches
// all documents have the same score
searcher = &stubSearcher{
matches: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("a"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("b"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("c"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("d"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("e"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("f"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("g"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("h"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("i"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("j"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("k"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("l"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("m"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("n"),
Score: 5,
},
},
}
// now get next 5 hits
collector = NewTopNCollector(5, 5, search.SortOrder{&search.SortScore{Desc: true}})
err = collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
}
total = collector.Total()
if total != 14 {
t.Errorf("expected 14 total results, got %d", total)
}
results = collector.Results()
if len(results) != 5 {
t.Fatalf("expected 5 results, got %d", len(results))
}
// make sure that none of these hits repeat ones we saw in the top 5
for _, hit := range results {
if _, ok := firstResults[hit.ID]; ok {
t.Errorf("doc ID %s is in top 5 and next 5 result sets", hit.ID)
}
}
}
// TestStreamResults verifies the search.DocumentMatchHandler
func TestStreamResults(t *testing.T) {
matches := []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("a"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("b"),
Score: 1,
},
{
IndexInternalID: index.IndexInternalID("c"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("d"),
Score: 999,
},
{
IndexInternalID: index.IndexInternalID("e"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("f"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("g"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("h"),
Score: 89,
},
{
IndexInternalID: index.IndexInternalID("i"),
Score: 101,
},
{
IndexInternalID: index.IndexInternalID("j"),
Score: 112,
},
{
IndexInternalID: index.IndexInternalID("k"),
Score: 10,
},
{
IndexInternalID: index.IndexInternalID("l"),
Score: 99,
},
{
IndexInternalID: index.IndexInternalID("m"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("n"),
Score: 111,
},
}
searcher := &stubSearcher{
matches: matches,
}
ind := 0
docMatchHandler := func(hit *search.DocumentMatch) error {
if hit == nil {
return nil // search completed
}
if !bytes.Equal(hit.IndexInternalID, matches[ind].IndexInternalID) {
t.Errorf("%d hit IndexInternalID actual: %s, expected: %s",
ind, hit.IndexInternalID, matches[ind].IndexInternalID)
}
if hit.Score != matches[ind].Score {
t.Errorf("%d hit Score actual: %s, expected: %s",
ind, hit.IndexInternalID, matches[ind].IndexInternalID)
}
ind++
return nil
}
var handlerMaker search.MakeDocumentMatchHandler = func(ctx *search.SearchContext) (search.DocumentMatchHandler, bool, error) {
return docMatchHandler, false, nil
}
ctx := context.WithValue(context.Background(), search.MakeDocumentMatchHandlerKey, handlerMaker)
collector := NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(ctx, searcher, &stubReader{})
if err != nil {
t.Fatal(err)
}
maxScore := collector.MaxScore()
if maxScore != 999.0 {
t.Errorf("expected max score 99.0, got %f", maxScore)
}
total := collector.Total()
if int(total) != ind {
t.Errorf("expected 14 total results, got %d", total)
}
results := collector.Results()
if len(results) != 0 {
t.Fatalf("expected 0 results, got %d", len(results))
}
}
// TestCollectorChaining verifies the chaining of collectors.
// The custom DocumentMatchHandler can process every hit for
// the search query and then pass the hit to the topn collector
// to eventually have the sorted top `N` results.
func TestCollectorChaining(t *testing.T) {
matches := []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("a"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("b"),
Score: 1,
},
{
IndexInternalID: index.IndexInternalID("c"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("d"),
Score: 999,
},
{
IndexInternalID: index.IndexInternalID("e"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("f"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("g"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("h"),
Score: 89,
},
{
IndexInternalID: index.IndexInternalID("i"),
Score: 101,
},
{
IndexInternalID: index.IndexInternalID("j"),
Score: 112,
},
{
IndexInternalID: index.IndexInternalID("k"),
Score: 10,
},
{
IndexInternalID: index.IndexInternalID("l"),
Score: 99,
},
{
IndexInternalID: index.IndexInternalID("m"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("n"),
Score: 111,
},
}
searcher := &stubSearcher{
matches: matches,
}
var topNHandler search.DocumentMatchHandler
ind := 0
docMatchHandler := func(hit *search.DocumentMatch) error {
if hit == nil {
return nil // search completed
}
if !bytes.Equal(hit.IndexInternalID, matches[ind].IndexInternalID) {
t.Errorf("%d hit IndexInternalID actual: %s, expected: %s",
ind, hit.IndexInternalID, matches[ind].IndexInternalID)
}
if hit.Score != matches[ind].Score {
t.Errorf("%d hit Score actual: %s, expected: %s",
ind, hit.IndexInternalID, matches[ind].IndexInternalID)
}
ind++
// give the hit back to the topN collector
err := topNHandler(hit)
if err != nil {
t.Errorf("unexpected err: %v", err)
}
return nil
}
var handlerMaker search.MakeDocumentMatchHandler = func(ctx *search.SearchContext) (search.DocumentMatchHandler, bool, error) {
topNHandler, _, _ = MakeTopNDocumentMatchHandler(ctx)
return docMatchHandler, false, nil
}
ctx := context.WithValue(context.Background(), search.MakeDocumentMatchHandlerKey,
handlerMaker)
collector := NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(ctx, searcher, &stubReader{})
if err != nil {
t.Fatal(err)
}
maxScore := collector.MaxScore()
if maxScore != 999.0 {
t.Errorf("expected max score 99.0, got %f", maxScore)
}
total := collector.Total()
if int(total) != ind {
t.Errorf("expected 14 total results, got %d", total)
}
results := collector.Results()
if len(results) != 10 { // as it is paged
t.Fatalf("expected 0 results, got %d", len(results))
}
if results[0].ID != "d" {
t.Errorf("expected first result to have ID 'l', got %s", results[0].ID)
}
if results[0].Score != 999.0 {
t.Errorf("expected highest score to be 999.0, got %f", results[0].Score)
}
minScore := 1000.0
for _, result := range results {
if result.Score < minScore {
minScore = result.Score
}
}
if minScore < 10 {
t.Errorf("expected minimum score to be higher than 10, got %f", minScore)
}
}
func setupIndex(t *testing.T) index.Index {
analysisQueue := index.NewAnalysisQueue(1)
i, err := scorch.NewScorch(
scorch.Name,
map[string]interface{}{
"path": "",
},
analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
return i
}
func TestSetFacetsBuilder(t *testing.T) {
// Field common to both sorting and faceting.
sortFacetsField := "locations"
coll := NewTopNCollector(10, 0, search.SortOrder{&search.SortField{Field: sortFacetsField}})
i := setupIndex(t)
indexReader, err := i.Reader()
if err != nil {
t.Fatal(err)
}
fb := search.NewFacetsBuilder(indexReader)
facetBuilder := facet.NewTermsFacetBuilder(sortFacetsField, 100)
fb.Add("locations_facet", facetBuilder)
coll.SetFacetsBuilder(fb)
// Should not duplicate the "locations" field in the collector.
if len(coll.neededFields) != 1 || coll.neededFields[0] != sortFacetsField {
t.Errorf("expected fields in collector: %v, observed: %v", []string{sortFacetsField}, coll.neededFields)
}
}
func BenchmarkTop10of0Scores(b *testing.B) {
benchHelper(0, func() search.Collector {
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop10of3Scores(b *testing.B) {
benchHelper(3, func() search.Collector {
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop10of10Scores(b *testing.B) {
benchHelper(10, func() search.Collector {
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop10of25Scores(b *testing.B) {
benchHelper(25, func() search.Collector {
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop10of50Scores(b *testing.B) {
benchHelper(50, func() search.Collector {
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop10of10000Scores(b *testing.B) {
benchHelper(10000, func() search.Collector {
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop100of0Scores(b *testing.B) {
benchHelper(0, func() search.Collector {
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop100of3Scores(b *testing.B) {
benchHelper(3, func() search.Collector {
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop100of10Scores(b *testing.B) {
benchHelper(10, func() search.Collector {
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop100of25Scores(b *testing.B) {
benchHelper(25, func() search.Collector {
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop100of50Scores(b *testing.B) {
benchHelper(50, func() search.Collector {
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop100of10000Scores(b *testing.B) {
benchHelper(10000, func() search.Collector {
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop1000of10000Scores(b *testing.B) {
benchHelper(10000, func() search.Collector {
return NewTopNCollector(1000, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop10000of100000Scores(b *testing.B) {
benchHelper(100000, func() search.Collector {
return NewTopNCollector(10000, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop10of100000Scores(b *testing.B) {
benchHelper(100000, func() search.Collector {
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop100of100000Scores(b *testing.B) {
benchHelper(100000, func() search.Collector {
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop1000of100000Scores(b *testing.B) {
benchHelper(100000, func() search.Collector {
return NewTopNCollector(1000, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop10000of1000000Scores(b *testing.B) {
benchHelper(1000000, func() search.Collector {
return NewTopNCollector(10000, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}