Adding upstream version 2.5.1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
c71cb8b61d
commit
982828099e
783 changed files with 150650 additions and 0 deletions
72
search/scorer/scorer_conjunction.go
Normal file
72
search/scorer/scorer_conjunction.go
Normal file
|
@ -0,0 +1,72 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorer
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeConjunctionQueryScorer int
|
||||
|
||||
func init() {
|
||||
var cqs ConjunctionQueryScorer
|
||||
reflectStaticSizeConjunctionQueryScorer = int(reflect.TypeOf(cqs).Size())
|
||||
}
|
||||
|
||||
type ConjunctionQueryScorer struct {
|
||||
options search.SearcherOptions
|
||||
}
|
||||
|
||||
func (s *ConjunctionQueryScorer) Size() int {
|
||||
return reflectStaticSizeConjunctionQueryScorer + size.SizeOfPtr
|
||||
}
|
||||
|
||||
func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer {
|
||||
return &ConjunctionQueryScorer{
|
||||
options: options,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch) *search.DocumentMatch {
|
||||
var sum float64
|
||||
var childrenExplanations []*search.Explanation
|
||||
if s.options.Explain {
|
||||
childrenExplanations = make([]*search.Explanation, len(constituents))
|
||||
}
|
||||
|
||||
for i, docMatch := range constituents {
|
||||
sum += docMatch.Score
|
||||
if s.options.Explain {
|
||||
childrenExplanations[i] = docMatch.Expl
|
||||
}
|
||||
}
|
||||
newScore := sum
|
||||
var newExpl *search.Explanation
|
||||
if s.options.Explain {
|
||||
newExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
|
||||
}
|
||||
|
||||
// reuse constituents[0] as the return value
|
||||
rv := constituents[0]
|
||||
rv.Score = newScore
|
||||
rv.Expl = newExpl
|
||||
rv.FieldTermLocations = search.MergeFieldTermLocations(
|
||||
rv.FieldTermLocations, constituents[1:])
|
||||
|
||||
return rv
|
||||
}
|
132
search/scorer/scorer_constant.go
Normal file
132
search/scorer/scorer_constant.go
Normal file
|
@ -0,0 +1,132 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeConstantScorer int
|
||||
|
||||
func init() {
|
||||
var cs ConstantScorer
|
||||
reflectStaticSizeConstantScorer = int(reflect.TypeOf(cs).Size())
|
||||
}
|
||||
|
||||
type ConstantScorer struct {
|
||||
constant float64
|
||||
boost float64
|
||||
options search.SearcherOptions
|
||||
queryNorm float64
|
||||
queryWeight float64
|
||||
queryWeightExplanation *search.Explanation
|
||||
includeScore bool
|
||||
}
|
||||
|
||||
func (s *ConstantScorer) Size() int {
|
||||
sizeInBytes := reflectStaticSizeConstantScorer + size.SizeOfPtr
|
||||
|
||||
if s.queryWeightExplanation != nil {
|
||||
sizeInBytes += s.queryWeightExplanation.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer {
|
||||
rv := ConstantScorer{
|
||||
options: options,
|
||||
queryWeight: 1.0,
|
||||
constant: constant,
|
||||
boost: boost,
|
||||
includeScore: options.Score != "none",
|
||||
}
|
||||
|
||||
return &rv
|
||||
}
|
||||
|
||||
func (s *ConstantScorer) Weight() float64 {
|
||||
sum := s.boost
|
||||
return sum * sum
|
||||
}
|
||||
|
||||
func (s *ConstantScorer) SetQueryNorm(qnorm float64) {
|
||||
s.queryNorm = qnorm
|
||||
|
||||
// update the query weight
|
||||
s.queryWeight = s.boost * s.queryNorm
|
||||
|
||||
if s.options.Explain {
|
||||
childrenExplanations := make([]*search.Explanation, 2)
|
||||
childrenExplanations[0] = &search.Explanation{
|
||||
Value: s.boost,
|
||||
Message: "boost",
|
||||
}
|
||||
childrenExplanations[1] = &search.Explanation{
|
||||
Value: s.queryNorm,
|
||||
Message: "queryNorm",
|
||||
}
|
||||
s.queryWeightExplanation = &search.Explanation{
|
||||
Value: s.queryWeight,
|
||||
Message: fmt.Sprintf("ConstantScore()^%f, product of:", s.boost),
|
||||
Children: childrenExplanations,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *ConstantScorer) Score(ctx *search.SearchContext, id index.IndexInternalID) *search.DocumentMatch {
|
||||
var scoreExplanation *search.Explanation
|
||||
|
||||
rv := ctx.DocumentMatchPool.Get()
|
||||
rv.IndexInternalID = id
|
||||
|
||||
if s.includeScore {
|
||||
score := s.constant
|
||||
|
||||
if s.options.Explain {
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
Message: "ConstantScore()",
|
||||
}
|
||||
}
|
||||
|
||||
// if the query weight isn't 1, multiply
|
||||
if s.queryWeight != 1.0 {
|
||||
score = score * s.queryWeight
|
||||
if s.options.Explain {
|
||||
childExplanations := make([]*search.Explanation, 2)
|
||||
childExplanations[0] = s.queryWeightExplanation
|
||||
childExplanations[1] = scoreExplanation
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
Message: fmt.Sprintf("weight(^%f), product of:", s.boost),
|
||||
Children: childExplanations,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rv.Score = score
|
||||
if s.options.Explain {
|
||||
rv.Expl = scoreExplanation
|
||||
}
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
131
search/scorer/scorer_constant_test.go
Normal file
131
search/scorer/scorer_constant_test.go
Normal file
|
@ -0,0 +1,131 @@
|
|||
// Copyright (c) 2013 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorer
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func TestConstantScorer(t *testing.T) {
|
||||
|
||||
scorer := NewConstantScorer(1, 1, search.SearcherOptions{Explain: true})
|
||||
|
||||
tests := []struct {
|
||||
termMatch *index.TermFieldDoc
|
||||
result *search.DocumentMatch
|
||||
}{
|
||||
// test some simple math
|
||||
{
|
||||
termMatch: &index.TermFieldDoc{
|
||||
ID: index.IndexInternalID("one"),
|
||||
Freq: 1,
|
||||
Norm: 1.0,
|
||||
Vectors: []*index.TermFieldVector{
|
||||
{
|
||||
Field: "desc",
|
||||
Pos: 1,
|
||||
Start: 0,
|
||||
End: 4,
|
||||
},
|
||||
},
|
||||
},
|
||||
result: &search.DocumentMatch{
|
||||
IndexInternalID: index.IndexInternalID("one"),
|
||||
Score: 1.0,
|
||||
Expl: &search.Explanation{
|
||||
Value: 1.0,
|
||||
Message: "ConstantScore()",
|
||||
},
|
||||
Sort: []string{},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
ctx := &search.SearchContext{
|
||||
DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
|
||||
}
|
||||
actual := scorer.Score(ctx, test.termMatch.ID)
|
||||
|
||||
if !reflect.DeepEqual(actual, test.result) {
|
||||
t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestConstantScorerWithQueryNorm(t *testing.T) {
|
||||
|
||||
scorer := NewConstantScorer(1, 1, search.SearcherOptions{Explain: true})
|
||||
scorer.SetQueryNorm(2.0)
|
||||
|
||||
tests := []struct {
|
||||
termMatch *index.TermFieldDoc
|
||||
result *search.DocumentMatch
|
||||
}{
|
||||
{
|
||||
termMatch: &index.TermFieldDoc{
|
||||
ID: index.IndexInternalID("one"),
|
||||
Freq: 1,
|
||||
Norm: 1.0,
|
||||
},
|
||||
result: &search.DocumentMatch{
|
||||
IndexInternalID: index.IndexInternalID("one"),
|
||||
Score: 2.0,
|
||||
Sort: []string{},
|
||||
Expl: &search.Explanation{
|
||||
Value: 2.0,
|
||||
Message: "weight(^1.000000), product of:",
|
||||
Children: []*search.Explanation{
|
||||
{
|
||||
Value: 2.0,
|
||||
Message: "ConstantScore()^1.000000, product of:",
|
||||
Children: []*search.Explanation{
|
||||
{
|
||||
Value: 1,
|
||||
Message: "boost",
|
||||
},
|
||||
{
|
||||
Value: 2,
|
||||
Message: "queryNorm",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Value: 1.0,
|
||||
Message: "ConstantScore()",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
ctx := &search.SearchContext{
|
||||
DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
|
||||
}
|
||||
actual := scorer.Score(ctx, test.termMatch.ID)
|
||||
|
||||
if !reflect.DeepEqual(actual, test.result) {
|
||||
t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
123
search/scorer/scorer_disjunction.go
Normal file
123
search/scorer/scorer_disjunction.go
Normal file
|
@ -0,0 +1,123 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDisjunctionQueryScorer int
|
||||
|
||||
func init() {
|
||||
var dqs DisjunctionQueryScorer
|
||||
reflectStaticSizeDisjunctionQueryScorer = int(reflect.TypeOf(dqs).Size())
|
||||
}
|
||||
|
||||
type DisjunctionQueryScorer struct {
|
||||
options search.SearcherOptions
|
||||
}
|
||||
|
||||
func (s *DisjunctionQueryScorer) Size() int {
|
||||
return reflectStaticSizeDisjunctionQueryScorer + size.SizeOfPtr
|
||||
}
|
||||
|
||||
func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer {
|
||||
return &DisjunctionQueryScorer{
|
||||
options: options,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch, countMatch, countTotal int) *search.DocumentMatch {
|
||||
var sum float64
|
||||
var childrenExplanations []*search.Explanation
|
||||
if s.options.Explain {
|
||||
childrenExplanations = make([]*search.Explanation, len(constituents))
|
||||
}
|
||||
|
||||
for i, docMatch := range constituents {
|
||||
sum += docMatch.Score
|
||||
if s.options.Explain {
|
||||
childrenExplanations[i] = docMatch.Expl
|
||||
}
|
||||
}
|
||||
|
||||
var rawExpl *search.Explanation
|
||||
if s.options.Explain {
|
||||
rawExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
|
||||
}
|
||||
|
||||
coord := float64(countMatch) / float64(countTotal)
|
||||
newScore := sum * coord
|
||||
var newExpl *search.Explanation
|
||||
if s.options.Explain {
|
||||
ce := make([]*search.Explanation, 2)
|
||||
ce[0] = rawExpl
|
||||
ce[1] = &search.Explanation{Value: coord, Message: fmt.Sprintf("coord(%d/%d)", countMatch, countTotal)}
|
||||
newExpl = &search.Explanation{Value: newScore, Message: "product of:", Children: ce, PartialMatch: countMatch != countTotal}
|
||||
}
|
||||
|
||||
// reuse constituents[0] as the return value
|
||||
rv := constituents[0]
|
||||
rv.Score = newScore
|
||||
rv.Expl = newExpl
|
||||
rv.FieldTermLocations = search.MergeFieldTermLocations(
|
||||
rv.FieldTermLocations, constituents[1:])
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
// This method is used only when disjunction searcher is used over multiple
|
||||
// KNN searchers, where only the score breakdown and the optional explanation breakdown
|
||||
// is required. The final score and explanation is set when we finalize the KNN hits.
|
||||
func (s *DisjunctionQueryScorer) ScoreAndExplBreakdown(ctx *search.SearchContext, constituents []*search.DocumentMatch,
|
||||
matchingIdxs []int, originalPositions []int, countTotal int) *search.DocumentMatch {
|
||||
|
||||
scoreBreakdown := make(map[int]float64)
|
||||
var childrenExplanations []*search.Explanation
|
||||
if s.options.Explain {
|
||||
// since we want to notify which expl belongs to which matched searcher within the disjunction searcher
|
||||
childrenExplanations = make([]*search.Explanation, countTotal)
|
||||
}
|
||||
|
||||
for i, docMatch := range constituents {
|
||||
var index int
|
||||
if originalPositions != nil {
|
||||
// scorer used in disjunction slice searcher
|
||||
index = originalPositions[matchingIdxs[i]]
|
||||
} else {
|
||||
// scorer used in disjunction heap searcher
|
||||
index = matchingIdxs[i]
|
||||
}
|
||||
scoreBreakdown[index] = docMatch.Score
|
||||
if s.options.Explain {
|
||||
childrenExplanations[index] = docMatch.Expl
|
||||
}
|
||||
}
|
||||
var explBreakdown *search.Explanation
|
||||
if s.options.Explain {
|
||||
explBreakdown = &search.Explanation{Children: childrenExplanations}
|
||||
}
|
||||
|
||||
rv := constituents[0]
|
||||
rv.ScoreBreakdown = scoreBreakdown
|
||||
rv.Expl = explBreakdown
|
||||
rv.FieldTermLocations = search.MergeFieldTermLocations(
|
||||
rv.FieldTermLocations, constituents[1:])
|
||||
return rv
|
||||
}
|
157
search/scorer/scorer_knn.go
Normal file
157
search/scorer/scorer_knn.go
Normal file
|
@ -0,0 +1,157 @@
|
|||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package scorer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeKNNQueryScorer int
|
||||
|
||||
func init() {
|
||||
var sqs KNNQueryScorer
|
||||
reflectStaticSizeKNNQueryScorer = int(reflect.TypeOf(sqs).Size())
|
||||
}
|
||||
|
||||
type KNNQueryScorer struct {
|
||||
queryVector []float32
|
||||
queryField string
|
||||
queryWeight float64
|
||||
queryBoost float64
|
||||
queryNorm float64
|
||||
options search.SearcherOptions
|
||||
similarityMetric string
|
||||
queryWeightExplanation *search.Explanation
|
||||
}
|
||||
|
||||
func (s *KNNQueryScorer) Size() int {
|
||||
sizeInBytes := reflectStaticSizeKNNQueryScorer + size.SizeOfPtr +
|
||||
(len(s.queryVector) * size.SizeOfFloat32) + len(s.queryField) +
|
||||
len(s.similarityMetric)
|
||||
|
||||
if s.queryWeightExplanation != nil {
|
||||
sizeInBytes += s.queryWeightExplanation.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func NewKNNQueryScorer(queryVector []float32, queryField string, queryBoost float64,
|
||||
options search.SearcherOptions,
|
||||
similarityMetric string) *KNNQueryScorer {
|
||||
return &KNNQueryScorer{
|
||||
queryVector: queryVector,
|
||||
queryField: queryField,
|
||||
queryBoost: queryBoost,
|
||||
queryWeight: 1.0,
|
||||
options: options,
|
||||
similarityMetric: similarityMetric,
|
||||
}
|
||||
}
|
||||
|
||||
// Score used when the knnMatch.Score = 0 ->
|
||||
// the query and indexed vector are exactly the same.
|
||||
const maxKNNScore = math.MaxFloat32
|
||||
|
||||
func (sqs *KNNQueryScorer) Score(ctx *search.SearchContext,
|
||||
knnMatch *index.VectorDoc) *search.DocumentMatch {
|
||||
rv := ctx.DocumentMatchPool.Get()
|
||||
var scoreExplanation *search.Explanation
|
||||
score := knnMatch.Score
|
||||
if sqs.similarityMetric == index.EuclideanDistance {
|
||||
// in case of euclidean distance being the distance metric,
|
||||
// an exact vector (perfect match), would return distance = 0
|
||||
if score == 0 {
|
||||
score = maxKNNScore
|
||||
} else {
|
||||
// euclidean distances need to be inverted to work with
|
||||
// tf-idf scoring
|
||||
score = 1.0 / score
|
||||
}
|
||||
}
|
||||
if sqs.options.Explain {
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
Message: fmt.Sprintf("fieldWeight(%s in doc %s), score of:",
|
||||
sqs.queryField, knnMatch.ID),
|
||||
Children: []*search.Explanation{
|
||||
{
|
||||
Value: score,
|
||||
Message: fmt.Sprintf("vector(field(%s:%s) with similarity_metric(%s)=%e",
|
||||
sqs.queryField, knnMatch.ID, sqs.similarityMetric, score),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
// if the query weight isn't 1, multiply
|
||||
if sqs.queryWeight != 1.0 && score != maxKNNScore {
|
||||
score = score * sqs.queryWeight
|
||||
if sqs.options.Explain {
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
// Product of score * weight
|
||||
// Avoid adding the query vector to the explanation since vectors
|
||||
// can get quite large.
|
||||
Message: fmt.Sprintf("weight(%s:query Vector^%f in %s), product of:",
|
||||
sqs.queryField, sqs.queryBoost, knnMatch.ID),
|
||||
Children: []*search.Explanation{sqs.queryWeightExplanation, scoreExplanation},
|
||||
}
|
||||
}
|
||||
}
|
||||
rv.Score = score
|
||||
if sqs.options.Explain {
|
||||
rv.Expl = scoreExplanation
|
||||
}
|
||||
rv.IndexInternalID = append(rv.IndexInternalID, knnMatch.ID...)
|
||||
return rv
|
||||
}
|
||||
|
||||
func (sqs *KNNQueryScorer) Weight() float64 {
|
||||
return 1.0
|
||||
}
|
||||
|
||||
func (sqs *KNNQueryScorer) SetQueryNorm(qnorm float64) {
|
||||
sqs.queryNorm = qnorm
|
||||
|
||||
// update the query weight
|
||||
sqs.queryWeight = sqs.queryBoost * sqs.queryNorm
|
||||
|
||||
if sqs.options.Explain {
|
||||
childrenExplanations := make([]*search.Explanation, 2)
|
||||
childrenExplanations[0] = &search.Explanation{
|
||||
Value: sqs.queryBoost,
|
||||
Message: "boost",
|
||||
}
|
||||
childrenExplanations[1] = &search.Explanation{
|
||||
Value: sqs.queryNorm,
|
||||
Message: "queryNorm",
|
||||
}
|
||||
sqs.queryWeightExplanation = &search.Explanation{
|
||||
Value: sqs.queryWeight,
|
||||
Message: fmt.Sprintf("queryWeight(%s:query Vector^%f), product of:",
|
||||
sqs.queryField, sqs.queryBoost),
|
||||
Children: childrenExplanations,
|
||||
}
|
||||
}
|
||||
}
|
181
search/scorer/scorer_knn_test.go
Normal file
181
search/scorer/scorer_knn_test.go
Normal file
|
@ -0,0 +1,181 @@
|
|||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package scorer
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func TestKNNScorerExplanation(t *testing.T) {
|
||||
var queryVector []float32
|
||||
// arbitrary vector of dims: 64
|
||||
for i := 0; i < 64; i++ {
|
||||
queryVector = append(queryVector, float32(i))
|
||||
}
|
||||
|
||||
var resVector []float32
|
||||
// arbitrary res vector.
|
||||
for i := 0; i < 64; i++ {
|
||||
resVector = append(resVector, float32(i))
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
vectorMatch *index.VectorDoc
|
||||
scorer *KNNQueryScorer
|
||||
norm float64
|
||||
result *search.DocumentMatch
|
||||
}{
|
||||
{
|
||||
vectorMatch: &index.VectorDoc{
|
||||
ID: index.IndexInternalID("one"),
|
||||
Score: 0.5,
|
||||
Vector: resVector,
|
||||
},
|
||||
norm: 1.0,
|
||||
scorer: NewKNNQueryScorer(queryVector, "desc", 1.0,
|
||||
search.SearcherOptions{Explain: true}, index.EuclideanDistance),
|
||||
// Specifically testing EuclideanDistance since that involves score inversion.
|
||||
result: &search.DocumentMatch{
|
||||
IndexInternalID: index.IndexInternalID("one"),
|
||||
Score: 0.5,
|
||||
Expl: &search.Explanation{
|
||||
Value: 1 / 0.5,
|
||||
Message: "fieldWeight(desc in doc one), score of:",
|
||||
Children: []*search.Explanation{
|
||||
{
|
||||
Value: 1 / 0.5,
|
||||
Message: "vector(field(desc:one) with similarity_metric(l2_norm)=2.000000e+00",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
vectorMatch: &index.VectorDoc{
|
||||
ID: index.IndexInternalID("one"),
|
||||
Score: 0.0,
|
||||
// Result vector is an exact match of an existing vector.
|
||||
Vector: queryVector,
|
||||
},
|
||||
norm: 1.0,
|
||||
scorer: NewKNNQueryScorer(queryVector, "desc", 1.0,
|
||||
search.SearcherOptions{Explain: true}, index.EuclideanDistance),
|
||||
// Specifically testing EuclideanDistance with 0 score.
|
||||
result: &search.DocumentMatch{
|
||||
IndexInternalID: index.IndexInternalID("one"),
|
||||
Score: 0.0,
|
||||
Expl: &search.Explanation{
|
||||
Value: maxKNNScore,
|
||||
Message: "fieldWeight(desc in doc one), score of:",
|
||||
Children: []*search.Explanation{
|
||||
{
|
||||
Value: maxKNNScore,
|
||||
Message: "vector(field(desc:one) with similarity_metric(l2_norm)=3.402823e+38",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
vectorMatch: &index.VectorDoc{
|
||||
ID: index.IndexInternalID("one"),
|
||||
Score: 0.5,
|
||||
Vector: resVector,
|
||||
},
|
||||
norm: 1.0,
|
||||
scorer: NewKNNQueryScorer(queryVector, "desc", 1.0,
|
||||
search.SearcherOptions{Explain: true}, index.InnerProduct),
|
||||
result: &search.DocumentMatch{
|
||||
IndexInternalID: index.IndexInternalID("one"),
|
||||
Score: 0.5,
|
||||
Expl: &search.Explanation{
|
||||
Value: 0.5,
|
||||
Message: "fieldWeight(desc in doc one), score of:",
|
||||
Children: []*search.Explanation{
|
||||
{
|
||||
Value: 0.5,
|
||||
Message: "vector(field(desc:one) with similarity_metric(dot_product)=5.000000e-01",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
vectorMatch: &index.VectorDoc{
|
||||
ID: index.IndexInternalID("one"),
|
||||
Score: 0.25,
|
||||
Vector: resVector,
|
||||
},
|
||||
norm: 0.5,
|
||||
scorer: NewKNNQueryScorer(queryVector, "desc", 1.0,
|
||||
search.SearcherOptions{Explain: true}, index.InnerProduct),
|
||||
result: &search.DocumentMatch{
|
||||
IndexInternalID: index.IndexInternalID("one"),
|
||||
Score: 0.25,
|
||||
Expl: &search.Explanation{
|
||||
Value: 0.125,
|
||||
Message: "weight(desc:query Vector^1.000000 in one), product of:",
|
||||
Children: []*search.Explanation{
|
||||
{
|
||||
Value: 0.5,
|
||||
Message: "queryWeight(desc:query Vector^1.000000), product of:",
|
||||
Children: []*search.Explanation{
|
||||
{
|
||||
Value: 1,
|
||||
Message: "boost",
|
||||
},
|
||||
{
|
||||
Value: 0.5,
|
||||
Message: "queryNorm",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Value: 0.25,
|
||||
Message: "fieldWeight(desc in doc one), score of:",
|
||||
Children: []*search.Explanation{
|
||||
{
|
||||
Value: 0.25,
|
||||
Message: "vector(field(desc:one) with similarity_metric(dot_product)=2.500000e-01",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
ctx := &search.SearchContext{
|
||||
DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
|
||||
}
|
||||
test.scorer.SetQueryNorm(test.norm)
|
||||
actual := test.scorer.Score(ctx, test.vectorMatch)
|
||||
actual.Complete(nil)
|
||||
|
||||
if !reflect.DeepEqual(actual.Expl, test.result.Expl) {
|
||||
t.Errorf("expected %#v got %#v for %#v", test.result.Expl,
|
||||
actual.Expl, test.vectorMatch)
|
||||
}
|
||||
}
|
||||
}
|
276
search/scorer/scorer_term.go
Normal file
276
search/scorer/scorer_term.go
Normal file
|
@ -0,0 +1,276 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTermQueryScorer int
|
||||
|
||||
func init() {
|
||||
var tqs TermQueryScorer
|
||||
reflectStaticSizeTermQueryScorer = int(reflect.TypeOf(tqs).Size())
|
||||
}
|
||||
|
||||
type TermQueryScorer struct {
|
||||
queryTerm string
|
||||
queryField string
|
||||
queryBoost float64
|
||||
docTerm uint64 // number of documents containing the term
|
||||
docTotal uint64 // total number of documents in the index
|
||||
avgDocLength float64
|
||||
idf float64
|
||||
options search.SearcherOptions
|
||||
idfExplanation *search.Explanation
|
||||
includeScore bool
|
||||
queryNorm float64
|
||||
queryWeight float64
|
||||
queryWeightExplanation *search.Explanation
|
||||
}
|
||||
|
||||
func (s *TermQueryScorer) Size() int {
|
||||
sizeInBytes := reflectStaticSizeTermQueryScorer + size.SizeOfPtr +
|
||||
len(s.queryTerm) + len(s.queryField)
|
||||
|
||||
if s.idfExplanation != nil {
|
||||
sizeInBytes += s.idfExplanation.Size()
|
||||
}
|
||||
|
||||
if s.queryWeightExplanation != nil {
|
||||
sizeInBytes += s.queryWeightExplanation.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (s *TermQueryScorer) computeIDF(avgDocLength float64, docTotal, docTerm uint64) float64 {
|
||||
var rv float64
|
||||
if avgDocLength > 0 {
|
||||
// avgDocLength is set only for bm25 scoring
|
||||
rv = math.Log(1 + (float64(docTotal)-float64(docTerm)+0.5)/
|
||||
(float64(docTerm)+0.5))
|
||||
} else {
|
||||
rv = 1.0 + math.Log(float64(docTotal)/
|
||||
float64(docTerm+1.0))
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
// queryTerm - the specific term being scored by this scorer object
|
||||
// queryField - the field in which the term is being searched
|
||||
// queryBoost - the boost value for the query term
|
||||
// docTotal - total number of documents in the index
|
||||
// docTerm - number of documents containing the term
|
||||
// avgDocLength - average document length in the index
|
||||
// options - search options such as explain scoring, include the location of the term etc.
|
||||
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal,
|
||||
docTerm uint64, avgDocLength float64, options search.SearcherOptions) *TermQueryScorer {
|
||||
|
||||
rv := TermQueryScorer{
|
||||
queryTerm: string(queryTerm),
|
||||
queryField: queryField,
|
||||
queryBoost: queryBoost,
|
||||
docTerm: docTerm,
|
||||
docTotal: docTotal,
|
||||
avgDocLength: avgDocLength,
|
||||
options: options,
|
||||
queryWeight: 1.0,
|
||||
includeScore: options.Score != "none",
|
||||
}
|
||||
|
||||
rv.idf = rv.computeIDF(avgDocLength, docTotal, docTerm)
|
||||
if options.Explain {
|
||||
rv.idfExplanation = &search.Explanation{
|
||||
Value: rv.idf,
|
||||
Message: fmt.Sprintf("idf(docFreq=%d, maxDocs=%d)", docTerm, docTotal),
|
||||
}
|
||||
}
|
||||
|
||||
return &rv
|
||||
}
|
||||
|
||||
func (s *TermQueryScorer) Weight() float64 {
|
||||
sum := s.queryBoost * s.idf
|
||||
return sum * sum
|
||||
}
|
||||
|
||||
func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
|
||||
s.queryNorm = qnorm
|
||||
|
||||
// update the query weight
|
||||
s.queryWeight = s.queryBoost * s.idf * s.queryNorm
|
||||
|
||||
if s.options.Explain {
|
||||
childrenExplanations := make([]*search.Explanation, 3)
|
||||
childrenExplanations[0] = &search.Explanation{
|
||||
Value: s.queryBoost,
|
||||
Message: "boost",
|
||||
}
|
||||
childrenExplanations[1] = s.idfExplanation
|
||||
childrenExplanations[2] = &search.Explanation{
|
||||
Value: s.queryNorm,
|
||||
Message: "queryNorm",
|
||||
}
|
||||
s.queryWeightExplanation = &search.Explanation{
|
||||
Value: s.queryWeight,
|
||||
Message: fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.queryField, s.queryTerm, s.queryBoost),
|
||||
Children: childrenExplanations,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *TermQueryScorer) docScore(tf, norm float64) (score float64, model string) {
|
||||
if s.avgDocLength > 0 {
|
||||
// bm25 scoring
|
||||
// using the posting's norm value to recompute the field length for the doc num
|
||||
fieldLength := 1 / (norm * norm)
|
||||
|
||||
score = s.idf * (tf * search.BM25_k1) /
|
||||
(tf + search.BM25_k1*(1-search.BM25_b+(search.BM25_b*fieldLength/s.avgDocLength)))
|
||||
model = index.BM25Scoring
|
||||
} else {
|
||||
// tf-idf scoring by default
|
||||
score = tf * norm * s.idf
|
||||
model = index.DefaultScoringModel
|
||||
}
|
||||
return score, model
|
||||
}
|
||||
|
||||
func (s *TermQueryScorer) scoreExplanation(tf float64, termMatch *index.TermFieldDoc) []*search.Explanation {
|
||||
var rv []*search.Explanation
|
||||
if s.avgDocLength > 0 {
|
||||
fieldLength := 1 / (termMatch.Norm * termMatch.Norm)
|
||||
fieldNormVal := 1 - search.BM25_b + (search.BM25_b * fieldLength / s.avgDocLength)
|
||||
fieldNormalizeExplanation := &search.Explanation{
|
||||
Value: fieldNormVal,
|
||||
Message: fmt.Sprintf("fieldNorm(field=%s), b=%f, fieldLength=%f, avgFieldLength=%f)",
|
||||
s.queryField, search.BM25_b, fieldLength, s.avgDocLength),
|
||||
}
|
||||
|
||||
saturationExplanation := &search.Explanation{
|
||||
Value: search.BM25_k1 / (tf + search.BM25_k1*fieldNormVal),
|
||||
Message: fmt.Sprintf("saturation(term:%s), k1=%f/(tf=%f + k1*fieldNorm=%f))",
|
||||
termMatch.Term, search.BM25_k1, tf, fieldNormVal),
|
||||
Children: []*search.Explanation{fieldNormalizeExplanation},
|
||||
}
|
||||
|
||||
rv = make([]*search.Explanation, 3)
|
||||
rv[0] = &search.Explanation{
|
||||
Value: tf,
|
||||
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
|
||||
}
|
||||
rv[1] = saturationExplanation
|
||||
rv[2] = s.idfExplanation
|
||||
} else {
|
||||
rv = make([]*search.Explanation, 3)
|
||||
rv[0] = &search.Explanation{
|
||||
Value: tf,
|
||||
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
|
||||
}
|
||||
rv[1] = &search.Explanation{
|
||||
Value: termMatch.Norm,
|
||||
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
|
||||
}
|
||||
rv[2] = s.idfExplanation
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch {
|
||||
rv := ctx.DocumentMatchPool.Get()
|
||||
// perform any score computations only when needed
|
||||
if s.includeScore || s.options.Explain {
|
||||
var scoreExplanation *search.Explanation
|
||||
var tf float64
|
||||
if termMatch.Freq < MaxSqrtCache {
|
||||
tf = SqrtCache[int(termMatch.Freq)]
|
||||
} else {
|
||||
tf = math.Sqrt(float64(termMatch.Freq))
|
||||
}
|
||||
|
||||
score, scoringModel := s.docScore(tf, termMatch.Norm)
|
||||
if s.options.Explain {
|
||||
childrenExplanations := s.scoreExplanation(tf, termMatch)
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), as per %s model, "+
|
||||
"product of:", s.queryField, s.queryTerm, termMatch.ID, scoringModel),
|
||||
Children: childrenExplanations,
|
||||
}
|
||||
}
|
||||
|
||||
// if the query weight isn't 1, multiply
|
||||
if s.queryWeight != 1.0 {
|
||||
score = score * s.queryWeight
|
||||
if s.options.Explain {
|
||||
childExplanations := make([]*search.Explanation, 2)
|
||||
childExplanations[0] = s.queryWeightExplanation
|
||||
childExplanations[1] = scoreExplanation
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
|
||||
Children: childExplanations,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if s.includeScore {
|
||||
rv.Score = score
|
||||
}
|
||||
|
||||
if s.options.Explain {
|
||||
rv.Expl = scoreExplanation
|
||||
}
|
||||
}
|
||||
|
||||
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
|
||||
|
||||
if len(termMatch.Vectors) > 0 {
|
||||
if cap(rv.FieldTermLocations) < len(termMatch.Vectors) {
|
||||
rv.FieldTermLocations = make([]search.FieldTermLocation, 0, len(termMatch.Vectors))
|
||||
}
|
||||
|
||||
for _, v := range termMatch.Vectors {
|
||||
var ap search.ArrayPositions
|
||||
if len(v.ArrayPositions) > 0 {
|
||||
n := len(rv.FieldTermLocations)
|
||||
if n < cap(rv.FieldTermLocations) { // reuse ap slice if available
|
||||
ap = rv.FieldTermLocations[:n+1][n].Location.ArrayPositions[:0]
|
||||
}
|
||||
ap = append(ap, v.ArrayPositions...)
|
||||
}
|
||||
rv.FieldTermLocations =
|
||||
append(rv.FieldTermLocations, search.FieldTermLocation{
|
||||
Field: v.Field,
|
||||
Term: s.queryTerm,
|
||||
Location: search.Location{
|
||||
Pos: v.Pos,
|
||||
Start: v.Start,
|
||||
End: v.End,
|
||||
ArrayPositions: ap,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
260
search/scorer/scorer_term_test.go
Normal file
260
search/scorer/scorer_term_test.go
Normal file
|
@ -0,0 +1,260 @@
|
|||
// Copyright (c) 2013 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorer
|
||||
|
||||
import (
|
||||
"math"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func TestTermScorer(t *testing.T) {
|
||||
|
||||
var docTotal uint64 = 100
|
||||
var docTerm uint64 = 9
|
||||
var queryTerm = []byte("beer")
|
||||
var queryField = "desc"
|
||||
var queryBoost = 1.0
|
||||
scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, 0, search.SearcherOptions{Explain: true})
|
||||
idf := 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0))
|
||||
|
||||
tests := []struct {
|
||||
termMatch *index.TermFieldDoc
|
||||
result *search.DocumentMatch
|
||||
}{
|
||||
// test some simple math
|
||||
{
|
||||
termMatch: &index.TermFieldDoc{
|
||||
ID: index.IndexInternalID("one"),
|
||||
Freq: 1,
|
||||
Norm: 1.0,
|
||||
Vectors: []*index.TermFieldVector{
|
||||
{
|
||||
Field: "desc",
|
||||
Pos: 1,
|
||||
Start: 0,
|
||||
End: 4,
|
||||
},
|
||||
},
|
||||
},
|
||||
result: &search.DocumentMatch{
|
||||
IndexInternalID: index.IndexInternalID("one"),
|
||||
Score: math.Sqrt(1.0) * idf,
|
||||
Sort: []string{},
|
||||
Expl: &search.Explanation{
|
||||
Value: math.Sqrt(1.0) * idf,
|
||||
Message: "fieldWeight(desc:beer in one), as per tfidf model, product of:",
|
||||
Children: []*search.Explanation{
|
||||
{
|
||||
Value: 1,
|
||||
Message: "tf(termFreq(desc:beer)=1",
|
||||
},
|
||||
{
|
||||
Value: 1,
|
||||
Message: "fieldNorm(field=desc, doc=one)",
|
||||
},
|
||||
{
|
||||
Value: idf,
|
||||
Message: "idf(docFreq=9, maxDocs=100)",
|
||||
},
|
||||
},
|
||||
},
|
||||
Locations: search.FieldTermLocationMap{
|
||||
"desc": search.TermLocationMap{
|
||||
"beer": []*search.Location{
|
||||
{
|
||||
Pos: 1,
|
||||
Start: 0,
|
||||
End: 4,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
// test the same thing again (score should be cached this time)
|
||||
{
|
||||
termMatch: &index.TermFieldDoc{
|
||||
ID: index.IndexInternalID("one"),
|
||||
Freq: 1,
|
||||
Norm: 1.0,
|
||||
},
|
||||
result: &search.DocumentMatch{
|
||||
IndexInternalID: index.IndexInternalID("one"),
|
||||
Score: math.Sqrt(1.0) * idf,
|
||||
Sort: []string{},
|
||||
Expl: &search.Explanation{
|
||||
Value: math.Sqrt(1.0) * idf,
|
||||
Message: "fieldWeight(desc:beer in one), as per tfidf model, product of:",
|
||||
Children: []*search.Explanation{
|
||||
{
|
||||
Value: 1,
|
||||
Message: "tf(termFreq(desc:beer)=1",
|
||||
},
|
||||
{
|
||||
Value: 1,
|
||||
Message: "fieldNorm(field=desc, doc=one)",
|
||||
},
|
||||
{
|
||||
Value: idf,
|
||||
Message: "idf(docFreq=9, maxDocs=100)",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
// test a case where the sqrt isn't precalculated
|
||||
{
|
||||
termMatch: &index.TermFieldDoc{
|
||||
ID: index.IndexInternalID("one"),
|
||||
Freq: 65,
|
||||
Norm: 1.0,
|
||||
},
|
||||
result: &search.DocumentMatch{
|
||||
IndexInternalID: index.IndexInternalID("one"),
|
||||
Score: math.Sqrt(65) * idf,
|
||||
Sort: []string{},
|
||||
Expl: &search.Explanation{
|
||||
Value: math.Sqrt(65) * idf,
|
||||
Message: "fieldWeight(desc:beer in one), as per tfidf model, product of:",
|
||||
Children: []*search.Explanation{
|
||||
{
|
||||
Value: math.Sqrt(65),
|
||||
Message: "tf(termFreq(desc:beer)=65",
|
||||
},
|
||||
{
|
||||
Value: 1,
|
||||
Message: "fieldNorm(field=desc, doc=one)",
|
||||
},
|
||||
{
|
||||
Value: idf,
|
||||
Message: "idf(docFreq=9, maxDocs=100)",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
ctx := &search.SearchContext{
|
||||
DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
|
||||
}
|
||||
actual := scorer.Score(ctx, test.termMatch)
|
||||
actual.Complete(nil)
|
||||
if len(actual.FieldTermLocations) == 0 {
|
||||
actual.FieldTermLocations = nil
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(actual, test.result) {
|
||||
t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestTermScorerWithQueryNorm(t *testing.T) {
|
||||
|
||||
var docTotal uint64 = 100
|
||||
var docTerm uint64 = 9
|
||||
var queryTerm = []byte("beer")
|
||||
var queryField = "desc"
|
||||
var queryBoost = 3.0
|
||||
scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, 0, search.SearcherOptions{Explain: true})
|
||||
idf := 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0))
|
||||
|
||||
scorer.SetQueryNorm(2.0)
|
||||
|
||||
expectedQueryWeight := 3 * idf * 3 * idf
|
||||
actualQueryWeight := scorer.Weight()
|
||||
if expectedQueryWeight != actualQueryWeight {
|
||||
t.Errorf("expected query weight %f, got %f", expectedQueryWeight, actualQueryWeight)
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
termMatch *index.TermFieldDoc
|
||||
result *search.DocumentMatch
|
||||
}{
|
||||
{
|
||||
termMatch: &index.TermFieldDoc{
|
||||
ID: index.IndexInternalID("one"),
|
||||
Freq: 1,
|
||||
Norm: 1.0,
|
||||
},
|
||||
result: &search.DocumentMatch{
|
||||
IndexInternalID: index.IndexInternalID("one"),
|
||||
Score: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0,
|
||||
Sort: []string{},
|
||||
Expl: &search.Explanation{
|
||||
Value: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0,
|
||||
Message: "weight(desc:beer^3.000000 in one), product of:",
|
||||
Children: []*search.Explanation{
|
||||
{
|
||||
Value: 2.0 * idf * 3.0,
|
||||
Message: "queryWeight(desc:beer^3.000000), product of:",
|
||||
Children: []*search.Explanation{
|
||||
{
|
||||
Value: 3,
|
||||
Message: "boost",
|
||||
},
|
||||
{
|
||||
Value: idf,
|
||||
Message: "idf(docFreq=9, maxDocs=100)",
|
||||
},
|
||||
{
|
||||
Value: 2,
|
||||
Message: "queryNorm",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Value: math.Sqrt(1.0) * idf,
|
||||
Message: "fieldWeight(desc:beer in one), as per tfidf model, product of:",
|
||||
Children: []*search.Explanation{
|
||||
{
|
||||
Value: 1,
|
||||
Message: "tf(termFreq(desc:beer)=1",
|
||||
},
|
||||
{
|
||||
Value: 1,
|
||||
Message: "fieldNorm(field=desc, doc=one)",
|
||||
},
|
||||
{
|
||||
Value: idf,
|
||||
Message: "idf(docFreq=9, maxDocs=100)",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
ctx := &search.SearchContext{
|
||||
DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
|
||||
}
|
||||
actual := scorer.Score(ctx, test.termMatch)
|
||||
|
||||
if !reflect.DeepEqual(actual, test.result) {
|
||||
t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
30
search/scorer/sqrt_cache.go
Normal file
30
search/scorer/sqrt_cache.go
Normal file
|
@ -0,0 +1,30 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorer
|
||||
|
||||
import (
|
||||
"math"
|
||||
)
|
||||
|
||||
var SqrtCache []float64
|
||||
|
||||
const MaxSqrtCache = 64
|
||||
|
||||
func init() {
|
||||
SqrtCache = make([]float64, MaxSqrtCache)
|
||||
for i := 0; i < MaxSqrtCache; i++ {
|
||||
SqrtCache[i] = math.Sqrt(float64(i))
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue