282 lines
7.6 KiB
Go
282 lines
7.6 KiB
Go
// Copyright (c) 2014 Couchbase, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package searcher
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math"
|
|
"reflect"
|
|
|
|
"github.com/blevesearch/bleve/v2/search"
|
|
"github.com/blevesearch/bleve/v2/search/scorer"
|
|
"github.com/blevesearch/bleve/v2/size"
|
|
index "github.com/blevesearch/bleve_index_api"
|
|
)
|
|
|
|
var reflectStaticSizeTermSearcher int
|
|
|
|
func init() {
|
|
var ts TermSearcher
|
|
reflectStaticSizeTermSearcher = int(reflect.TypeOf(ts).Size())
|
|
}
|
|
|
|
type TermSearcher struct {
|
|
indexReader index.IndexReader
|
|
reader index.TermFieldReader
|
|
scorer *scorer.TermQueryScorer
|
|
tfd index.TermFieldDoc
|
|
}
|
|
|
|
func NewTermSearcher(ctx context.Context, indexReader index.IndexReader,
|
|
term string, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
|
|
if isTermQuery(ctx) {
|
|
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Term)
|
|
}
|
|
return NewTermSearcherBytes(ctx, indexReader, []byte(term), field, boost, options)
|
|
}
|
|
|
|
func NewTermSearcherBytes(ctx context.Context, indexReader index.IndexReader,
|
|
term []byte, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
|
|
if ctx != nil {
|
|
if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok {
|
|
if ts, exists := fts[field]; exists {
|
|
if s, found := ts[string(term)]; found {
|
|
return NewSynonymSearcher(ctx, indexReader, term, s, field, boost, options)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
needFreqNorm := options.Score != "none"
|
|
reader, err := indexReader.TermFieldReader(ctx, term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return newTermSearcherFromReader(ctx, indexReader, reader, term, field, boost, options)
|
|
}
|
|
|
|
func tfIDFScoreMetrics(indexReader index.IndexReader) (uint64, error) {
|
|
// default tf-idf stats
|
|
count, err := indexReader.DocCount()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
if count == 0 {
|
|
return 0, nil
|
|
}
|
|
return count, nil
|
|
}
|
|
|
|
func bm25ScoreMetrics(ctx context.Context, field string,
|
|
indexReader index.IndexReader) (uint64, float64, error) {
|
|
var count uint64
|
|
var fieldCardinality int
|
|
var err error
|
|
|
|
bm25Stats, ok := ctx.Value(search.BM25StatsKey).(*search.BM25Stats)
|
|
if !ok {
|
|
count, err = indexReader.DocCount()
|
|
if err != nil {
|
|
return 0, 0, err
|
|
}
|
|
if bm25Reader, ok := indexReader.(index.BM25Reader); ok {
|
|
fieldCardinality, err = bm25Reader.FieldCardinality(field)
|
|
if err != nil {
|
|
return 0, 0, err
|
|
}
|
|
}
|
|
} else {
|
|
count = uint64(bm25Stats.DocCount)
|
|
fieldCardinality, ok = bm25Stats.FieldCardinality[field]
|
|
if !ok {
|
|
return 0, 0, fmt.Errorf("field stat for bm25 not present %s", field)
|
|
}
|
|
}
|
|
|
|
if count == 0 && fieldCardinality == 0 {
|
|
return 0, 0, nil
|
|
}
|
|
return count, math.Ceil(float64(fieldCardinality) / float64(count)), nil
|
|
}
|
|
|
|
func newTermSearcherFromReader(ctx context.Context, indexReader index.IndexReader,
|
|
reader index.TermFieldReader, term []byte, field string, boost float64,
|
|
options search.SearcherOptions) (*TermSearcher, error) {
|
|
var count uint64
|
|
var avgDocLength float64
|
|
var err error
|
|
var similarityModel string
|
|
|
|
// as a fallback case we track certain stats for tf-idf scoring
|
|
if ctx != nil {
|
|
if similarityModelCallback, ok := ctx.Value(search.
|
|
GetScoringModelCallbackKey).(search.GetScoringModelCallbackFn); ok {
|
|
similarityModel = similarityModelCallback()
|
|
}
|
|
}
|
|
switch similarityModel {
|
|
case index.BM25Scoring:
|
|
count, avgDocLength, err = bm25ScoreMetrics(ctx, field, indexReader)
|
|
if err != nil {
|
|
_ = reader.Close()
|
|
return nil, err
|
|
}
|
|
case index.TFIDFScoring:
|
|
fallthrough
|
|
default:
|
|
count, err = tfIDFScoreMetrics(indexReader)
|
|
if err != nil {
|
|
_ = reader.Close()
|
|
return nil, err
|
|
}
|
|
}
|
|
scorer := scorer.NewTermQueryScorer(term, field, boost, count, reader.Count(), avgDocLength, options)
|
|
return &TermSearcher{
|
|
indexReader: indexReader,
|
|
reader: reader,
|
|
scorer: scorer,
|
|
}, nil
|
|
}
|
|
|
|
func NewSynonymSearcher(ctx context.Context, indexReader index.IndexReader, term []byte, synonyms []string, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
|
|
createTermSearcher := func(term []byte, boostVal float64) (search.Searcher, error) {
|
|
needFreqNorm := options.Score != "none"
|
|
reader, err := indexReader.TermFieldReader(ctx, term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return newTermSearcherFromReader(ctx, indexReader, reader, term, field, boostVal, options)
|
|
}
|
|
// create a searcher for the term itself
|
|
termSearcher, err := createTermSearcher(term, boost)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// constituent searchers of the disjunction
|
|
qsearchers := make([]search.Searcher, 0, len(synonyms)+1)
|
|
// helper method to close all the searchers we've created
|
|
// in case of an error
|
|
qsearchersClose := func() {
|
|
for _, searcher := range qsearchers {
|
|
if searcher != nil {
|
|
_ = searcher.Close()
|
|
}
|
|
}
|
|
}
|
|
qsearchers = append(qsearchers, termSearcher)
|
|
// create a searcher for each synonym
|
|
for _, synonym := range synonyms {
|
|
synonymSearcher, err := createTermSearcher([]byte(synonym), boost/2.0)
|
|
if err != nil {
|
|
qsearchersClose()
|
|
return nil, err
|
|
}
|
|
qsearchers = append(qsearchers, synonymSearcher)
|
|
}
|
|
// create a disjunction searcher
|
|
rv, err := NewDisjunctionSearcher(ctx, indexReader, qsearchers, 0, options)
|
|
if err != nil {
|
|
qsearchersClose()
|
|
return nil, err
|
|
}
|
|
return rv, nil
|
|
}
|
|
|
|
func (s *TermSearcher) Size() int {
|
|
return reflectStaticSizeTermSearcher + size.SizeOfPtr +
|
|
s.reader.Size() +
|
|
s.tfd.Size() +
|
|
s.scorer.Size()
|
|
}
|
|
|
|
func (s *TermSearcher) Count() uint64 {
|
|
return s.reader.Count()
|
|
}
|
|
|
|
func (s *TermSearcher) Weight() float64 {
|
|
return s.scorer.Weight()
|
|
}
|
|
|
|
func (s *TermSearcher) SetQueryNorm(qnorm float64) {
|
|
s.scorer.SetQueryNorm(qnorm)
|
|
}
|
|
|
|
func (s *TermSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
|
|
termMatch, err := s.reader.Next(s.tfd.Reset())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if termMatch == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
// score match
|
|
docMatch := s.scorer.Score(ctx, termMatch)
|
|
// return doc match
|
|
return docMatch, nil
|
|
|
|
}
|
|
|
|
func (s *TermSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
|
|
termMatch, err := s.reader.Advance(ID, s.tfd.Reset())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if termMatch == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
// score match
|
|
docMatch := s.scorer.Score(ctx, termMatch)
|
|
|
|
// return doc match
|
|
return docMatch, nil
|
|
}
|
|
|
|
func (s *TermSearcher) Close() error {
|
|
return s.reader.Close()
|
|
}
|
|
|
|
func (s *TermSearcher) Min() int {
|
|
return 0
|
|
}
|
|
|
|
func (s *TermSearcher) DocumentMatchPoolSize() int {
|
|
return 1
|
|
}
|
|
|
|
func (s *TermSearcher) Optimize(kind string, octx index.OptimizableContext) (
|
|
index.OptimizableContext, error) {
|
|
o, ok := s.reader.(index.Optimizable)
|
|
if ok {
|
|
return o.Optimize(kind, octx)
|
|
}
|
|
|
|
return nil, nil
|
|
}
|
|
|
|
func isTermQuery(ctx context.Context) bool {
|
|
if ctx != nil {
|
|
// if the ctx already has a value set for query type
|
|
// it would've been done at a non term searcher level.
|
|
_, ok := ctx.Value(search.QueryTypeKey).(string)
|
|
return !ok
|
|
}
|
|
// if the context is nil, then don't set the query type
|
|
return false
|
|
}
|