1
0
Fork 0
golang-github-blevesearch-b.../search/searcher/search_term.go
Daniel Baumann 982828099e
Adding upstream version 2.5.1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-05-19 00:20:02 +02:00

282 lines
7.6 KiB
Go

// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"fmt"
"math"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/scorer"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeTermSearcher int
func init() {
var ts TermSearcher
reflectStaticSizeTermSearcher = int(reflect.TypeOf(ts).Size())
}
type TermSearcher struct {
indexReader index.IndexReader
reader index.TermFieldReader
scorer *scorer.TermQueryScorer
tfd index.TermFieldDoc
}
func NewTermSearcher(ctx context.Context, indexReader index.IndexReader,
term string, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
if isTermQuery(ctx) {
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Term)
}
return NewTermSearcherBytes(ctx, indexReader, []byte(term), field, boost, options)
}
func NewTermSearcherBytes(ctx context.Context, indexReader index.IndexReader,
term []byte, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
if ctx != nil {
if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok {
if ts, exists := fts[field]; exists {
if s, found := ts[string(term)]; found {
return NewSynonymSearcher(ctx, indexReader, term, s, field, boost, options)
}
}
}
}
needFreqNorm := options.Score != "none"
reader, err := indexReader.TermFieldReader(ctx, term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
if err != nil {
return nil, err
}
return newTermSearcherFromReader(ctx, indexReader, reader, term, field, boost, options)
}
func tfIDFScoreMetrics(indexReader index.IndexReader) (uint64, error) {
// default tf-idf stats
count, err := indexReader.DocCount()
if err != nil {
return 0, err
}
if count == 0 {
return 0, nil
}
return count, nil
}
func bm25ScoreMetrics(ctx context.Context, field string,
indexReader index.IndexReader) (uint64, float64, error) {
var count uint64
var fieldCardinality int
var err error
bm25Stats, ok := ctx.Value(search.BM25StatsKey).(*search.BM25Stats)
if !ok {
count, err = indexReader.DocCount()
if err != nil {
return 0, 0, err
}
if bm25Reader, ok := indexReader.(index.BM25Reader); ok {
fieldCardinality, err = bm25Reader.FieldCardinality(field)
if err != nil {
return 0, 0, err
}
}
} else {
count = uint64(bm25Stats.DocCount)
fieldCardinality, ok = bm25Stats.FieldCardinality[field]
if !ok {
return 0, 0, fmt.Errorf("field stat for bm25 not present %s", field)
}
}
if count == 0 && fieldCardinality == 0 {
return 0, 0, nil
}
return count, math.Ceil(float64(fieldCardinality) / float64(count)), nil
}
func newTermSearcherFromReader(ctx context.Context, indexReader index.IndexReader,
reader index.TermFieldReader, term []byte, field string, boost float64,
options search.SearcherOptions) (*TermSearcher, error) {
var count uint64
var avgDocLength float64
var err error
var similarityModel string
// as a fallback case we track certain stats for tf-idf scoring
if ctx != nil {
if similarityModelCallback, ok := ctx.Value(search.
GetScoringModelCallbackKey).(search.GetScoringModelCallbackFn); ok {
similarityModel = similarityModelCallback()
}
}
switch similarityModel {
case index.BM25Scoring:
count, avgDocLength, err = bm25ScoreMetrics(ctx, field, indexReader)
if err != nil {
_ = reader.Close()
return nil, err
}
case index.TFIDFScoring:
fallthrough
default:
count, err = tfIDFScoreMetrics(indexReader)
if err != nil {
_ = reader.Close()
return nil, err
}
}
scorer := scorer.NewTermQueryScorer(term, field, boost, count, reader.Count(), avgDocLength, options)
return &TermSearcher{
indexReader: indexReader,
reader: reader,
scorer: scorer,
}, nil
}
func NewSynonymSearcher(ctx context.Context, indexReader index.IndexReader, term []byte, synonyms []string, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
createTermSearcher := func(term []byte, boostVal float64) (search.Searcher, error) {
needFreqNorm := options.Score != "none"
reader, err := indexReader.TermFieldReader(ctx, term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
if err != nil {
return nil, err
}
return newTermSearcherFromReader(ctx, indexReader, reader, term, field, boostVal, options)
}
// create a searcher for the term itself
termSearcher, err := createTermSearcher(term, boost)
if err != nil {
return nil, err
}
// constituent searchers of the disjunction
qsearchers := make([]search.Searcher, 0, len(synonyms)+1)
// helper method to close all the searchers we've created
// in case of an error
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
qsearchers = append(qsearchers, termSearcher)
// create a searcher for each synonym
for _, synonym := range synonyms {
synonymSearcher, err := createTermSearcher([]byte(synonym), boost/2.0)
if err != nil {
qsearchersClose()
return nil, err
}
qsearchers = append(qsearchers, synonymSearcher)
}
// create a disjunction searcher
rv, err := NewDisjunctionSearcher(ctx, indexReader, qsearchers, 0, options)
if err != nil {
qsearchersClose()
return nil, err
}
return rv, nil
}
func (s *TermSearcher) Size() int {
return reflectStaticSizeTermSearcher + size.SizeOfPtr +
s.reader.Size() +
s.tfd.Size() +
s.scorer.Size()
}
func (s *TermSearcher) Count() uint64 {
return s.reader.Count()
}
func (s *TermSearcher) Weight() float64 {
return s.scorer.Weight()
}
func (s *TermSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *TermSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
termMatch, err := s.reader.Next(s.tfd.Reset())
if err != nil {
return nil, err
}
if termMatch == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(ctx, termMatch)
// return doc match
return docMatch, nil
}
func (s *TermSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
termMatch, err := s.reader.Advance(ID, s.tfd.Reset())
if err != nil {
return nil, err
}
if termMatch == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(ctx, termMatch)
// return doc match
return docMatch, nil
}
func (s *TermSearcher) Close() error {
return s.reader.Close()
}
func (s *TermSearcher) Min() int {
return 0
}
func (s *TermSearcher) DocumentMatchPoolSize() int {
return 1
}
func (s *TermSearcher) Optimize(kind string, octx index.OptimizableContext) (
index.OptimizableContext, error) {
o, ok := s.reader.(index.Optimizable)
if ok {
return o.Optimize(kind, octx)
}
return nil, nil
}
func isTermQuery(ctx context.Context) bool {
if ctx != nil {
// if the ctx already has a value set for query type
// it would've been done at a non term searcher level.
_, ok := ctx.Value(search.QueryTypeKey).(string)
return !ok
}
// if the context is nil, then don't set the query type
return false
}