Adding upstream version 2.5.1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
c71cb8b61d
commit
982828099e
783 changed files with 150650 additions and 0 deletions
282
search/searcher/search_term.go
Normal file
282
search/searcher/search_term.go
Normal file
|
@ -0,0 +1,282 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package searcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/scorer"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeTermSearcher int
|
||||
|
||||
func init() {
|
||||
var ts TermSearcher
|
||||
reflectStaticSizeTermSearcher = int(reflect.TypeOf(ts).Size())
|
||||
}
|
||||
|
||||
type TermSearcher struct {
|
||||
indexReader index.IndexReader
|
||||
reader index.TermFieldReader
|
||||
scorer *scorer.TermQueryScorer
|
||||
tfd index.TermFieldDoc
|
||||
}
|
||||
|
||||
func NewTermSearcher(ctx context.Context, indexReader index.IndexReader,
|
||||
term string, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
|
||||
if isTermQuery(ctx) {
|
||||
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Term)
|
||||
}
|
||||
return NewTermSearcherBytes(ctx, indexReader, []byte(term), field, boost, options)
|
||||
}
|
||||
|
||||
func NewTermSearcherBytes(ctx context.Context, indexReader index.IndexReader,
|
||||
term []byte, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
|
||||
if ctx != nil {
|
||||
if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok {
|
||||
if ts, exists := fts[field]; exists {
|
||||
if s, found := ts[string(term)]; found {
|
||||
return NewSynonymSearcher(ctx, indexReader, term, s, field, boost, options)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
needFreqNorm := options.Score != "none"
|
||||
reader, err := indexReader.TermFieldReader(ctx, term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return newTermSearcherFromReader(ctx, indexReader, reader, term, field, boost, options)
|
||||
}
|
||||
|
||||
func tfIDFScoreMetrics(indexReader index.IndexReader) (uint64, error) {
|
||||
// default tf-idf stats
|
||||
count, err := indexReader.DocCount()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if count == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
return count, nil
|
||||
}
|
||||
|
||||
func bm25ScoreMetrics(ctx context.Context, field string,
|
||||
indexReader index.IndexReader) (uint64, float64, error) {
|
||||
var count uint64
|
||||
var fieldCardinality int
|
||||
var err error
|
||||
|
||||
bm25Stats, ok := ctx.Value(search.BM25StatsKey).(*search.BM25Stats)
|
||||
if !ok {
|
||||
count, err = indexReader.DocCount()
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
if bm25Reader, ok := indexReader.(index.BM25Reader); ok {
|
||||
fieldCardinality, err = bm25Reader.FieldCardinality(field)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
count = uint64(bm25Stats.DocCount)
|
||||
fieldCardinality, ok = bm25Stats.FieldCardinality[field]
|
||||
if !ok {
|
||||
return 0, 0, fmt.Errorf("field stat for bm25 not present %s", field)
|
||||
}
|
||||
}
|
||||
|
||||
if count == 0 && fieldCardinality == 0 {
|
||||
return 0, 0, nil
|
||||
}
|
||||
return count, math.Ceil(float64(fieldCardinality) / float64(count)), nil
|
||||
}
|
||||
|
||||
func newTermSearcherFromReader(ctx context.Context, indexReader index.IndexReader,
|
||||
reader index.TermFieldReader, term []byte, field string, boost float64,
|
||||
options search.SearcherOptions) (*TermSearcher, error) {
|
||||
var count uint64
|
||||
var avgDocLength float64
|
||||
var err error
|
||||
var similarityModel string
|
||||
|
||||
// as a fallback case we track certain stats for tf-idf scoring
|
||||
if ctx != nil {
|
||||
if similarityModelCallback, ok := ctx.Value(search.
|
||||
GetScoringModelCallbackKey).(search.GetScoringModelCallbackFn); ok {
|
||||
similarityModel = similarityModelCallback()
|
||||
}
|
||||
}
|
||||
switch similarityModel {
|
||||
case index.BM25Scoring:
|
||||
count, avgDocLength, err = bm25ScoreMetrics(ctx, field, indexReader)
|
||||
if err != nil {
|
||||
_ = reader.Close()
|
||||
return nil, err
|
||||
}
|
||||
case index.TFIDFScoring:
|
||||
fallthrough
|
||||
default:
|
||||
count, err = tfIDFScoreMetrics(indexReader)
|
||||
if err != nil {
|
||||
_ = reader.Close()
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
scorer := scorer.NewTermQueryScorer(term, field, boost, count, reader.Count(), avgDocLength, options)
|
||||
return &TermSearcher{
|
||||
indexReader: indexReader,
|
||||
reader: reader,
|
||||
scorer: scorer,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func NewSynonymSearcher(ctx context.Context, indexReader index.IndexReader, term []byte, synonyms []string, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
|
||||
createTermSearcher := func(term []byte, boostVal float64) (search.Searcher, error) {
|
||||
needFreqNorm := options.Score != "none"
|
||||
reader, err := indexReader.TermFieldReader(ctx, term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return newTermSearcherFromReader(ctx, indexReader, reader, term, field, boostVal, options)
|
||||
}
|
||||
// create a searcher for the term itself
|
||||
termSearcher, err := createTermSearcher(term, boost)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// constituent searchers of the disjunction
|
||||
qsearchers := make([]search.Searcher, 0, len(synonyms)+1)
|
||||
// helper method to close all the searchers we've created
|
||||
// in case of an error
|
||||
qsearchersClose := func() {
|
||||
for _, searcher := range qsearchers {
|
||||
if searcher != nil {
|
||||
_ = searcher.Close()
|
||||
}
|
||||
}
|
||||
}
|
||||
qsearchers = append(qsearchers, termSearcher)
|
||||
// create a searcher for each synonym
|
||||
for _, synonym := range synonyms {
|
||||
synonymSearcher, err := createTermSearcher([]byte(synonym), boost/2.0)
|
||||
if err != nil {
|
||||
qsearchersClose()
|
||||
return nil, err
|
||||
}
|
||||
qsearchers = append(qsearchers, synonymSearcher)
|
||||
}
|
||||
// create a disjunction searcher
|
||||
rv, err := NewDisjunctionSearcher(ctx, indexReader, qsearchers, 0, options)
|
||||
if err != nil {
|
||||
qsearchersClose()
|
||||
return nil, err
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (s *TermSearcher) Size() int {
|
||||
return reflectStaticSizeTermSearcher + size.SizeOfPtr +
|
||||
s.reader.Size() +
|
||||
s.tfd.Size() +
|
||||
s.scorer.Size()
|
||||
}
|
||||
|
||||
func (s *TermSearcher) Count() uint64 {
|
||||
return s.reader.Count()
|
||||
}
|
||||
|
||||
func (s *TermSearcher) Weight() float64 {
|
||||
return s.scorer.Weight()
|
||||
}
|
||||
|
||||
func (s *TermSearcher) SetQueryNorm(qnorm float64) {
|
||||
s.scorer.SetQueryNorm(qnorm)
|
||||
}
|
||||
|
||||
func (s *TermSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
|
||||
termMatch, err := s.reader.Next(s.tfd.Reset())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if termMatch == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// score match
|
||||
docMatch := s.scorer.Score(ctx, termMatch)
|
||||
// return doc match
|
||||
return docMatch, nil
|
||||
|
||||
}
|
||||
|
||||
func (s *TermSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
|
||||
termMatch, err := s.reader.Advance(ID, s.tfd.Reset())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if termMatch == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// score match
|
||||
docMatch := s.scorer.Score(ctx, termMatch)
|
||||
|
||||
// return doc match
|
||||
return docMatch, nil
|
||||
}
|
||||
|
||||
func (s *TermSearcher) Close() error {
|
||||
return s.reader.Close()
|
||||
}
|
||||
|
||||
func (s *TermSearcher) Min() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (s *TermSearcher) DocumentMatchPoolSize() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
func (s *TermSearcher) Optimize(kind string, octx index.OptimizableContext) (
|
||||
index.OptimizableContext, error) {
|
||||
o, ok := s.reader.(index.Optimizable)
|
||||
if ok {
|
||||
return o.Optimize(kind, octx)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func isTermQuery(ctx context.Context) bool {
|
||||
if ctx != nil {
|
||||
// if the ctx already has a value set for query type
|
||||
// it would've been done at a non term searcher level.
|
||||
_, ok := ctx.Value(search.QueryTypeKey).(string)
|
||||
return !ok
|
||||
}
|
||||
// if the context is nil, then don't set the query type
|
||||
return false
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue