783 lines
19 KiB
Go
783 lines
19 KiB
Go
// Copyright (c) 2014 Couchbase, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package query
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"strings"
|
|
|
|
"github.com/blevesearch/bleve/v2/analysis"
|
|
"github.com/blevesearch/bleve/v2/mapping"
|
|
"github.com/blevesearch/bleve/v2/search"
|
|
"github.com/blevesearch/bleve/v2/search/searcher"
|
|
"github.com/blevesearch/bleve/v2/util"
|
|
index "github.com/blevesearch/bleve_index_api"
|
|
)
|
|
|
|
var logger = log.New(io.Discard, "bleve mapping ", log.LstdFlags)
|
|
|
|
// SetLog sets the logger used for logging
|
|
// by default log messages are sent to io.Discard
|
|
func SetLog(l *log.Logger) {
|
|
logger = l
|
|
}
|
|
|
|
// A Query represents a description of the type
|
|
// and parameters for a query into the index.
|
|
type Query interface {
|
|
Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping,
|
|
options search.SearcherOptions) (search.Searcher, error)
|
|
}
|
|
|
|
// A BoostableQuery represents a Query which can be boosted
|
|
// relative to other queries.
|
|
type BoostableQuery interface {
|
|
Query
|
|
SetBoost(b float64)
|
|
Boost() float64
|
|
}
|
|
|
|
// A FieldableQuery represents a Query which can be restricted
|
|
// to a single field.
|
|
type FieldableQuery interface {
|
|
Query
|
|
SetField(f string)
|
|
Field() string
|
|
}
|
|
|
|
// A ValidatableQuery represents a Query which can be validated
|
|
// prior to execution.
|
|
type ValidatableQuery interface {
|
|
Query
|
|
Validate() error
|
|
}
|
|
|
|
// ParsePreSearchData deserializes a JSON representation of
|
|
// a PreSearchData object.
|
|
func ParsePreSearchData(input []byte) (map[string]interface{}, error) {
|
|
var rv map[string]interface{}
|
|
|
|
var tmp map[string]json.RawMessage
|
|
err := util.UnmarshalJSON(input, &tmp)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
for k, v := range tmp {
|
|
switch k {
|
|
case search.KnnPreSearchDataKey:
|
|
var value []*search.DocumentMatch
|
|
if v != nil {
|
|
err := util.UnmarshalJSON(v, &value)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if rv == nil {
|
|
rv = make(map[string]interface{})
|
|
}
|
|
rv[search.KnnPreSearchDataKey] = value
|
|
case search.SynonymPreSearchDataKey:
|
|
var value search.FieldTermSynonymMap
|
|
if v != nil {
|
|
err := util.UnmarshalJSON(v, &value)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if rv == nil {
|
|
rv = make(map[string]interface{})
|
|
}
|
|
rv[search.SynonymPreSearchDataKey] = value
|
|
case search.BM25PreSearchDataKey:
|
|
var value *search.BM25Stats
|
|
if v != nil {
|
|
err := util.UnmarshalJSON(v, &value)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if rv == nil {
|
|
rv = make(map[string]interface{})
|
|
}
|
|
rv[search.BM25PreSearchDataKey] = value
|
|
|
|
}
|
|
}
|
|
return rv, nil
|
|
}
|
|
|
|
// ParseQuery deserializes a JSON representation of
|
|
// a Query object.
|
|
func ParseQuery(input []byte) (Query, error) {
|
|
if len(input) == 0 {
|
|
// interpret as a match_none query
|
|
return NewMatchNoneQuery(), nil
|
|
}
|
|
|
|
var tmp map[string]interface{}
|
|
err := util.UnmarshalJSON(input, &tmp)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if len(tmp) == 0 {
|
|
// interpret as a match_none query
|
|
return NewMatchNoneQuery(), nil
|
|
}
|
|
|
|
_, hasFuzziness := tmp["fuzziness"]
|
|
_, isMatchQuery := tmp["match"]
|
|
_, isMatchPhraseQuery := tmp["match_phrase"]
|
|
_, hasTerms := tmp["terms"]
|
|
if hasFuzziness && !isMatchQuery && !isMatchPhraseQuery && !hasTerms {
|
|
var rv FuzzyQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
if isMatchQuery {
|
|
var rv MatchQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
if isMatchPhraseQuery {
|
|
var rv MatchPhraseQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
if hasTerms {
|
|
var rv PhraseQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
// now try multi-phrase
|
|
var rv2 MultiPhraseQuery
|
|
err = util.UnmarshalJSON(input, &rv2)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv2, nil
|
|
}
|
|
return &rv, nil
|
|
}
|
|
_, isTermQuery := tmp["term"]
|
|
if isTermQuery {
|
|
var rv TermQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
_, hasMust := tmp["must"]
|
|
_, hasShould := tmp["should"]
|
|
_, hasMustNot := tmp["must_not"]
|
|
if hasMust || hasShould || hasMustNot {
|
|
var rv BooleanQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
_, hasConjuncts := tmp["conjuncts"]
|
|
if hasConjuncts {
|
|
var rv ConjunctionQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
_, hasDisjuncts := tmp["disjuncts"]
|
|
if hasDisjuncts {
|
|
var rv DisjunctionQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
|
|
_, hasSyntaxQuery := tmp["query"]
|
|
if hasSyntaxQuery {
|
|
var rv QueryStringQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
_, hasMin := tmp["min"].(float64)
|
|
_, hasMax := tmp["max"].(float64)
|
|
if hasMin || hasMax {
|
|
var rv NumericRangeQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
_, hasMinStr := tmp["min"].(string)
|
|
_, hasMaxStr := tmp["max"].(string)
|
|
if hasMinStr || hasMaxStr {
|
|
var rv TermRangeQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
_, hasStart := tmp["start"]
|
|
_, hasEnd := tmp["end"]
|
|
if hasStart || hasEnd {
|
|
var rv DateRangeStringQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
_, hasPrefix := tmp["prefix"]
|
|
if hasPrefix {
|
|
var rv PrefixQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
_, hasRegexp := tmp["regexp"]
|
|
if hasRegexp {
|
|
var rv RegexpQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
_, hasWildcard := tmp["wildcard"]
|
|
if hasWildcard {
|
|
var rv WildcardQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
_, hasMatchAll := tmp["match_all"]
|
|
if hasMatchAll {
|
|
var rv MatchAllQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
_, hasMatchNone := tmp["match_none"]
|
|
if hasMatchNone {
|
|
var rv MatchNoneQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
_, hasDocIds := tmp["ids"]
|
|
if hasDocIds {
|
|
var rv DocIDQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
_, hasBool := tmp["bool"]
|
|
if hasBool {
|
|
var rv BoolFieldQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
_, hasTopLeft := tmp["top_left"]
|
|
_, hasBottomRight := tmp["bottom_right"]
|
|
if hasTopLeft && hasBottomRight {
|
|
var rv GeoBoundingBoxQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
_, hasDistance := tmp["distance"]
|
|
if hasDistance {
|
|
var rv GeoDistanceQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
_, hasPoints := tmp["polygon_points"]
|
|
if hasPoints {
|
|
var rv GeoBoundingPolygonQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
|
|
_, hasGeo := tmp["geometry"]
|
|
if hasGeo {
|
|
var rv GeoShapeQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
|
|
_, hasCIDR := tmp["cidr"]
|
|
if hasCIDR {
|
|
var rv IPRangeQuery
|
|
err := util.UnmarshalJSON(input, &rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &rv, nil
|
|
}
|
|
|
|
return nil, fmt.Errorf("unknown query type")
|
|
}
|
|
|
|
// expandQuery traverses the input query tree and returns a new tree where
|
|
// query string queries have been expanded into base queries. Returned tree may
|
|
// reference queries from the input tree or new queries.
|
|
func expandQuery(m mapping.IndexMapping, query Query) (Query, error) {
|
|
var expand func(query Query) (Query, error)
|
|
var expandSlice func(queries []Query) ([]Query, error) = func(queries []Query) ([]Query, error) {
|
|
expanded := []Query{}
|
|
for _, q := range queries {
|
|
exp, err := expand(q)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
expanded = append(expanded, exp)
|
|
}
|
|
return expanded, nil
|
|
}
|
|
|
|
expand = func(query Query) (Query, error) {
|
|
switch q := query.(type) {
|
|
case *QueryStringQuery:
|
|
parsed, err := parseQuerySyntax(q.Query)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("could not parse '%s': %s", q.Query, err)
|
|
}
|
|
return expand(parsed)
|
|
case *ConjunctionQuery:
|
|
children, err := expandSlice(q.Conjuncts)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
q.Conjuncts = children
|
|
return q, nil
|
|
case *DisjunctionQuery:
|
|
children, err := expandSlice(q.Disjuncts)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
q.Disjuncts = children
|
|
return q, nil
|
|
case *BooleanQuery:
|
|
var err error
|
|
q.Must, err = expand(q.Must)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
q.Should, err = expand(q.Should)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
q.MustNot, err = expand(q.MustNot)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return q, nil
|
|
default:
|
|
return query, nil
|
|
}
|
|
}
|
|
return expand(query)
|
|
}
|
|
|
|
// DumpQuery returns a string representation of the query tree, where query
|
|
// string queries have been expanded into base queries. The output format is
|
|
// meant for debugging purpose and may change in the future.
|
|
func DumpQuery(m mapping.IndexMapping, query Query) (string, error) {
|
|
q, err := expandQuery(m, query)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
data, err := json.MarshalIndent(q, "", " ")
|
|
return string(data), err
|
|
}
|
|
|
|
// FieldSet represents a set of queried fields.
|
|
type FieldSet map[string]struct{}
|
|
|
|
// ExtractFields returns a set of fields referenced by the query.
|
|
// The returned set may be nil if the query does not explicitly reference any field
|
|
// and the DefaultSearchField is unset in the index mapping.
|
|
func ExtractFields(q Query, m mapping.IndexMapping, fs FieldSet) (FieldSet, error) {
|
|
if q == nil || m == nil {
|
|
return fs, nil
|
|
}
|
|
var err error
|
|
switch q := q.(type) {
|
|
case FieldableQuery:
|
|
f := q.Field()
|
|
if f == "" {
|
|
f = m.DefaultSearchField()
|
|
}
|
|
if f != "" {
|
|
if fs == nil {
|
|
fs = make(FieldSet)
|
|
}
|
|
fs[f] = struct{}{}
|
|
}
|
|
case *QueryStringQuery:
|
|
var expandedQuery Query
|
|
expandedQuery, err = expandQuery(m, q)
|
|
if err == nil {
|
|
fs, err = ExtractFields(expandedQuery, m, fs)
|
|
}
|
|
case *BooleanQuery:
|
|
for _, subq := range []Query{q.Must, q.Should, q.MustNot} {
|
|
fs, err = ExtractFields(subq, m, fs)
|
|
if err != nil {
|
|
break
|
|
}
|
|
}
|
|
case *ConjunctionQuery:
|
|
for _, subq := range q.Conjuncts {
|
|
fs, err = ExtractFields(subq, m, fs)
|
|
if err != nil {
|
|
break
|
|
}
|
|
}
|
|
case *DisjunctionQuery:
|
|
for _, subq := range q.Disjuncts {
|
|
fs, err = ExtractFields(subq, m, fs)
|
|
if err != nil {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
return fs, err
|
|
}
|
|
|
|
const (
|
|
FuzzyMatchType = iota
|
|
RegexpMatchType
|
|
PrefixMatchType
|
|
)
|
|
|
|
// ExtractSynonyms extracts synonyms from the query tree and returns a map of
|
|
// field-term pairs to their synonyms. The input query tree is traversed and
|
|
// for each term query, the synonyms are extracted from the synonym source
|
|
// associated with the field. The synonyms are then added to the provided map.
|
|
// The map is returned and may be nil if no synonyms were found.
|
|
func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.ThesaurusReader,
|
|
query Query, rv search.FieldTermSynonymMap,
|
|
) (search.FieldTermSynonymMap, error) {
|
|
if r == nil || m == nil || query == nil {
|
|
return rv, nil
|
|
}
|
|
var err error
|
|
resolveFieldAndSource := func(field string) (string, string) {
|
|
if field == "" {
|
|
field = m.DefaultSearchField()
|
|
}
|
|
return field, m.SynonymSourceForPath(field)
|
|
}
|
|
handleAnalyzer := func(analyzerName, field string) (analysis.Analyzer, error) {
|
|
if analyzerName == "" {
|
|
analyzerName = m.AnalyzerNameForPath(field)
|
|
}
|
|
analyzer := m.AnalyzerNamed(analyzerName)
|
|
if analyzer == nil {
|
|
return nil, fmt.Errorf("no analyzer named '%s' registered", analyzerName)
|
|
}
|
|
return analyzer, nil
|
|
}
|
|
switch q := query.(type) {
|
|
case *BooleanQuery:
|
|
rv, err = ExtractSynonyms(ctx, m, r, q.Must, rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
rv, err = ExtractSynonyms(ctx, m, r, q.Should, rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
rv, err = ExtractSynonyms(ctx, m, r, q.MustNot, rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
case *ConjunctionQuery:
|
|
for _, child := range q.Conjuncts {
|
|
rv, err = ExtractSynonyms(ctx, m, r, child, rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
case *DisjunctionQuery:
|
|
for _, child := range q.Disjuncts {
|
|
rv, err = ExtractSynonyms(ctx, m, r, child, rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
case *FuzzyQuery:
|
|
field, source := resolveFieldAndSource(q.FieldVal)
|
|
if source != "" {
|
|
fuzziness := q.Fuzziness
|
|
if q.autoFuzzy {
|
|
fuzziness = searcher.GetAutoFuzziness(q.Term)
|
|
}
|
|
rv, err = addSynonymsForTermWithMatchType(ctx, FuzzyMatchType, source, field, q.Term, fuzziness, q.Prefix, r, rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
case *MatchQuery, *MatchPhraseQuery:
|
|
var analyzerName, matchString, fieldVal string
|
|
var fuzziness, prefix int
|
|
var autoFuzzy bool
|
|
if mq, ok := q.(*MatchQuery); ok {
|
|
analyzerName, fieldVal, matchString, fuzziness, prefix, autoFuzzy = mq.Analyzer, mq.FieldVal, mq.Match, mq.Fuzziness, mq.Prefix, mq.autoFuzzy
|
|
} else if mpq, ok := q.(*MatchPhraseQuery); ok {
|
|
analyzerName, fieldVal, matchString, fuzziness, autoFuzzy = mpq.Analyzer, mpq.FieldVal, mpq.MatchPhrase, mpq.Fuzziness, mpq.autoFuzzy
|
|
}
|
|
field, source := resolveFieldAndSource(fieldVal)
|
|
if source != "" {
|
|
analyzer, err := handleAnalyzer(analyzerName, field)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
tokens := analyzer.Analyze([]byte(matchString))
|
|
for _, token := range tokens {
|
|
if autoFuzzy {
|
|
fuzziness = searcher.GetAutoFuzziness(string(token.Term))
|
|
}
|
|
rv, err = addSynonymsForTermWithMatchType(ctx, FuzzyMatchType, source, field, string(token.Term), fuzziness, prefix, r, rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
}
|
|
case *MultiPhraseQuery, *PhraseQuery:
|
|
var fieldVal string
|
|
var fuzziness int
|
|
var autoFuzzy bool
|
|
if mpq, ok := q.(*MultiPhraseQuery); ok {
|
|
fieldVal, fuzziness, autoFuzzy = mpq.FieldVal, mpq.Fuzziness, mpq.autoFuzzy
|
|
} else if pq, ok := q.(*PhraseQuery); ok {
|
|
fieldVal, fuzziness, autoFuzzy = pq.FieldVal, pq.Fuzziness, pq.autoFuzzy
|
|
}
|
|
field, source := resolveFieldAndSource(fieldVal)
|
|
if source != "" {
|
|
var terms []string
|
|
if mpq, ok := q.(*MultiPhraseQuery); ok {
|
|
for _, termGroup := range mpq.Terms {
|
|
terms = append(terms, termGroup...)
|
|
}
|
|
} else if pq, ok := q.(*PhraseQuery); ok {
|
|
terms = pq.Terms
|
|
}
|
|
for _, term := range terms {
|
|
if autoFuzzy {
|
|
fuzziness = searcher.GetAutoFuzziness(term)
|
|
}
|
|
rv, err = addSynonymsForTermWithMatchType(ctx, FuzzyMatchType, source, field, term, fuzziness, 0, r, rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
}
|
|
case *PrefixQuery:
|
|
field, source := resolveFieldAndSource(q.FieldVal)
|
|
if source != "" {
|
|
rv, err = addSynonymsForTermWithMatchType(ctx, PrefixMatchType, source, field, q.Prefix, 0, 0, r, rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
case *QueryStringQuery:
|
|
expanded, err := expandQuery(m, q)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
rv, err = ExtractSynonyms(ctx, m, r, expanded, rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
case *TermQuery:
|
|
field, source := resolveFieldAndSource(q.FieldVal)
|
|
if source != "" {
|
|
rv, err = addSynonymsForTerm(ctx, source, field, q.Term, r, rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
case *RegexpQuery:
|
|
field, source := resolveFieldAndSource(q.FieldVal)
|
|
if source != "" {
|
|
rv, err = addSynonymsForTermWithMatchType(ctx, RegexpMatchType, source, field, strings.TrimPrefix(q.Regexp, "^"), 0, 0, r, rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
case *WildcardQuery:
|
|
field, source := resolveFieldAndSource(q.FieldVal)
|
|
if source != "" {
|
|
rv, err = addSynonymsForTermWithMatchType(ctx, RegexpMatchType, source, field, wildcardRegexpReplacer.Replace(q.Wildcard), 0, 0, r, rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
}
|
|
return rv, nil
|
|
}
|
|
|
|
// addFuzzySynonymsForTerm finds all terms that match the given term with the
|
|
// given fuzziness and adds their synonyms to the provided map.
|
|
func addSynonymsForTermWithMatchType(ctx context.Context, matchType int, src, field, term string, fuzziness, prefix int,
|
|
r index.ThesaurusReader, rv search.FieldTermSynonymMap,
|
|
) (search.FieldTermSynonymMap, error) {
|
|
// Determine the terms based on the match type (fuzzy, prefix, or regexp)
|
|
var thesKeys index.ThesaurusKeys
|
|
var err error
|
|
var terms []string
|
|
switch matchType {
|
|
case FuzzyMatchType:
|
|
// Ensure valid fuzziness
|
|
if fuzziness == 0 {
|
|
rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return rv, nil
|
|
}
|
|
if fuzziness > searcher.MaxFuzziness {
|
|
return nil, fmt.Errorf("fuzziness exceeds max (%d)", searcher.MaxFuzziness)
|
|
}
|
|
if fuzziness < 0 {
|
|
return nil, fmt.Errorf("invalid fuzziness, negative")
|
|
}
|
|
// Handle fuzzy match
|
|
prefixTerm := ""
|
|
for i, r := range term {
|
|
if i < prefix {
|
|
prefixTerm += string(r)
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
thesKeys, err = r.ThesaurusKeysFuzzy(src, term, fuzziness, prefixTerm)
|
|
case RegexpMatchType:
|
|
// Handle regexp match
|
|
thesKeys, err = r.ThesaurusKeysRegexp(src, term)
|
|
case PrefixMatchType:
|
|
// Handle prefix match
|
|
thesKeys, err = r.ThesaurusKeysPrefix(src, []byte(term))
|
|
default:
|
|
return nil, fmt.Errorf("invalid match type: %d", matchType)
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer func() {
|
|
if cerr := thesKeys.Close(); cerr != nil && err == nil {
|
|
err = cerr
|
|
}
|
|
}()
|
|
// Collect the matching terms
|
|
terms = []string{}
|
|
tfd, err := thesKeys.Next()
|
|
for err == nil && tfd != nil {
|
|
terms = append(terms, tfd.Term)
|
|
tfd, err = thesKeys.Next()
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for _, synTerm := range terms {
|
|
rv, err = addSynonymsForTerm(ctx, src, field, synTerm, r, rv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
return rv, nil
|
|
}
|
|
|
|
func addSynonymsForTerm(ctx context.Context, src, field, term string,
|
|
r index.ThesaurusReader, rv search.FieldTermSynonymMap,
|
|
) (search.FieldTermSynonymMap, error) {
|
|
termReader, err := r.ThesaurusTermReader(ctx, src, []byte(term))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer func() {
|
|
if cerr := termReader.Close(); cerr != nil && err == nil {
|
|
err = cerr
|
|
}
|
|
}()
|
|
var synonyms []string
|
|
synonym, err := termReader.Next()
|
|
for err == nil && synonym != "" {
|
|
synonyms = append(synonyms, synonym)
|
|
synonym, err = termReader.Next()
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if len(synonyms) > 0 {
|
|
if rv == nil {
|
|
rv = make(search.FieldTermSynonymMap)
|
|
}
|
|
if _, exists := rv[field]; !exists {
|
|
rv[field] = make(map[string][]string)
|
|
}
|
|
rv[field][term] = synonyms
|
|
}
|
|
return rv, nil
|
|
}
|