1
0
Fork 0
golang-github-blevesearch-b.../document/field_synonym.go
Daniel Baumann 982828099e
Adding upstream version 2.5.1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-05-19 00:20:02 +02:00

149 lines
3.8 KiB
Go

// Copyright (c) 2024 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"reflect"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeSynonymField int
func init() {
var f SynonymField
reflectStaticSizeSynonymField = int(reflect.TypeOf(f).Size())
}
const DefaultSynonymIndexingOptions = index.IndexField
type SynonymField struct {
name string
analyzer analysis.Analyzer
options index.FieldIndexingOptions
input []string
synonyms []string
numPlainTextBytes uint64
// populated during analysis
synonymMap map[string][]string
}
func (s *SynonymField) Size() int {
return reflectStaticSizeSynonymField + size.SizeOfPtr +
len(s.name)
}
func (s *SynonymField) Name() string {
return s.name
}
func (s *SynonymField) ArrayPositions() []uint64 {
return nil
}
func (s *SynonymField) Options() index.FieldIndexingOptions {
return s.options
}
func (s *SynonymField) NumPlainTextBytes() uint64 {
return s.numPlainTextBytes
}
func (s *SynonymField) AnalyzedLength() int {
return 0
}
func (s *SynonymField) EncodedFieldType() byte {
return 'y'
}
func (s *SynonymField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return nil
}
func (s *SynonymField) Analyze() {
var analyzedInput []string
if len(s.input) > 0 {
analyzedInput = make([]string, 0, len(s.input))
for _, term := range s.input {
analyzedTerm := analyzeSynonymTerm(term, s.analyzer)
if analyzedTerm != "" {
analyzedInput = append(analyzedInput, analyzedTerm)
}
}
}
analyzedSynonyms := make([]string, 0, len(s.synonyms))
for _, syn := range s.synonyms {
analyzedTerm := analyzeSynonymTerm(syn, s.analyzer)
if analyzedTerm != "" {
analyzedSynonyms = append(analyzedSynonyms, analyzedTerm)
}
}
s.synonymMap = processSynonymData(analyzedInput, analyzedSynonyms)
}
func (s *SynonymField) Value() []byte {
return nil
}
func (s *SynonymField) IterateSynonyms(visitor func(term string, synonyms []string)) {
for term, synonyms := range s.synonymMap {
visitor(term, synonyms)
}
}
func NewSynonymField(name string, analyzer analysis.Analyzer, input []string, synonyms []string) *SynonymField {
return &SynonymField{
name: name,
analyzer: analyzer,
options: DefaultSynonymIndexingOptions,
input: input,
synonyms: synonyms,
}
}
func processSynonymData(input []string, synonyms []string) map[string][]string {
var synonymMap map[string][]string
if len(input) > 0 {
// Map each term to the same list of synonyms.
synonymMap = make(map[string][]string, len(input))
for _, term := range input {
synonymMap[term] = synonyms
}
} else {
synonymMap = make(map[string][]string, len(synonyms))
// Precompute a map where each synonym points to all other synonyms.
for i, elem := range synonyms {
synonymMap[elem] = make([]string, 0, len(synonyms)-1)
for j, otherElem := range synonyms {
if i != j {
synonymMap[elem] = append(synonymMap[elem], otherElem)
}
}
}
}
return synonymMap
}
func analyzeSynonymTerm(term string, analyzer analysis.Analyzer) string {
tokenStream := analyzer.Analyze([]byte(term))
if len(tokenStream) == 1 {
return string(tokenStream[0].Term)
}
return ""
}