1
0
Fork 0
golang-github-blevesearch-b.../analysis/token/hierarchy/hierarchy.go
Daniel Baumann 982828099e
Adding upstream version 2.5.1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-05-19 00:20:02 +02:00

95 lines
2 KiB
Go

package hierarchy
import (
"bytes"
"fmt"
"math"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "hierarchy"
type HierarchyFilter struct {
maxLevels int
delimiter []byte
splitInput bool
}
func NewHierarchyFilter(delimiter []byte, maxLevels int, splitInput bool) *HierarchyFilter {
return &HierarchyFilter{
maxLevels: maxLevels,
delimiter: delimiter,
splitInput: splitInput,
}
}
func (s *HierarchyFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, s.maxLevels)
var soFar [][]byte
for _, token := range input {
if s.splitInput {
parts := bytes.Split(token.Term, s.delimiter)
for _, part := range parts {
soFar, rv = s.buildToken(rv, soFar, part)
if len(soFar) >= s.maxLevels {
return rv
}
}
} else {
soFar, rv = s.buildToken(rv, soFar, token.Term)
if len(soFar) >= s.maxLevels {
return rv
}
}
}
return rv
}
func (s *HierarchyFilter) buildToken(tokenStream analysis.TokenStream, soFar [][]byte, part []byte) (
[][]byte, analysis.TokenStream) {
soFar = append(soFar, part)
term := bytes.Join(soFar, s.delimiter)
tokenStream = append(tokenStream, &analysis.Token{
Type: analysis.Shingle,
Term: term,
Start: 0,
End: len(term),
Position: 1,
})
return soFar, tokenStream
}
func HierarchyFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
max := math.MaxInt64
maxVal, ok := config["max"].(float64)
if ok {
max = int(maxVal)
}
splitInput := true
splitInputVal, ok := config["split_input"].(bool)
if ok {
splitInput = splitInputVal
}
delimiter, ok := config["delimiter"].(string)
if !ok {
return nil, fmt.Errorf("must specify delimiter")
}
return NewHierarchyFilter([]byte(delimiter), max, splitInput), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, HierarchyFilterConstructor)
if err != nil {
panic(err)
}
}