95 lines
2 KiB
Go
95 lines
2 KiB
Go
package hierarchy
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"math"
|
|
|
|
"github.com/blevesearch/bleve/v2/analysis"
|
|
"github.com/blevesearch/bleve/v2/registry"
|
|
)
|
|
|
|
const Name = "hierarchy"
|
|
|
|
type HierarchyFilter struct {
|
|
maxLevels int
|
|
delimiter []byte
|
|
splitInput bool
|
|
}
|
|
|
|
func NewHierarchyFilter(delimiter []byte, maxLevels int, splitInput bool) *HierarchyFilter {
|
|
return &HierarchyFilter{
|
|
maxLevels: maxLevels,
|
|
delimiter: delimiter,
|
|
splitInput: splitInput,
|
|
}
|
|
}
|
|
|
|
func (s *HierarchyFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
|
rv := make(analysis.TokenStream, 0, s.maxLevels)
|
|
|
|
var soFar [][]byte
|
|
for _, token := range input {
|
|
if s.splitInput {
|
|
parts := bytes.Split(token.Term, s.delimiter)
|
|
for _, part := range parts {
|
|
soFar, rv = s.buildToken(rv, soFar, part)
|
|
if len(soFar) >= s.maxLevels {
|
|
return rv
|
|
}
|
|
}
|
|
} else {
|
|
soFar, rv = s.buildToken(rv, soFar, token.Term)
|
|
if len(soFar) >= s.maxLevels {
|
|
return rv
|
|
}
|
|
}
|
|
}
|
|
|
|
return rv
|
|
}
|
|
|
|
func (s *HierarchyFilter) buildToken(tokenStream analysis.TokenStream, soFar [][]byte, part []byte) (
|
|
[][]byte, analysis.TokenStream) {
|
|
|
|
soFar = append(soFar, part)
|
|
term := bytes.Join(soFar, s.delimiter)
|
|
|
|
tokenStream = append(tokenStream, &analysis.Token{
|
|
Type: analysis.Shingle,
|
|
Term: term,
|
|
Start: 0,
|
|
End: len(term),
|
|
Position: 1,
|
|
})
|
|
|
|
return soFar, tokenStream
|
|
}
|
|
|
|
func HierarchyFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
|
max := math.MaxInt64
|
|
maxVal, ok := config["max"].(float64)
|
|
if ok {
|
|
max = int(maxVal)
|
|
}
|
|
|
|
splitInput := true
|
|
splitInputVal, ok := config["split_input"].(bool)
|
|
if ok {
|
|
splitInput = splitInputVal
|
|
}
|
|
|
|
delimiter, ok := config["delimiter"].(string)
|
|
if !ok {
|
|
return nil, fmt.Errorf("must specify delimiter")
|
|
}
|
|
|
|
return NewHierarchyFilter([]byte(delimiter), max, splitInput), nil
|
|
}
|
|
|
|
func init() {
|
|
err := registry.RegisterTokenFilter(Name, HierarchyFilterConstructor)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|