1
0
Fork 0

Adding upstream version 2.5.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-05-19 00:20:02 +02:00
parent c71cb8b61d
commit 982828099e
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
783 changed files with 150650 additions and 0 deletions

View file

@ -0,0 +1,78 @@
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package reverse
import (
"unicode"
"unicode/utf8"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
// Name is the name used to register ReverseFilter in the bleve registry
const Name = "reverse"
type ReverseFilter struct {
}
func NewReverseFilter() *ReverseFilter {
return &ReverseFilter{}
}
func (f *ReverseFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
token.Term = reverse(token.Term)
}
return input
}
func ReverseFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewReverseFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, ReverseFilterConstructor)
if err != nil {
panic(err)
}
}
// reverse(..) will generate a reversed version of the provided
// unicode array and return it back to its caller.
func reverse(s []byte) []byte {
cursorIn := 0
inputRunes := []rune(string(s))
cursorOut := len(s)
output := make([]byte, len(s))
for i := 0; i < len(inputRunes); {
wid := utf8.RuneLen(inputRunes[i])
i++
for i < len(inputRunes) {
r := inputRunes[i]
if unicode.Is(unicode.Mn, r) || unicode.Is(unicode.Me, r) || unicode.Is(unicode.Mc, r) {
wid += utf8.RuneLen(r)
i++
} else {
break
}
}
copy(output[cursorOut-wid:cursorOut], s[cursorIn:cursorIn+wid])
cursorIn += wid
cursorOut -= wid
}
return output
}

View file

@ -0,0 +1,184 @@
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package reverse
import (
"bytes"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestReverseFilter(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{},
&analysis.Token{
Term: []byte("one"),
},
&analysis.Token{
Term: []byte("TWo"),
},
&analysis.Token{
Term: []byte("thRee"),
},
&analysis.Token{
Term: []byte("four's"),
},
&analysis.Token{
Term: []byte("what's this in reverse"),
},
&analysis.Token{
Term: []byte("œ∑´®†"),
},
&analysis.Token{
Term: []byte("İȺȾCAT÷≥≤µ123"),
},
&analysis.Token{
Term: []byte("!@#$%^&*()"),
},
&analysis.Token{
Term: []byte("cafés"),
},
&analysis.Token{
Term: []byte("¿Dónde estás?"),
},
&analysis.Token{
Term: []byte("Me gustaría una cerveza."),
},
}
expectedTokenStream := analysis.TokenStream{
&analysis.Token{},
&analysis.Token{
Term: []byte("eno"),
},
&analysis.Token{
Term: []byte("oWT"),
},
&analysis.Token{
Term: []byte("eeRht"),
},
&analysis.Token{
Term: []byte("s'ruof"),
},
&analysis.Token{
Term: []byte("esrever ni siht s'tahw"),
},
&analysis.Token{
Term: []byte("†®´∑œ"),
},
&analysis.Token{
Term: []byte("321µ≤≥÷TACȾȺİ"),
},
&analysis.Token{
Term: []byte(")(*&^%$#@!"),
},
&analysis.Token{
Term: []byte("séfac"),
},
&analysis.Token{
Term: []byte("?sátse ednóD¿"),
},
&analysis.Token{
Term: []byte(".azevrec anu aíratsug eM"),
},
}
filter := NewReverseFilter()
outputTokenStream := filter.Filter(inputTokenStream)
for i := 0; i < len(expectedTokenStream); i++ {
if !bytes.Equal(outputTokenStream[i].Term, expectedTokenStream[i].Term) {
t.Errorf("[%d] expected %s got %s",
i+1, expectedTokenStream[i].Term, outputTokenStream[i].Term)
}
}
}
func BenchmarkReverseFilter(b *testing.B) {
input := analysis.TokenStream{
&analysis.Token{
Term: []byte("A"),
},
&analysis.Token{
Term: []byte("boiling"),
},
&analysis.Token{
Term: []byte("liquid"),
},
&analysis.Token{
Term: []byte("expanding"),
},
&analysis.Token{
Term: []byte("vapor"),
},
&analysis.Token{
Term: []byte("explosion"),
},
&analysis.Token{
Term: []byte("caused"),
},
&analysis.Token{
Term: []byte("by"),
},
&analysis.Token{
Term: []byte("the"),
},
&analysis.Token{
Term: []byte("rupture"),
},
&analysis.Token{
Term: []byte("of"),
},
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("vessel"),
},
&analysis.Token{
Term: []byte("containing"),
},
&analysis.Token{
Term: []byte("pressurized"),
},
&analysis.Token{
Term: []byte("liquid"),
},
&analysis.Token{
Term: []byte("above"),
},
&analysis.Token{
Term: []byte("its"),
},
&analysis.Token{
Term: []byte("boiling"),
},
&analysis.Token{
Term: []byte("point"),
},
&analysis.Token{
Term: []byte("İȺȾCAT"),
},
&analysis.Token{
Term: []byte("Me gustaría una cerveza."),
},
}
filter := NewReverseFilter()
b.ResetTimer()
for i := 0; i < b.N; i++ {
filter.Filter(input)
}
}