Adding upstream version 2.5.1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
c71cb8b61d
commit
982828099e
783 changed files with 150650 additions and 0 deletions
112
search/highlight/format/ansi/ansi.go
Normal file
112
search/highlight/format/ansi/ansi.go
Normal file
|
@ -0,0 +1,112 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package ansi
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/bleve/v2/search/highlight"
|
||||
)
|
||||
|
||||
const Name = "ansi"
|
||||
|
||||
const DefaultAnsiHighlight = BgYellow
|
||||
|
||||
type FragmentFormatter struct {
|
||||
color string
|
||||
}
|
||||
|
||||
func NewFragmentFormatter(color string) *FragmentFormatter {
|
||||
return &FragmentFormatter{
|
||||
color: color,
|
||||
}
|
||||
}
|
||||
|
||||
func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations highlight.TermLocations) string {
|
||||
rv := ""
|
||||
curr := f.Start
|
||||
for _, termLocation := range orderedTermLocations {
|
||||
if termLocation == nil {
|
||||
continue
|
||||
}
|
||||
// make sure the array positions match
|
||||
if !termLocation.ArrayPositions.Equals(f.ArrayPositions) {
|
||||
continue
|
||||
}
|
||||
if termLocation.Start < curr {
|
||||
continue
|
||||
}
|
||||
if termLocation.End > f.End {
|
||||
break
|
||||
}
|
||||
// add the stuff before this location
|
||||
rv += string(f.Orig[curr:termLocation.Start])
|
||||
// add the color
|
||||
rv += a.color
|
||||
// add the term itself
|
||||
rv += string(f.Orig[termLocation.Start:termLocation.End])
|
||||
// reset the color
|
||||
rv += Reset
|
||||
// update current
|
||||
curr = termLocation.End
|
||||
}
|
||||
// add any remaining text after the last token
|
||||
rv += string(f.Orig[curr:f.End])
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
// ANSI color control escape sequences.
|
||||
// Shamelessly copied from https://github.com/sqp/godock/blob/master/libs/log/colors.go
|
||||
const (
|
||||
Reset = "\x1b[0m"
|
||||
Bright = "\x1b[1m"
|
||||
Dim = "\x1b[2m"
|
||||
Underscore = "\x1b[4m"
|
||||
Blink = "\x1b[5m"
|
||||
Reverse = "\x1b[7m"
|
||||
Hidden = "\x1b[8m"
|
||||
FgBlack = "\x1b[30m"
|
||||
FgRed = "\x1b[31m"
|
||||
FgGreen = "\x1b[32m"
|
||||
FgYellow = "\x1b[33m"
|
||||
FgBlue = "\x1b[34m"
|
||||
FgMagenta = "\x1b[35m"
|
||||
FgCyan = "\x1b[36m"
|
||||
FgWhite = "\x1b[37m"
|
||||
BgBlack = "\x1b[40m"
|
||||
BgRed = "\x1b[41m"
|
||||
BgGreen = "\x1b[42m"
|
||||
BgYellow = "\x1b[43m"
|
||||
BgBlue = "\x1b[44m"
|
||||
BgMagenta = "\x1b[45m"
|
||||
BgCyan = "\x1b[46m"
|
||||
BgWhite = "\x1b[47m"
|
||||
)
|
||||
|
||||
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.FragmentFormatter, error) {
|
||||
color := DefaultAnsiHighlight
|
||||
colorVal, ok := config["color"].(string)
|
||||
if ok {
|
||||
color = colorVal
|
||||
}
|
||||
return NewFragmentFormatter(color), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterFragmentFormatter(Name, Constructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
94
search/highlight/format/html/html.go
Normal file
94
search/highlight/format/html/html.go
Normal file
|
@ -0,0 +1,94 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"html"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/bleve/v2/search/highlight"
|
||||
)
|
||||
|
||||
const Name = "html"
|
||||
|
||||
const defaultHTMLHighlightBefore = "<mark>"
|
||||
const defaultHTMLHighlightAfter = "</mark>"
|
||||
|
||||
type FragmentFormatter struct {
|
||||
before string
|
||||
after string
|
||||
}
|
||||
|
||||
func NewFragmentFormatter(before, after string) *FragmentFormatter {
|
||||
return &FragmentFormatter{
|
||||
before: before,
|
||||
after: after,
|
||||
}
|
||||
}
|
||||
|
||||
func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations highlight.TermLocations) string {
|
||||
rv := ""
|
||||
curr := f.Start
|
||||
for _, termLocation := range orderedTermLocations {
|
||||
if termLocation == nil {
|
||||
continue
|
||||
}
|
||||
// make sure the array positions match
|
||||
if !termLocation.ArrayPositions.Equals(f.ArrayPositions) {
|
||||
continue
|
||||
}
|
||||
if termLocation.Start < curr {
|
||||
continue
|
||||
}
|
||||
if termLocation.End > f.End {
|
||||
break
|
||||
}
|
||||
// add the stuff before this location
|
||||
rv += html.EscapeString(string(f.Orig[curr:termLocation.Start]))
|
||||
// start the <mark> tag
|
||||
rv += a.before
|
||||
// add the term itself
|
||||
rv += html.EscapeString(string(f.Orig[termLocation.Start:termLocation.End]))
|
||||
// end the <mark> tag
|
||||
rv += a.after
|
||||
// update current
|
||||
curr = termLocation.End
|
||||
}
|
||||
// add any remaining text after the last token
|
||||
rv += html.EscapeString(string(f.Orig[curr:f.End]))
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.FragmentFormatter, error) {
|
||||
before := defaultHTMLHighlightBefore
|
||||
beforeVal, ok := config["before"].(string)
|
||||
if ok {
|
||||
before = beforeVal
|
||||
}
|
||||
after := defaultHTMLHighlightAfter
|
||||
afterVal, ok := config["after"].(string)
|
||||
if ok {
|
||||
after = afterVal
|
||||
}
|
||||
return NewFragmentFormatter(before, after), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterFragmentFormatter(Name, Constructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
120
search/highlight/format/html/html_test.go
Normal file
120
search/highlight/format/html/html_test.go
Normal file
|
@ -0,0 +1,120 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/highlight"
|
||||
)
|
||||
|
||||
func TestHTMLFragmentFormatter(t *testing.T) {
|
||||
tests := []struct {
|
||||
fragment *highlight.Fragment
|
||||
tlm search.TermLocationMap
|
||||
output string
|
||||
start string
|
||||
end string
|
||||
}{
|
||||
{
|
||||
fragment: &highlight.Fragment{
|
||||
Orig: []byte("the quick brown fox"),
|
||||
Start: 0,
|
||||
End: 19,
|
||||
},
|
||||
tlm: search.TermLocationMap{
|
||||
"quick": []*search.Location{
|
||||
{
|
||||
Pos: 2,
|
||||
Start: 4,
|
||||
End: 9,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: "the <b>quick</b> brown fox",
|
||||
start: "<b>",
|
||||
end: "</b>",
|
||||
},
|
||||
{
|
||||
fragment: &highlight.Fragment{
|
||||
Orig: []byte("the quick brown fox"),
|
||||
Start: 0,
|
||||
End: 19,
|
||||
},
|
||||
tlm: search.TermLocationMap{
|
||||
"quick": []*search.Location{
|
||||
{
|
||||
Pos: 2,
|
||||
Start: 4,
|
||||
End: 9,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: "the <em>quick</em> brown fox",
|
||||
start: "<em>",
|
||||
end: "</em>",
|
||||
},
|
||||
// test html escaping
|
||||
{
|
||||
fragment: &highlight.Fragment{
|
||||
Orig: []byte("<the> quick brown & fox"),
|
||||
Start: 0,
|
||||
End: 23,
|
||||
},
|
||||
tlm: search.TermLocationMap{
|
||||
"quick": []*search.Location{
|
||||
{
|
||||
Pos: 2,
|
||||
Start: 6,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: "<the> <em>quick</em> brown & fox",
|
||||
start: "<em>",
|
||||
end: "</em>",
|
||||
},
|
||||
// test html escaping inside search term
|
||||
{
|
||||
fragment: &highlight.Fragment{
|
||||
Orig: []byte("<the> qu&ick brown & fox"),
|
||||
Start: 0,
|
||||
End: 24,
|
||||
},
|
||||
tlm: search.TermLocationMap{
|
||||
"qu&ick": []*search.Location{
|
||||
{
|
||||
Pos: 2,
|
||||
Start: 6,
|
||||
End: 12,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: "<the> <em>qu&ick</em> brown & fox",
|
||||
start: "<em>",
|
||||
end: "</em>",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
emHTMLFormatter := NewFragmentFormatter(test.start, test.end)
|
||||
otl := highlight.OrderTermLocations(test.tlm)
|
||||
result := emHTMLFormatter.Format(test.fragment, otl)
|
||||
if result != test.output {
|
||||
t.Errorf("expected `%s`, got `%s`", test.output, result)
|
||||
}
|
||||
}
|
||||
}
|
92
search/highlight/format/plain/plain.go
Normal file
92
search/highlight/format/plain/plain.go
Normal file
|
@ -0,0 +1,92 @@
|
|||
// Copyright (c) 2022 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package plain
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/bleve/v2/search/highlight"
|
||||
)
|
||||
|
||||
const Name = "plain"
|
||||
|
||||
const defaultPlainHighlightBefore = "<start>"
|
||||
const defaultPlainHighlightAfter = "<end>"
|
||||
|
||||
type FragmentFormatter struct {
|
||||
before string
|
||||
after string
|
||||
}
|
||||
|
||||
func NewFragmentFormatter(before, after string) *FragmentFormatter {
|
||||
return &FragmentFormatter{
|
||||
before: before,
|
||||
after: after,
|
||||
}
|
||||
}
|
||||
|
||||
func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations highlight.TermLocations) string {
|
||||
rv := ""
|
||||
curr := f.Start
|
||||
for _, termLocation := range orderedTermLocations {
|
||||
if termLocation == nil {
|
||||
continue
|
||||
}
|
||||
// make sure the array positions match
|
||||
if !termLocation.ArrayPositions.Equals(f.ArrayPositions) {
|
||||
continue
|
||||
}
|
||||
if termLocation.Start < curr {
|
||||
continue
|
||||
}
|
||||
if termLocation.End > f.End {
|
||||
break
|
||||
}
|
||||
// add the stuff before this location
|
||||
rv += string(f.Orig[curr:termLocation.Start])
|
||||
// start the highlight tag
|
||||
rv += a.before
|
||||
// add the term itself
|
||||
rv += string(f.Orig[termLocation.Start:termLocation.End])
|
||||
// end the highlight tag
|
||||
rv += a.after
|
||||
// update current
|
||||
curr = termLocation.End
|
||||
}
|
||||
// add any remaining text after the last token
|
||||
rv += string(f.Orig[curr:f.End])
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.FragmentFormatter, error) {
|
||||
before := defaultPlainHighlightBefore
|
||||
beforeVal, ok := config["before"].(string)
|
||||
if ok {
|
||||
before = beforeVal
|
||||
}
|
||||
after := defaultPlainHighlightAfter
|
||||
afterVal, ok := config["after"].(string)
|
||||
if ok {
|
||||
after = afterVal
|
||||
}
|
||||
return NewFragmentFormatter(before, after), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterFragmentFormatter(Name, Constructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
80
search/highlight/format/plain/plain_test.go
Normal file
80
search/highlight/format/plain/plain_test.go
Normal file
|
@ -0,0 +1,80 @@
|
|||
// Copyright (c) 2022 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package plain
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/highlight"
|
||||
)
|
||||
|
||||
func TestPlainFragmentFormatter(t *testing.T) {
|
||||
tests := []struct {
|
||||
fragment *highlight.Fragment
|
||||
tlm search.TermLocationMap
|
||||
output string
|
||||
start string
|
||||
end string
|
||||
}{
|
||||
{
|
||||
fragment: &highlight.Fragment{
|
||||
Orig: []byte("the quick brown fox"),
|
||||
Start: 0,
|
||||
End: 19,
|
||||
},
|
||||
tlm: search.TermLocationMap{
|
||||
"quick": []*search.Location{
|
||||
{
|
||||
Pos: 2,
|
||||
Start: 4,
|
||||
End: 9,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: "the <b>quick</b> brown fox",
|
||||
start: "<b>",
|
||||
end: "</b>",
|
||||
},
|
||||
{
|
||||
fragment: &highlight.Fragment{
|
||||
Orig: []byte("the quick brown fox"),
|
||||
Start: 0,
|
||||
End: 19,
|
||||
},
|
||||
tlm: search.TermLocationMap{
|
||||
"quick": []*search.Location{
|
||||
{
|
||||
Pos: 2,
|
||||
Start: 4,
|
||||
End: 9,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: "the <em>quick</em> brown fox",
|
||||
start: "<em>",
|
||||
end: "</em>",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
plainFormatter := NewFragmentFormatter(test.start, test.end)
|
||||
otl := highlight.OrderTermLocations(test.tlm)
|
||||
result := plainFormatter.Format(test.fragment, otl)
|
||||
if result != test.output {
|
||||
t.Errorf("expected `%s`, got `%s`", test.output, result)
|
||||
}
|
||||
}
|
||||
}
|
156
search/highlight/fragmenter/simple/simple.go
Normal file
156
search/highlight/fragmenter/simple/simple.go
Normal file
|
@ -0,0 +1,156 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package simple
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/bleve/v2/search/highlight"
|
||||
)
|
||||
|
||||
const Name = "simple"
|
||||
|
||||
const defaultFragmentSize = 200
|
||||
|
||||
type Fragmenter struct {
|
||||
fragmentSize int
|
||||
}
|
||||
|
||||
func NewFragmenter(fragmentSize int) *Fragmenter {
|
||||
return &Fragmenter{
|
||||
fragmentSize: fragmentSize,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Fragmenter) Fragment(orig []byte, ot highlight.TermLocations) []*highlight.Fragment {
|
||||
var rv []*highlight.Fragment
|
||||
maxbegin := 0
|
||||
OUTER:
|
||||
for currTermIndex, termLocation := range ot {
|
||||
// start with this
|
||||
// it should be the highest scoring fragment with this term first
|
||||
start := termLocation.Start
|
||||
end := start
|
||||
used := 0
|
||||
for end < len(orig) && used < s.fragmentSize {
|
||||
r, size := utf8.DecodeRune(orig[end:])
|
||||
if r == utf8.RuneError {
|
||||
continue OUTER // bail
|
||||
}
|
||||
end += size
|
||||
used++
|
||||
}
|
||||
|
||||
// if we still have more characters available to us
|
||||
// push back towards beginning
|
||||
// without cross maxbegin
|
||||
for start > 0 && used < s.fragmentSize {
|
||||
if start > len(orig) {
|
||||
// bail if out of bounds, possibly due to token replacement
|
||||
// e.g with a regexp replacement
|
||||
continue OUTER
|
||||
}
|
||||
r, size := utf8.DecodeLastRune(orig[0:start])
|
||||
if r == utf8.RuneError {
|
||||
continue OUTER // bail
|
||||
}
|
||||
if start-size >= maxbegin {
|
||||
start -= size
|
||||
used++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// however, we'd rather have the tokens centered more in the frag
|
||||
// lets try to do that as best we can, without affecting the score
|
||||
// find the end of the last term in this fragment
|
||||
minend := end
|
||||
for _, innerTermLocation := range ot[currTermIndex:] {
|
||||
if innerTermLocation.End > end {
|
||||
break
|
||||
}
|
||||
minend = innerTermLocation.End
|
||||
}
|
||||
|
||||
// find the smaller of the two rooms to move
|
||||
roomToMove := utf8.RuneCount(orig[minend:end])
|
||||
roomToMoveStart := 0
|
||||
if start >= maxbegin {
|
||||
roomToMoveStart = utf8.RuneCount(orig[maxbegin:start])
|
||||
}
|
||||
if roomToMoveStart < roomToMove {
|
||||
roomToMove = roomToMoveStart
|
||||
}
|
||||
|
||||
offset := roomToMove / 2
|
||||
|
||||
for offset > 0 {
|
||||
r, size := utf8.DecodeLastRune(orig[0:start])
|
||||
if r == utf8.RuneError {
|
||||
continue OUTER // bail
|
||||
}
|
||||
start -= size
|
||||
|
||||
r, size = utf8.DecodeLastRune(orig[0:end])
|
||||
if r == utf8.RuneError {
|
||||
continue OUTER // bail
|
||||
}
|
||||
end -= size
|
||||
offset--
|
||||
}
|
||||
|
||||
rv = append(rv, &highlight.Fragment{Orig: orig, Start: start - offset, End: end - offset})
|
||||
// set maxbegin to the end of the current term location
|
||||
// so that next one won't back up to include it
|
||||
maxbegin = termLocation.End
|
||||
|
||||
}
|
||||
if len(ot) == 0 {
|
||||
// if there were no terms to highlight
|
||||
// produce a single fragment from the beginning
|
||||
start := 0
|
||||
end := start
|
||||
used := 0
|
||||
for end < len(orig) && used < s.fragmentSize {
|
||||
r, size := utf8.DecodeRune(orig[end:])
|
||||
if r == utf8.RuneError {
|
||||
break
|
||||
}
|
||||
end += size
|
||||
used++
|
||||
}
|
||||
rv = append(rv, &highlight.Fragment{Orig: orig, Start: start, End: end})
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Fragmenter, error) {
|
||||
size := defaultFragmentSize
|
||||
sizeVal, ok := config["size"].(float64)
|
||||
if ok {
|
||||
size = int(sizeVal)
|
||||
}
|
||||
return NewFragmenter(size), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterFragmenter(Name, Constructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
311
search/highlight/fragmenter/simple/simple_test.go
Normal file
311
search/highlight/fragmenter/simple/simple_test.go
Normal file
|
@ -0,0 +1,311 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package simple
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search/highlight"
|
||||
)
|
||||
|
||||
func TestSimpleFragmenter(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
orig []byte
|
||||
fragments []*highlight.Fragment
|
||||
ot highlight.TermLocations
|
||||
size int
|
||||
}{
|
||||
{
|
||||
orig: []byte("this is a test"),
|
||||
fragments: []*highlight.Fragment{
|
||||
{
|
||||
Orig: []byte("this is a test"),
|
||||
Start: 0,
|
||||
End: 14,
|
||||
},
|
||||
},
|
||||
ot: highlight.TermLocations{
|
||||
&highlight.TermLocation{
|
||||
Term: "test",
|
||||
Pos: 4,
|
||||
Start: 10,
|
||||
End: 14,
|
||||
},
|
||||
},
|
||||
size: 100,
|
||||
},
|
||||
{
|
||||
orig: []byte("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"),
|
||||
fragments: []*highlight.Fragment{
|
||||
{
|
||||
Orig: []byte("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"),
|
||||
Start: 0,
|
||||
End: 100,
|
||||
},
|
||||
},
|
||||
ot: highlight.TermLocations{
|
||||
&highlight.TermLocation{
|
||||
Term: "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789",
|
||||
Pos: 1,
|
||||
Start: 0,
|
||||
End: 100,
|
||||
},
|
||||
},
|
||||
size: 100,
|
||||
},
|
||||
{
|
||||
orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
|
||||
fragments: []*highlight.Fragment{
|
||||
{
|
||||
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
|
||||
Start: 0,
|
||||
End: 100,
|
||||
},
|
||||
{
|
||||
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
|
||||
Start: 10,
|
||||
End: 101,
|
||||
},
|
||||
{
|
||||
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
|
||||
Start: 20,
|
||||
End: 101,
|
||||
},
|
||||
{
|
||||
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
|
||||
Start: 30,
|
||||
End: 101,
|
||||
},
|
||||
{
|
||||
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
|
||||
Start: 40,
|
||||
End: 101,
|
||||
},
|
||||
{
|
||||
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
|
||||
Start: 50,
|
||||
End: 101,
|
||||
},
|
||||
{
|
||||
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
|
||||
Start: 60,
|
||||
End: 101,
|
||||
},
|
||||
{
|
||||
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
|
||||
Start: 70,
|
||||
End: 101,
|
||||
},
|
||||
{
|
||||
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
|
||||
Start: 80,
|
||||
End: 101,
|
||||
},
|
||||
{
|
||||
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
|
||||
Start: 90,
|
||||
End: 101,
|
||||
},
|
||||
},
|
||||
ot: highlight.TermLocations{
|
||||
&highlight.TermLocation{
|
||||
Term: "0123456789",
|
||||
Pos: 1,
|
||||
Start: 0,
|
||||
End: 10,
|
||||
},
|
||||
&highlight.TermLocation{
|
||||
Term: "0123456789",
|
||||
Pos: 2,
|
||||
Start: 10,
|
||||
End: 20,
|
||||
},
|
||||
&highlight.TermLocation{
|
||||
Term: "0123456789",
|
||||
Pos: 3,
|
||||
Start: 20,
|
||||
End: 30,
|
||||
},
|
||||
&highlight.TermLocation{
|
||||
Term: "0123456789",
|
||||
Pos: 4,
|
||||
Start: 30,
|
||||
End: 40,
|
||||
},
|
||||
&highlight.TermLocation{
|
||||
Term: "0123456789",
|
||||
Pos: 5,
|
||||
Start: 40,
|
||||
End: 50,
|
||||
},
|
||||
&highlight.TermLocation{
|
||||
Term: "0123456789",
|
||||
Pos: 6,
|
||||
Start: 50,
|
||||
End: 60,
|
||||
},
|
||||
&highlight.TermLocation{
|
||||
Term: "0123456789",
|
||||
Pos: 7,
|
||||
Start: 60,
|
||||
End: 70,
|
||||
},
|
||||
&highlight.TermLocation{
|
||||
Term: "0123456789",
|
||||
Pos: 8,
|
||||
Start: 70,
|
||||
End: 80,
|
||||
},
|
||||
&highlight.TermLocation{
|
||||
Term: "0123456789",
|
||||
Pos: 9,
|
||||
Start: 80,
|
||||
End: 90,
|
||||
},
|
||||
&highlight.TermLocation{
|
||||
Term: "0123456789",
|
||||
Pos: 10,
|
||||
Start: 90,
|
||||
End: 100,
|
||||
},
|
||||
},
|
||||
size: 100,
|
||||
},
|
||||
{
|
||||
orig: []byte("[[पानी का स्वाद]] [[नीलेश रघुवंशी]] का कविता संग्रह हैं। इस कृति के लिए उन्हें २००४ में [[केदार सम्मान]] से सम्मानित किया गया है।{{केदार सम्मान से सम्मानित कृतियाँ}}"),
|
||||
fragments: []*highlight.Fragment{
|
||||
{
|
||||
Orig: []byte("[[पानी का स्वाद]] [[नीलेश रघुवंशी]] का कविता संग्रह हैं। इस कृति के लिए उन्हें २००४ में [[केदार सम्मान]] से सम्मानित किया गया है।{{केदार सम्मान से सम्मानित कृतियाँ}}"),
|
||||
Start: 0,
|
||||
End: 411,
|
||||
},
|
||||
},
|
||||
ot: highlight.TermLocations{
|
||||
&highlight.TermLocation{
|
||||
Term: "पानी",
|
||||
Pos: 1,
|
||||
Start: 2,
|
||||
End: 14,
|
||||
},
|
||||
},
|
||||
size: 200,
|
||||
},
|
||||
{
|
||||
orig: []byte("交换机"),
|
||||
fragments: []*highlight.Fragment{
|
||||
{
|
||||
Orig: []byte("交换机"),
|
||||
Start: 0,
|
||||
End: 9,
|
||||
},
|
||||
{
|
||||
Orig: []byte("交换机"),
|
||||
Start: 3,
|
||||
End: 9,
|
||||
},
|
||||
},
|
||||
ot: highlight.TermLocations{
|
||||
&highlight.TermLocation{
|
||||
Term: "交换",
|
||||
Pos: 1,
|
||||
Start: 0,
|
||||
End: 6,
|
||||
},
|
||||
&highlight.TermLocation{
|
||||
Term: "换机",
|
||||
Pos: 2,
|
||||
Start: 3,
|
||||
End: 9,
|
||||
},
|
||||
},
|
||||
size: 200,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
fragmenter := NewFragmenter(test.size)
|
||||
fragments := fragmenter.Fragment(test.orig, test.ot)
|
||||
if !reflect.DeepEqual(fragments, test.fragments) {
|
||||
t.Errorf("expected %#v, got %#v", test.fragments, fragments)
|
||||
for _, fragment := range fragments {
|
||||
t.Logf("frag: %s", fragment.Orig[fragment.Start:fragment.End])
|
||||
t.Logf("frag: %d - %d", fragment.Start, fragment.End)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSimpleFragmenterWithSize(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
orig []byte
|
||||
fragments []*highlight.Fragment
|
||||
ot highlight.TermLocations
|
||||
}{
|
||||
{
|
||||
orig: []byte("this is a test"),
|
||||
fragments: []*highlight.Fragment{
|
||||
{
|
||||
Orig: []byte("this is a test"),
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
{
|
||||
Orig: []byte("this is a test"),
|
||||
Start: 9,
|
||||
End: 14,
|
||||
},
|
||||
},
|
||||
ot: highlight.TermLocations{
|
||||
&highlight.TermLocation{
|
||||
Term: "this",
|
||||
Pos: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&highlight.TermLocation{
|
||||
Term: "test",
|
||||
Pos: 4,
|
||||
Start: 10,
|
||||
End: 14,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
orig: []byte("避免出现 rune 越界问题"),
|
||||
fragments: []*highlight.Fragment{
|
||||
{
|
||||
Orig: []byte("避免出现 rune 越界问题"),
|
||||
Start: 0,
|
||||
End: 13,
|
||||
},
|
||||
},
|
||||
ot: nil,
|
||||
},
|
||||
}
|
||||
|
||||
fragmenter := NewFragmenter(5)
|
||||
for _, test := range tests {
|
||||
fragments := fragmenter.Fragment(test.orig, test.ot)
|
||||
if !reflect.DeepEqual(fragments, test.fragments) {
|
||||
t.Errorf("expected %#v, got %#v", test.fragments, fragments)
|
||||
for _, fragment := range fragments {
|
||||
t.Logf("frag: %#v", fragment)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
64
search/highlight/highlighter.go
Normal file
64
search/highlight/highlighter.go
Normal file
|
@ -0,0 +1,64 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package highlight
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
type Fragment struct {
|
||||
Orig []byte
|
||||
ArrayPositions []uint64
|
||||
Start int
|
||||
End int
|
||||
Score float64
|
||||
Index int // used by heap
|
||||
}
|
||||
|
||||
func (f *Fragment) Overlaps(other *Fragment) bool {
|
||||
if other.Start >= f.Start && other.Start < f.End {
|
||||
return true
|
||||
} else if f.Start >= other.Start && f.Start < other.End {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type Fragmenter interface {
|
||||
Fragment([]byte, TermLocations) []*Fragment
|
||||
}
|
||||
|
||||
type FragmentFormatter interface {
|
||||
Format(f *Fragment, orderedTermLocations TermLocations) string
|
||||
}
|
||||
|
||||
type FragmentScorer interface {
|
||||
Score(f *Fragment) float64
|
||||
}
|
||||
|
||||
type Highlighter interface {
|
||||
Fragmenter() Fragmenter
|
||||
SetFragmenter(Fragmenter)
|
||||
|
||||
FragmentFormatter() FragmentFormatter
|
||||
SetFragmentFormatter(FragmentFormatter)
|
||||
|
||||
Separator() string
|
||||
SetSeparator(string)
|
||||
|
||||
BestFragmentInField(*search.DocumentMatch, index.Document, string) string
|
||||
BestFragmentsInField(*search.DocumentMatch, index.Document, string, int) []string
|
||||
}
|
53
search/highlight/highlighter/ansi/ansi.go
Normal file
53
search/highlight/highlighter/ansi/ansi.go
Normal file
|
@ -0,0 +1,53 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package ansi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/bleve/v2/search/highlight"
|
||||
ansiFormatter "github.com/blevesearch/bleve/v2/search/highlight/format/ansi"
|
||||
simpleFragmenter "github.com/blevesearch/bleve/v2/search/highlight/fragmenter/simple"
|
||||
simpleHighlighter "github.com/blevesearch/bleve/v2/search/highlight/highlighter/simple"
|
||||
)
|
||||
|
||||
const Name = "ansi"
|
||||
|
||||
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) {
|
||||
|
||||
fragmenter, err := cache.FragmenterNamed(simpleFragmenter.Name)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error building fragmenter: %v", err)
|
||||
}
|
||||
|
||||
formatter, err := cache.FragmentFormatterNamed(ansiFormatter.Name)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error building fragment formatter: %v", err)
|
||||
}
|
||||
|
||||
return simpleHighlighter.NewHighlighter(
|
||||
fragmenter,
|
||||
formatter,
|
||||
simpleHighlighter.DefaultSeparator),
|
||||
nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterHighlighter(Name, Constructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
53
search/highlight/highlighter/html/html.go
Normal file
53
search/highlight/highlighter/html/html.go
Normal file
|
@ -0,0 +1,53 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/bleve/v2/search/highlight"
|
||||
htmlFormatter "github.com/blevesearch/bleve/v2/search/highlight/format/html"
|
||||
simpleFragmenter "github.com/blevesearch/bleve/v2/search/highlight/fragmenter/simple"
|
||||
simpleHighlighter "github.com/blevesearch/bleve/v2/search/highlight/highlighter/simple"
|
||||
)
|
||||
|
||||
const Name = "html"
|
||||
|
||||
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) {
|
||||
|
||||
fragmenter, err := cache.FragmenterNamed(simpleFragmenter.Name)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error building fragmenter: %v", err)
|
||||
}
|
||||
|
||||
formatter, err := cache.FragmentFormatterNamed(htmlFormatter.Name)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error building fragment formatter: %v", err)
|
||||
}
|
||||
|
||||
return simpleHighlighter.NewHighlighter(
|
||||
fragmenter,
|
||||
formatter,
|
||||
simpleHighlighter.DefaultSeparator),
|
||||
nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterHighlighter(Name, Constructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package simple
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/highlight"
|
||||
)
|
||||
|
||||
// FragmentScorer will score fragments by how many
|
||||
// unique terms occur in the fragment with no regard for
|
||||
// any boost values used in the original query
|
||||
type FragmentScorer struct {
|
||||
tlm search.TermLocationMap
|
||||
}
|
||||
|
||||
func NewFragmentScorer(tlm search.TermLocationMap) *FragmentScorer {
|
||||
return &FragmentScorer{
|
||||
tlm: tlm,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *FragmentScorer) Score(f *highlight.Fragment) {
|
||||
score := 0.0
|
||||
OUTER:
|
||||
for _, locations := range s.tlm {
|
||||
for _, location := range locations {
|
||||
if location.ArrayPositions.Equals(f.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End {
|
||||
score += 1.0
|
||||
// once we find a term in the fragment
|
||||
// don't care about additional matches
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
}
|
||||
f.Score = score
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package simple
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/highlight"
|
||||
)
|
||||
|
||||
func TestSimpleFragmentScorer(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
fragment *highlight.Fragment
|
||||
tlm search.TermLocationMap
|
||||
score float64
|
||||
}{
|
||||
{
|
||||
fragment: &highlight.Fragment{
|
||||
Orig: []byte("cat in the hat"),
|
||||
Start: 0,
|
||||
End: 14,
|
||||
},
|
||||
tlm: search.TermLocationMap{
|
||||
"cat": []*search.Location{
|
||||
{
|
||||
Pos: 0,
|
||||
Start: 0,
|
||||
End: 3,
|
||||
},
|
||||
},
|
||||
},
|
||||
score: 1,
|
||||
},
|
||||
{
|
||||
fragment: &highlight.Fragment{
|
||||
Orig: []byte("cat in the hat"),
|
||||
Start: 0,
|
||||
End: 14,
|
||||
},
|
||||
tlm: search.TermLocationMap{
|
||||
"cat": []*search.Location{
|
||||
{
|
||||
Pos: 1,
|
||||
Start: 0,
|
||||
End: 3,
|
||||
},
|
||||
},
|
||||
"hat": []*search.Location{
|
||||
{
|
||||
Pos: 4,
|
||||
Start: 11,
|
||||
End: 14,
|
||||
},
|
||||
},
|
||||
},
|
||||
score: 2,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
scorer := NewFragmentScorer(test.tlm)
|
||||
scorer.Score(test.fragment)
|
||||
if test.fragment.Score != test.score {
|
||||
t.Errorf("expected score %f, got %f", test.score, test.fragment.Score)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
225
search/highlight/highlighter/simple/highlighter_simple.go
Normal file
225
search/highlight/highlighter/simple/highlighter_simple.go
Normal file
|
@ -0,0 +1,225 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package simple
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"fmt"
|
||||
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/highlight"
|
||||
)
|
||||
|
||||
const Name = "simple"
|
||||
const DefaultSeparator = "…"
|
||||
|
||||
type Highlighter struct {
|
||||
fragmenter highlight.Fragmenter
|
||||
formatter highlight.FragmentFormatter
|
||||
sep string
|
||||
}
|
||||
|
||||
func NewHighlighter(fragmenter highlight.Fragmenter, formatter highlight.FragmentFormatter, separator string) *Highlighter {
|
||||
return &Highlighter{
|
||||
fragmenter: fragmenter,
|
||||
formatter: formatter,
|
||||
sep: separator,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Highlighter) Fragmenter() highlight.Fragmenter {
|
||||
return s.fragmenter
|
||||
}
|
||||
|
||||
func (s *Highlighter) SetFragmenter(f highlight.Fragmenter) {
|
||||
s.fragmenter = f
|
||||
}
|
||||
|
||||
func (s *Highlighter) FragmentFormatter() highlight.FragmentFormatter {
|
||||
return s.formatter
|
||||
}
|
||||
|
||||
func (s *Highlighter) SetFragmentFormatter(f highlight.FragmentFormatter) {
|
||||
s.formatter = f
|
||||
}
|
||||
|
||||
func (s *Highlighter) Separator() string {
|
||||
return s.sep
|
||||
}
|
||||
|
||||
func (s *Highlighter) SetSeparator(sep string) {
|
||||
s.sep = sep
|
||||
}
|
||||
|
||||
func (s *Highlighter) BestFragmentInField(dm *search.DocumentMatch, doc index.Document, field string) string {
|
||||
fragments := s.BestFragmentsInField(dm, doc, field, 1)
|
||||
if len(fragments) > 0 {
|
||||
return fragments[0]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc index.Document, field string, num int) []string {
|
||||
tlm := dm.Locations[field]
|
||||
orderedTermLocations := highlight.OrderTermLocations(tlm)
|
||||
scorer := NewFragmentScorer(tlm)
|
||||
|
||||
// score the fragments and put them into a priority queue ordered by score
|
||||
fq := make(FragmentQueue, 0)
|
||||
heap.Init(&fq)
|
||||
doc.VisitFields(func(f index.Field) {
|
||||
if f.Name() == field {
|
||||
_, ok := f.(index.TextField)
|
||||
if ok {
|
||||
termLocationsSameArrayPosition := make(highlight.TermLocations, 0)
|
||||
for _, otl := range orderedTermLocations {
|
||||
if otl.ArrayPositions.Equals(f.ArrayPositions()) {
|
||||
termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl)
|
||||
}
|
||||
}
|
||||
|
||||
fieldData := f.Value()
|
||||
fragments := s.fragmenter.Fragment(fieldData, termLocationsSameArrayPosition)
|
||||
for _, fragment := range fragments {
|
||||
fragment.ArrayPositions = f.ArrayPositions()
|
||||
scorer.Score(fragment)
|
||||
heap.Push(&fq, fragment)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// now find the N best non-overlapping fragments
|
||||
var bestFragments []*highlight.Fragment
|
||||
if len(fq) > 0 {
|
||||
candidate := heap.Pop(&fq)
|
||||
OUTER:
|
||||
for candidate != nil && len(bestFragments) < num {
|
||||
// see if this overlaps with any of the best already identified
|
||||
if len(bestFragments) > 0 {
|
||||
for _, frag := range bestFragments {
|
||||
if candidate.(*highlight.Fragment).Overlaps(frag) {
|
||||
if len(fq) < 1 {
|
||||
break OUTER
|
||||
}
|
||||
candidate = heap.Pop(&fq)
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
bestFragments = append(bestFragments, candidate.(*highlight.Fragment))
|
||||
} else {
|
||||
bestFragments = append(bestFragments, candidate.(*highlight.Fragment))
|
||||
}
|
||||
|
||||
if len(fq) < 1 {
|
||||
break
|
||||
}
|
||||
candidate = heap.Pop(&fq)
|
||||
}
|
||||
}
|
||||
|
||||
// now that we have the best fragments, we can format them
|
||||
orderedTermLocations.MergeOverlapping()
|
||||
formattedFragments := make([]string, len(bestFragments))
|
||||
for i, fragment := range bestFragments {
|
||||
formattedFragments[i] = ""
|
||||
if fragment.Start != 0 {
|
||||
formattedFragments[i] += s.sep
|
||||
}
|
||||
formattedFragments[i] += s.formatter.Format(fragment, orderedTermLocations)
|
||||
if fragment.End != len(fragment.Orig) {
|
||||
formattedFragments[i] += s.sep
|
||||
}
|
||||
}
|
||||
|
||||
if dm.Fragments == nil {
|
||||
dm.Fragments = make(search.FieldFragmentMap, 0)
|
||||
}
|
||||
if len(formattedFragments) > 0 {
|
||||
dm.Fragments[field] = formattedFragments
|
||||
}
|
||||
|
||||
return formattedFragments
|
||||
}
|
||||
|
||||
// FragmentQueue implements heap.Interface and holds Items.
|
||||
type FragmentQueue []*highlight.Fragment
|
||||
|
||||
func (fq FragmentQueue) Len() int { return len(fq) }
|
||||
|
||||
func (fq FragmentQueue) Less(i, j int) bool {
|
||||
// We want Pop to give us the highest, not lowest, priority so we use greater-than here.
|
||||
return fq[i].Score > fq[j].Score
|
||||
}
|
||||
|
||||
func (fq FragmentQueue) Swap(i, j int) {
|
||||
fq[i], fq[j] = fq[j], fq[i]
|
||||
fq[i].Index = i
|
||||
fq[j].Index = j
|
||||
}
|
||||
|
||||
func (fq *FragmentQueue) Push(x interface{}) {
|
||||
n := len(*fq)
|
||||
item := x.(*highlight.Fragment)
|
||||
item.Index = n
|
||||
*fq = append(*fq, item)
|
||||
}
|
||||
|
||||
func (fq *FragmentQueue) Pop() interface{} {
|
||||
old := *fq
|
||||
n := len(old)
|
||||
item := old[n-1]
|
||||
item.Index = -1 // for safety
|
||||
*fq = old[0 : n-1]
|
||||
return item
|
||||
}
|
||||
|
||||
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) {
|
||||
separator := DefaultSeparator
|
||||
separatorVal, ok := config["separator"].(string)
|
||||
if ok {
|
||||
separator = separatorVal
|
||||
}
|
||||
|
||||
fragmenterName, ok := config["fragmenter"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify fragmenter")
|
||||
}
|
||||
fragmenter, err := cache.FragmenterNamed(fragmenterName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error building fragmenter: %v", err)
|
||||
}
|
||||
|
||||
formatterName, ok := config["formatter"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify formatter")
|
||||
}
|
||||
formatter, err := cache.FragmentFormatterNamed(formatterName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error building fragment formatter: %v", err)
|
||||
}
|
||||
|
||||
return NewHighlighter(fragmenter, formatter, separator), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterHighlighter(Name, Constructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
169
search/highlight/highlighter/simple/highlighter_simple_test.go
Normal file
169
search/highlight/highlighter/simple/highlighter_simple_test.go
Normal file
|
@ -0,0 +1,169 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package simple
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/highlight/format/ansi"
|
||||
sfrag "github.com/blevesearch/bleve/v2/search/highlight/fragmenter/simple"
|
||||
)
|
||||
|
||||
const (
|
||||
reset = "\x1b[0m"
|
||||
DefaultAnsiHighlight = "\x1b[43m"
|
||||
)
|
||||
|
||||
func TestSimpleHighlighter(t *testing.T) {
|
||||
fragmenter := sfrag.NewFragmenter(100)
|
||||
formatter := ansi.NewFragmentFormatter(ansi.DefaultAnsiHighlight)
|
||||
highlighter := NewHighlighter(fragmenter, formatter, DefaultSeparator)
|
||||
|
||||
docMatch := search.DocumentMatch{
|
||||
ID: "a",
|
||||
Score: 1.0,
|
||||
Locations: search.FieldTermLocationMap{
|
||||
"desc": search.TermLocationMap{
|
||||
"quick": []*search.Location{
|
||||
{
|
||||
Pos: 2,
|
||||
Start: 4,
|
||||
End: 9,
|
||||
},
|
||||
},
|
||||
"fox": []*search.Location{
|
||||
{
|
||||
Pos: 4,
|
||||
Start: 16,
|
||||
End: 19,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
expectedFragment := "the " + DefaultAnsiHighlight + "quick" + reset + " brown " + DefaultAnsiHighlight + "fox" + reset + " jumps over the lazy dog"
|
||||
doc := document.NewDocument("a").AddField(document.NewTextField("desc", []uint64{}, []byte("the quick brown fox jumps over the lazy dog")))
|
||||
|
||||
fragment := highlighter.BestFragmentInField(&docMatch, doc, "desc")
|
||||
if fragment != expectedFragment {
|
||||
t.Errorf("expected `%s`, got `%s`", expectedFragment, fragment)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSimpleHighlighterLonger(t *testing.T) {
|
||||
|
||||
fieldBytes := []byte(`Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris sed semper nulla, sed pellentesque urna. Suspendisse potenti. Aliquam dignissim pulvinar erat vel ullamcorper. Nullam sed diam at dolor dapibus varius. Vestibulum at semper nunc. Integer ullamcorper enim ut nisi condimentum lacinia. Nulla ipsum ipsum, dictum in dapibus non, bibendum eget neque. Vestibulum malesuada erat quis malesuada dictum. Mauris luctus viverra lorem, nec hendrerit lacus lacinia ut. Donec suscipit sit amet nisi et dictum. Maecenas ultrices mollis diam, vel commodo libero lobortis nec. Nunc non dignissim dolor. Nulla non tempus risus, eget porttitor lectus. Suspendisse vitae gravida magna, a sagittis urna. Curabitur nec dui volutpat, hendrerit nisi non, adipiscing erat. Maecenas aliquet sem sit amet nibh ultrices accumsan.
|
||||
|
||||
Mauris lobortis sem sed blandit bibendum. In scelerisque eros sed metus aliquet convallis ac eget metus. Donec eget feugiat sem. Quisque venenatis, augue et blandit vulputate, velit odio viverra dolor, eu iaculis eros urna ut nunc. Duis faucibus mattis enim ut ultricies. Donec scelerisque volutpat elit, vel varius ante porttitor vel. Duis neque nulla, ultrices vel est id, molestie semper odio. Maecenas condimentum felis vitae nibh venenatis, ut feugiat risus vehicula. Suspendisse non sapien neque. Etiam et lorem consequat lorem aliquam ullamcorper. Pellentesque id vestibulum neque, at aliquam turpis. Aenean ultrices nec erat sit amet aliquam. Morbi eu sem in augue cursus ullamcorper a sed dolor. Integer et lobortis nulla, sit amet laoreet elit. In elementum, nibh nec volutpat pretium, lectus est pulvinar arcu, vehicula lobortis tellus sem id mauris. Maecenas ac blandit purus, sit amet scelerisque magna.
|
||||
|
||||
In hac habitasse platea dictumst. In lacinia elit non risus venenatis viverra. Nulla vestibulum laoreet turpis ac accumsan. Vivamus eros felis, rhoncus vel interdum bibendum, imperdiet nec diam. Etiam sed eros sed orci pellentesque sagittis. Praesent a fermentum leo. Vivamus ipsum risus, faucibus a dignissim ut, ullamcorper nec risus. Etiam quis adipiscing velit. Nam ac cursus arcu. Sed bibendum lectus quis massa dapibus dapibus. Vestibulum fermentum eros vitae hendrerit condimentum.
|
||||
|
||||
Fusce viverra eleifend iaculis. Maecenas tempor dictum cursus. Mauris faucibus, tortor in bibendum ornare, nibh lorem sollicitudin est, sed consectetur nulla dui imperdiet urna. Fusce aliquet odio fermentum massa mollis, id feugiat lacus egestas. Integer et eleifend metus. Duis neque tellus, vulputate nec dui eu, euismod sodales orci. Vivamus turpis erat, consectetur et pulvinar nec, ornare a quam. Maecenas fermentum, ligula vitae consectetur lobortis, mi lacus fermentum ante, ut semper lacus lectus porta orci. Nulla vehicula sodales eros, in iaculis ante laoreet at. Sed venenatis interdum metus, egestas scelerisque orci laoreet ut. Donec fermentum enim eget nibh blandit laoreet. Proin lacinia adipiscing lorem vel ornare. Donec ullamcorper massa elementum urna varius viverra. Proin pharetra, erat at feugiat rhoncus, velit eros condimentum mi, ac mattis sapien dolor non elit. Aenean viverra purus id tincidunt vulputate.
|
||||
|
||||
Etiam vel augue vel nisl commodo suscipit et ac nisl. Quisque eros diam, porttitor et aliquet sed, vulputate in odio. Aenean feugiat est quis neque vehicula, eget vulputate nunc tempor. Donec quis nulla ut quam feugiat consectetur ut et justo. Nulla congue, metus auctor facilisis scelerisque, nunc risus vulputate urna, in blandit urna nibh et neque. Etiam quis tortor ut nulla dignissim dictum non sed ligula. Vivamus accumsan ligula eget ipsum ultrices, a tincidunt urna blandit. In hac habitasse platea dictumst.`)
|
||||
|
||||
doc := document.NewDocument("a").AddField(document.NewTextField("full", []uint64{}, fieldBytes))
|
||||
docMatch := search.DocumentMatch{
|
||||
ID: "a",
|
||||
Score: 1.0,
|
||||
Locations: search.FieldTermLocationMap{
|
||||
"full": search.TermLocationMap{
|
||||
"metus": []*search.Location{
|
||||
{
|
||||
Pos: 0,
|
||||
Start: 883,
|
||||
End: 888,
|
||||
},
|
||||
{
|
||||
Pos: 0,
|
||||
Start: 915,
|
||||
End: 920,
|
||||
},
|
||||
{
|
||||
Pos: 0,
|
||||
Start: 2492,
|
||||
End: 2497,
|
||||
},
|
||||
{
|
||||
Pos: 0,
|
||||
Start: 2822,
|
||||
End: 2827,
|
||||
},
|
||||
{
|
||||
Pos: 0,
|
||||
Start: 3417,
|
||||
End: 3422,
|
||||
},
|
||||
},
|
||||
"interdum": []*search.Location{
|
||||
{
|
||||
Pos: 0,
|
||||
Start: 1891,
|
||||
End: 1899,
|
||||
},
|
||||
{
|
||||
Pos: 0,
|
||||
Start: 2813,
|
||||
End: 2821,
|
||||
},
|
||||
},
|
||||
"venenatis": []*search.Location{
|
||||
{
|
||||
Pos: 0,
|
||||
Start: 954,
|
||||
End: 963,
|
||||
},
|
||||
{
|
||||
Pos: 0,
|
||||
Start: 1252,
|
||||
End: 1261,
|
||||
},
|
||||
{
|
||||
Pos: 0,
|
||||
Start: 1795,
|
||||
End: 1804,
|
||||
},
|
||||
{
|
||||
Pos: 0,
|
||||
Start: 2803,
|
||||
End: 2812,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
expectedFragments := []string{
|
||||
"…eros, in iaculis ante laoreet at. Sed " + DefaultAnsiHighlight + "venenatis" + reset + " " + DefaultAnsiHighlight + "interdum" + reset + " " + DefaultAnsiHighlight + "metus" + reset + ", egestas scelerisque orci laoreet ut.…",
|
||||
"… eros sed " + DefaultAnsiHighlight + "metus" + reset + " aliquet convallis ac eget " + DefaultAnsiHighlight + "metus" + reset + ". Donec eget feugiat sem. Quisque " + DefaultAnsiHighlight + "venenatis" + reset + ", augue et…",
|
||||
"… odio. Maecenas condimentum felis vitae nibh " + DefaultAnsiHighlight + "venenatis" + reset + ", ut feugiat risus vehicula. Suspendisse non s…",
|
||||
"… id feugiat lacus egestas. Integer et eleifend " + DefaultAnsiHighlight + "metus" + reset + ". Duis neque tellus, vulputate nec dui eu, euism…",
|
||||
"… accumsan. Vivamus eros felis, rhoncus vel " + DefaultAnsiHighlight + "interdum" + reset + " bibendum, imperdiet nec diam. Etiam sed eros sed…",
|
||||
}
|
||||
|
||||
fragmenter := sfrag.NewFragmenter(100)
|
||||
formatter := ansi.NewFragmentFormatter(ansi.DefaultAnsiHighlight)
|
||||
highlighter := NewHighlighter(fragmenter, formatter, DefaultSeparator)
|
||||
fragments := highlighter.BestFragmentsInField(&docMatch, doc, "full", 5)
|
||||
|
||||
if !reflect.DeepEqual(fragments, expectedFragments) {
|
||||
t.Errorf("expected %#v, got %#v", expectedFragments, fragments)
|
||||
}
|
||||
|
||||
}
|
105
search/highlight/term_locations.go
Normal file
105
search/highlight/term_locations.go
Normal file
|
@ -0,0 +1,105 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package highlight
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
)
|
||||
|
||||
type TermLocation struct {
|
||||
Term string
|
||||
ArrayPositions search.ArrayPositions
|
||||
Pos int
|
||||
Start int
|
||||
End int
|
||||
}
|
||||
|
||||
func (tl *TermLocation) Overlaps(other *TermLocation) bool {
|
||||
if reflect.DeepEqual(tl.ArrayPositions, other.ArrayPositions) {
|
||||
if other.Start >= tl.Start && other.Start < tl.End {
|
||||
return true
|
||||
} else if tl.Start >= other.Start && tl.Start < other.End {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type TermLocations []*TermLocation
|
||||
|
||||
func (t TermLocations) Len() int { return len(t) }
|
||||
func (t TermLocations) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
|
||||
func (t TermLocations) Less(i, j int) bool {
|
||||
|
||||
shortestArrayPositions := len(t[i].ArrayPositions)
|
||||
if len(t[j].ArrayPositions) < shortestArrayPositions {
|
||||
shortestArrayPositions = len(t[j].ArrayPositions)
|
||||
}
|
||||
|
||||
// compare all the common array positions
|
||||
for api := 0; api < shortestArrayPositions; api++ {
|
||||
if t[i].ArrayPositions[api] < t[j].ArrayPositions[api] {
|
||||
return true
|
||||
}
|
||||
if t[i].ArrayPositions[api] > t[j].ArrayPositions[api] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
// all the common array positions are the same
|
||||
if len(t[i].ArrayPositions) < len(t[j].ArrayPositions) {
|
||||
return true // j array positions, longer so greater
|
||||
} else if len(t[i].ArrayPositions) > len(t[j].ArrayPositions) {
|
||||
return false // j array positions, shorter so less
|
||||
}
|
||||
|
||||
// array positions the same, compare starts
|
||||
return t[i].Start < t[j].Start
|
||||
}
|
||||
|
||||
func (t TermLocations) MergeOverlapping() {
|
||||
var lastTl *TermLocation
|
||||
for i, tl := range t {
|
||||
if lastTl == nil && tl != nil {
|
||||
lastTl = tl
|
||||
} else if lastTl != nil && tl != nil {
|
||||
if lastTl.Overlaps(tl) {
|
||||
// ok merge this with previous
|
||||
lastTl.End = tl.End
|
||||
t[i] = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func OrderTermLocations(tlm search.TermLocationMap) TermLocations {
|
||||
rv := make(TermLocations, 0)
|
||||
for term, locations := range tlm {
|
||||
for _, location := range locations {
|
||||
tl := TermLocation{
|
||||
Term: term,
|
||||
ArrayPositions: location.ArrayPositions,
|
||||
Pos: int(location.Pos),
|
||||
Start: int(location.Start),
|
||||
End: int(location.End),
|
||||
}
|
||||
rv = append(rv, &tl)
|
||||
}
|
||||
}
|
||||
sort.Sort(rv)
|
||||
return rv
|
||||
}
|
512
search/highlight/term_locations_test.go
Normal file
512
search/highlight/term_locations_test.go
Normal file
|
@ -0,0 +1,512 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package highlight
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
)
|
||||
|
||||
func TestTermLocationOverlaps(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
left *TermLocation
|
||||
right *TermLocation
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
left: &TermLocation{
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
right: &TermLocation{
|
||||
Start: 3,
|
||||
End: 7,
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
left: &TermLocation{
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
right: &TermLocation{
|
||||
Start: 5,
|
||||
End: 7,
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
left: &TermLocation{
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
right: &TermLocation{
|
||||
Start: 7,
|
||||
End: 11,
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
// with array positions
|
||||
{
|
||||
left: &TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
right: &TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{1},
|
||||
Start: 7,
|
||||
End: 11,
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
left: &TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
right: &TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{1},
|
||||
Start: 3,
|
||||
End: 11,
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
left: &TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
right: &TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 3,
|
||||
End: 11,
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
left: &TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
right: &TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 7,
|
||||
End: 11,
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
actual := test.left.Overlaps(test.right)
|
||||
if actual != test.expected {
|
||||
t.Errorf("expected %t got %t for %#v", test.expected, actual, test)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTermLocationsMergeOverlapping(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
input TermLocations
|
||||
output TermLocations
|
||||
}{
|
||||
{
|
||||
input: TermLocations{},
|
||||
output: TermLocations{},
|
||||
},
|
||||
{
|
||||
input: TermLocations{
|
||||
&TermLocation{
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
Start: 7,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
Start: 7,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: TermLocations{
|
||||
&TermLocation{
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
Start: 4,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
Start: 0,
|
||||
End: 11,
|
||||
},
|
||||
nil,
|
||||
},
|
||||
},
|
||||
{
|
||||
input: TermLocations{
|
||||
&TermLocation{
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
Start: 4,
|
||||
End: 11,
|
||||
},
|
||||
&TermLocation{
|
||||
Start: 9,
|
||||
End: 13,
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
Start: 0,
|
||||
End: 13,
|
||||
},
|
||||
nil,
|
||||
nil,
|
||||
},
|
||||
},
|
||||
{
|
||||
input: TermLocations{
|
||||
&TermLocation{
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
Start: 4,
|
||||
End: 11,
|
||||
},
|
||||
&TermLocation{
|
||||
Start: 9,
|
||||
End: 13,
|
||||
},
|
||||
&TermLocation{
|
||||
Start: 15,
|
||||
End: 21,
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
Start: 0,
|
||||
End: 13,
|
||||
},
|
||||
nil,
|
||||
nil,
|
||||
&TermLocation{
|
||||
Start: 15,
|
||||
End: 21,
|
||||
},
|
||||
},
|
||||
},
|
||||
// with array positions
|
||||
{
|
||||
input: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{1},
|
||||
Start: 7,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{1},
|
||||
Start: 7,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 7,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 7,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 3,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 0,
|
||||
End: 11,
|
||||
},
|
||||
nil,
|
||||
},
|
||||
},
|
||||
{
|
||||
input: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{1},
|
||||
Start: 3,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{1},
|
||||
Start: 3,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
test.input.MergeOverlapping()
|
||||
if !reflect.DeepEqual(test.input, test.output) {
|
||||
t.Errorf("expected: %#v got %#v", test.output, test.input)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTermLocationsOrder(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
input search.TermLocationMap
|
||||
output TermLocations
|
||||
}{
|
||||
{
|
||||
input: search.TermLocationMap{},
|
||||
output: TermLocations{},
|
||||
},
|
||||
{
|
||||
input: search.TermLocationMap{
|
||||
"term": []*search.Location{
|
||||
{
|
||||
Start: 0,
|
||||
},
|
||||
{
|
||||
Start: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
Term: "term",
|
||||
Start: 0,
|
||||
},
|
||||
&TermLocation{
|
||||
Term: "term",
|
||||
Start: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: search.TermLocationMap{
|
||||
"term": []*search.Location{
|
||||
{
|
||||
Start: 5,
|
||||
},
|
||||
{
|
||||
Start: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
Term: "term",
|
||||
Start: 0,
|
||||
},
|
||||
&TermLocation{
|
||||
Term: "term",
|
||||
Start: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
// with array positions
|
||||
{
|
||||
input: search.TermLocationMap{
|
||||
"term": []*search.Location{
|
||||
{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 0,
|
||||
},
|
||||
{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Term: "term",
|
||||
Start: 0,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Term: "term",
|
||||
Start: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: search.TermLocationMap{
|
||||
"term": []*search.Location{
|
||||
{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 5,
|
||||
},
|
||||
{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Term: "term",
|
||||
Start: 0,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Term: "term",
|
||||
Start: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: search.TermLocationMap{
|
||||
"term": []*search.Location{
|
||||
{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 5,
|
||||
},
|
||||
{
|
||||
ArrayPositions: search.ArrayPositions{1},
|
||||
Start: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Term: "term",
|
||||
Start: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{1},
|
||||
Term: "term",
|
||||
Start: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: search.TermLocationMap{
|
||||
"term": []*search.Location{
|
||||
{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Start: 5,
|
||||
},
|
||||
{
|
||||
ArrayPositions: search.ArrayPositions{0, 1},
|
||||
Start: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
output: TermLocations{
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0},
|
||||
Term: "term",
|
||||
Start: 5,
|
||||
},
|
||||
&TermLocation{
|
||||
ArrayPositions: search.ArrayPositions{0, 1},
|
||||
Term: "term",
|
||||
Start: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
actual := OrderTermLocations(test.input)
|
||||
if !reflect.DeepEqual(actual, test.output) {
|
||||
t.Errorf("expected: %#v got %#v", test.output, actual)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue