Adding upstream version 2.5.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-05-19 00:20:02 +02:00 · 2025-05-19 00:20:02 +02:00 · 982828099e
commit 982828099e
parent c71cb8b61d
783 changed files with 150650 additions and 0 deletions
--- a/analysis/lang/fr/analyzer_fr.go
+++ b/analysis/lang/fr/analyzer_fr.go
@ -0,0 +1,65 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "fr"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
+	tokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	elisionFilter, err := cache.TokenFilterNamed(ElisionName)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopFrFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerFrFilter, err := cache.TokenFilterNamed(LightStemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.DefaultAnalyzer{
+		Tokenizer: tokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			elisionFilter,
+			stopFrFilter,
+			stemmerFrFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fr/analyzer_fr_test.go
+++ b/analysis/lang/fr/analyzer_fr_test.go
@ -0,0 +1,209 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestFrenchAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		{
+			input:  []byte(""),
+			output: analysis.TokenStream{},
+		},
+		{
+			input: []byte("chien chat cheval"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chien"),
+				},
+				&analysis.Token{
+					Term: []byte("chat"),
+				},
+				&analysis.Token{
+					Term: []byte("cheval"),
+				},
+			},
+		},
+		{
+			input: []byte("chien CHAT CHEVAL"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chien"),
+				},
+				&analysis.Token{
+					Term: []byte("chat"),
+				},
+				&analysis.Token{
+					Term: []byte("cheval"),
+				},
+			},
+		},
+		{
+			input: []byte("  chien  ,? + = -  CHAT /: > CHEVAL"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chien"),
+				},
+				&analysis.Token{
+					Term: []byte("chat"),
+				},
+				&analysis.Token{
+					Term: []byte("cheval"),
+				},
+			},
+		},
+		{
+			input: []byte("chien++"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chien"),
+				},
+			},
+		},
+		{
+			input: []byte("mot \"entreguillemet\""),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("mot"),
+				},
+				&analysis.Token{
+					Term: []byte("entreguilemet"),
+				},
+			},
+		},
+		{
+			input: []byte("Jean-François"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("jean"),
+				},
+				&analysis.Token{
+					Term: []byte("francoi"),
+				},
+			},
+		},
+		// stop words
+		{
+			input: []byte("le la chien les aux chat du des à cheval"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chien"),
+				},
+				&analysis.Token{
+					Term: []byte("chat"),
+				},
+				&analysis.Token{
+					Term: []byte("cheval"),
+				},
+			},
+		},
+		// nouns and adjectives
+		{
+			input: []byte("lances chismes habitable chiste éléments captifs"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("lanc"),
+				},
+				&analysis.Token{
+					Term: []byte("chism"),
+				},
+				&analysis.Token{
+					Term: []byte("habitabl"),
+				},
+				&analysis.Token{
+					Term: []byte("chist"),
+				},
+				&analysis.Token{
+					Term: []byte("element"),
+				},
+				&analysis.Token{
+					Term: []byte("captif"),
+				},
+			},
+		},
+		// verbs
+		{
+			input: []byte("finissions souffrirent rugissante"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("finision"),
+				},
+				&analysis.Token{
+					Term: []byte("soufrirent"),
+				},
+				&analysis.Token{
+					Term: []byte("rugisant"),
+				},
+			},
+		},
+		{
+			input: []byte("C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ "),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("c3po"),
+				},
+				&analysis.Token{
+					Term: []byte("aujourd'hui"),
+				},
+				&analysis.Token{
+					Term: []byte("oeuf"),
+				},
+				&analysis.Token{
+					Term: []byte("ïaöuaä"),
+				},
+				&analysis.Token{
+					Term: []byte("anticonstitutionel"),
+				},
+				&analysis.Token{
+					Term: []byte("java"),
+				},
+			},
+		},
+		{
+			input: []byte("propriétaire"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("proprietair"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if len(actual) != len(test.output) {
+			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
+		}
+		for i, tok := range actual {
+			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
+				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
+			}
+		}
+	}
+}
--- a/analysis/lang/fr/articles_fr.go
+++ b/analysis/lang/fr/articles_fr.go
@ -0,0 +1,40 @@
+package fr
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const ArticlesName = "articles_fr"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
+
+var FrenchArticles = []byte(`
+l
+m
+t
+qu
+n
+s
+j
+d
+c
+jusqu
+quoiqu
+lorsqu
+puisqu
+`)
+
+func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(FrenchArticles)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fr/elision_fr.go
+++ b/analysis/lang/fr/elision_fr.go
@ -0,0 +1,40 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"fmt"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/elision"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const ElisionName = "elision_fr"
+
+func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	articlesTokenMap, err := cache.TokenMapNamed(ArticlesName)
+	if err != nil {
+		return nil, fmt.Errorf("error building elision filter: %v", err)
+	}
+	return elision.NewElisionFilter(articlesTokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fr/elision_fr_test.go
+++ b/analysis/lang/fr/elision_fr_test.go
@ -0,0 +1,55 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestFrenchElision(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("l'avion"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("avion"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	elisionFilter, err := cache.TokenFilterNamed(ElisionName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := elisionFilter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/fr/light_stemmer_fr.go
+++ b/analysis/lang/fr/light_stemmer_fr.go
@ -0,0 +1,309 @@
+//  Copyright (c) 2015 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"bytes"
+	"unicode"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const LightStemmerName = "stemmer_fr_light"
+
+type FrenchLightStemmerFilter struct {
+}
+
+func NewFrenchLightStemmerFilter() *FrenchLightStemmerFilter {
+	return &FrenchLightStemmerFilter{}
+}
+
+func (s *FrenchLightStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		runes := bytes.Runes(token.Term)
+		runes = stem(runes)
+		token.Term = analysis.BuildTermFromRunes(runes)
+	}
+	return input
+}
+
+func stem(input []rune) []rune {
+
+	inputLen := len(input)
+
+	if inputLen > 5 && input[inputLen-1] == 'x' {
+		if input[inputLen-3] == 'a' && input[inputLen-2] == 'u' && input[inputLen-4] != 'e' {
+			input[inputLen-2] = 'l'
+		}
+		input = input[0 : inputLen-1]
+		inputLen = len(input)
+	}
+
+	if inputLen > 3 && input[inputLen-1] == 'x' {
+		input = input[0 : inputLen-1]
+		inputLen = len(input)
+	}
+
+	if inputLen > 3 && input[inputLen-1] == 's' {
+		input = input[0 : inputLen-1]
+		inputLen = len(input)
+	}
+
+	if inputLen > 9 && analysis.RunesEndsWith(input, "issement") {
+		input = input[0 : inputLen-6]
+		inputLen = len(input)
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 8 && analysis.RunesEndsWith(input, "issant") {
+		input = input[0 : inputLen-4]
+		inputLen = len(input)
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 6 && analysis.RunesEndsWith(input, "ement") {
+		input = input[0 : inputLen-4]
+		inputLen = len(input)
+		if inputLen > 3 && analysis.RunesEndsWith(input, "ive") {
+			input = input[0 : inputLen-1]
+			inputLen = len(input)
+			input[inputLen-1] = 'f'
+		}
+		return norm(input)
+	}
+
+	if inputLen > 11 && analysis.RunesEndsWith(input, "ficatrice") {
+		input = input[0 : inputLen-5]
+		inputLen = len(input)
+		input[inputLen-2] = 'e'
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 10 && analysis.RunesEndsWith(input, "ficateur") {
+		input = input[0 : inputLen-4]
+		inputLen = len(input)
+		input[inputLen-2] = 'e'
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 9 && analysis.RunesEndsWith(input, "catrice") {
+		input = input[0 : inputLen-3]
+		inputLen = len(input)
+		input[inputLen-4] = 'q'
+		input[inputLen-3] = 'u'
+		input[inputLen-2] = 'e'
+		//s[len-1] = 'r' <-- unnecessary, already 'r'.
+		return norm(input)
+	}
+
+	if inputLen > 8 && analysis.RunesEndsWith(input, "cateur") {
+		input = input[0 : inputLen-2]
+		inputLen = len(input)
+		input[inputLen-4] = 'q'
+		input[inputLen-3] = 'u'
+		input[inputLen-2] = 'e'
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 8 && analysis.RunesEndsWith(input, "atrice") {
+		input = input[0 : inputLen-4]
+		inputLen = len(input)
+		input[inputLen-2] = 'e'
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 7 && analysis.RunesEndsWith(input, "ateur") {
+		input = input[0 : inputLen-3]
+		inputLen = len(input)
+		input[inputLen-2] = 'e'
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 6 && analysis.RunesEndsWith(input, "trice") {
+		input = input[0 : inputLen-1]
+		inputLen = len(input)
+		input[inputLen-3] = 'e'
+		input[inputLen-2] = 'u'
+		input[inputLen-1] = 'r'
+	}
+
+	if inputLen > 5 && analysis.RunesEndsWith(input, "ième") {
+		return norm(input[0 : inputLen-4])
+	}
+
+	if inputLen > 7 && analysis.RunesEndsWith(input, "teuse") {
+		input = input[0 : inputLen-2]
+		inputLen = len(input)
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 6 && analysis.RunesEndsWith(input, "teur") {
+		input = input[0 : inputLen-1]
+		inputLen = len(input)
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 5 && analysis.RunesEndsWith(input, "euse") {
+		return norm(input[0 : inputLen-2])
+	}
+
+	if inputLen > 8 && analysis.RunesEndsWith(input, "ère") {
+		input = input[0 : inputLen-1]
+		inputLen = len(input)
+		input[inputLen-2] = 'e'
+		return norm(input)
+	}
+
+	if inputLen > 7 && analysis.RunesEndsWith(input, "ive") {
+		input = input[0 : inputLen-1]
+		inputLen = len(input)
+		input[inputLen-1] = 'f'
+		return norm(input)
+	}
+
+	if inputLen > 4 &&
+		(analysis.RunesEndsWith(input, "folle") ||
+			analysis.RunesEndsWith(input, "molle")) {
+		input = input[0 : inputLen-2]
+		inputLen = len(input)
+		input[inputLen-1] = 'u'
+		return norm(input)
+	}
+
+	if inputLen > 9 && analysis.RunesEndsWith(input, "nnelle") {
+		return norm(input[0 : inputLen-5])
+	}
+
+	if inputLen > 9 && analysis.RunesEndsWith(input, "nnel") {
+		return norm(input[0 : inputLen-3])
+	}
+
+	if inputLen > 4 && analysis.RunesEndsWith(input, "ète") {
+		input = input[0 : inputLen-1]
+		inputLen = len(input)
+		input[inputLen-2] = 'e'
+	}
+
+	if inputLen > 8 && analysis.RunesEndsWith(input, "ique") {
+		input = input[0 : inputLen-4]
+		inputLen = len(input)
+	}
+
+	if inputLen > 8 && analysis.RunesEndsWith(input, "esse") {
+		return norm(input[0 : inputLen-3])
+	}
+
+	if inputLen > 7 && analysis.RunesEndsWith(input, "inage") {
+		return norm(input[0 : inputLen-3])
+	}
+
+	if inputLen > 9 && analysis.RunesEndsWith(input, "isation") {
+		input = input[0 : inputLen-7]
+		inputLen = len(input)
+		if inputLen > 5 && analysis.RunesEndsWith(input, "ual") {
+			input[inputLen-2] = 'e'
+		}
+		return norm(input)
+	}
+
+	if inputLen > 9 && analysis.RunesEndsWith(input, "isateur") {
+		return norm(input[0 : inputLen-7])
+	}
+
+	if inputLen > 8 && analysis.RunesEndsWith(input, "ation") {
+		return norm(input[0 : inputLen-5])
+	}
+
+	if inputLen > 8 && analysis.RunesEndsWith(input, "ition") {
+		return norm(input[0 : inputLen-5])
+	}
+
+	return norm(input)
+
+}
+
+func norm(input []rune) []rune {
+
+	if len(input) > 4 {
+		for i := 0; i < len(input); i++ {
+			switch input[i] {
+			case 'à', 'á', 'â':
+				input[i] = 'a'
+			case 'ô':
+				input[i] = 'o'
+			case 'è', 'é', 'ê':
+				input[i] = 'e'
+			case 'ù', 'û':
+				input[i] = 'u'
+			case 'î':
+				input[i] = 'i'
+			case 'ç':
+				input[i] = 'c'
+			}
+
+			ch := input[0]
+			for i := 1; i < len(input); i++ {
+				if input[i] == ch && unicode.IsLetter(ch) {
+					input = analysis.DeleteRune(input, i)
+					i -= 1
+				} else {
+					ch = input[i]
+				}
+			}
+		}
+	}
+
+	if len(input) > 4 && analysis.RunesEndsWith(input, "ie") {
+		input = input[0 : len(input)-2]
+	}
+
+	if len(input) > 4 {
+		if input[len(input)-1] == 'r' {
+			input = input[0 : len(input)-1]
+		}
+		if input[len(input)-1] == 'e' {
+			input = input[0 : len(input)-1]
+		}
+		if input[len(input)-1] == 'e' {
+			input = input[0 : len(input)-1]
+		}
+		if input[len(input)-1] == input[len(input)-2] && unicode.IsLetter(input[len(input)-1]) {
+			input = input[0 : len(input)-1]
+		}
+	}
+
+	return input
+}
+
+func FrenchLightStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewFrenchLightStemmerFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(LightStemmerName, FrenchLightStemmerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fr/light_stemmer_fr_test.go
+++ b/analysis/lang/fr/light_stemmer_fr_test.go
--- a/analysis/lang/fr/minimal_stemmer_fr.go
+++ b/analysis/lang/fr/minimal_stemmer_fr.go
@ -0,0 +1,82 @@
+//  Copyright (c) 2015 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"bytes"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const MinimalStemmerName = "stemmer_fr_min"
+
+type FrenchMinimalStemmerFilter struct {
+}
+
+func NewFrenchMinimalStemmerFilter() *FrenchMinimalStemmerFilter {
+	return &FrenchMinimalStemmerFilter{}
+}
+
+func (s *FrenchMinimalStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		runes := bytes.Runes(token.Term)
+		runes = minstem(runes)
+		token.Term = analysis.BuildTermFromRunes(runes)
+	}
+	return input
+}
+
+func minstem(input []rune) []rune {
+
+	if len(input) < 6 {
+		return input
+	}
+
+	if input[len(input)-1] == 'x' {
+		if input[len(input)-3] == 'a' && input[len(input)-2] == 'u' {
+			input[len(input)-2] = 'l'
+		}
+		return input[0 : len(input)-1]
+	}
+
+	if input[len(input)-1] == 's' {
+		input = input[0 : len(input)-1]
+	}
+	if input[len(input)-1] == 'r' {
+		input = input[0 : len(input)-1]
+	}
+	if input[len(input)-1] == 'e' {
+		input = input[0 : len(input)-1]
+	}
+	if input[len(input)-1] == 'é' {
+		input = input[0 : len(input)-1]
+	}
+	if input[len(input)-1] == input[len(input)-2] {
+		input = input[0 : len(input)-1]
+	}
+	return input
+}
+
+func FrenchMinimalStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewFrenchMinimalStemmerFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(MinimalStemmerName, FrenchMinimalStemmerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fr/minimal_stemmer_fr_test.go
+++ b/analysis/lang/fr/minimal_stemmer_fr_test.go
@ -0,0 +1,139 @@
+//  Copyright (c) 2015 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestFrenchMinimalStemmer(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chevaux"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("cheval"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("hiboux"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("hibou"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chantés"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chant"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chanter"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chant"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chante"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chant"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("baronnes"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("baron"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("barons"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("baron"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("baron"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("baron"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	filter, err := cache.TokenFilterNamed(MinimalStemmerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := filter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/fr/stemmer_fr_snowball.go
+++ b/analysis/lang/fr/stemmer_fr_snowball.go
@ -0,0 +1,52 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/french"
+)
+
+const SnowballStemmerName = "stemmer_fr_snowball"
+
+type FrenchStemmerFilter struct {
+}
+
+func NewFrenchStemmerFilter() *FrenchStemmerFilter {
+	return &FrenchStemmerFilter{}
+}
+
+func (s *FrenchStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		french.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func FrenchStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewFrenchStemmerFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(SnowballStemmerName, FrenchStemmerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fr/stemmer_fr_snowball_test.go
+++ b/analysis/lang/fr/stemmer_fr_snowball_test.go
@ -0,0 +1,79 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestSnowballFrenchStemmer(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("antagoniste"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("antagon"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("barbouillait"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("barbouill"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("calculateur"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("calcul"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	filter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := filter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/fr/stop_filter_fr.go
+++ b/analysis/lang/fr/stop_filter_fr.go
@ -0,0 +1,36 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fr/stop_words_fr.go
+++ b/analysis/lang/fr/stop_words_fr.go
@ -0,0 +1,213 @@
+package fr
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_fr"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
+
+var FrenchStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A French stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+au             |  a + le
+aux            |  a + les
+avec           |  with
+ce             |  this
+ces            |  these
+dans           |  with
+de             |  of
+des            |  de + les
+du             |  de + le
+elle           |  she
+en             |  'of them' etc
+et             |  and
+eux            |  them
+il             |  he
+je             |  I
+la             |  the
+le             |  the
+leur           |  their
+lui            |  him
+ma             |  my (fem)
+mais           |  but
+me             |  me
+même           |  same; as in moi-même (myself) etc
+mes            |  me (pl)
+moi            |  me
+mon            |  my (masc)
+ne             |  not
+nos            |  our (pl)
+notre          |  our
+nous           |  we
+on             |  one
+ou             |  where
+par            |  by
+pas            |  not
+pour           |  for
+qu             |  que before vowel
+que            |  that
+qui            |  who
+sa             |  his, her (fem)
+se             |  oneself
+ses            |  his (pl)
+son            |  his, her (masc)
+sur            |  on
+ta             |  thy (fem)
+te             |  thee
+tes            |  thy (pl)
+toi            |  thee
+ton            |  thy (masc)
+tu             |  thou
+un             |  a
+une            |  a
+vos            |  your (pl)
+votre          |  your
+vous           |  you
+
+               |  single letter forms
+
+c              |  c'
+d              |  d'
+j              |  j'
+l              |  l'
+à              |  to, at
+m              |  m'
+n              |  n'
+s              |  s'
+t              |  t'
+y              |  there
+
+               | forms of être (not including the infinitive):
+été
+étée
+étées
+étés
+étant
+suis
+es
+est
+sommes
+êtes
+sont
+serai
+seras
+sera
+serons
+serez
+seront
+serais
+serait
+serions
+seriez
+seraient
+étais
+était
+étions
+étiez
+étaient
+fus
+fut
+fûmes
+fûtes
+furent
+sois
+soit
+soyons
+soyez
+soient
+fusse
+fusses
+fût
+fussions
+fussiez
+fussent
+
+               | forms of avoir (not including the infinitive):
+ayant
+eu
+eue
+eues
+eus
+ai
+as
+avons
+avez
+ont
+aurai
+auras
+aura
+aurons
+aurez
+auront
+aurais
+aurait
+aurions
+auriez
+auraient
+avais
+avait
+avions
+aviez
+avaient
+eut
+eûmes
+eûtes
+eurent
+aie
+aies
+ait
+ayons
+ayez
+aient
+eusse
+eusses
+eût
+eussions
+eussiez
+eussent
+
+               | Later additions (from Jean-Christophe Deschamps)
+ceci           |  this
+cela           |  that
+celà           |  that
+cet            |  this
+cette          |  this
+ici            |  here
+ils            |  they
+les            |  the (pl)
+leurs          |  their (pl)
+quel           |  which
+quels          |  which
+quelle         |  which
+quelles        |  which
+sans           |  without
+soi            |  oneself
+
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(FrenchStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}