Adding upstream version 2.5.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-05-19 00:20:02 +02:00 · 2025-05-19 00:20:02 +02:00 · 982828099e
commit 982828099e
parent c71cb8b61d
783 changed files with 150650 additions and 0 deletions
--- a/analysis/lang/ar/analyzer_ar.go
+++ b/analysis/lang/ar/analyzer_ar.go
@ -0,0 +1,68 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ar
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "ar"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
+	tokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	normalizeFilter := unicodenorm.MustNewUnicodeNormalizeFilter(unicodenorm.NFKC)
+	stopArFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	normalizeArFilter, err := cache.TokenFilterNamed(NormalizeName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerArFilter, err := cache.TokenFilterNamed(StemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.DefaultAnalyzer{
+		Tokenizer: tokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			normalizeFilter,
+			stopArFilter,
+			normalizeArFilter,
+			stemmerArFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/ar/analyzer_ar_test.go
+++ b/analysis/lang/ar/analyzer_ar_test.go
@ -0,0 +1,184 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ar
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestArabicAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		{
+			input: []byte("كبير"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("كبير"),
+					Position: 1,
+					Start:    0,
+					End:      8,
+				},
+			},
+		},
+		// feminine marker
+		{
+			input: []byte("كبيرة"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("كبير"),
+					Position: 1,
+					Start:    0,
+					End:      10,
+				},
+			},
+		},
+		{
+			input: []byte("مشروب"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("مشروب"),
+					Position: 1,
+					Start:    0,
+					End:      10,
+				},
+			},
+		},
+		// plural -at
+		{
+			input: []byte("مشروبات"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("مشروب"),
+					Position: 1,
+					Start:    0,
+					End:      14,
+				},
+			},
+		},
+		// plural -in
+		{
+			input: []byte("أمريكيين"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("امريك"),
+					Position: 1,
+					Start:    0,
+					End:      16,
+				},
+			},
+		},
+		// singular with bare alif
+		{
+			input: []byte("امريكي"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("امريك"),
+					Position: 1,
+					Start:    0,
+					End:      12,
+				},
+			},
+		},
+		{
+			input: []byte("كتاب"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("كتاب"),
+					Position: 1,
+					Start:    0,
+					End:      8,
+				},
+			},
+		},
+		// definite article
+		{
+			input: []byte("الكتاب"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("كتاب"),
+					Position: 1,
+					Start:    0,
+					End:      12,
+				},
+			},
+		},
+		{
+			input: []byte("ما ملكت أيمانكم"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("ملكت"),
+					Position: 2,
+					Start:    5,
+					End:      13,
+				},
+				&analysis.Token{
+					Term:     []byte("ايمانكم"),
+					Position: 3,
+					Start:    14,
+					End:      28,
+				},
+			},
+		},
+		// stopwords
+		{
+			input: []byte("الذين ملكت أيمانكم"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("ملكت"),
+					Position: 2,
+					Start:    11,
+					End:      19,
+				},
+				&analysis.Token{
+					Term:     []byte("ايمانكم"),
+					Position: 3,
+					Start:    20,
+					End:      34,
+				},
+			},
+		},
+		// presentation form normalization
+		{
+			input: []byte("ﺍﻟﺴﻼﻢ"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("سلام"),
+					Position: 1,
+					Start:    0,
+					End:      15,
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %v, got %v", test.output, actual)
+			t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/ar/arabic_normalize.go
+++ b/analysis/lang/ar/arabic_normalize.go
@ -0,0 +1,88 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ar
+
+import (
+	"bytes"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const NormalizeName = "normalize_ar"
+
+const (
+	Alef           = '\u0627'
+	AlefMadda      = '\u0622'
+	AlefHamzaAbove = '\u0623'
+	AlefHamzaBelow = '\u0625'
+	Yeh            = '\u064A'
+	DotlessYeh     = '\u0649'
+	TehMarbuta     = '\u0629'
+	Heh            = '\u0647'
+	Tatweel        = '\u0640'
+	Fathatan       = '\u064B'
+	Dammatan       = '\u064C'
+	Kasratan       = '\u064D'
+	Fatha          = '\u064E'
+	Damma          = '\u064F'
+	Kasra          = '\u0650'
+	Shadda         = '\u0651'
+	Sukun          = '\u0652'
+)
+
+type ArabicNormalizeFilter struct {
+}
+
+func NewArabicNormalizeFilter() *ArabicNormalizeFilter {
+	return &ArabicNormalizeFilter{}
+}
+
+func (s *ArabicNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		term := normalize(token.Term)
+		token.Term = term
+	}
+	return input
+}
+
+func normalize(input []byte) []byte {
+	runes := bytes.Runes(input)
+	for i := 0; i < len(runes); i++ {
+		switch runes[i] {
+		case AlefMadda, AlefHamzaAbove, AlefHamzaBelow:
+			runes[i] = Alef
+		case DotlessYeh:
+			runes[i] = Yeh
+		case TehMarbuta:
+			runes[i] = Heh
+		case Tatweel, Kasratan, Dammatan, Fathatan, Fatha, Damma, Kasra, Shadda, Sukun:
+			runes = analysis.DeleteRune(runes, i)
+			i--
+		}
+	}
+	return analysis.BuildTermFromRunes(runes)
+}
+
+func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewArabicNormalizeFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/ar/arabic_normalize_test.go
+++ b/analysis/lang/ar/arabic_normalize_test.go
@ -0,0 +1,234 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ar
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+)
+
+func TestArabicNormalizeFilter(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		// AlifMadda
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("آجن"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("اجن"),
+				},
+			},
+		},
+		// AlifHamzaAbove
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("أحمد"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("احمد"),
+				},
+			},
+		},
+		// AlifHamzaBelow
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("إعاذ"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("اعاذ"),
+				},
+			},
+		},
+		// AlifMaksura
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("بنى"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("بني"),
+				},
+			},
+		},
+		// TehMarbuta
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("فاطمة"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("فاطمه"),
+				},
+			},
+		},
+		// Tatweel
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("روبرـــــت"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("روبرت"),
+				},
+			},
+		},
+		// Fatha
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("مَبنا"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("مبنا"),
+				},
+			},
+		},
+		// Kasra
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("علِي"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("علي"),
+				},
+			},
+		},
+		// Damma
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("بُوات"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("بوات"),
+				},
+			},
+		},
+		// Fathatan
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ولداً"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ولدا"),
+				},
+			},
+		},
+		// Kasratan
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ولدٍ"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ولد"),
+				},
+			},
+		},
+		// Dammatan
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ولدٌ"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ولد"),
+				},
+			},
+		},
+		// Sukun
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("نلْسون"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("نلسون"),
+				},
+			},
+		},
+		// Shaddah
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("هتميّ"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("هتمي"),
+				},
+			},
+		},
+		// empty
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+	}
+
+	arabicNormalizeFilter := NewArabicNormalizeFilter()
+	for _, test := range tests {
+		actual := arabicNormalizeFilter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %#v, got %#v", test.output, actual)
+			t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/ar/stemmer_ar.go
+++ b/analysis/lang/ar/stemmer_ar.go
@ -0,0 +1,121 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ar
+
+import (
+	"bytes"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StemmerName = "stemmer_ar"
+
+// These were obtained from org.apache.lucene.analysis.ar.ArabicStemmer
+var prefixes = [][]rune{
+	[]rune("ال"),
+	[]rune("وال"),
+	[]rune("بال"),
+	[]rune("كال"),
+	[]rune("فال"),
+	[]rune("لل"),
+	[]rune("و"),
+}
+var suffixes = [][]rune{
+	[]rune("ها"),
+	[]rune("ان"),
+	[]rune("ات"),
+	[]rune("ون"),
+	[]rune("ين"),
+	[]rune("يه"),
+	[]rune("ية"),
+	[]rune("ه"),
+	[]rune("ة"),
+	[]rune("ي"),
+}
+
+type ArabicStemmerFilter struct{}
+
+func NewArabicStemmerFilter() *ArabicStemmerFilter {
+	return &ArabicStemmerFilter{}
+}
+
+func (s *ArabicStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		term := stem(token.Term)
+		token.Term = term
+	}
+	return input
+}
+
+func canStemPrefix(input, prefix []rune) bool {
+	// Wa- prefix requires at least 3 characters.
+	if len(prefix) == 1 && len(input) < 4 {
+		return false
+	}
+	// Other prefixes require only 2.
+	if len(input)-len(prefix) < 2 {
+		return false
+	}
+	for i := range prefix {
+		if prefix[i] != input[i] {
+			return false
+		}
+	}
+	return true
+}
+
+func canStemSuffix(input, suffix []rune) bool {
+	// All suffixes require at least 2 characters after stemming.
+	if len(input)-len(suffix) < 2 {
+		return false
+	}
+	stemEnd := len(input) - len(suffix)
+	for i := range suffix {
+		if suffix[i] != input[stemEnd+i] {
+			return false
+		}
+	}
+	return true
+}
+
+func stem(input []byte) []byte {
+	runes := bytes.Runes(input)
+	// Strip a single prefix.
+	for _, p := range prefixes {
+		if canStemPrefix(runes, p) {
+			runes = runes[len(p):]
+			break
+		}
+	}
+	// Strip off multiple suffixes, in their order in the suffixes array.
+	for _, s := range suffixes {
+		if canStemSuffix(runes, s) {
+			runes = runes[:len(runes)-len(s)]
+		}
+	}
+	return analysis.BuildTermFromRunes(runes)
+}
+
+func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewArabicStemmerFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/ar/stemmer_ar_test.go
+++ b/analysis/lang/ar/stemmer_ar_test.go
@ -0,0 +1,397 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ar
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+)
+
+func TestArabicStemmerFilter(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		// AlPrefix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("الحسن"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("حسن"),
+				},
+			},
+		},
+		// WalPrefix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("والحسن"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("حسن"),
+				},
+			},
+		},
+		// BalPrefix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("بالحسن"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("حسن"),
+				},
+			},
+		},
+		// KalPrefix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("كالحسن"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("حسن"),
+				},
+			},
+		},
+		// FalPrefix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("فالحسن"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("حسن"),
+				},
+			},
+		},
+		// LlPrefix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("للاخر"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("اخر"),
+				},
+			},
+		},
+		// WaPrefix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("وحسن"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("حسن"),
+				},
+			},
+		},
+		// AhSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("زوجها"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("زوج"),
+				},
+			},
+		},
+		// AnSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهدان"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// AtSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهدات"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// WnSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهدون"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// YnSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهدين"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// YhSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهديه"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// YpSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهدية"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// HSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهده"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// PSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهدة"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// YSuffix
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهدي"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// ComboPrefSuf
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("وساهدون"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// ComboSuf
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهدهات"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ساهد"),
+				},
+			},
+		},
+		// Shouldn't Stem
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("الو"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("الو"),
+				},
+			},
+		},
+		// NonArabic
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("English"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("English"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("سلام"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("سلام"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("السلام"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("سلام"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("سلامة"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("سلام"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("السلامة"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("سلام"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("الوصل"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("وصل"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("والصل"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("صل"),
+				},
+			},
+		},
+		// Empty
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+	}
+
+	arabicStemmerFilter := NewArabicStemmerFilter()
+	for _, test := range tests {
+		actual := arabicStemmerFilter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %#v, got %#v", test.output, actual)
+			t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/ar/stop_filter_ar.go
+++ b/analysis/lang/ar/stop_filter_ar.go
@ -0,0 +1,36 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ar
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/ar/stop_words_ar.go
+++ b/analysis/lang/ar/stop_words_ar.go
@ -0,0 +1,152 @@
+package ar
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_ar"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
+// ` was changed to ' to allow for literal string
+
+var ArabicStopWords = []byte(`# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Cleaned on October 11, 2009 (not normalized, so use before normalization)
+# This means that when modifying this list, you might need to add some 
+# redundant entries, for example containing forms with both أ and ا
+من
+ومن
+منها
+منه
+في
+وفي
+فيها
+فيه
+و
+ف
+ثم
+او
+أو
+ب
+بها
+به
+ا
+أ
+اى
+اي
+أي
+أى
+لا
+ولا
+الا
+ألا
+إلا
+لكن
+ما
+وما
+كما
+فما
+عن
+مع
+اذا
+إذا
+ان
+أن
+إن
+انها
+أنها
+إنها
+انه
+أنه
+إنه
+بان
+بأن
+فان
+فأن
+وان
+وأن
+وإن
+التى
+التي
+الذى
+الذي
+الذين
+الى
+الي
+إلى
+إلي
+على
+عليها
+عليه
+اما
+أما
+إما
+ايضا
+أيضا
+كل
+وكل
+لم
+ولم
+لن
+ولن
+هى
+هي
+هو
+وهى
+وهي
+وهو
+فهى
+فهي
+فهو
+انت
+أنت
+لك
+لها
+له
+هذه
+هذا
+تلك
+ذلك
+هناك
+كانت
+كان
+يكون
+تكون
+وكانت
+وكان
+غير
+بعض
+قد
+نحو
+بين
+بينما
+منذ
+ضمن
+حيث
+الان
+الآن
+خلال
+بعد
+قبل
+حتى
+عند
+عندما
+لدى
+جميع
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(ArabicStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/bg/stop_filter_bg.go
+++ b/analysis/lang/bg/stop_filter_bg.go
@ -0,0 +1,36 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bg
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/bg/stop_words_bg.go
+++ b/analysis/lang/bg/stop_words_bg.go
@ -0,0 +1,220 @@
+package bg
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_bg"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
+// ` was changed to ' to allow for literal string
+
+var BulgarianStopWords = []byte(`# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+а
+аз
+ако
+ала
+бе
+без
+беше
+би
+бил
+била
+били
+било
+близо
+бъдат
+бъде
+бяха
+в
+вас
+ваш
+ваша
+вероятно
+вече
+взема
+ви
+вие
+винаги
+все
+всеки
+всички
+всичко
+всяка
+във
+въпреки
+върху
+г
+ги
+главно
+го
+д
+да
+дали
+до
+докато
+докога
+дори
+досега
+доста
+е
+едва
+един
+ето
+за
+зад
+заедно
+заради
+засега
+затова
+защо
+защото
+и
+из
+или
+им
+има
+имат
+иска
+й
+каза
+как
+каква
+какво
+както
+какъв
+като
+кога
+когато
+което
+които
+кой
+който
+колко
+която
+къде
+където
+към
+ли
+м
+ме
+между
+мен
+ми
+мнозина
+мога
+могат
+може
+моля
+момента
+му
+н
+на
+над
+назад
+най
+направи
+напред
+например
+нас
+не
+него
+нея
+ни
+ние
+никой
+нито
+но
+някои
+някой
+няма
+обаче
+около
+освен
+особено
+от
+отгоре
+отново
+още
+пак
+по
+повече
+повечето
+под
+поне
+поради
+после
+почти
+прави
+пред
+преди
+през
+при
+пък
+първо
+с
+са
+само
+се
+сега
+си
+скоро
+след
+сме
+според
+сред
+срещу
+сте
+съм
+със
+също
+т
+тази
+така
+такива
+такъв
+там
+твой
+те
+тези
+ти
+тн
+то
+това
+тогава
+този
+той
+толкова
+точно
+трябва
+тук
+тъй
+тя
+тях
+у
+харесва
+ч
+че
+често
+чрез
+ще
+щом
+я
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(BulgarianStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/ca/articles_ca.go
+++ b/analysis/lang/ca/articles_ca.go
@ -0,0 +1,33 @@
+package ca
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const ArticlesName = "articles_ca"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
+
+var CatalanArticles = []byte(`
+d
+l
+m
+n
+s
+t
+`)
+
+func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(CatalanArticles)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/ca/elision_ca.go
+++ b/analysis/lang/ca/elision_ca.go
@ -0,0 +1,40 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ca
+
+import (
+	"fmt"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/elision"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const ElisionName = "elision_ca"
+
+func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	articlesTokenMap, err := cache.TokenMapNamed(ArticlesName)
+	if err != nil {
+		return nil, fmt.Errorf("error building elision filter: %v", err)
+	}
+	return elision.NewElisionFilter(articlesTokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/ca/elision_ca_test.go
+++ b/analysis/lang/ca/elision_ca_test.go
@ -0,0 +1,61 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ca
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestFrenchElision(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("l'Institut"),
+				},
+				&analysis.Token{
+					Term: []byte("d'Estudis"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("Institut"),
+				},
+				&analysis.Token{
+					Term: []byte("Estudis"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	elisionFilter, err := cache.TokenFilterNamed(ElisionName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := elisionFilter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/ca/stop_filter_ca.go
+++ b/analysis/lang/ca/stop_filter_ca.go
@ -0,0 +1,36 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ca
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/ca/stop_words_ca.go
+++ b/analysis/lang/ca/stop_words_ca.go
@ -0,0 +1,247 @@
+package ca
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_ca"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
+// ` was changed to ' to allow for literal string
+
+var CatalanStopWords = []byte(`# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
+a
+abans
+ací
+ah
+així
+això
+al
+als
+aleshores
+algun
+alguna
+algunes
+alguns
+alhora
+allà
+allí
+allò
+altra
+altre
+altres
+amb
+ambdós
+ambdues
+apa
+aquell
+aquella
+aquelles
+aquells
+aquest
+aquesta
+aquestes
+aquests
+aquí
+baix
+cada
+cadascú
+cadascuna
+cadascunes
+cadascuns
+com
+contra
+d'un
+d'una
+d'unes
+d'uns
+dalt
+de
+del
+dels
+des
+després
+dins
+dintre
+donat
+doncs
+durant
+e
+eh
+el
+els
+em
+en
+encara
+ens
+entre
+érem
+eren
+éreu
+es
+és
+esta
+està
+estàvem
+estaven
+estàveu
+esteu
+et
+etc
+ets
+fins
+fora
+gairebé
+ha
+han
+has
+havia
+he
+hem
+heu
+hi 
+ho
+i
+igual
+iguals
+ja
+l'hi
+la
+les
+li
+li'n
+llavors
+m'he
+ma
+mal
+malgrat
+mateix
+mateixa
+mateixes
+mateixos
+me
+mentre
+més
+meu
+meus
+meva
+meves
+molt
+molta
+moltes
+molts
+mon
+mons
+n'he
+n'hi
+ne
+ni
+no
+nogensmenys
+només
+nosaltres
+nostra
+nostre
+nostres
+o
+oh
+oi
+on
+pas
+pel
+pels
+per
+però
+perquè
+poc 
+poca
+pocs
+poques
+potser
+propi
+qual
+quals
+quan
+quant 
+que
+què
+quelcom
+qui
+quin
+quina
+quines
+quins
+s'ha
+s'han
+sa
+semblant
+semblants
+ses
+seu 
+seus
+seva
+seva
+seves
+si
+sobre
+sobretot
+sóc
+solament
+sols
+son 
+són
+sons 
+sota
+sou
+t'ha
+t'han
+t'he
+ta
+tal
+també
+tampoc
+tan
+tant
+tanta
+tantes
+teu
+teus
+teva
+teves
+ton
+tons
+tot
+tota
+totes
+tots
+un
+una
+unes
+uns
+us
+va
+vaig
+vam
+van
+vas
+veu
+vosaltres
+vostra
+vostre
+vostres
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(CatalanStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/cjk/analyzer_cjk.go
+++ b/analysis/lang/cjk/analyzer_cjk.go
@ -0,0 +1,60 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cjk
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "cjk"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
+	tokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	widthFilter, err := cache.TokenFilterNamed(WidthName)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	bigramFilter, err := cache.TokenFilterNamed(BigramName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.DefaultAnalyzer{
+		Tokenizer: tokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			widthFilter,
+			toLowerFilter,
+			bigramFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/cjk/analyzer_cjk_test.go
+++ b/analysis/lang/cjk/analyzer_cjk_test.go
@ -0,0 +1,642 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cjk
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestCJKAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		{
+			input: []byte("こんにちは世界"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("こん"),
+					Type:     analysis.Double,
+					Position: 1,
+					Start:    0,
+					End:      6,
+				},
+				&analysis.Token{
+					Term:     []byte("んに"),
+					Type:     analysis.Double,
+					Position: 2,
+					Start:    3,
+					End:      9,
+				},
+				&analysis.Token{
+					Term:     []byte("にち"),
+					Type:     analysis.Double,
+					Position: 3,
+					Start:    6,
+					End:      12,
+				},
+				&analysis.Token{
+					Term:     []byte("ちは"),
+					Type:     analysis.Double,
+					Position: 4,
+					Start:    9,
+					End:      15,
+				},
+				&analysis.Token{
+					Term:     []byte("は世"),
+					Type:     analysis.Double,
+					Position: 5,
+					Start:    12,
+					End:      18,
+				},
+				&analysis.Token{
+					Term:     []byte("世界"),
+					Type:     analysis.Double,
+					Position: 6,
+					Start:    15,
+					End:      21,
+				},
+			},
+		},
+		{
+			input: []byte("一二三四五六七八九十"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("一二"),
+					Type:     analysis.Double,
+					Position: 1,
+					Start:    0,
+					End:      6,
+				},
+				&analysis.Token{
+					Term:     []byte("二三"),
+					Type:     analysis.Double,
+					Position: 2,
+					Start:    3,
+					End:      9,
+				},
+				&analysis.Token{
+					Term:     []byte("三四"),
+					Type:     analysis.Double,
+					Position: 3,
+					Start:    6,
+					End:      12,
+				},
+				&analysis.Token{
+					Term:     []byte("四五"),
+					Type:     analysis.Double,
+					Position: 4,
+					Start:    9,
+					End:      15,
+				},
+				&analysis.Token{
+					Term:     []byte("五六"),
+					Type:     analysis.Double,
+					Position: 5,
+					Start:    12,
+					End:      18,
+				},
+				&analysis.Token{
+					Term:     []byte("六七"),
+					Type:     analysis.Double,
+					Position: 6,
+					Start:    15,
+					End:      21,
+				},
+				&analysis.Token{
+					Term:     []byte("七八"),
+					Type:     analysis.Double,
+					Position: 7,
+					Start:    18,
+					End:      24,
+				},
+				&analysis.Token{
+					Term:     []byte("八九"),
+					Type:     analysis.Double,
+					Position: 8,
+					Start:    21,
+					End:      27,
+				},
+				&analysis.Token{
+					Term:     []byte("九十"),
+					Type:     analysis.Double,
+					Position: 9,
+					Start:    24,
+					End:      30,
+				},
+			},
+		},
+		{
+			input: []byte("一 二三四 五六七八九 十"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("一"),
+					Type:     analysis.Single,
+					Position: 1,
+					Start:    0,
+					End:      3,
+				},
+				&analysis.Token{
+					Term:     []byte("二三"),
+					Type:     analysis.Double,
+					Position: 2,
+					Start:    4,
+					End:      10,
+				},
+				&analysis.Token{
+					Term:     []byte("三四"),
+					Type:     analysis.Double,
+					Position: 3,
+					Start:    7,
+					End:      13,
+				},
+				&analysis.Token{
+					Term:     []byte("五六"),
+					Type:     analysis.Double,
+					Position: 4,
+					Start:    14,
+					End:      20,
+				},
+				&analysis.Token{
+					Term:     []byte("六七"),
+					Type:     analysis.Double,
+					Position: 5,
+					Start:    17,
+					End:      23,
+				},
+				&analysis.Token{
+					Term:     []byte("七八"),
+					Type:     analysis.Double,
+					Position: 6,
+					Start:    20,
+					End:      26,
+				},
+				&analysis.Token{
+					Term:     []byte("八九"),
+					Type:     analysis.Double,
+					Position: 7,
+					Start:    23,
+					End:      29,
+				},
+				&analysis.Token{
+					Term:     []byte("十"),
+					Type:     analysis.Single,
+					Position: 8,
+					Start:    30,
+					End:      33,
+				},
+			},
+		},
+		{
+			input: []byte("abc defgh ijklmn opqrstu vwxy z"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("abc"),
+					Type:     analysis.AlphaNumeric,
+					Position: 1,
+					Start:    0,
+					End:      3,
+				},
+				&analysis.Token{
+					Term:     []byte("defgh"),
+					Type:     analysis.AlphaNumeric,
+					Position: 2,
+					Start:    4,
+					End:      9,
+				},
+				&analysis.Token{
+					Term:     []byte("ijklmn"),
+					Type:     analysis.AlphaNumeric,
+					Position: 3,
+					Start:    10,
+					End:      16,
+				},
+				&analysis.Token{
+					Term:     []byte("opqrstu"),
+					Type:     analysis.AlphaNumeric,
+					Position: 4,
+					Start:    17,
+					End:      24,
+				},
+				&analysis.Token{
+					Term:     []byte("vwxy"),
+					Type:     analysis.AlphaNumeric,
+					Position: 5,
+					Start:    25,
+					End:      29,
+				},
+				&analysis.Token{
+					Term:     []byte("z"),
+					Type:     analysis.AlphaNumeric,
+					Position: 6,
+					Start:    30,
+					End:      31,
+				},
+			},
+		},
+		{
+			input: []byte("あい"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("あい"),
+					Type:     analysis.Double,
+					Position: 1,
+					Start:    0,
+					End:      6,
+				},
+			},
+		},
+		{
+			input: []byte("あい   "),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("あい"),
+					Type:     analysis.Double,
+					Position: 1,
+					Start:    0,
+					End:      6,
+				},
+			},
+		},
+		{
+			input: []byte("test"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("test"),
+					Type:     analysis.AlphaNumeric,
+					Position: 1,
+					Start:    0,
+					End:      4,
+				},
+			},
+		},
+		{
+			input: []byte("test   "),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("test"),
+					Type:     analysis.AlphaNumeric,
+					Position: 1,
+					Start:    0,
+					End:      4,
+				},
+			},
+		},
+		{
+			input: []byte("あいtest"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("あい"),
+					Type:     analysis.Double,
+					Position: 1,
+					Start:    0,
+					End:      6,
+				},
+				&analysis.Token{
+					Term:     []byte("test"),
+					Type:     analysis.AlphaNumeric,
+					Position: 2,
+					Start:    6,
+					End:      10,
+				},
+			},
+		},
+		{
+			input: []byte("testあい    "),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("test"),
+					Type:     analysis.AlphaNumeric,
+					Position: 1,
+					Start:    0,
+					End:      4,
+				},
+				&analysis.Token{
+					Term:     []byte("あい"),
+					Type:     analysis.Double,
+					Position: 2,
+					Start:    4,
+					End:      10,
+				},
+			},
+		},
+		{
+			input: []byte("あいうえおabcかきくけこ"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("あい"),
+					Type:     analysis.Double,
+					Position: 1,
+					Start:    0,
+					End:      6,
+				},
+				&analysis.Token{
+					Term:     []byte("いう"),
+					Type:     analysis.Double,
+					Position: 2,
+					Start:    3,
+					End:      9,
+				},
+				&analysis.Token{
+					Term:     []byte("うえ"),
+					Type:     analysis.Double,
+					Position: 3,
+					Start:    6,
+					End:      12,
+				},
+				&analysis.Token{
+					Term:     []byte("えお"),
+					Type:     analysis.Double,
+					Position: 4,
+					Start:    9,
+					End:      15,
+				},
+				&analysis.Token{
+					Term:     []byte("abc"),
+					Type:     analysis.AlphaNumeric,
+					Position: 5,
+					Start:    15,
+					End:      18,
+				},
+				&analysis.Token{
+					Term:     []byte("かき"),
+					Type:     analysis.Double,
+					Position: 6,
+					Start:    18,
+					End:      24,
+				},
+				&analysis.Token{
+					Term:     []byte("きく"),
+					Type:     analysis.Double,
+					Position: 7,
+					Start:    21,
+					End:      27,
+				},
+				&analysis.Token{
+					Term:     []byte("くけ"),
+					Type:     analysis.Double,
+					Position: 8,
+					Start:    24,
+					End:      30,
+				},
+				&analysis.Token{
+					Term:     []byte("けこ"),
+					Type:     analysis.Double,
+					Position: 9,
+					Start:    27,
+					End:      33,
+				},
+			},
+		},
+		{
+			input: []byte("あいうえおabんcかきくけ こ"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("あい"),
+					Type:     analysis.Double,
+					Position: 1,
+					Start:    0,
+					End:      6,
+				},
+				&analysis.Token{
+					Term:     []byte("いう"),
+					Type:     analysis.Double,
+					Position: 2,
+					Start:    3,
+					End:      9,
+				},
+				&analysis.Token{
+					Term:     []byte("うえ"),
+					Type:     analysis.Double,
+					Position: 3,
+					Start:    6,
+					End:      12,
+				},
+				&analysis.Token{
+					Term:     []byte("えお"),
+					Type:     analysis.Double,
+					Position: 4,
+					Start:    9,
+					End:      15,
+				},
+				&analysis.Token{
+					Term:     []byte("ab"),
+					Type:     analysis.AlphaNumeric,
+					Position: 5,
+					Start:    15,
+					End:      17,
+				},
+				&analysis.Token{
+					Term:     []byte("ん"),
+					Type:     analysis.Single,
+					Position: 6,
+					Start:    17,
+					End:      20,
+				},
+				&analysis.Token{
+					Term:     []byte("c"),
+					Type:     analysis.AlphaNumeric,
+					Position: 7,
+					Start:    20,
+					End:      21,
+				},
+				&analysis.Token{
+					Term:     []byte("かき"),
+					Type:     analysis.Double,
+					Position: 8,
+					Start:    21,
+					End:      27,
+				},
+				&analysis.Token{
+					Term:     []byte("きく"),
+					Type:     analysis.Double,
+					Position: 9,
+					Start:    24,
+					End:      30,
+				},
+				&analysis.Token{
+					Term:     []byte("くけ"),
+					Type:     analysis.Double,
+					Position: 10,
+					Start:    27,
+					End:      33,
+				},
+				&analysis.Token{
+					Term:     []byte("こ"),
+					Type:     analysis.Single,
+					Position: 11,
+					Start:    34,
+					End:      37,
+				},
+			},
+		},
+		{
+			input: []byte("一 روبرت موير"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("一"),
+					Type:     analysis.Single,
+					Position: 1,
+					Start:    0,
+					End:      3,
+				},
+				&analysis.Token{
+					Term:     []byte("روبرت"),
+					Type:     analysis.AlphaNumeric,
+					Position: 2,
+					Start:    4,
+					End:      14,
+				},
+				&analysis.Token{
+					Term:     []byte("موير"),
+					Type:     analysis.AlphaNumeric,
+					Position: 3,
+					Start:    15,
+					End:      23,
+				},
+			},
+		},
+		{
+			input: []byte("一 رُوبرت موير"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("一"),
+					Type:     analysis.Single,
+					Position: 1,
+					Start:    0,
+					End:      3,
+				},
+				&analysis.Token{
+					Term:     []byte("رُوبرت"),
+					Type:     analysis.AlphaNumeric,
+					Position: 2,
+					Start:    4,
+					End:      16,
+				},
+				&analysis.Token{
+					Term:     []byte("موير"),
+					Type:     analysis.AlphaNumeric,
+					Position: 3,
+					Start:    17,
+					End:      25,
+				},
+			},
+		},
+		{
+			input: []byte("𩬅艱鍟䇹愯瀛"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("𩬅艱"),
+					Type:     analysis.Double,
+					Position: 1,
+					Start:    0,
+					End:      7,
+				},
+				&analysis.Token{
+					Term:     []byte("艱鍟"),
+					Type:     analysis.Double,
+					Position: 2,
+					Start:    4,
+					End:      10,
+				},
+				&analysis.Token{
+					Term:     []byte("鍟䇹"),
+					Type:     analysis.Double,
+					Position: 3,
+					Start:    7,
+					End:      13,
+				},
+				&analysis.Token{
+					Term:     []byte("䇹愯"),
+					Type:     analysis.Double,
+					Position: 4,
+					Start:    10,
+					End:      16,
+				},
+				&analysis.Token{
+					Term:     []byte("愯瀛"),
+					Type:     analysis.Double,
+					Position: 5,
+					Start:    13,
+					End:      19,
+				},
+			},
+		},
+		{
+			input: []byte("一"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("一"),
+					Type:     analysis.Single,
+					Position: 1,
+					Start:    0,
+					End:      3,
+				},
+			},
+		},
+		{
+			input: []byte("一丁丂"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("一丁"),
+					Type:     analysis.Double,
+					Position: 1,
+					Start:    0,
+					End:      6,
+				},
+				&analysis.Token{
+					Term:     []byte("丁丂"),
+					Type:     analysis.Double,
+					Position: 2,
+					Start:    3,
+					End:      9,
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	for _, test := range tests {
+		analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+		if err != nil {
+			t.Fatal(err)
+		}
+		actual := analyzer.Analyze(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %v, got %v", test.output, actual)
+		}
+	}
+}
+
+func BenchmarkCJKAnalyzer(b *testing.B) {
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	for i := 0; i < b.N; i++ {
+		analyzer.Analyze(bleveWikiArticleJapanese)
+	}
+}
+
+var bleveWikiArticleJapanese = []byte(`加圧容器に貯蔵されている液体物質は、その時の気液平衡状態にあるが、火災により容器が加熱されていると容器内の液体は、その物質の大気圧のもとでの沸点より十分に高い温度まで加熱され、圧力も高くなる。この状態で容器が破裂すると容器内部の圧力は瞬間的に大気圧にまで低下する。
+この時に容器内の平衡状態が破られ、液体は突沸し、気体になることで爆発現象を起こす。液化石油ガスなどでは、さらに拡散して空気と混ざったガスが自由空間蒸気雲爆発を起こす。液化石油ガスなどの常温常圧で気体になる物を高い圧力で液化して収納している容器、あるいは、そのような液体を輸送するためのパイプラインや配管などが火災などによって破壊されたときに起きる。
+ブリーブという現象が明らかになったのは、フランス・リヨンの郊外にあるフェザンという町のフェザン製油所（ウニオン・ド・ゼネラル・ド・ペトロール）で大規模な爆発火災事故が発生したときだと言われている。
+中身の液体が高温高圧の水である場合には「水蒸気爆発」と呼ばれる。`)
--- a/analysis/lang/cjk/cjk_bigram.go
+++ b/analysis/lang/cjk/cjk_bigram.go
@ -0,0 +1,210 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cjk
+
+import (
+	"bytes"
+	"container/ring"
+	"unicode/utf8"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const BigramName = "cjk_bigram"
+
+type CJKBigramFilter struct {
+	outputUnigram bool
+}
+
+func NewCJKBigramFilter(outputUnigram bool) *CJKBigramFilter {
+	return &CJKBigramFilter{
+		outputUnigram: outputUnigram,
+	}
+}
+
+func (s *CJKBigramFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	r := ring.New(2)
+	itemsInRing := 0
+	pos := 1
+	outputPos := 1
+
+	rv := make(analysis.TokenStream, 0, len(input))
+
+	for _, tokout := range input {
+		if tokout.Type == analysis.Ideographic {
+			runes := bytes.Runes(tokout.Term)
+			sofar := 0
+			for _, run := range runes {
+				rlen := utf8.RuneLen(run)
+				token := &analysis.Token{
+					Term:     tokout.Term[sofar : sofar+rlen],
+					Start:    tokout.Start + sofar,
+					End:      tokout.Start + sofar + rlen,
+					Position: pos,
+					Type:     tokout.Type,
+					KeyWord:  tokout.KeyWord,
+				}
+				pos++
+				sofar += rlen
+				if itemsInRing > 0 {
+					// if items already buffered
+					// check to see if this is aligned
+					curr := r.Value.(*analysis.Token)
+					if token.Start-curr.End != 0 {
+						// not aligned flush
+						flushToken := s.flush(r, &itemsInRing, outputPos)
+						if flushToken != nil {
+							outputPos++
+							rv = append(rv, flushToken)
+						}
+					}
+				}
+				// now we can add this token to the buffer
+				r = r.Next()
+				r.Value = token
+				if itemsInRing < 2 {
+					itemsInRing++
+				}
+				builtUnigram := false
+				if itemsInRing > 1 && s.outputUnigram {
+					unigram := s.buildUnigram(r, &itemsInRing, outputPos)
+					if unigram != nil {
+						builtUnigram = true
+						rv = append(rv, unigram)
+					}
+				}
+				bigramToken := s.outputBigram(r, &itemsInRing, outputPos)
+				if bigramToken != nil {
+					rv = append(rv, bigramToken)
+					outputPos++
+				}
+
+				// prev token should be removed if unigram was built
+				if builtUnigram {
+					itemsInRing--
+				}
+			}
+
+		} else {
+			// flush anything already buffered
+			flushToken := s.flush(r, &itemsInRing, outputPos)
+			if flushToken != nil {
+				rv = append(rv, flushToken)
+				outputPos++
+			}
+			// output this token as is
+			tokout.Position = outputPos
+			rv = append(rv, tokout)
+			outputPos++
+		}
+	}
+
+	// deal with possible trailing unigram
+	if itemsInRing == 1 || s.outputUnigram {
+		if itemsInRing == 2 {
+			r = r.Next()
+		}
+		unigram := s.buildUnigram(r, &itemsInRing, outputPos)
+		if unigram != nil {
+			rv = append(rv, unigram)
+		}
+	}
+	return rv
+}
+
+func (s *CJKBigramFilter) flush(r *ring.Ring, itemsInRing *int, pos int) *analysis.Token {
+	var rv *analysis.Token
+	if *itemsInRing == 1 {
+		rv = s.buildUnigram(r, itemsInRing, pos)
+	}
+	r.Value = nil
+	*itemsInRing = 0
+
+	return rv
+}
+
+func (s *CJKBigramFilter) outputBigram(r *ring.Ring, itemsInRing *int, pos int) *analysis.Token {
+	if *itemsInRing == 2 {
+		thisShingleRing := r.Move(-1)
+		shingledBytes := make([]byte, 0)
+
+		// do first token
+		prev := thisShingleRing.Value.(*analysis.Token)
+		shingledBytes = append(shingledBytes, prev.Term...)
+
+		// do second token
+		thisShingleRing = thisShingleRing.Next()
+		curr := thisShingleRing.Value.(*analysis.Token)
+		shingledBytes = append(shingledBytes, curr.Term...)
+
+		token := analysis.Token{
+			Type:     analysis.Double,
+			Term:     shingledBytes,
+			Position: pos,
+			Start:    prev.Start,
+			End:      curr.End,
+		}
+		return &token
+	}
+
+	return nil
+}
+
+func (s *CJKBigramFilter) buildUnigram(r *ring.Ring, itemsInRing *int, pos int) *analysis.Token {
+	switch *itemsInRing {
+	case 2:
+		thisShingleRing := r.Move(-1)
+		// do first token
+		prev := thisShingleRing.Value.(*analysis.Token)
+		token := analysis.Token{
+			Type:     analysis.Single,
+			Term:     prev.Term,
+			Position: pos,
+			Start:    prev.Start,
+			End:      prev.End,
+		}
+		return &token
+	case 1:
+		// do first token
+		prev := r.Value.(*analysis.Token)
+		token := analysis.Token{
+			Type:     analysis.Single,
+			Term:     prev.Term,
+			Position: pos,
+			Start:    prev.Start,
+			End:      prev.End,
+		}
+		return &token
+	}
+
+	return nil
+}
+
+func CJKBigramFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	outputUnigram := false
+	outVal, ok := config["output_unigram"].(bool)
+	if ok {
+		outputUnigram = outVal
+	}
+	return NewCJKBigramFilter(outputUnigram), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(BigramName, CJKBigramFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/cjk/cjk_bigram_test.go
+++ b/analysis/lang/cjk/cjk_bigram_test.go
@ -0,0 +1,848 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cjk
+
+import (
+	"container/ring"
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+)
+
+// Helper function to create a token
+func makeToken(term string, start, end, pos int) *analysis.Token {
+	return &analysis.Token{
+		Term:     []byte(term),
+		Start:    start,
+		End:      end,
+		Position: pos, // Note: buildUnigram uses the 'pos' argument, not the token's original pos
+		Type:     analysis.Ideographic,
+	}
+}
+
+func TestCJKBigramFilter_buildUnigram(t *testing.T) {
+	filter := NewCJKBigramFilter(false)
+
+	tests := []struct {
+		name        string
+		ringSetup   func() (*ring.Ring, int) // Function to set up the ring and itemsInRing
+		inputPos    int                      // Position to pass to buildUnigram
+		expectToken *analysis.Token
+	}{
+		{
+			name: "itemsInRing == 2",
+			ringSetup: func() (*ring.Ring, int) {
+				r := ring.New(2)
+				token1 := makeToken("一", 0, 3, 1) // Original pos 1
+				token2 := makeToken("二", 3, 6, 2) // Original pos 2
+				r.Value = token1
+				r = r.Next()
+				r.Value = token2
+				// r currently points to token2, r.Move(-1) points to token1
+				return r, 2
+			},
+			inputPos: 10, // Expected output position
+			expectToken: &analysis.Token{
+				Type:     analysis.Single,
+				Term:     []byte("一"),
+				Position: 10, // Should use inputPos
+				Start:    0,
+				End:      3,
+			},
+		},
+		{
+			name: "itemsInRing == 1 (ring points to the single item)",
+			ringSetup: func() (*ring.Ring, int) {
+				r := ring.New(2)
+				token1 := makeToken("三", 6, 9, 3)
+				r.Value = token1
+				// r points to token1
+				return r, 1
+			},
+			inputPos: 11,
+			expectToken: &analysis.Token{
+				Type:     analysis.Single,
+				Term:     []byte("三"),
+				Position: 11, // Should use inputPos
+				Start:    6,
+				End:      9,
+			},
+		},
+		{
+			name: "itemsInRing == 1 (ring points to nil, next is the single item)",
+			ringSetup: func() (*ring.Ring, int) {
+				r := ring.New(2)
+				token1 := makeToken("四", 9, 12, 4)
+				r = r.Next() // r points to nil initially
+				r.Value = token1
+				// r points to token1
+				return r, 1
+			},
+			inputPos: 12,
+			expectToken: &analysis.Token{
+				Type:     analysis.Single,
+				Term:     []byte("四"),
+				Position: 12, // Should use inputPos
+				Start:    9,
+				End:      12,
+			},
+		},
+		{
+			name: "itemsInRing == 0",
+			ringSetup: func() (*ring.Ring, int) {
+				r := ring.New(2)
+				// Ring is empty
+				return r, 0
+			},
+			inputPos:    13,
+			expectToken: nil, // Expect nil when itemsInRing is not 1 or 2
+		},
+		{
+			name: "itemsInRing > 2 (should behave like 0)",
+			ringSetup: func() (*ring.Ring, int) {
+				r := ring.New(2)
+				token1 := makeToken("五", 12, 15, 5)
+				token2 := makeToken("六", 15, 18, 6)
+				r.Value = token1
+				r = r.Next()
+				r.Value = token2
+				// Simulate incorrect itemsInRing count
+				return r, 3
+			},
+			inputPos:    14,
+			expectToken: nil, // Expect nil when itemsInRing is not 1 or 2
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ringPtr, itemsInRing := tt.ringSetup()
+			itemsInRingCopy := itemsInRing // Pass a pointer to a copy
+
+			gotToken := filter.buildUnigram(ringPtr, &itemsInRingCopy, tt.inputPos)
+
+			if !reflect.DeepEqual(gotToken, tt.expectToken) {
+				t.Errorf("buildUnigram() got = %v, want %v", gotToken, tt.expectToken)
+			}
+
+			// Check if itemsInRing was modified (it shouldn't be by buildUnigram)
+			if itemsInRingCopy != itemsInRing {
+				t.Errorf("buildUnigram() modified itemsInRing, got = %d, want %d", itemsInRingCopy, itemsInRing)
+			}
+		})
+	}
+}
+
+func TestCJKBigramFilter_outputBigram(t *testing.T) {
+	// Create a filter instance (outputUnigram value doesn't matter for outputBigram)
+	filter := NewCJKBigramFilter(false)
+
+	tests := []struct {
+		name        string
+		ringSetup   func() (*ring.Ring, int) // Function to set up the ring and itemsInRing
+		inputPos    int                      // Position to pass to outputBigram
+		expectToken *analysis.Token
+	}{
+		{
+			name: "itemsInRing == 2",
+			ringSetup: func() (*ring.Ring, int) {
+				r := ring.New(2)
+				token1 := makeToken("一", 0, 3, 1) // Original pos 1
+				token2 := makeToken("二", 3, 6, 2) // Original pos 2
+				r.Value = token1
+				r = r.Next()
+				r.Value = token2
+				// r currently points to token2, r.Move(-1) points to token1
+				return r, 2
+			},
+			inputPos: 10, // Expected output position
+			expectToken: &analysis.Token{
+				Type:     analysis.Double,
+				Term:     []byte("一二"), // Combined term
+				Position: 10,           // Should use inputPos
+				Start:    0,            // Start of first token
+				End:      6,            // End of second token
+			},
+		},
+		{
+			name: "itemsInRing == 2 with different terms",
+			ringSetup: func() (*ring.Ring, int) {
+				r := ring.New(2)
+				token1 := makeToken("你好", 0, 6, 1)
+				token2 := makeToken("世界", 6, 12, 2)
+				r.Value = token1
+				r = r.Next()
+				r.Value = token2
+				return r, 2
+			},
+			inputPos: 5,
+			expectToken: &analysis.Token{
+				Type:     analysis.Double,
+				Term:     []byte("你好世界"),
+				Position: 5,
+				Start:    0,
+				End:      12,
+			},
+		},
+		{
+			name: "itemsInRing == 1",
+			ringSetup: func() (*ring.Ring, int) {
+				r := ring.New(2)
+				token1 := makeToken("三", 6, 9, 3)
+				r.Value = token1
+				return r, 1
+			},
+			inputPos:    11,
+			expectToken: nil, // Expect nil when itemsInRing is not 2
+		},
+		{
+			name: "itemsInRing == 0",
+			ringSetup: func() (*ring.Ring, int) {
+				r := ring.New(2)
+				// Ring is empty
+				return r, 0
+			},
+			inputPos:    13,
+			expectToken: nil, // Expect nil when itemsInRing is not 2
+		},
+		{
+			name: "itemsInRing > 2 (should behave like 0)",
+			ringSetup: func() (*ring.Ring, int) {
+				r := ring.New(2)
+				token1 := makeToken("五", 12, 15, 5)
+				token2 := makeToken("六", 15, 18, 6)
+				r.Value = token1
+				r = r.Next()
+				r.Value = token2
+				// Simulate incorrect itemsInRing count
+				return r, 3
+			},
+			inputPos:    14,
+			expectToken: nil, // Expect nil when itemsInRing is not 2
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ringPtr, itemsInRing := tt.ringSetup()
+			itemsInRingCopy := itemsInRing // Pass a pointer to a copy
+
+			gotToken := filter.outputBigram(ringPtr, &itemsInRingCopy, tt.inputPos)
+
+			if !reflect.DeepEqual(gotToken, tt.expectToken) {
+				t.Errorf("outputBigram() got = %v, want %v", gotToken, tt.expectToken)
+			}
+
+			// Check if itemsInRing was modified (it shouldn't be by outputBigram)
+			if itemsInRingCopy != itemsInRing {
+				t.Errorf("outputBigram() modified itemsInRing, got = %d, want %d", itemsInRingCopy, itemsInRing)
+			}
+		})
+	}
+}
+
+func TestCJKBigramFilter(t *testing.T) {
+	tests := []struct {
+		outputUnigram bool
+		input         analysis.TokenStream
+		output        analysis.TokenStream
+	}{
+		// first test that non-adjacent terms are not combined
+		{
+			outputUnigram: false,
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("こ"),
+					Type:     analysis.Ideographic,
+					Position: 1,
+					Start:    0,
+					End:      3,
+				},
+				&analysis.Token{
+					Term:     []byte("ん"),
+					Type:     analysis.Ideographic,
+					Position: 2,
+					Start:    5,
+					End:      8,
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("こ"),
+					Type:     analysis.Single,
+					Position: 1,
+					Start:    0,
+					End:      3,
+				},
+				&analysis.Token{
+					Term:     []byte("ん"),
+					Type:     analysis.Single,
+					Position: 2,
+					Start:    5,
+					End:      8,
+				},
+			},
+		},
+		{
+			outputUnigram: false,
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("こ"),
+					Type:     analysis.Ideographic,
+					Position: 1,
+					Start:    0,
+					End:      3,
+				},
+				&analysis.Token{
+					Term:     []byte("ん"),
+					Type:     analysis.Ideographic,
+					Position: 2,
+					Start:    3,
+					End:      6,
+				},
+				&analysis.Token{
+					Term:     []byte("に"),
+					Type:     analysis.Ideographic,
+					Position: 3,
+					Start:    6,
+					End:      9,
+				},
+				&analysis.Token{
+					Term:     []byte("ち"),
+					Type:     analysis.Ideographic,
+					Position: 4,
+					Start:    9,
+					End:      12,
+				},
+				&analysis.Token{
+					Term:     []byte("は"),
+					Type:     analysis.Ideographic,
+					Position: 5,
+					Start:    12,
+					End:      15,
+				},
+				&analysis.Token{
+					Term:     []byte("世"),
+					Type:     analysis.Ideographic,
+					Position: 6,
+					Start:    15,
+					End:      18,
+				},
+				&analysis.Token{
+					Term:     []byte("界"),
+					Type:     analysis.Ideographic,
+					Position: 7,
+					Start:    18,
+					End:      21,
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("こん"),
+					Type:     analysis.Double,
+					Position: 1,
+					Start:    0,
+					End:      6,
+				},
+				&analysis.Token{
+					Term:     []byte("んに"),
+					Type:     analysis.Double,
+					Position: 2,
+					Start:    3,
+					End:      9,
+				},
+				&analysis.Token{
+					Term:     []byte("にち"),
+					Type:     analysis.Double,
+					Position: 3,
+					Start:    6,
+					End:      12,
+				},
+				&analysis.Token{
+					Term:     []byte("ちは"),
+					Type:     analysis.Double,
+					Position: 4,
+					Start:    9,
+					End:      15,
+				},
+				&analysis.Token{
+					Term:     []byte("は世"),
+					Type:     analysis.Double,
+					Position: 5,
+					Start:    12,
+					End:      18,
+				},
+				&analysis.Token{
+					Term:     []byte("世界"),
+					Type:     analysis.Double,
+					Position: 6,
+					Start:    15,
+					End:      21,
+				},
+			},
+		},
+		{
+			outputUnigram: true,
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("こ"),
+					Type:     analysis.Ideographic,
+					Position: 1,
+					Start:    0,
+					End:      3,
+				},
+				&analysis.Token{
+					Term:     []byte("ん"),
+					Type:     analysis.Ideographic,
+					Position: 2,
+					Start:    3,
+					End:      6,
+				},
+				&analysis.Token{
+					Term:     []byte("に"),
+					Type:     analysis.Ideographic,
+					Position: 3,
+					Start:    6,
+					End:      9,
+				},
+				&analysis.Token{
+					Term:     []byte("ち"),
+					Type:     analysis.Ideographic,
+					Position: 4,
+					Start:    9,
+					End:      12,
+				},
+				&analysis.Token{
+					Term:     []byte("は"),
+					Type:     analysis.Ideographic,
+					Position: 5,
+					Start:    12,
+					End:      15,
+				},
+				&analysis.Token{
+					Term:     []byte("世"),
+					Type:     analysis.Ideographic,
+					Position: 6,
+					Start:    15,
+					End:      18,
+				},
+				&analysis.Token{
+					Term:     []byte("界"),
+					Type:     analysis.Ideographic,
+					Position: 7,
+					Start:    18,
+					End:      21,
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("こ"),
+					Type:     analysis.Single,
+					Position: 1,
+					Start:    0,
+					End:      3,
+				},
+				&analysis.Token{
+					Term:     []byte("こん"),
+					Type:     analysis.Double,
+					Position: 1,
+					Start:    0,
+					End:      6,
+				},
+				&analysis.Token{
+					Term:     []byte("ん"),
+					Type:     analysis.Single,
+					Position: 2,
+					Start:    3,
+					End:      6,
+				},
+				&analysis.Token{
+					Term:     []byte("んに"),
+					Type:     analysis.Double,
+					Position: 2,
+					Start:    3,
+					End:      9,
+				},
+				&analysis.Token{
+					Term:     []byte("に"),
+					Type:     analysis.Single,
+					Position: 3,
+					Start:    6,
+					End:      9,
+				},
+				&analysis.Token{
+					Term:     []byte("にち"),
+					Type:     analysis.Double,
+					Position: 3,
+					Start:    6,
+					End:      12,
+				},
+				&analysis.Token{
+					Term:     []byte("ち"),
+					Type:     analysis.Single,
+					Position: 4,
+					Start:    9,
+					End:      12,
+				},
+				&analysis.Token{
+					Term:     []byte("ちは"),
+					Type:     analysis.Double,
+					Position: 4,
+					Start:    9,
+					End:      15,
+				},
+				&analysis.Token{
+					Term:     []byte("は"),
+					Type:     analysis.Single,
+					Position: 5,
+					Start:    12,
+					End:      15,
+				},
+				&analysis.Token{
+					Term:     []byte("は世"),
+					Type:     analysis.Double,
+					Position: 5,
+					Start:    12,
+					End:      18,
+				},
+				&analysis.Token{
+					Term:     []byte("世"),
+					Type:     analysis.Single,
+					Position: 6,
+					Start:    15,
+					End:      18,
+				},
+				&analysis.Token{
+					Term:     []byte("世界"),
+					Type:     analysis.Double,
+					Position: 6,
+					Start:    15,
+					End:      21,
+				},
+				&analysis.Token{
+					Term:     []byte("界"),
+					Type:     analysis.Single,
+					Position: 7,
+					Start:    18,
+					End:      21,
+				},
+			},
+		},
+		{
+			// Assuming that `、` is removed by unicode tokenizer from `こんにちは、世界`
+			outputUnigram: true,
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("こ"),
+					Type:     analysis.Ideographic,
+					Position: 1,
+					Start:    0,
+					End:      3,
+				},
+				&analysis.Token{
+					Term:     []byte("ん"),
+					Type:     analysis.Ideographic,
+					Position: 2,
+					Start:    3,
+					End:      6,
+				},
+				&analysis.Token{
+					Term:     []byte("に"),
+					Type:     analysis.Ideographic,
+					Position: 3,
+					Start:    6,
+					End:      9,
+				},
+				&analysis.Token{
+					Term:     []byte("ち"),
+					Type:     analysis.Ideographic,
+					Position: 4,
+					Start:    9,
+					End:      12,
+				},
+				&analysis.Token{
+					Term:     []byte("は"),
+					Type:     analysis.Ideographic,
+					Position: 5,
+					Start:    12,
+					End:      15,
+				},
+				&analysis.Token{
+					Term:     []byte("世"),
+					Type:     analysis.Ideographic,
+					Position: 7,
+					Start:    18,
+					End:      21,
+				},
+				&analysis.Token{
+					Term:     []byte("界"),
+					Type:     analysis.Ideographic,
+					Position: 8,
+					Start:    21,
+					End:      24,
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("こ"),
+					Type:     analysis.Single,
+					Position: 1,
+					Start:    0,
+					End:      3,
+				},
+				&analysis.Token{
+					Term:     []byte("こん"),
+					Type:     analysis.Double,
+					Position: 1,
+					Start:    0,
+					End:      6,
+				},
+				&analysis.Token{
+					Term:     []byte("ん"),
+					Type:     analysis.Single,
+					Position: 2,
+					Start:    3,
+					End:      6,
+				},
+				&analysis.Token{
+					Term:     []byte("んに"),
+					Type:     analysis.Double,
+					Position: 2,
+					Start:    3,
+					End:      9,
+				},
+				&analysis.Token{
+					Term:     []byte("に"),
+					Type:     analysis.Single,
+					Position: 3,
+					Start:    6,
+					End:      9,
+				},
+				&analysis.Token{
+					Term:     []byte("にち"),
+					Type:     analysis.Double,
+					Position: 3,
+					Start:    6,
+					End:      12,
+				},
+				&analysis.Token{
+					Term:     []byte("ち"),
+					Type:     analysis.Single,
+					Position: 4,
+					Start:    9,
+					End:      12,
+				},
+				&analysis.Token{
+					Term:     []byte("ちは"),
+					Type:     analysis.Double,
+					Position: 4,
+					Start:    9,
+					End:      15,
+				},
+				&analysis.Token{
+					Term:     []byte("は"),
+					Type:     analysis.Single,
+					Position: 5,
+					Start:    12,
+					End:      15,
+				},
+				&analysis.Token{
+					Term:     []byte("世"),
+					Type:     analysis.Single,
+					Position: 6,
+					Start:    18,
+					End:      21,
+				},
+				&analysis.Token{
+					Term:     []byte("世界"),
+					Type:     analysis.Double,
+					Position: 6,
+					Start:    18,
+					End:      24,
+				},
+				&analysis.Token{
+					Term:     []byte("界"),
+					Type:     analysis.Single,
+					Position: 7,
+					Start:    21,
+					End:      24,
+				},
+			},
+		},
+		{
+			outputUnigram: false,
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("こ"),
+					Type:     analysis.Ideographic,
+					Position: 1,
+					Start:    0,
+					End:      3,
+				},
+				&analysis.Token{
+					Term:     []byte("ん"),
+					Type:     analysis.Ideographic,
+					Position: 2,
+					Start:    3,
+					End:      6,
+				},
+				&analysis.Token{
+					Term:     []byte("に"),
+					Type:     analysis.Ideographic,
+					Position: 3,
+					Start:    6,
+					End:      9,
+				},
+				&analysis.Token{
+					Term:     []byte("ち"),
+					Type:     analysis.Ideographic,
+					Position: 4,
+					Start:    9,
+					End:      12,
+				},
+				&analysis.Token{
+					Term:     []byte("は"),
+					Type:     analysis.Ideographic,
+					Position: 5,
+					Start:    12,
+					End:      15,
+				},
+				&analysis.Token{
+					Term:     []byte("cat"),
+					Type:     analysis.AlphaNumeric,
+					Position: 6,
+					Start:    12,
+					End:      15,
+				},
+				&analysis.Token{
+					Term:     []byte("世"),
+					Type:     analysis.Ideographic,
+					Position: 7,
+					Start:    18,
+					End:      21,
+				},
+				&analysis.Token{
+					Term:     []byte("界"),
+					Type:     analysis.Ideographic,
+					Position: 8,
+					Start:    21,
+					End:      24,
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("こん"),
+					Type:     analysis.Double,
+					Position: 1,
+					Start:    0,
+					End:      6,
+				},
+				&analysis.Token{
+					Term:     []byte("んに"),
+					Type:     analysis.Double,
+					Position: 2,
+					Start:    3,
+					End:      9,
+				},
+				&analysis.Token{
+					Term:     []byte("にち"),
+					Type:     analysis.Double,
+					Position: 3,
+					Start:    6,
+					End:      12,
+				},
+				&analysis.Token{
+					Term:     []byte("ちは"),
+					Type:     analysis.Double,
+					Position: 4,
+					Start:    9,
+					End:      15,
+				},
+				&analysis.Token{
+					Term:     []byte("cat"),
+					Type:     analysis.AlphaNumeric,
+					Position: 5,
+					Start:    12,
+					End:      15,
+				},
+				&analysis.Token{
+					Term:     []byte("世界"),
+					Type:     analysis.Double,
+					Position: 6,
+					Start:    18,
+					End:      24,
+				},
+			},
+		},
+		{
+			outputUnigram: false,
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("パイプライン"),
+					Type:     analysis.Ideographic,
+					Position: 1,
+					Start:    0,
+					End:      18,
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("パイ"),
+					Type:     analysis.Double,
+					Position: 1,
+					Start:    0,
+					End:      6,
+				},
+				&analysis.Token{
+					Term:     []byte("イプ"),
+					Type:     analysis.Double,
+					Position: 2,
+					Start:    3,
+					End:      9,
+				},
+				&analysis.Token{
+					Term:     []byte("プラ"),
+					Type:     analysis.Double,
+					Position: 3,
+					Start:    6,
+					End:      12,
+				},
+				&analysis.Token{
+					Term:     []byte("ライ"),
+					Type:     analysis.Double,
+					Position: 4,
+					Start:    9,
+					End:      15,
+				},
+				&analysis.Token{
+					Term:     []byte("イン"),
+					Type:     analysis.Double,
+					Position: 5,
+					Start:    12,
+					End:      18,
+				},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		cjkBigramFilter := NewCJKBigramFilter(test.outputUnigram)
+		actual := cjkBigramFilter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output, actual)
+		}
+	}
+}
--- a/analysis/lang/cjk/cjk_width.go
+++ b/analysis/lang/cjk/cjk_width.go
@ -0,0 +1,104 @@
+//  Copyright (c) 2016 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cjk
+
+import (
+	"bytes"
+	"unicode/utf8"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const WidthName = "cjk_width"
+
+type CJKWidthFilter struct{}
+
+func NewCJKWidthFilter() *CJKWidthFilter {
+	return &CJKWidthFilter{}
+}
+
+func (s *CJKWidthFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		runeCount := utf8.RuneCount(token.Term)
+		runes := bytes.Runes(token.Term)
+		for i := 0; i < runeCount; i++ {
+			ch := runes[i]
+			if ch >= 0xFF01 && ch <= 0xFF5E {
+				// fullwidth ASCII variants
+				runes[i] -= 0xFEE0
+			} else if ch >= 0xFF65 && ch <= 0xFF9F {
+				// halfwidth Katakana variants
+				if (ch == 0xFF9E || ch == 0xFF9F) && i > 0 && combine(runes, i, ch) {
+					runes = analysis.DeleteRune(runes, i)
+					i--
+					runeCount = len(runes)
+				} else {
+					runes[i] = kanaNorm[ch-0xFF65]
+				}
+			}
+		}
+		token.Term = analysis.BuildTermFromRunes(runes)
+	}
+
+	return input
+}
+
+var kanaNorm = []rune{
+	0x30fb, 0x30f2, 0x30a1, 0x30a3, 0x30a5, 0x30a7, 0x30a9, 0x30e3, 0x30e5,
+	0x30e7, 0x30c3, 0x30fc, 0x30a2, 0x30a4, 0x30a6, 0x30a8, 0x30aa, 0x30ab,
+	0x30ad, 0x30af, 0x30b1, 0x30b3, 0x30b5, 0x30b7, 0x30b9, 0x30bb, 0x30bd,
+	0x30bf, 0x30c1, 0x30c4, 0x30c6, 0x30c8, 0x30ca, 0x30cb, 0x30cc, 0x30cd,
+	0x30ce, 0x30cf, 0x30d2, 0x30d5, 0x30d8, 0x30db, 0x30de, 0x30df, 0x30e0,
+	0x30e1, 0x30e2, 0x30e4, 0x30e6, 0x30e8, 0x30e9, 0x30ea, 0x30eb, 0x30ec,
+	0x30ed, 0x30ef, 0x30f3, 0x3099, 0x309A,
+}
+
+var kanaCombineVoiced = []rune{
+	78, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+	0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,
+	0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+}
+var kanaCombineHalfVoiced = []rune{
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 2,
+	0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+}
+
+func combine(text []rune, pos int, r rune) bool {
+	prev := text[pos-1]
+	if prev >= 0x30A6 && prev <= 0x30FD {
+		if r == 0xFF9F {
+			text[pos-1] += kanaCombineHalfVoiced[prev-0x30A6]
+		} else {
+			text[pos-1] += kanaCombineVoiced[prev-0x30A6]
+		}
+		return text[pos-1] != prev
+	}
+	return false
+}
+
+func CJKWidthFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewCJKWidthFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(WidthName, CJKWidthFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/cjk/cjk_width_test.go
+++ b/analysis/lang/cjk/cjk_width_test.go
@ -0,0 +1,93 @@
+//  Copyright (c) 2016 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cjk
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+)
+
+func TestCJKWidthFilter(t *testing.T) {
+
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("Ｔｅｓｔ"),
+				},
+				&analysis.Token{
+					Term: []byte("１２３４"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("Test"),
+				},
+				&analysis.Token{
+					Term: []byte("1234"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ｶﾀｶﾅ"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("カタカナ"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ｳﾞｨｯﾂ"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ヴィッツ"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("ﾊﾟﾅｿﾆｯｸ"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("パナソニック"),
+				},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		cjkWidthFilter := NewCJKWidthFilter()
+		actual := cjkWidthFilter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output, actual)
+		}
+	}
+}
--- a/analysis/lang/ckb/analyzer_ckb.go
+++ b/analysis/lang/ckb/analyzer_ckb.go
@ -0,0 +1,64 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ckb
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const AnalyzerName = "ckb"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
+	unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	normCkbFilter, err := cache.TokenFilterNamed(NormalizeName)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopCkbFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerCkbFilter, err := cache.TokenFilterNamed(StemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.DefaultAnalyzer{
+		Tokenizer: unicodeTokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			normCkbFilter,
+			toLowerFilter,
+			stopCkbFilter,
+			stemmerCkbFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/ckb/analyzer_ckb_test.go
+++ b/analysis/lang/ckb/analyzer_ckb_test.go
@ -0,0 +1,77 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ckb
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestSoraniAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// stop word removal
+		{
+			input: []byte("ئەم پیاوە"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("پیاو"),
+					Position: 2,
+					Start:    7,
+					End:      17,
+				},
+			},
+		},
+		{
+			input: []byte("پیاوە"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("پیاو"),
+					Position: 1,
+					Start:    0,
+					End:      10,
+				},
+			},
+		},
+		{
+			input: []byte("پیاو"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("پیاو"),
+					Position: 1,
+					Start:    0,
+					End:      8,
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %v, got %v", test.output, actual)
+		}
+	}
+}
--- a/analysis/lang/ckb/sorani_normalize.go
+++ b/analysis/lang/ckb/sorani_normalize.go
@ -0,0 +1,121 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ckb
+
+import (
+	"bytes"
+	"unicode"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const NormalizeName = "normalize_ckb"
+
+const (
+	Yeh        = '\u064A'
+	DotlessYeh = '\u0649'
+	FarsiYeh   = '\u06CC'
+
+	Kaf   = '\u0643'
+	Keheh = '\u06A9'
+
+	Heh            = '\u0647'
+	Ae             = '\u06D5'
+	Zwnj           = '\u200C'
+	HehDoachashmee = '\u06BE'
+	TehMarbuta     = '\u0629'
+
+	Reh       = '\u0631'
+	Rreh      = '\u0695'
+	RrehAbove = '\u0692'
+
+	Tatweel  = '\u0640'
+	Fathatan = '\u064B'
+	Dammatan = '\u064C'
+	Kasratan = '\u064D'
+	Fatha    = '\u064E'
+	Damma    = '\u064F'
+	Kasra    = '\u0650'
+	Shadda   = '\u0651'
+	Sukun    = '\u0652'
+)
+
+type SoraniNormalizeFilter struct {
+}
+
+func NewSoraniNormalizeFilter() *SoraniNormalizeFilter {
+	return &SoraniNormalizeFilter{}
+}
+
+func (s *SoraniNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		term := normalize(token.Term)
+		token.Term = term
+	}
+	return input
+}
+
+func normalize(input []byte) []byte {
+	runes := bytes.Runes(input)
+	for i := 0; i < len(runes); i++ {
+		switch runes[i] {
+		case Yeh, DotlessYeh:
+			runes[i] = FarsiYeh
+		case Kaf:
+			runes[i] = Keheh
+		case Zwnj:
+			if i > 0 && runes[i-1] == Heh {
+				runes[i-1] = Ae
+			}
+			runes = analysis.DeleteRune(runes, i)
+			i--
+		case Heh:
+			if i == len(runes)-1 {
+				runes[i] = Ae
+			}
+		case TehMarbuta:
+			runes[i] = Ae
+		case HehDoachashmee:
+			runes[i] = Heh
+		case Reh:
+			if i == 0 {
+				runes[i] = Rreh
+			}
+		case RrehAbove:
+			runes[i] = Rreh
+		case Tatweel, Kasratan, Dammatan, Fathatan, Fatha, Damma, Kasra, Shadda, Sukun:
+			runes = analysis.DeleteRune(runes, i)
+			i--
+		default:
+			if unicode.In(runes[i], unicode.Cf) {
+				runes = analysis.DeleteRune(runes, i)
+				i--
+			}
+		}
+	}
+	return analysis.BuildTermFromRunes(runes)
+}
+
+func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewSoraniNormalizeFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/ckb/sorani_normalize_test.go
+++ b/analysis/lang/ckb/sorani_normalize_test.go
@ -0,0 +1,323 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ckb
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+)
+
+func TestSoraniNormalizeFilter(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		// test Y
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u064A"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u06CC"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u0649"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u06CC"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u06CC"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u06CC"),
+				},
+			},
+		},
+		// test K
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u0643"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u06A9"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u06A9"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u06A9"),
+				},
+			},
+		},
+		// test H
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u0647\u200C"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u06D5"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u0647\u200C\u06A9"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u06D5\u06A9"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u06BE"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u0647"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u0629"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u06D5"),
+				},
+			},
+		},
+		// test final H
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u0647\u0647\u0647"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u0647\u0647\u06D5"),
+				},
+			},
+		},
+		// test RR
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u0692"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u0695"),
+				},
+			},
+		},
+		// test initial RR
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u0631\u0631\u0631"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u0695\u0631\u0631"),
+				},
+			},
+		},
+		// test remove
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u0640"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u064B"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u064C"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u064D"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u064E"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u064F"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u0650"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u0651"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u0652"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("\u200C"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+		// empty
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+	}
+
+	soraniNormalizeFilter := NewSoraniNormalizeFilter()
+	for _, test := range tests {
+		actual := soraniNormalizeFilter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %#v, got %#v", test.output, actual)
+			t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/ckb/sorani_stemmer_filter.go
+++ b/analysis/lang/ckb/sorani_stemmer_filter.go
@ -0,0 +1,151 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ckb
+
+import (
+	"bytes"
+	"unicode/utf8"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StemmerName = "stemmer_ckb"
+
+type SoraniStemmerFilter struct {
+}
+
+func NewSoraniStemmerFilter() *SoraniStemmerFilter {
+	return &SoraniStemmerFilter{}
+}
+
+func (s *SoraniStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		// if not protected keyword, stem it
+		if !token.KeyWord {
+			stemmed := stem(token.Term)
+			token.Term = stemmed
+		}
+	}
+	return input
+}
+
+func stem(input []byte) []byte {
+	inputLen := utf8.RuneCount(input)
+
+	// postposition
+	if inputLen > 5 && bytes.HasSuffix(input, []byte("دا")) {
+		input = truncateRunes(input, 2)
+		inputLen = utf8.RuneCount(input)
+	} else if inputLen > 4 && bytes.HasSuffix(input, []byte("نا")) {
+		input = truncateRunes(input, 1)
+		inputLen = utf8.RuneCount(input)
+	} else if inputLen > 6 && bytes.HasSuffix(input, []byte("ەوە")) {
+		input = truncateRunes(input, 3)
+		inputLen = utf8.RuneCount(input)
+	}
+
+	// possessive pronoun
+	if inputLen > 6 &&
+		(bytes.HasSuffix(input, []byte("مان")) ||
+			bytes.HasSuffix(input, []byte("یان")) ||
+			bytes.HasSuffix(input, []byte("تان"))) {
+		input = truncateRunes(input, 3)
+		inputLen = utf8.RuneCount(input)
+	}
+
+	// indefinite singular ezafe
+	if inputLen > 6 && bytes.HasSuffix(input, []byte("ێکی")) {
+		return truncateRunes(input, 3)
+	} else if inputLen > 7 && bytes.HasSuffix(input, []byte("یەکی")) {
+		return truncateRunes(input, 4)
+	}
+
+	if inputLen > 5 && bytes.HasSuffix(input, []byte("ێک")) {
+		// indefinite singular
+		return truncateRunes(input, 2)
+	} else if inputLen > 6 && bytes.HasSuffix(input, []byte("یەک")) {
+		// indefinite singular
+		return truncateRunes(input, 3)
+	} else if inputLen > 6 && bytes.HasSuffix(input, []byte("ەکە")) {
+		// definite singular
+		return truncateRunes(input, 3)
+	} else if inputLen > 5 && bytes.HasSuffix(input, []byte("کە")) {
+		// definite singular
+		return truncateRunes(input, 2)
+	} else if inputLen > 7 && bytes.HasSuffix(input, []byte("ەکان")) {
+		// definite plural
+		return truncateRunes(input, 4)
+	} else if inputLen > 6 && bytes.HasSuffix(input, []byte("کان")) {
+		// definite plural
+		return truncateRunes(input, 3)
+	} else if inputLen > 7 && bytes.HasSuffix(input, []byte("یانی")) {
+		// indefinite plural ezafe
+		return truncateRunes(input, 4)
+	} else if inputLen > 6 && bytes.HasSuffix(input, []byte("انی")) {
+		// indefinite plural ezafe
+		return truncateRunes(input, 3)
+	} else if inputLen > 6 && bytes.HasSuffix(input, []byte("یان")) {
+		// indefinite plural
+		return truncateRunes(input, 3)
+	} else if inputLen > 5 && bytes.HasSuffix(input, []byte("ان")) {
+		// indefinite plural
+		return truncateRunes(input, 2)
+	} else if inputLen > 7 && bytes.HasSuffix(input, []byte("یانە")) {
+		// demonstrative plural
+		return truncateRunes(input, 4)
+	} else if inputLen > 6 && bytes.HasSuffix(input, []byte("انە")) {
+		// demonstrative plural
+		return truncateRunes(input, 3)
+	} else if inputLen > 5 && (bytes.HasSuffix(input, []byte("ایە")) || bytes.HasSuffix(input, []byte("ەیە"))) {
+		// demonstrative singular
+		return truncateRunes(input, 2)
+	} else if inputLen > 4 && bytes.HasSuffix(input, []byte("ە")) {
+		// demonstrative singular
+		return truncateRunes(input, 1)
+	} else if inputLen > 4 && bytes.HasSuffix(input, []byte("ی")) {
+		// absolute singular ezafe
+		return truncateRunes(input, 1)
+	}
+	return input
+}
+
+func truncateRunes(input []byte, num int) []byte {
+	runes := bytes.Runes(input)
+	runes = runes[:len(runes)-num]
+	out := buildTermFromRunes(runes)
+	return out
+}
+
+func buildTermFromRunes(runes []rune) []byte {
+	rv := make([]byte, 0, len(runes)*4)
+	for _, r := range runes {
+		runeBytes := make([]byte, utf8.RuneLen(r))
+		utf8.EncodeRune(runeBytes, r)
+		rv = append(rv, runeBytes...)
+	}
+	return rv
+}
+
+func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewSoraniStemmerFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/ckb/sorani_stemmer_filter_test.go
+++ b/analysis/lang/ckb/sorani_stemmer_filter_test.go
@ -0,0 +1,299 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ckb
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
+)
+
+func TestSoraniStemmerFilter(t *testing.T) {
+
+	// in order to match the lucene tests
+	// we will test with an analyzer, not just the stemmer
+	analyzer := analysis.DefaultAnalyzer{
+		Tokenizer: single.NewSingleTokenTokenizer(),
+		TokenFilters: []analysis.TokenFilter{
+			NewSoraniNormalizeFilter(),
+			NewSoraniStemmerFilter(),
+		},
+	}
+
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		{ // -ek
+			input: []byte("پیاوێک"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("پیاو"),
+					Position: 1,
+					Start:    0,
+					End:      12,
+				},
+			},
+		},
+		{ // -yek
+			input: []byte("دەرگایەک"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("دەرگا"),
+					Position: 1,
+					Start:    0,
+					End:      16,
+				},
+			},
+		},
+		{ // -aka
+			input: []byte("پیاوەكە"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("پیاو"),
+					Position: 1,
+					Start:    0,
+					End:      14,
+				},
+			},
+		},
+		{ // -ka
+			input: []byte("دەرگاكە"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("دەرگا"),
+					Position: 1,
+					Start:    0,
+					End:      14,
+				},
+			},
+		},
+		{ // -a
+			input: []byte("کتاویە"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("کتاوی"),
+					Position: 1,
+					Start:    0,
+					End:      12,
+				},
+			},
+		},
+		{ // -ya
+			input: []byte("دەرگایە"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("دەرگا"),
+					Position: 1,
+					Start:    0,
+					End:      14,
+				},
+			},
+		},
+		{ // -An
+			input: []byte("پیاوان"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("پیاو"),
+					Position: 1,
+					Start:    0,
+					End:      12,
+				},
+			},
+		},
+		{ // -yAn
+			input: []byte("دەرگایان"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("دەرگا"),
+					Position: 1,
+					Start:    0,
+					End:      16,
+				},
+			},
+		},
+		{ // -akAn
+			input: []byte("پیاوەکان"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("پیاو"),
+					Position: 1,
+					Start:    0,
+					End:      16,
+				},
+			},
+		},
+		{ // -kAn
+			input: []byte("دەرگاکان"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("دەرگا"),
+					Position: 1,
+					Start:    0,
+					End:      16,
+				},
+			},
+		},
+		{ // -Ana
+			input: []byte("پیاوانە"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("پیاو"),
+					Position: 1,
+					Start:    0,
+					End:      14,
+				},
+			},
+		},
+		{ // -yAna
+			input: []byte("دەرگایانە"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("دەرگا"),
+					Position: 1,
+					Start:    0,
+					End:      18,
+				},
+			},
+		},
+		{ // Ezafe singular
+			input: []byte("هۆتیلی"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("هۆتیل"),
+					Position: 1,
+					Start:    0,
+					End:      12,
+				},
+			},
+		},
+		{ // Ezafe indefinite
+			input: []byte("هۆتیلێکی"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("هۆتیل"),
+					Position: 1,
+					Start:    0,
+					End:      16,
+				},
+			},
+		},
+		{ // Ezafe plural
+			input: []byte("هۆتیلانی"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("هۆتیل"),
+					Position: 1,
+					Start:    0,
+					End:      16,
+				},
+			},
+		},
+		{ // -awa
+			input: []byte("دوورەوە"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("دوور"),
+					Position: 1,
+					Start:    0,
+					End:      14,
+				},
+			},
+		},
+		{ // -dA
+			input: []byte("نیوەشەودا"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("نیوەشەو"),
+					Position: 1,
+					Start:    0,
+					End:      18,
+				},
+			},
+		},
+		{ // -A
+			input: []byte("سۆرانا"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("سۆران"),
+					Position: 1,
+					Start:    0,
+					End:      12,
+				},
+			},
+		},
+		{ // -mAn
+			input: []byte("پارەمان"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("پارە"),
+					Position: 1,
+					Start:    0,
+					End:      14,
+				},
+			},
+		},
+		{ // -tAn
+			input: []byte("پارەتان"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("پارە"),
+					Position: 1,
+					Start:    0,
+					End:      14,
+				},
+			},
+		},
+		{ // -yAn
+			input: []byte("پارەیان"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("پارە"),
+					Position: 1,
+					Start:    0,
+					End:      14,
+				},
+			},
+		},
+		{ // empty
+			input: []byte(""),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte(""),
+					Position: 1,
+					Start:    0,
+					End:      0,
+				},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("for input %s(% x)", test.input, test.input)
+			t.Errorf("\texpected:")
+			for _, token := range test.output {
+				t.Errorf("\t\t%v %s(% x)", token, token.Term, token.Term)
+			}
+			t.Errorf("\tactual:")
+			for _, token := range actual {
+				t.Errorf("\t\t%v %s(% x)", token, token.Term, token.Term)
+			}
+		}
+	}
+}
--- a/analysis/lang/ckb/stop_filter_ckb.go
+++ b/analysis/lang/ckb/stop_filter_ckb.go
@ -0,0 +1,36 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ckb
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/ckb/stop_words_ckb.go
+++ b/analysis/lang/ckb/stop_words_ckb.go
@ -0,0 +1,163 @@
+package ckb
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_ckb"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
+// ` was changed to ' to allow for literal string
+
+var SoraniStopWords = []byte(`# set of kurdish stopwords
+# note these have been normalized with our scheme (e represented with U+06D5, etc)
+# constructed from:
+# * Fig 5 of "Building A Test Collection For Sorani Kurdish" (Esmaili et al)
+# * "Sorani Kurdish: A Reference Grammar with selected readings" (Thackston)
+# * Corpus-based analysis of 77M word Sorani collection: wikipedia, news, blogs, etc
+
+# and
+و
+# which
+کە
+# of
+ی
+# made/did
+کرد
+# that/which
+ئەوەی
+# on/head
+سەر
+# two
+دوو
+# also
+هەروەها
+# from/that
+لەو
+# makes/does
+دەکات
+# some
+چەند
+# every
+هەر
+
+# demonstratives
+# that
+ئەو
+# this
+ئەم
+
+# personal pronouns
+# I
+من
+# we
+ئێمە
+# you
+تۆ
+# you
+ئێوە
+# he/she/it
+ئەو
+# they
+ئەوان
+
+# prepositions
+# to/with/by
+بە
+پێ
+# without
+بەبێ
+# along with/while/during
+بەدەم
+# in the opinion of
+بەلای
+# according to
+بەپێی
+# before
+بەرلە
+# in the direction of
+بەرەوی
+# in front of/toward
+بەرەوە
+# before/in the face of
+بەردەم
+# without
+بێ
+# except for
+بێجگە
+# for
+بۆ
+# on/in
+دە
+تێ
+# with
+دەگەڵ
+# after
+دوای
+# except for/aside from
+جگە
+# in/from
+لە
+لێ
+# in front of/before/because of
+لەبەر
+# between/among
+لەبەینی
+# concerning/about
+لەبابەت
+# concerning
+لەبارەی
+# instead of
+لەباتی
+# beside
+لەبن
+# instead of
+لەبرێتی
+# behind
+لەدەم
+# with/together with
+لەگەڵ
+# by
+لەلایەن
+# within
+لەناو
+# between/among
+لەنێو
+# for the sake of
+لەپێناوی
+# with respect to
+لەرەوی
+# by means of/for
+لەرێ
+# for the sake of
+لەرێگا
+# on/on top of/according to
+لەسەر
+# under
+لەژێر
+# between/among
+ناو
+# between/among
+نێوان
+# after
+پاش
+# before
+پێش
+# like
+وەک
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(SoraniStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/cs/stop_filter_cs.go
+++ b/analysis/lang/cs/stop_filter_cs.go
@ -0,0 +1,36 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cs
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/cs/stop_words_cs.go
+++ b/analysis/lang/cs/stop_words_cs.go
@ -0,0 +1,199 @@
+package cs
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_cs"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
+// ` was changed to ' to allow for literal string
+
+var CzechStopWords = []byte(`a
+s
+k
+o
+i
+u
+v
+z
+dnes
+cz
+tímto
+budeš
+budem
+byli
+jseš
+můj
+svým
+ta
+tomto
+tohle
+tuto
+tyto
+jej
+zda
+proč
+máte
+tato
+kam
+tohoto
+kdo
+kteří
+mi
+nám
+tom
+tomuto
+mít
+nic
+proto
+kterou
+byla
+toho
+protože
+asi
+ho
+naši
+napište
+re
+což
+tím
+takže
+svých
+její
+svými
+jste
+aj
+tu
+tedy
+teto
+bylo
+kde
+ke
+pravé
+ji
+nad
+nejsou
+či
+pod
+téma
+mezi
+přes
+ty
+pak
+vám
+ani
+když
+však
+neg
+jsem
+tento
+článku
+články
+aby
+jsme
+před
+pta
+jejich
+byl
+ještě
+až
+bez
+také
+pouze
+první
+vaše
+která
+nás
+nový
+tipy
+pokud
+může
+strana
+jeho
+své
+jiné
+zprávy
+nové
+není
+vás
+jen
+podle
+zde
+už
+být
+více
+bude
+již
+než
+který
+by
+které
+co
+nebo
+ten
+tak
+má
+při
+od
+po
+jsou
+jak
+další
+ale
+si
+se
+ve
+to
+jako
+za
+zpět
+ze
+do
+pro
+je
+na
+atd
+atp
+jakmile
+přičemž
+já
+on
+ona
+ono
+oni
+ony
+my
+vy
+jí
+ji
+mě
+mne
+jemu
+tomu
+těm
+těmu
+němu
+němuž
+jehož
+jíž
+jelikož
+jež
+jakož
+načež
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(CzechStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/da/analyzer_da.go
+++ b/analysis/lang/da/analyzer_da.go
@ -0,0 +1,59 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package da
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const AnalyzerName = "da"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
+	unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopDaFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerDaFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.DefaultAnalyzer{
+		Tokenizer: unicodeTokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			stopDaFilter,
+			stemmerDaFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/da/analyzer_da_test.go
+++ b/analysis/lang/da/analyzer_da_test.go
@ -0,0 +1,71 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package da
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestDanishAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// stemming
+		{
+			input: []byte("undersøg"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("undersøg"),
+					Position: 1,
+					Start:    0,
+					End:      9,
+				},
+			},
+		},
+		{
+			input: []byte("undersøgelse"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("undersøg"),
+					Position: 1,
+					Start:    0,
+					End:      13,
+				},
+			},
+		},
+		// stop word
+		{
+			input:  []byte("på"),
+			output: analysis.TokenStream{},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %v, got %v", test.output, actual)
+		}
+	}
+}
--- a/analysis/lang/da/stemmer_da.go
+++ b/analysis/lang/da/stemmer_da.go
@ -0,0 +1,52 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package da
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/danish"
+)
+
+const SnowballStemmerName = "stemmer_da_snowball"
+
+type DanishStemmerFilter struct {
+}
+
+func NewDanishStemmerFilter() *DanishStemmerFilter {
+	return &DanishStemmerFilter{}
+}
+
+func (s *DanishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		danish.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func DanishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewDanishStemmerFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(SnowballStemmerName, DanishStemmerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/da/stop_filter_da.go
+++ b/analysis/lang/da/stop_filter_da.go
@ -0,0 +1,36 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package da
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/da/stop_words_da.go
+++ b/analysis/lang/da/stop_words_da.go
@ -0,0 +1,137 @@
+package da
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_da"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
+
+var DanishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Danish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+
+og           | and
+i            | in
+jeg          | I
+det          | that (dem. pronoun)/it (pers. pronoun)
+at           | that (in front of a sentence)/to (with infinitive)
+en           | a/an
+den          | it (pers. pronoun)/that (dem. pronoun)
+til          | to/at/for/until/against/by/of/into, more
+er           | present tense of "to be"
+som          | who, as
+på           | on/upon/in/on/at/to/after/of/with/for, on
+de           | they
+med          | with/by/in, along
+han          | he
+af           | of/by/from/off/for/in/with/on, off
+for          | at/for/to/from/by/of/ago, in front/before, because
+ikke         | not
+der          | who/which, there/those
+var          | past tense of "to be"
+mig          | me/myself
+sig          | oneself/himself/herself/itself/themselves
+men          | but
+et           | a/an/one, one (number), someone/somebody/one
+har          | present tense of "to have"
+om           | round/about/for/in/a, about/around/down, if
+vi           | we
+min          | my
+havde        | past tense of "to have"
+ham          | him
+hun          | she
+nu           | now
+over         | over/above/across/by/beyond/past/on/about, over/past
+da           | then, when/as/since
+fra          | from/off/since, off, since
+du           | you
+ud           | out
+sin          | his/her/its/one's
+dem          | them
+os           | us/ourselves
+op           | up
+man          | you/one
+hans         | his
+hvor         | where
+eller        | or
+hvad         | what
+skal         | must/shall etc.
+selv         | myself/youself/herself/ourselves etc., even
+her          | here
+alle         | all/everyone/everybody etc.
+vil          | will (verb)
+blev         | past tense of "to stay/to remain/to get/to become"
+kunne        | could
+ind          | in
+når          | when
+være         | present tense of "to be"
+dog          | however/yet/after all
+noget        | something
+ville        | would
+jo           | you know/you see (adv), yes
+deres        | their/theirs
+efter        | after/behind/according to/for/by/from, later/afterwards
+ned          | down
+skulle       | should
+denne        | this
+end          | than
+dette        | this
+mit          | my/mine
+også         | also
+under        | under/beneath/below/during, below/underneath
+have         | have
+dig          | you
+anden        | other
+hende        | her
+mine         | my
+alt          | everything
+meget        | much/very, plenty of
+sit          | his, her, its, one's
+sine         | his, her, its, one's
+vor          | our
+mod          | against
+disse        | these
+hvis         | if
+din          | your/yours
+nogle        | some
+hos          | by/at
+blive        | be/become
+mange        | many
+ad           | by/through
+bliver       | present tense of "to be/to become"
+hendes       | her/hers
+været        | be
+thi          | for (conj)
+jer          | you
+sådan        | such, like this/like that
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(DanishStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/de/analyzer_de.go
+++ b/analysis/lang/de/analyzer_de.go
@ -0,0 +1,64 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package de
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const AnalyzerName = "de"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
+	unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopDeFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	normalizeDeFilter, err := cache.TokenFilterNamed(NormalizeName)
+	if err != nil {
+		return nil, err
+	}
+	lightStemmerDeFilter, err := cache.TokenFilterNamed(LightStemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.DefaultAnalyzer{
+		Tokenizer: unicodeTokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			stopDeFilter,
+			normalizeDeFilter,
+			lightStemmerDeFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/de/analyzer_de_test.go
+++ b/analysis/lang/de/analyzer_de_test.go
@ -0,0 +1,155 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package de
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestGermanAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		{
+			input: []byte("Tisch"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("tisch"),
+					Position: 1,
+					Start:    0,
+					End:      5,
+				},
+			},
+		},
+		{
+			input: []byte("Tische"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("tisch"),
+					Position: 1,
+					Start:    0,
+					End:      6,
+				},
+			},
+		},
+		{
+			input: []byte("Tischen"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("tisch"),
+					Position: 1,
+					Start:    0,
+					End:      7,
+				},
+			},
+		},
+		// german specials
+		{
+			input: []byte("Schaltflächen"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("schaltflach"),
+					Position: 1,
+					Start:    0,
+					End:      14,
+				},
+			},
+		},
+		{
+			input: []byte("Schaltflaechen"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("schaltflach"),
+					Position: 1,
+					Start:    0,
+					End:      14,
+				},
+			},
+		},
+		// tests added by marty to increase coverage
+		{
+			input: []byte("Blechern"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("blech"),
+					Position: 1,
+					Start:    0,
+					End:      8,
+				},
+			},
+		},
+		{
+			input: []byte("Klecks"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("kleck"),
+					Position: 1,
+					Start:    0,
+					End:      6,
+				},
+			},
+		},
+		{
+			input: []byte("Mindestens"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("mindest"),
+					Position: 1,
+					Start:    0,
+					End:      10,
+				},
+			},
+		},
+		{
+			input: []byte("Kugelfest"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("kugelf"),
+					Position: 1,
+					Start:    0,
+					End:      9,
+				},
+			},
+		},
+		{
+			input: []byte("Baldigst"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("baldig"),
+					Position: 1,
+					Start:    0,
+					End:      8,
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %v, got %v", test.output, actual)
+		}
+	}
+}
--- a/analysis/lang/de/german_normalize.go
+++ b/analysis/lang/de/german_normalize.go
@ -0,0 +1,98 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package de
+
+import (
+	"bytes"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const NormalizeName = "normalize_de"
+
+const (
+	N = 0 /* ordinary state */
+	V = 1 /* stops 'u' from entering umlaut state */
+	U = 2 /* umlaut state, allows e-deletion */
+)
+
+type GermanNormalizeFilter struct {
+}
+
+func NewGermanNormalizeFilter() *GermanNormalizeFilter {
+	return &GermanNormalizeFilter{}
+}
+
+func (s *GermanNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		term := normalize(token.Term)
+		token.Term = term
+	}
+	return input
+}
+
+func normalize(input []byte) []byte {
+	state := N
+	runes := bytes.Runes(input)
+	for i := 0; i < len(runes); i++ {
+		switch runes[i] {
+		case 'a', 'o':
+			state = U
+		case 'u':
+			if state == N {
+				state = U
+			} else {
+				state = V
+			}
+		case 'e':
+			if state == U {
+				runes = analysis.DeleteRune(runes, i)
+				i--
+			}
+			state = V
+		case 'i', 'q', 'y':
+			state = V
+		case 'ä':
+			runes[i] = 'a'
+			state = V
+		case 'ö':
+			runes[i] = 'o'
+			state = V
+		case 'ü':
+			runes[i] = 'u'
+			state = V
+		case 'ß':
+			runes[i] = 's'
+			i++
+			runes = analysis.InsertRune(runes, i, 's')
+			state = N
+		default:
+			state = N
+		}
+	}
+	return analysis.BuildTermFromRunes(runes)
+}
+
+func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewGermanNormalizeFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/de/german_normalize_test.go
+++ b/analysis/lang/de/german_normalize_test.go
@ -0,0 +1,103 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package de
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+)
+
+func TestGermanNormalizeFilter(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		// Tests that a/o/u + e is equivalent to the umlaut form
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("Schaltflächen"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("Schaltflachen"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("Schaltflaechen"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("Schaltflachen"),
+				},
+			},
+		},
+		// Tests the specific heuristic that ue is not folded after a vowel or q.
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("dauer"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("dauer"),
+				},
+			},
+		},
+		// Tests german specific folding of sharp-s
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("weißbier"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("weissbier"),
+				},
+			},
+		},
+		// empty
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+	}
+
+	germanNormalizeFilter := NewGermanNormalizeFilter()
+	for _, test := range tests {
+		actual := germanNormalizeFilter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %#v, got %#v", test.output, actual)
+			t.Errorf("expected %s(% x), got %s(% x)", test.output[0].Term, test.output[0].Term, actual[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/de/light_stemmer_de.go
+++ b/analysis/lang/de/light_stemmer_de.go
@ -0,0 +1,119 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package de
+
+import (
+	"bytes"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const LightStemmerName = "stemmer_de_light"
+
+type GermanLightStemmerFilter struct {
+}
+
+func NewGermanLightStemmerFilter() *GermanLightStemmerFilter {
+	return &GermanLightStemmerFilter{}
+}
+
+func (s *GermanLightStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		runes := bytes.Runes(token.Term)
+		runes = stem(runes)
+		token.Term = analysis.BuildTermFromRunes(runes)
+	}
+	return input
+}
+
+func stem(input []rune) []rune {
+
+	for i, r := range input {
+		switch r {
+		case 'ä', 'à', 'á', 'â':
+			input[i] = 'a'
+		case 'ö', 'ò', 'ó', 'ô':
+			input[i] = 'o'
+		case 'ï', 'ì', 'í', 'î':
+			input[i] = 'i'
+		case 'ü', 'ù', 'ú', 'û':
+			input[i] = 'u'
+		}
+	}
+
+	input = step1(input)
+	return step2(input)
+}
+
+func stEnding(ch rune) bool {
+	switch ch {
+	case 'b', 'd', 'f', 'g', 'h', 'k', 'l', 'm', 'n', 't':
+		return true
+	}
+	return false
+}
+
+func step1(s []rune) []rune {
+	l := len(s)
+	if l > 5 && s[l-3] == 'e' && s[l-2] == 'r' && s[l-1] == 'n' {
+		return s[:l-3]
+	}
+
+	if l > 4 && s[l-2] == 'e' {
+		switch s[l-1] {
+		case 'm', 'n', 'r', 's':
+			return s[:l-2]
+		}
+	}
+
+	if l > 3 && s[l-1] == 'e' {
+		return s[:l-1]
+	}
+
+	if l > 3 && s[l-1] == 's' && stEnding(s[l-2]) {
+		return s[:l-1]
+	}
+
+	return s
+}
+
+func step2(s []rune) []rune {
+	l := len(s)
+	if l > 5 && s[l-3] == 'e' && s[l-2] == 's' && s[l-1] == 't' {
+		return s[:l-3]
+	}
+
+	if l > 4 && s[l-2] == 'e' && (s[l-1] == 'r' || s[l-1] == 'n') {
+		return s[:l-2]
+	}
+
+	if l > 4 && s[l-2] == 's' && s[l-1] == 't' && stEnding(s[l-3]) {
+		return s[:l-2]
+	}
+
+	return s
+}
+
+func GermanLightStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewGermanLightStemmerFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(LightStemmerName, GermanLightStemmerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/de/stemmer_de_snowball.go
+++ b/analysis/lang/de/stemmer_de_snowball.go
@ -0,0 +1,52 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package de
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/german"
+)
+
+const SnowballStemmerName = "stemmer_de_snowball"
+
+type GermanStemmerFilter struct {
+}
+
+func NewGermanStemmerFilter() *GermanStemmerFilter {
+	return &GermanStemmerFilter{}
+}
+
+func (s *GermanStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		german.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func GermanStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewGermanStemmerFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(SnowballStemmerName, GermanStemmerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/de/stemmer_de_test.go
+++ b/analysis/lang/de/stemmer_de_test.go
@ -0,0 +1,91 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package de
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestSnowballGermanStemmer(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("abzuschrecken"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("abzuschreck"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("abzuwarten"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("abzuwart"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("zwirnfabrik"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("zwirnfabr"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("zyniker"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("zynik"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	filter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := filter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/de/stop_filter_de.go
+++ b/analysis/lang/de/stop_filter_de.go
@ -0,0 +1,36 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package de
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/de/stop_words_de.go
+++ b/analysis/lang/de/stop_words_de.go
@ -0,0 +1,321 @@
+package de
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_de"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
+
+var GermanStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A German stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | The number of forms in this list is reduced significantly by passing it
+ | through the German stemmer.
+
+
+aber           |  but
+
+alle           |  all
+allem
+allen
+aller
+alles
+
+als            |  than, as
+also           |  so
+am             |  an + dem
+an             |  at
+
+ander          |  other
+andere
+anderem
+anderen
+anderer
+anderes
+anderm
+andern
+anderr
+anders
+
+auch           |  also
+auf            |  on
+aus            |  out of
+bei            |  by
+bin            |  am
+bis            |  until
+bist           |  art
+da             |  there
+damit          |  with it
+dann           |  then
+
+der            |  the
+den
+des
+dem
+die
+das
+
+daß            |  that
+
+derselbe       |  the same
+derselben
+denselben
+desselben
+demselben
+dieselbe
+dieselben
+dasselbe
+
+dazu           |  to that
+
+dein           |  thy
+deine
+deinem
+deinen
+deiner
+deines
+
+denn           |  because
+
+derer          |  of those
+dessen         |  of him
+
+dich           |  thee
+dir            |  to thee
+du             |  thou
+
+dies           |  this
+diese
+diesem
+diesen
+dieser
+dieses
+
+
+doch           |  (several meanings)
+dort           |  (over) there
+
+
+durch          |  through
+
+ein            |  a
+eine
+einem
+einen
+einer
+eines
+
+einig          |  some
+einige
+einigem
+einigen
+einiger
+einiges
+
+einmal         |  once
+
+er             |  he
+ihn            |  him
+ihm            |  to him
+
+es             |  it
+etwas          |  something
+
+euer           |  your
+eure
+eurem
+euren
+eurer
+eures
+
+für            |  for
+gegen          |  towards
+gewesen        |  p.p. of sein
+hab            |  have
+habe           |  have
+haben          |  have
+hat            |  has
+hatte          |  had
+hatten         |  had
+hier           |  here
+hin            |  there
+hinter         |  behind
+
+ich            |  I
+mich           |  me
+mir            |  to me
+
+
+ihr            |  you, to her
+ihre
+ihrem
+ihren
+ihrer
+ihres
+euch           |  to you
+
+im             |  in + dem
+in             |  in
+indem          |  while
+ins            |  in + das
+ist            |  is
+
+jede           |  each, every
+jedem
+jeden
+jeder
+jedes
+
+jene           |  that
+jenem
+jenen
+jener
+jenes
+
+jetzt          |  now
+kann           |  can
+
+kein           |  no
+keine
+keinem
+keinen
+keiner
+keines
+
+können         |  can
+könnte         |  could
+machen         |  do
+man            |  one
+
+manche         |  some, many a
+manchem
+manchen
+mancher
+manches
+
+mein           |  my
+meine
+meinem
+meinen
+meiner
+meines
+
+mit            |  with
+muss           |  must
+musste         |  had to
+nach           |  to(wards)
+nicht          |  not
+nichts         |  nothing
+noch           |  still, yet
+nun            |  now
+nur            |  only
+ob             |  whether
+oder           |  or
+ohne           |  without
+sehr           |  very
+
+sein           |  his
+seine
+seinem
+seinen
+seiner
+seines
+
+selbst         |  self
+sich           |  herself
+
+sie            |  they, she
+ihnen          |  to them
+
+sind           |  are
+so             |  so
+
+solche         |  such
+solchem
+solchen
+solcher
+solches
+
+soll           |  shall
+sollte         |  should
+sondern        |  but
+sonst          |  else
+über           |  over
+um             |  about, around
+und            |  and
+
+uns            |  us
+unse
+unsem
+unsen
+unser
+unses
+
+unter          |  under
+viel           |  much
+vom            |  von + dem
+von            |  from
+vor            |  before
+während        |  while
+war            |  was
+waren          |  were
+warst          |  wast
+was            |  what
+weg            |  away, off
+weil           |  because
+weiter         |  further
+
+welche         |  which
+welchem
+welchen
+welcher
+welches
+
+wenn           |  when
+werde          |  will
+werden         |  will
+wie            |  how
+wieder         |  again
+will           |  want
+wir            |  we
+wird           |  will
+wirst          |  willst
+wo             |  where
+wollen         |  want
+wollte         |  wanted
+würde          |  would
+würden         |  would
+zu             |  to
+zum            |  zu + dem
+zur            |  zu + der
+zwar           |  indeed
+zwischen       |  between
+
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(GermanStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/el/stop_filter_el.go
+++ b/analysis/lang/el/stop_filter_el.go
@ -0,0 +1,36 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package el
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/el/stop_words_el.go
+++ b/analysis/lang/el/stop_words_el.go
@ -0,0 +1,105 @@
+package el
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_el"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
+// ` was changed to ' to allow for literal string
+
+var GreekStopWords = []byte(`# Lucene Greek Stopwords list
+# Note: by default this file is used after GreekLowerCaseFilter,
+# so when modifying this file use 'σ' instead of 'ς' 
+ο
+η
+το
+οι
+τα
+του
+τησ
+των
+τον
+την
+και 
+κι
+κ
+ειμαι
+εισαι
+ειναι
+ειμαστε
+ειστε
+στο
+στον
+στη
+στην
+μα
+αλλα
+απο
+για
+προσ
+με
+σε
+ωσ
+παρα
+αντι
+κατα
+μετα
+θα
+να
+δε
+δεν
+μη
+μην
+επι
+ενω
+εαν
+αν
+τοτε
+που
+πωσ
+ποιοσ
+ποια
+ποιο
+ποιοι
+ποιεσ
+ποιων
+ποιουσ
+αυτοσ
+αυτη
+αυτο
+αυτοι
+αυτων
+αυτουσ
+αυτεσ
+αυτα
+εκεινοσ
+εκεινη
+εκεινο
+εκεινοι
+εκεινεσ
+εκεινα
+εκεινων
+εκεινουσ
+οπωσ
+ομωσ
+ισωσ
+οσο
+οτι
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(GreekStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/en/analyzer_en.go
+++ b/analysis/lang/en/analyzer_en.go
@ -0,0 +1,73 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package en implements an analyzer with reasonable defaults for processing
+// English text.
+//
+// It strips possessive suffixes ('s), transforms tokens to lower case,
+// removes stopwords from a built-in list, and applies porter stemming.
+//
+// The built-in stopwords list is defined in EnglishStopWords.
+package en
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/v2/analysis/token/porter"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "en"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
+	tokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	possEnFilter, err := cache.TokenFilterNamed(PossessiveName)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopEnFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerEnFilter, err := cache.TokenFilterNamed(porter.Name)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.DefaultAnalyzer{
+		Tokenizer: tokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			possEnFilter,
+			toLowerFilter,
+			stopEnFilter,
+			stemmerEnFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/en/analyzer_en_test.go
+++ b/analysis/lang/en/analyzer_en_test.go
@ -0,0 +1,105 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package en
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestEnglishAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// stemming
+		{
+			input: []byte("books"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("book"),
+					Position: 1,
+					Start:    0,
+					End:      5,
+				},
+			},
+		},
+		{
+			input: []byte("book"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("book"),
+					Position: 1,
+					Start:    0,
+					End:      4,
+				},
+			},
+		},
+		// stop word removal
+		{
+			input:  []byte("the"),
+			output: analysis.TokenStream{},
+		},
+		// possessive removal
+		{
+			input: []byte("steven's"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("steven"),
+					Position: 1,
+					Start:    0,
+					End:      8,
+				},
+			},
+		},
+		{
+			input: []byte("steven\u2019s"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("steven"),
+					Position: 1,
+					Start:    0,
+					End:      10,
+				},
+			},
+		},
+		{
+			input: []byte("steven\uFF07s"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("steven"),
+					Position: 1,
+					Start:    0,
+					End:      10,
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %v, got %v", test.output, actual)
+		}
+	}
+}
--- a/analysis/lang/en/plural_stemmer.go
+++ b/analysis/lang/en/plural_stemmer.go
@ -0,0 +1,177 @@
+/*
+	This code was ported from the Open Search Project
+	https://github.com/opensearch-project/OpenSearch/blob/main/modules/analysis-common/src/main/java/org/opensearch/analysis/common/EnglishPluralStemFilter.java
+	The algorithm itself was created by Mark Harwood
+	https://github.com/markharwood
+*/
+
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package en
+
+import (
+	"strings"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const PluralStemmerName = "stemmer_en_plural"
+
+type EnglishPluralStemmerFilter struct {
+}
+
+func NewEnglishPluralStemmerFilter() *EnglishPluralStemmerFilter {
+	return &EnglishPluralStemmerFilter{}
+}
+
+func (s *EnglishPluralStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		token.Term = []byte(stem(string(token.Term)))
+	}
+
+	return input
+}
+
+func EnglishPluralStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewEnglishPluralStemmerFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(PluralStemmerName, EnglishPluralStemmerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
+
+// ----------------------------------------------------------------------------
+
+// Words ending in oes that retain the e when stemmed
+var oesExceptions = []string{"shoes", "canoes", "oboes"}
+
+// Words ending in ches that retain the e when stemmed
+var chesExceptions = []string{
+	"cliches",
+	"avalanches",
+	"mustaches",
+	"moustaches",
+	"quiches",
+	"headaches",
+	"heartaches",
+	"porsches",
+	"tranches",
+	"caches",
+}
+
+func stem(word string) string {
+	runes := []rune(strings.ToLower(word))
+
+	if len(runes) < 3 || runes[len(runes)-1] != 's' {
+		return string(runes)
+	}
+
+	switch runes[len(runes)-2] {
+	case 'u':
+		fallthrough
+	case 's':
+		return string(runes)
+	case 'e':
+		// Modified ies->y logic from original s-stemmer - only work on strings > 4
+		// so spies -> spy still but pies->pie.
+		// The original code also special-cased aies and eies for no good reason as far as I can tell.
+		// ( no words of consequence - eg http://www.thefreedictionary.com/words-that-end-in-aies )
+		if len(runes) > 4 && runes[len(runes)-3] == 'i' {
+			runes[len(runes)-3] = 'y'
+			return string(runes[0 : len(runes)-2])
+		}
+
+		// Suffix rules to remove any dangling "e"
+		if len(runes) > 3 {
+			// xes (but >1 prefix so we can stem "boxes->box" but keep "axes->axe")
+			if len(runes) > 4 && runes[len(runes)-3] == 'x' {
+				return string(runes[0 : len(runes)-2])
+			}
+
+			// oes
+			if len(runes) > 3 && runes[len(runes)-3] == 'o' {
+				if isException(runes, oesExceptions) {
+					// Only remove the S
+					return string(runes[0 : len(runes)-1])
+				}
+				// Remove the es
+				return string(runes[0 : len(runes)-2])
+			}
+
+			if len(runes) > 4 {
+				// shes/sses
+				if runes[len(runes)-4] == 's' && (runes[len(runes)-3] == 'h' || runes[len(runes)-3] == 's') {
+					return string(runes[0 : len(runes)-2])
+				}
+
+				// ches
+				if len(runes) > 4 {
+					if runes[len(runes)-4] == 'c' && runes[len(runes)-3] == 'h' {
+						if isException(runes, chesExceptions) {
+							// Only remove the S
+							return string(runes[0 : len(runes)-1])
+						}
+						// Remove the es
+						return string(runes[0 : len(runes)-2])
+					}
+				}
+			}
+		}
+		fallthrough
+	default:
+		return string(runes[0 : len(runes)-1])
+	}
+}
+
+func isException(word []rune, exceptions []string) bool {
+	for _, exception := range exceptions {
+
+		exceptionRunes := []rune(exception)
+
+		exceptionPos := len(exceptionRunes) - 1
+		wordPos := len(word) - 1
+
+		matched := true
+		for exceptionPos >= 0 && wordPos >= 0 {
+			if exceptionRunes[exceptionPos] != word[wordPos] {
+				matched = false
+				break
+			}
+			exceptionPos--
+			wordPos--
+		}
+		if matched {
+			return true
+		}
+	}
+	return false
+}
--- a/analysis/lang/en/plural_stemmer_test.go
+++ b/analysis/lang/en/plural_stemmer_test.go
@ -0,0 +1,46 @@
+package en
+
+import "testing"
+
+func TestEnglishPluralStemmer(t *testing.T) {
+	data := []struct {
+		In, Out string
+	}{
+		{"dresses", "dress"},
+		{"dress", "dress"},
+		{"axes", "axe"},
+		{"ad", "ad"},
+		{"ads", "ad"},
+		{"gas", "ga"},
+		{"sass", "sass"},
+		{"berries", "berry"},
+		{"dresses", "dress"},
+		{"spies", "spy"},
+		{"shoes", "shoe"},
+		{"headaches", "headache"},
+		{"computer", "computer"},
+		{"dressing", "dressing"},
+		{"clothes", "clothe"},
+		{"DRESSES", "dress"},
+		{"frog", "frog"},
+		{"dress", "dress"},
+		{"runs", "run"},
+		{"pies", "pie"},
+		{"foxes", "fox"},
+		{"axes", "axe"},
+		{"foes", "fo"},
+		{"dishes", "dish"},
+		{"snitches", "snitch"},
+		{"cliches", "cliche"},
+		{"forests", "forest"},
+		{"yes", "ye"},
+	}
+
+	for _, datum := range data {
+		stemmed := stem(datum.In)
+
+		if stemmed != datum.Out {
+			t.Errorf("expected %v but got %v", datum.Out, stemmed)
+		}
+	}
+}
--- a/analysis/lang/en/possessive_filter_en.go
+++ b/analysis/lang/en/possessive_filter_en.go
@ -0,0 +1,70 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package en
+
+import (
+	"unicode/utf8"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+// PossessiveName is the name PossessiveFilter is registered as
+// in the bleve registry.
+const PossessiveName = "possessive_en"
+
+const rightSingleQuotationMark = '’'
+const apostrophe = '\''
+const fullWidthApostrophe = '＇'
+
+const apostropheChars = rightSingleQuotationMark + apostrophe + fullWidthApostrophe
+
+// PossessiveFilter implements a TokenFilter which
+// strips the English possessive suffix ('s) from tokens.
+// It handle a variety of apostrophe types, is case-insensitive
+// and doesn't distinguish between possessive and contraction.
+// (ie "She's So Rad" becomes "She So Rad")
+type PossessiveFilter struct {
+}
+
+func NewPossessiveFilter() *PossessiveFilter {
+	return &PossessiveFilter{}
+}
+
+func (s *PossessiveFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		lastRune, lastRuneSize := utf8.DecodeLastRune(token.Term)
+		if lastRune == 's' || lastRune == 'S' {
+			nextLastRune, nextLastRuneSize := utf8.DecodeLastRune(token.Term[:len(token.Term)-lastRuneSize])
+			if nextLastRune == rightSingleQuotationMark ||
+				nextLastRune == apostrophe ||
+				nextLastRune == fullWidthApostrophe {
+				token.Term = token.Term[:len(token.Term)-lastRuneSize-nextLastRuneSize]
+			}
+		}
+	}
+	return input
+}
+
+func PossessiveFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewPossessiveFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(PossessiveName, PossessiveFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/en/possessive_filter_en_test.go
+++ b/analysis/lang/en/possessive_filter_en_test.go
@ -0,0 +1,142 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package en
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestEnglishPossessiveFilter(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("marty's"),
+				},
+				&analysis.Token{
+					Term: []byte("MARTY'S"),
+				},
+				&analysis.Token{
+					Term: []byte("marty’s"),
+				},
+				&analysis.Token{
+					Term: []byte("MARTY’S"),
+				},
+				&analysis.Token{
+					Term: []byte("marty＇s"),
+				},
+				&analysis.Token{
+					Term: []byte("MARTY＇S"),
+				},
+				&analysis.Token{
+					Term: []byte("m"),
+				},
+				&analysis.Token{
+					Term: []byte("s"),
+				},
+				&analysis.Token{
+					Term: []byte("'s"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("marty"),
+				},
+				&analysis.Token{
+					Term: []byte("MARTY"),
+				},
+				&analysis.Token{
+					Term: []byte("marty"),
+				},
+				&analysis.Token{
+					Term: []byte("MARTY"),
+				},
+				&analysis.Token{
+					Term: []byte("marty"),
+				},
+				&analysis.Token{
+					Term: []byte("MARTY"),
+				},
+				&analysis.Token{
+					Term: []byte("m"),
+				},
+				&analysis.Token{
+					Term: []byte("s"),
+				},
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	stemmerFilter, err := cache.TokenFilterNamed(PossessiveName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := stemmerFilter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output, actual)
+		}
+	}
+}
+
+func BenchmarkEnglishPossessiveFilter(b *testing.B) {
+
+	input := analysis.TokenStream{
+		&analysis.Token{
+			Term: []byte("marty's"),
+		},
+		&analysis.Token{
+			Term: []byte("MARTY'S"),
+		},
+		&analysis.Token{
+			Term: []byte("marty’s"),
+		},
+		&analysis.Token{
+			Term: []byte("MARTY’S"),
+		},
+		&analysis.Token{
+			Term: []byte("marty＇s"),
+		},
+		&analysis.Token{
+			Term: []byte("MARTY＇S"),
+		},
+		&analysis.Token{
+			Term: []byte("m"),
+		},
+	}
+
+	cache := registry.NewCache()
+	stemmerFilter, err := cache.TokenFilterNamed(PossessiveName)
+	if err != nil {
+		b.Fatal(err)
+	}
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		stemmerFilter.Filter(input)
+	}
+
+}
--- a/analysis/lang/en/stemmer_en_snowball.go
+++ b/analysis/lang/en/stemmer_en_snowball.go
@ -0,0 +1,52 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package en
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/english"
+)
+
+const SnowballStemmerName = "stemmer_en_snowball"
+
+type EnglishStemmerFilter struct {
+}
+
+func NewEnglishStemmerFilter() *EnglishStemmerFilter {
+	return &EnglishStemmerFilter{}
+}
+
+func (s *EnglishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		english.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func EnglishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewEnglishStemmerFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(SnowballStemmerName, EnglishStemmerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/en/stemmer_en_test.go
+++ b/analysis/lang/en/stemmer_en_test.go
@ -0,0 +1,79 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package en
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestSnowballEnglishStemmer(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("enjoy"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("enjoy"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("enjoyed"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("enjoy"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("enjoyable"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("enjoy"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	filter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := filter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/en/stop_filter_en.go
+++ b/analysis/lang/en/stop_filter_en.go
@ -0,0 +1,36 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package en
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/en/stop_words_en.go
+++ b/analysis/lang/en/stop_words_en.go
@ -0,0 +1,347 @@
+package en
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_en"
+
+// EnglishStopWords is the built-in list of stopwords used by the "stop_en" TokenFilter.
+//
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
+var EnglishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+ 
+ | An English stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | Many of the forms below are quite rare (e.g. "yourselves") but included for
+ |  completeness.
+
+           | PRONOUNS FORMS
+             | 1st person sing
+
+i              | subject, always in upper case of course
+
+me             | object
+my             | possessive adjective
+               | the possessive pronoun 'mine' is best suppressed, because of the
+               | sense of coal-mine etc.
+myself         | reflexive
+             | 1st person plural
+we             | subject
+
+| us           | object
+               | care is required here because US = United States. It is usually
+               | safe to remove it if it is in lower case.
+our            | possessive adjective
+ours           | possessive pronoun
+ourselves      | reflexive
+             | second person (archaic 'thou' forms not included)
+you            | subject and object
+your           | possessive adjective
+yours          | possessive pronoun
+yourself       | reflexive (singular)
+yourselves     | reflexive (plural)
+             | third person singular
+he             | subject
+him            | object
+his            | possessive adjective and pronoun
+himself        | reflexive
+
+she            | subject
+her            | object and possessive adjective
+hers           | possessive pronoun
+herself        | reflexive
+
+it             | subject and object
+its            | possessive adjective
+itself         | reflexive
+             | third person plural
+they           | subject
+them           | object
+their          | possessive adjective
+theirs         | possessive pronoun
+themselves     | reflexive
+             | other forms (demonstratives, interrogatives)
+what
+which
+who
+whom
+this
+that
+these
+those
+
+           | VERB FORMS (using F.R. Palmer's nomenclature)
+             | BE
+am             | 1st person, present
+is             | -s form (3rd person, present)
+are            | present
+was            | 1st person, past
+were           | past
+be             | infinitive
+been           | past participle
+being          | -ing form
+             | HAVE
+have           | simple
+has            | -s form
+had            | past
+having         | -ing form
+             | DO
+do             | simple
+does           | -s form
+did            | past
+doing          | -ing form
+
+ | The forms below are, I believe, best omitted, because of the significant
+ | homonym forms:
+
+ |  He made a WILL
+ |  old tin CAN
+ |  merry month of MAY
+ |  a smell of MUST
+ |  fight the good fight with all thy MIGHT
+
+ | would, could, should, ought might however be included
+
+ |          | AUXILIARIES
+ |            | WILL
+ |will
+
+would
+
+ |            | SHALL
+ |shall
+
+should
+
+ |            | CAN
+ |can
+
+could
+
+ |            | MAY
+ |may
+ |might
+ |            | MUST
+ |must
+ |            | OUGHT
+
+ought
+
+           | COMPOUND FORMS, increasingly encountered nowadays in 'formal' writing
+              | pronoun + verb
+
+i'm
+you're
+he's
+she's
+it's
+we're
+they're
+i've
+you've
+we've
+they've
+i'd
+you'd
+he'd
+she'd
+we'd
+they'd
+i'll
+you'll
+he'll
+she'll
+we'll
+they'll
+
+              | verb + negation
+
+isn't
+aren't
+wasn't
+weren't
+hasn't
+haven't
+hadn't
+doesn't
+don't
+didn't
+
+              | auxiliary + negation
+
+won't
+wouldn't
+shan't
+shouldn't
+can't
+cannot
+couldn't
+mustn't
+
+             | miscellaneous forms
+
+let's
+that's
+who's
+what's
+here's
+there's
+when's
+where's
+why's
+how's
+
+              | rarer forms
+
+ | daren't needn't
+
+              | doubtful forms
+
+ | oughtn't mightn't
+
+           | ARTICLES
+a
+an
+the
+
+           | THE REST (Overlap among prepositions, conjunctions, adverbs etc is so
+           | high, that classification is pointless.)
+and
+but
+if
+or
+because
+as
+until
+while
+
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+
+again
+further
+then
+once
+
+here
+there
+when
+where
+why
+how
+
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+
+ | Just for the record, the following words are among the commonest in English
+
+    | one
+    | every
+    | least
+    | less
+    | many
+    | now
+    | ever
+    | never
+    | say
+    | says
+    | said
+    | also
+    | get
+    | go
+    | goes
+    | just
+    | made
+    | make
+    | put
+    | see
+    | seen
+    | whether
+    | like
+    | well
+    | back
+    | even
+    | still
+    | way
+    | take
+    | since
+    | another
+    | however
+    | two
+    | three
+    | four
+    | five
+    | first
+    | second
+    | new
+    | old
+    | high
+    | long
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(EnglishStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/es/analyzer_es.go
+++ b/analysis/lang/es/analyzer_es.go
@ -0,0 +1,66 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package es
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "es"
+
+func AnalyzerConstructor(config map[string]interface{},
+	cache *registry.Cache) (analysis.Analyzer, error) {
+	unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	normalizeEsFilter, err := cache.TokenFilterNamed(NormalizeName)
+	if err != nil {
+		return nil, err
+	}
+	stopEsFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	lightStemmerEsFilter, err := cache.TokenFilterNamed(LightStemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.DefaultAnalyzer{
+		Tokenizer: unicodeTokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			stopEsFilter,
+			normalizeEsFilter,
+			lightStemmerEsFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/es/analyzer_es_test.go
+++ b/analysis/lang/es/analyzer_es_test.go
@ -0,0 +1,122 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package es
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestSpanishAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// stemming
+		{
+			input: []byte("chicana"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("chican"),
+					Position: 1,
+					Start:    0,
+					End:      7,
+				},
+			},
+		},
+		{
+			input: []byte("chicano"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("chican"),
+					Position: 1,
+					Start:    0,
+					End:      7,
+				},
+			},
+		},
+		// added by marty for better coverage
+		{
+			input: []byte("yeses"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("yes"),
+					Position: 1,
+					Start:    0,
+					End:      5,
+				},
+			},
+		},
+		{
+			input: []byte("jaeces"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("jaez"),
+					Position: 1,
+					Start:    0,
+					End:      6,
+				},
+			},
+		},
+		{
+			input: []byte("arcos"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("arc"),
+					Position: 1,
+					Start:    0,
+					End:      5,
+				},
+			},
+		},
+		{
+			input: []byte("caos"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("caos"),
+					Position: 1,
+					Start:    0,
+					End:      4,
+				},
+			},
+		},
+		{
+			input: []byte("parecer"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term:     []byte("parecer"),
+					Position: 1,
+					Start:    0,
+					End:      7,
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %v, got %v", test.output, actual)
+		}
+	}
+}
--- a/analysis/lang/es/light_stemmer_es.go
+++ b/analysis/lang/es/light_stemmer_es.go
@ -0,0 +1,78 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package es
+
+import (
+	"bytes"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const LightStemmerName = "stemmer_es_light"
+
+type SpanishLightStemmerFilter struct {
+}
+
+func NewSpanishLightStemmerFilter() *SpanishLightStemmerFilter {
+	return &SpanishLightStemmerFilter{}
+}
+
+func (s *SpanishLightStemmerFilter) Filter(
+	input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		runes := bytes.Runes(token.Term)
+		runes = stem(runes)
+		token.Term = analysis.BuildTermFromRunes(runes)
+	}
+	return input
+}
+
+func stem(input []rune) []rune {
+	l := len(input)
+	if l < 5 {
+		return input
+	}
+
+	switch input[l-1] {
+	case 'o', 'a', 'e':
+		return input[:l-1]
+	case 's':
+		if input[l-2] == 'e' && input[l-3] == 's' && input[l-4] == 'e' {
+			return input[:l-2]
+		}
+		if input[l-2] == 'e' && input[l-3] == 'c' {
+			input[l-3] = 'z'
+			return input[:l-2]
+		}
+		if input[l-2] == 'o' || input[l-2] == 'a' || input[l-2] == 'e' {
+			return input[:l-2]
+		}
+	}
+
+	return input
+}
+
+func SpanishLightStemmerFilterConstructor(config map[string]interface{},
+	cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewSpanishLightStemmerFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(LightStemmerName, SpanishLightStemmerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/es/spanish_normalize.go
+++ b/analysis/lang/es/spanish_normalize.go
@ -0,0 +1,70 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package es
+
+import (
+	"bytes"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const NormalizeName = "normalize_es"
+
+type SpanishNormalizeFilter struct {
+}
+
+func NewSpanishNormalizeFilter() *SpanishNormalizeFilter {
+	return &SpanishNormalizeFilter{}
+}
+
+func (s *SpanishNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		term := normalize(token.Term)
+		token.Term = term
+	}
+	return input
+}
+
+func normalize(input []byte) []byte {
+	runes := bytes.Runes(input)
+	for i := 0; i < len(runes); i++ {
+		switch runes[i] {
+		case 'à', 'á', 'â', 'ä':
+			runes[i] = 'a'
+		case 'ò', 'ó', 'ô', 'ö':
+			runes[i] = 'o'
+		case 'è', 'é', 'ê', 'ë':
+			runes[i] = 'e'
+		case 'ù', 'ú', 'û', 'ü':
+			runes[i] = 'u'
+		case 'ì', 'í', 'î', 'ï':
+			runes[i] = 'i'
+		}
+	}
+
+	return analysis.BuildTermFromRunes(runes)
+}
+
+func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewSpanishNormalizeFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/es/spanish_normalize_test.go
+++ b/analysis/lang/es/spanish_normalize_test.go
@ -0,0 +1,112 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package es
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+)
+
+func TestSpanishNormalizeFilter(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("Guía"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("Guia"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("Belcebú"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("Belcebu"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("Limón"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("Limon"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("agüero"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("aguero"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("laúd"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("laud"),
+				},
+			},
+		},
+		// empty
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+	}
+
+	spanishNormalizeFilter := NewSpanishNormalizeFilter()
+	for _, test := range tests {
+		actual := spanishNormalizeFilter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %#v, got %#v", test.output, actual)
+			t.Errorf("expected %s(% x), got %s(% x)", test.output[0].Term, test.output[0].Term, actual[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/es/stemmer_es_snowball.go
+++ b/analysis/lang/es/stemmer_es_snowball.go
@ -0,0 +1,52 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package es
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/spanish"
+)
+
+const SnowballStemmerName = "stemmer_es_snowball"
+
+type SpanishStemmerFilter struct {
+}
+
+func NewSpanishStemmerFilter() *SpanishStemmerFilter {
+	return &SpanishStemmerFilter{}
+}
+
+func (s *SpanishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		spanish.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func SpanishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewSpanishStemmerFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(SnowballStemmerName, SpanishStemmerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/es/stemmer_es_snowball_test.go
+++ b/analysis/lang/es/stemmer_es_snowball_test.go
@ -0,0 +1,79 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package es
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestSnowballSpanishStemmer(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("agresivos"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("agres"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("agresivamente"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("agres"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("agresividad"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("agres"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	filter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := filter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/es/stop_filter_es.go
+++ b/analysis/lang/es/stop_filter_es.go
@ -0,0 +1,36 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//	http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package es
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{},
+	cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/es/stop_words_es.go
+++ b/analysis/lang/es/stop_words_es.go
@ -0,0 +1,383 @@
+package es
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_es"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
+
+var SpanishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Spanish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de             |  from, of
+la             |  the, her
+que            |  who, that
+el             |  the
+en             |  in
+y              |  and
+a              |  to
+los            |  the, them
+del            |  de + el
+se             |  himself, from him etc
+las            |  the, them
+por            |  for, by, etc
+un             |  a
+para           |  for
+con            |  with
+no             |  no
+una            |  a
+su             |  his, her
+al             |  a + el
+  | es         from SER
+lo             |  him
+como           |  how
+más            |  more
+pero           |  pero
+sus            |  su plural
+le             |  to him, her
+ya             |  already
+o              |  or
+  | fue        from SER
+este           |  this
+  | ha         from HABER
+sí             |  himself etc
+porque         |  because
+esta           |  this
+  | son        from SER
+entre          |  between
+  | está     from ESTAR
+cuando         |  when
+muy            |  very
+sin            |  without
+sobre          |  on
+  | ser        from SER
+  | tiene      from TENER
+también        |  also
+me             |  me
+hasta          |  until
+hay            |  there is/are
+donde          |  where
+  | han        from HABER
+quien          |  whom, that
+  | están      from ESTAR
+  | estado     from ESTAR
+desde          |  from
+todo           |  all
+nos            |  us
+durante        |  during
+  | estados    from ESTAR
+todos          |  all
+uno            |  a
+les            |  to them
+ni             |  nor
+contra         |  against
+otros          |  other
+  | fueron     from SER
+ese            |  that
+eso            |  that
+  | había      from HABER
+ante           |  before
+ellos          |  they
+e              |  and (variant of y)
+esto           |  this
+mí             |  me
+antes          |  before
+algunos        |  some
+qué            |  what?
+unos           |  a
+yo             |  I
+otro           |  other
+otras          |  other
+otra           |  other
+él             |  he
+tanto          |  so much, many
+esa            |  that
+estos          |  these
+mucho          |  much, many
+quienes        |  who
+nada           |  nothing
+muchos         |  many
+cual           |  who
+  | sea        from SER
+poco           |  few
+ella           |  she
+estar          |  to be
+  | haber      from HABER
+estas          |  these
+  | estaba     from ESTAR
+  | estamos    from ESTAR
+algunas        |  some
+algo           |  something
+nosotros       |  we
+
+      | other forms
+
+mi             |  me
+mis            |  mi plural
+tú             |  thou
+te             |  thee
+ti             |  thee
+tu             |  thy
+tus            |  tu plural
+ellas          |  they
+nosotras       |  we
+vosotros       |  you
+vosotras       |  you
+os             |  you
+mío            |  mine
+mía            |
+míos           |
+mías           |
+tuyo           |  thine
+tuya           |
+tuyos          |
+tuyas          |
+suyo           |  his, hers, theirs
+suya           |
+suyos          |
+suyas          |
+nuestro        |  ours
+nuestra        |
+nuestros       |
+nuestras       |
+vuestro        |  yours
+vuestra        |
+vuestros       |
+vuestras       |
+esos           |  those
+esas           |  those
+
+               | forms of estar, to be (not including the infinitive):
+estoy
+estás
+está
+estamos
+estáis
+están
+esté
+estés
+estemos
+estéis
+estén
+estaré
+estarás
+estará
+estaremos
+estaréis
+estarán
+estaría
+estarías
+estaríamos
+estaríais
+estarían
+estaba
+estabas
+estábamos
+estabais
+estaban
+estuve
+estuviste
+estuvo
+estuvimos
+estuvisteis
+estuvieron
+estuviera
+estuvieras
+estuviéramos
+estuvierais
+estuvieran
+estuviese
+estuvieses
+estuviésemos
+estuvieseis
+estuviesen
+estando
+estado
+estada
+estados
+estadas
+estad
+
+               | forms of haber, to have (not including the infinitive):
+he
+has
+ha
+hemos
+habéis
+han
+haya
+hayas
+hayamos
+hayáis
+hayan
+habré
+habrás
+habrá
+habremos
+habréis
+habrán
+habría
+habrías
+habríamos
+habríais
+habrían
+había
+habías
+habíamos
+habíais
+habían
+hube
+hubiste
+hubo
+hubimos
+hubisteis
+hubieron
+hubiera
+hubieras
+hubiéramos
+hubierais
+hubieran
+hubiese
+hubieses
+hubiésemos
+hubieseis
+hubiesen
+habiendo
+habido
+habida
+habidos
+habidas
+
+               | forms of ser, to be (not including the infinitive):
+soy
+eres
+es
+somos
+sois
+son
+sea
+seas
+seamos
+seáis
+sean
+seré
+serás
+será
+seremos
+seréis
+serán
+sería
+serías
+seríamos
+seríais
+serían
+era
+eras
+éramos
+erais
+eran
+fui
+fuiste
+fue
+fuimos
+fuisteis
+fueron
+fuera
+fueras
+fuéramos
+fuerais
+fueran
+fuese
+fueses
+fuésemos
+fueseis
+fuesen
+siendo
+sido
+  |  sed also means 'thirst'
+
+               | forms of tener, to have (not including the infinitive):
+tengo
+tienes
+tiene
+tenemos
+tenéis
+tienen
+tenga
+tengas
+tengamos
+tengáis
+tengan
+tendré
+tendrás
+tendrá
+tendremos
+tendréis
+tendrán
+tendría
+tendrías
+tendríamos
+tendríais
+tendrían
+tenía
+tenías
+teníamos
+teníais
+tenían
+tuve
+tuviste
+tuvo
+tuvimos
+tuvisteis
+tuvieron
+tuviera
+tuvieras
+tuviéramos
+tuvierais
+tuvieran
+tuviese
+tuvieses
+tuviésemos
+tuvieseis
+tuviesen
+teniendo
+tenido
+tenida
+tenidos
+tenidas
+tened
+
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(SpanishStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/eu/stop_filter_eu.go
+++ b/analysis/lang/eu/stop_filter_eu.go
@ -0,0 +1,36 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package eu
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/eu/stop_words_eu.go
+++ b/analysis/lang/eu/stop_words_eu.go
@ -0,0 +1,126 @@
+package eu
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_eu"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
+// ` was changed to ' to allow for literal string
+
+var BasqueStopWords = []byte(`# example set of basque stopwords
+al
+anitz
+arabera
+asko
+baina
+bat
+batean
+batek
+bati
+batzuei
+batzuek
+batzuetan
+batzuk
+bera
+beraiek
+berau
+berauek
+bere
+berori
+beroriek
+beste
+bezala
+da
+dago
+dira
+ditu
+du
+dute
+edo
+egin
+ere
+eta
+eurak
+ez
+gainera
+gu
+gutxi
+guzti
+haiei
+haiek
+haietan
+hainbeste
+hala
+han
+handik
+hango
+hara
+hari
+hark
+hartan
+hau
+hauei
+hauek
+hauetan
+hemen
+hemendik
+hemengo
+hi
+hona
+honek
+honela
+honetan
+honi
+hor
+hori
+horiei
+horiek
+horietan
+horko
+horra
+horrek
+horrela
+horretan
+horri
+hortik
+hura
+izan
+ni
+noiz
+nola
+non
+nondik
+nongo
+nor
+nora
+ze
+zein
+zen
+zenbait
+zenbat
+zer
+zergatik
+ziren
+zituen
+zu
+zuek
+zuen
+zuten
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(BasqueStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fa/analyzer_fa.go
+++ b/analysis/lang/fa/analyzer_fa.go
@ -0,0 +1,74 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fa
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/bleve/v2/analysis/char/zerowidthnonjoiner"
+	"github.com/blevesearch/bleve/v2/analysis/lang/ar"
+	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "fa"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
+	zFilter, err := cache.CharFilterNamed(zerowidthnonjoiner.Name)
+	if err != nil {
+		return nil, err
+	}
+	unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	normArFilter, err := cache.TokenFilterNamed(ar.NormalizeName)
+	if err != nil {
+		return nil, err
+	}
+	normFaFilter, err := cache.TokenFilterNamed(NormalizeName)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopFaFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.DefaultAnalyzer{
+		CharFilters: []analysis.CharFilter{
+			zFilter,
+		},
+		Tokenizer: unicodeTokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			normArFilter,
+			normFaFilter,
+			stopFaFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fa/analyzer_fa_test.go
+++ b/analysis/lang/fa/analyzer_fa_test.go
@ -0,0 +1,684 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fa
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestPersianAnalyzerVerbs(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// active present indicative
+		{
+			input: []byte("می‌خورد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورد"),
+				},
+			},
+		},
+		// active preterite indicative
+		{
+			input: []byte("خورد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورد"),
+				},
+			},
+		},
+		// active imperfective preterite indicative
+		{
+			input: []byte("می‌خورد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورد"),
+				},
+			},
+		},
+		// active future indicative
+		{
+			input: []byte("خواهد خورد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورد"),
+				},
+			},
+		},
+		// active present progressive indicative
+		{
+			input: []byte("دارد می‌خورد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورد"),
+				},
+			},
+		},
+		// active preterite progressive indicative
+		{
+			input: []byte("داشت می‌خورد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورد"),
+				},
+			},
+		},
+		// active perfect indicative
+		{
+			input: []byte("خورده‌است"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// active imperfective perfect indicative
+		{
+			input: []byte("می‌خورده‌است"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// active pluperfect indicative
+		{
+			input: []byte("خورده بود"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// active imperfective pluperfect indicative
+		{
+			input: []byte("می‌خورده بود"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// active preterite subjunctive
+		{
+			input: []byte("خورده باشد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// active imperfective preterite subjunctive
+		{
+			input: []byte("می‌خورده باشد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// active pluperfect subjunctive
+		{
+			input: []byte("خورده بوده باشد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// active imperfective pluperfect subjunctive
+		{
+			input: []byte("می‌خورده بوده باشد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive present indicative
+		{
+			input: []byte("خورده می‌شود"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive preterite indicative
+		{
+			input: []byte("خورده شد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive imperfective preterite indicative
+		{
+			input: []byte("خورده می‌شد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive perfect indicative
+		{
+			input: []byte("خورده شده‌است"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive imperfective perfect indicative
+		{
+			input: []byte("خورده می‌شده‌است"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive pluperfect indicative
+		{
+			input: []byte("خورده شده بود"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive imperfective pluperfect indicative
+		{
+			input: []byte("خورده می‌شده بود"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive future indicative
+		{
+			input: []byte("خورده خواهد شد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive present progressive indicative
+		{
+			input: []byte("دارد خورده می‌شود"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive preterite progressive indicative
+		{
+			input: []byte("داشت خورده می‌شد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive present subjunctive
+		{
+			input: []byte("خورده شود"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive preterite subjunctive
+		{
+			input: []byte("خورده شده باشد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive imperfective preterite subjunctive
+		{
+			input: []byte("خورده می‌شده باشد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive pluperfect subjunctive
+		{
+			input: []byte("خورده شده بوده باشد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive imperfective pluperfect subjunctive
+		{
+			input: []byte("خورده می‌شده بوده باشد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// active present subjunctive
+		{
+			input: []byte("بخورد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("بخورد"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if len(actual) != len(test.output) {
+			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
+		}
+		for i, tok := range actual {
+			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
+				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
+			}
+		}
+	}
+}
+
+func TestPersianAnalyzerVerbsDefective(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// active present indicative
+		{
+			input: []byte("مي خورد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورد"),
+				},
+			},
+		},
+		// active preterite indicative
+		{
+			input: []byte("خورد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورد"),
+				},
+			},
+		},
+		// active imperfective preterite indicative
+		{
+			input: []byte("مي خورد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورد"),
+				},
+			},
+		},
+		// active future indicative
+		{
+			input: []byte("خواهد خورد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورد"),
+				},
+			},
+		},
+		// active present progressive indicative
+		{
+			input: []byte("دارد مي خورد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورد"),
+				},
+			},
+		},
+		// active preterite progressive indicative
+		{
+			input: []byte("داشت مي خورد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورد"),
+				},
+			},
+		},
+		// active perfect indicative
+		{
+			input: []byte("خورده است"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// active imperfective perfect indicative
+		{
+			input: []byte("مي خورده است"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// active pluperfect indicative
+		{
+			input: []byte("خورده بود"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// active imperfective pluperfect indicative
+		{
+			input: []byte("مي خورده بود"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// active preterite subjunctive
+		{
+			input: []byte("خورده باشد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// active imperfective preterite subjunctive
+		{
+			input: []byte("مي خورده باشد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// active pluperfect subjunctive
+		{
+			input: []byte("خورده بوده باشد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// active imperfective pluperfect subjunctive
+		{
+			input: []byte("مي خورده بوده باشد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive present indicative
+		{
+			input: []byte("خورده مي شود"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive preterite indicative
+		{
+			input: []byte("خورده شد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive imperfective preterite indicative
+		{
+			input: []byte("خورده مي شد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive perfect indicative
+		{
+			input: []byte("خورده شده است"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive imperfective perfect indicative
+		{
+			input: []byte("خورده مي شده است"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive pluperfect indicative
+		{
+			input: []byte("خورده شده بود"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive imperfective pluperfect indicative
+		{
+			input: []byte("خورده مي شده بود"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive future indicative
+		{
+			input: []byte("خورده خواهد شد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive present progressive indicative
+		{
+			input: []byte("دارد خورده مي شود"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive preterite progressive indicative
+		{
+			input: []byte("داشت خورده مي شد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive present subjunctive
+		{
+			input: []byte("خورده شود"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive preterite subjunctive
+		{
+			input: []byte("خورده شده باشد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive imperfective preterite subjunctive
+		{
+			input: []byte("خورده مي شده باشد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive pluperfect subjunctive
+		{
+			input: []byte("خورده شده بوده باشد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// passive imperfective pluperfect subjunctive
+		{
+			input: []byte("خورده مي شده بوده باشد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		// active present subjunctive
+		{
+			input: []byte("بخورد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("بخورد"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if len(actual) != len(test.output) {
+			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
+		}
+		for i, tok := range actual {
+			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
+				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
+			}
+		}
+	}
+}
+
+func TestPersianAnalyzerOthers(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// nouns
+		{
+			input: []byte("برگ ها"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("برگ"),
+				},
+			},
+		},
+		{
+			input: []byte("برگ‌ها"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("برگ"),
+				},
+			},
+		},
+		// non persian
+		{
+			input: []byte("English test."),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("english"),
+				},
+				&analysis.Token{
+					Term: []byte("test"),
+				},
+			},
+		},
+		// others
+		{
+			input: []byte("خورده مي شده بوده باشد"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("خورده"),
+				},
+			},
+		},
+		{
+			input: []byte("برگ‌ها"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("برگ"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if len(actual) != len(test.output) {
+			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
+		}
+		for i, tok := range actual {
+			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
+				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
+			}
+		}
+	}
+}
--- a/analysis/lang/fa/persian_normalize.go
+++ b/analysis/lang/fa/persian_normalize.go
@ -0,0 +1,80 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fa
+
+import (
+	"bytes"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const NormalizeName = "normalize_fa"
+
+const (
+	Yeh        = '\u064A'
+	FarsiYeh   = '\u06CC'
+	YehBarree  = '\u06D2'
+	Keheh      = '\u06A9'
+	Kaf        = '\u0643'
+	HamzaAbove = '\u0654'
+	HehYeh     = '\u06C0'
+	HehGoal    = '\u06C1'
+	Heh        = '\u0647'
+)
+
+type PersianNormalizeFilter struct {
+}
+
+func NewPersianNormalizeFilter() *PersianNormalizeFilter {
+	return &PersianNormalizeFilter{}
+}
+
+func (s *PersianNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		term := normalize(token.Term)
+		token.Term = term
+	}
+	return input
+}
+
+func normalize(input []byte) []byte {
+	runes := bytes.Runes(input)
+	for i := 0; i < len(runes); i++ {
+		switch runes[i] {
+		case FarsiYeh, YehBarree:
+			runes[i] = Yeh
+		case Keheh:
+			runes[i] = Kaf
+		case HehYeh, HehGoal:
+			runes[i] = Heh
+		case HamzaAbove: // necessary for HEH + HAMZA
+			runes = analysis.DeleteRune(runes, i)
+			i--
+		}
+	}
+	return analysis.BuildTermFromRunes(runes)
+}
+
+func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewPersianNormalizeFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fa/persian_normalize_test.go
+++ b/analysis/lang/fa/persian_normalize_test.go
@ -0,0 +1,130 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fa
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+)
+
+func TestPersianNormalizeFilter(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		// FarsiYeh
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("های"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("هاي"),
+				},
+			},
+		},
+		// YehBarree
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("هاے"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("هاي"),
+				},
+			},
+		},
+		// Keheh
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("کشاندن"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("كشاندن"),
+				},
+			},
+		},
+		// HehYeh
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("كتابۀ"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("كتابه"),
+				},
+			},
+		},
+		// HehHamzaAbove
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("كتابهٔ"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("كتابه"),
+				},
+			},
+		},
+		// HehGoal
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("زادہ"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("زاده"),
+				},
+			},
+		},
+		// empty
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte(""),
+				},
+			},
+		},
+	}
+
+	persianNormalizeFilter := NewPersianNormalizeFilter()
+	for _, test := range tests {
+		actual := persianNormalizeFilter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %#v, got %#v", test.output, actual)
+			t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/fa/stop_filter_fa.go
+++ b/analysis/lang/fa/stop_filter_fa.go
@ -0,0 +1,36 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fa
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fa/stop_words_fa.go
+++ b/analysis/lang/fa/stop_words_fa.go
@ -0,0 +1,340 @@
+package fa
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_fa"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
+// ` was changed to ' to allow for literal string
+
+var PersianStopWords = []byte(`# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Note: by default this file is used after normalization, so when adding entries
+# to this file, use the arabic 'ي' instead of 'ی'
+انان
+نداشته
+سراسر
+خياه
+ايشان
+وي
+تاكنون
+بيشتري
+دوم
+پس
+ناشي
+وگو
+يا
+داشتند
+سپس
+هنگام
+هرگز
+پنج
+نشان
+امسال
+ديگر
+گروهي
+شدند
+چطور
+ده
+و
+دو
+نخستين
+ولي
+چرا
+چه
+وسط
+ه
+كدام
+قابل
+يك
+رفت
+هفت
+همچنين
+در
+هزار
+بله
+بلي
+شايد
+اما
+شناسي
+گرفته
+دهد
+داشته
+دانست
+داشتن
+خواهيم
+ميليارد
+وقتيكه
+امد
+خواهد
+جز
+اورده
+شده
+بلكه
+خدمات
+شدن
+برخي
+نبود
+بسياري
+جلوگيري
+حق
+كردند
+نوعي
+بعري
+نكرده
+نظير
+نبايد
+بوده
+بودن
+داد
+اورد
+هست
+جايي
+شود
+دنبال
+داده
+بايد
+سابق
+هيچ
+همان
+انجا
+كمتر
+كجاست
+گردد
+كسي
+تر
+مردم
+تان
+دادن
+بودند
+سري
+جدا
+ندارند
+مگر
+يكديگر
+دارد
+دهند
+بنابراين
+هنگامي
+سمت
+جا
+انچه
+خود
+دادند
+زياد
+دارند
+اثر
+بدون
+بهترين
+بيشتر
+البته
+به
+براساس
+بيرون
+كرد
+بعضي
+گرفت
+توي
+اي
+ميليون
+او
+جريان
+تول
+بر
+مانند
+برابر
+باشيم
+مدتي
+گويند
+اكنون
+تا
+تنها
+جديد
+چند
+بي
+نشده
+كردن
+كردم
+گويد
+كرده
+كنيم
+نمي
+نزد
+روي
+قصد
+فقط
+بالاي
+ديگران
+اين
+ديروز
+توسط
+سوم
+ايم
+دانند
+سوي
+استفاده
+شما
+كنار
+داريم
+ساخته
+طور
+امده
+رفته
+نخست
+بيست
+نزديك
+طي
+كنيد
+از
+انها
+تمامي
+داشت
+يكي
+طريق
+اش
+چيست
+روب
+نمايد
+گفت
+چندين
+چيزي
+تواند
+ام
+ايا
+با
+ان
+ايد
+ترين
+اينكه
+ديگري
+راه
+هايي
+بروز
+همچنان
+پاعين
+كس
+حدود
+مختلف
+مقابل
+چيز
+گيرد
+ندارد
+ضد
+همچون
+سازي
+شان
+مورد
+باره
+مرسي
+خويش
+برخوردار
+چون
+خارج
+شش
+هنوز
+تحت
+ضمن
+هستيم
+گفته
+فكر
+بسيار
+پيش
+براي
+روزهاي
+انكه
+نخواهد
+بالا
+كل
+وقتي
+كي
+چنين
+كه
+گيري
+نيست
+است
+كجا
+كند
+نيز
+يابد
+بندي
+حتي
+توانند
+عقب
+خواست
+كنند
+بين
+تمام
+همه
+ما
+باشند
+مثل
+شد
+اري
+باشد
+اره
+طبق
+بعد
+اگر
+صورت
+غير
+جاي
+بيش
+ريزي
+اند
+زيرا
+چگونه
+بار
+لطفا
+مي
+درباره
+من
+ديده
+همين
+گذاري
+برداري
+علت
+گذاشته
+هم
+فوق
+نه
+ها
+شوند
+اباد
+همواره
+هر
+اول
+خواهند
+چهار
+نام
+امروز
+مان
+هاي
+قبل
+كنم
+سعي
+تازه
+را
+هستند
+زير
+جلوي
+عنوان
+بود
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(PersianStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fi/analyzer_fi.go
+++ b/analysis/lang/fi/analyzer_fi.go
@ -0,0 +1,60 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fi
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "fi"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
+	unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopFiFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerFiFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.DefaultAnalyzer{
+		Tokenizer: unicodeTokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			stopFiFilter,
+			stemmerFiFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fi/analyzer_fi_test.go
+++ b/analysis/lang/fi/analyzer_fi_test.go
@ -0,0 +1,70 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fi
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestFinishAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// stemming
+		{
+			input: []byte("edeltäjiinsä"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("edeltäj"),
+				},
+			},
+		},
+		{
+			input: []byte("edeltäjistään"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("edeltäj"),
+				},
+			},
+		},
+		// stop word
+		{
+			input:  []byte("olla"),
+			output: analysis.TokenStream{},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if len(actual) != len(test.output) {
+			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
+		}
+		for i, tok := range actual {
+			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
+				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
+			}
+		}
+	}
+}
--- a/analysis/lang/fi/stemmer_fi.go
+++ b/analysis/lang/fi/stemmer_fi.go
@ -0,0 +1,52 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fi
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/finnish"
+)
+
+const SnowballStemmerName = "stemmer_fi_snowball"
+
+type FinnishStemmerFilter struct {
+}
+
+func NewFinnishStemmerFilter() *FinnishStemmerFilter {
+	return &FinnishStemmerFilter{}
+}
+
+func (s *FinnishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		finnish.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func FinnishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewFinnishStemmerFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(SnowballStemmerName, FinnishStemmerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fi/stop_filter_fi.go
+++ b/analysis/lang/fi/stop_filter_fi.go
@ -0,0 +1,36 @@
+//  Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fi
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fi/stop_words_fi.go
+++ b/analysis/lang/fi/stop_words_fi.go
@ -0,0 +1,124 @@
+package fi
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_fi"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
+
+var FinnishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+ 
+| forms of BE
+
+olla
+olen
+olet
+on
+olemme
+olette
+ovat
+ole        | negative form
+
+oli
+olisi
+olisit
+olisin
+olisimme
+olisitte
+olisivat
+olit
+olin
+olimme
+olitte
+olivat
+ollut
+olleet
+
+en         | negation
+et
+ei
+emme
+ette
+eivät
+
+|Nom   Gen    Acc    Part   Iness   Elat    Illat  Adess   Ablat   Allat   Ess    Trans
+minä   minun  minut  minua  minussa minusta minuun minulla minulta minulle               | I
+sinä   sinun  sinut  sinua  sinussa sinusta sinuun sinulla sinulta sinulle               | you
+hän    hänen  hänet  häntä  hänessä hänestä häneen hänellä häneltä hänelle               | he she
+me     meidän meidät meitä  meissä  meistä  meihin meillä  meiltä  meille                | we
+te     teidän teidät teitä  teissä  teistä  teihin teillä  teiltä  teille                | you
+he     heidän heidät heitä  heissä  heistä  heihin heillä  heiltä  heille                | they
+
+tämä   tämän         tätä   tässä   tästä   tähän  tallä   tältä   tälle   tänä   täksi  | this
+tuo    tuon          tuotä  tuossa  tuosta  tuohon tuolla  tuolta  tuolle  tuona  tuoksi | that
+se     sen           sitä   siinä   siitä   siihen sillä   siltä   sille   sinä   siksi  | it
+nämä   näiden        näitä  näissä  näistä  näihin näillä  näiltä  näille  näinä  näiksi | these
+nuo    noiden        noita  noissa  noista  noihin noilla  noilta  noille  noina  noiksi | those
+ne     niiden        niitä  niissä  niistä  niihin niillä  niiltä  niille  niinä  niiksi | they
+
+kuka   kenen kenet   ketä   kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
+ketkä  keiden ketkä  keitä  keissä  keistä  keihin keillä  keiltä  keille  keinä  keiksi | (pl)
+mikä   minkä minkä   mitä   missä   mistä   mihin  millä   miltä   mille   minä   miksi  | which what
+mitkä                                                                                    | (pl)
+
+joka   jonka         jota   jossa   josta   johon  jolla   jolta   jolle   jona   joksi  | who which
+jotka  joiden        joita  joissa  joista  joihin joilla  joilta  joille  joina  joiksi | (pl)
+
+| conjunctions
+
+että   | that
+ja     | and
+jos    | if
+koska  | because
+kuin   | than
+mutta  | but
+niin   | so
+sekä   | and
+sillä  | for
+tai    | or
+vaan   | but
+vai    | or
+vaikka | although
+
+
+| prepositions
+
+kanssa  | with
+mukaan  | according to
+noin    | about
+poikki  | across
+yli     | over, across
+
+| other
+
+kun    | when
+niin   | so
+nyt    | now
+itse   | self
+
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(FinnishStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fr/analyzer_fr.go
+++ b/analysis/lang/fr/analyzer_fr.go
@ -0,0 +1,65 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "fr"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
+	tokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	elisionFilter, err := cache.TokenFilterNamed(ElisionName)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopFrFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerFrFilter, err := cache.TokenFilterNamed(LightStemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.DefaultAnalyzer{
+		Tokenizer: tokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			elisionFilter,
+			stopFrFilter,
+			stemmerFrFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fr/analyzer_fr_test.go
+++ b/analysis/lang/fr/analyzer_fr_test.go
@ -0,0 +1,209 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestFrenchAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		{
+			input:  []byte(""),
+			output: analysis.TokenStream{},
+		},
+		{
+			input: []byte("chien chat cheval"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chien"),
+				},
+				&analysis.Token{
+					Term: []byte("chat"),
+				},
+				&analysis.Token{
+					Term: []byte("cheval"),
+				},
+			},
+		},
+		{
+			input: []byte("chien CHAT CHEVAL"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chien"),
+				},
+				&analysis.Token{
+					Term: []byte("chat"),
+				},
+				&analysis.Token{
+					Term: []byte("cheval"),
+				},
+			},
+		},
+		{
+			input: []byte("  chien  ,? + = -  CHAT /: > CHEVAL"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chien"),
+				},
+				&analysis.Token{
+					Term: []byte("chat"),
+				},
+				&analysis.Token{
+					Term: []byte("cheval"),
+				},
+			},
+		},
+		{
+			input: []byte("chien++"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chien"),
+				},
+			},
+		},
+		{
+			input: []byte("mot \"entreguillemet\""),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("mot"),
+				},
+				&analysis.Token{
+					Term: []byte("entreguilemet"),
+				},
+			},
+		},
+		{
+			input: []byte("Jean-François"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("jean"),
+				},
+				&analysis.Token{
+					Term: []byte("francoi"),
+				},
+			},
+		},
+		// stop words
+		{
+			input: []byte("le la chien les aux chat du des à cheval"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chien"),
+				},
+				&analysis.Token{
+					Term: []byte("chat"),
+				},
+				&analysis.Token{
+					Term: []byte("cheval"),
+				},
+			},
+		},
+		// nouns and adjectives
+		{
+			input: []byte("lances chismes habitable chiste éléments captifs"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("lanc"),
+				},
+				&analysis.Token{
+					Term: []byte("chism"),
+				},
+				&analysis.Token{
+					Term: []byte("habitabl"),
+				},
+				&analysis.Token{
+					Term: []byte("chist"),
+				},
+				&analysis.Token{
+					Term: []byte("element"),
+				},
+				&analysis.Token{
+					Term: []byte("captif"),
+				},
+			},
+		},
+		// verbs
+		{
+			input: []byte("finissions souffrirent rugissante"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("finision"),
+				},
+				&analysis.Token{
+					Term: []byte("soufrirent"),
+				},
+				&analysis.Token{
+					Term: []byte("rugisant"),
+				},
+			},
+		},
+		{
+			input: []byte("C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ "),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("c3po"),
+				},
+				&analysis.Token{
+					Term: []byte("aujourd'hui"),
+				},
+				&analysis.Token{
+					Term: []byte("oeuf"),
+				},
+				&analysis.Token{
+					Term: []byte("ïaöuaä"),
+				},
+				&analysis.Token{
+					Term: []byte("anticonstitutionel"),
+				},
+				&analysis.Token{
+					Term: []byte("java"),
+				},
+			},
+		},
+		{
+			input: []byte("propriétaire"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("proprietair"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if len(actual) != len(test.output) {
+			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
+		}
+		for i, tok := range actual {
+			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
+				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
+			}
+		}
+	}
+}
--- a/analysis/lang/fr/articles_fr.go
+++ b/analysis/lang/fr/articles_fr.go
@ -0,0 +1,40 @@
+package fr
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const ArticlesName = "articles_fr"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
+
+var FrenchArticles = []byte(`
+l
+m
+t
+qu
+n
+s
+j
+d
+c
+jusqu
+quoiqu
+lorsqu
+puisqu
+`)
+
+func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(FrenchArticles)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fr/elision_fr.go
+++ b/analysis/lang/fr/elision_fr.go
@ -0,0 +1,40 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"fmt"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/elision"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const ElisionName = "elision_fr"
+
+func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	articlesTokenMap, err := cache.TokenMapNamed(ArticlesName)
+	if err != nil {
+		return nil, fmt.Errorf("error building elision filter: %v", err)
+	}
+	return elision.NewElisionFilter(articlesTokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fr/elision_fr_test.go
+++ b/analysis/lang/fr/elision_fr_test.go
@ -0,0 +1,55 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestFrenchElision(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("l'avion"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("avion"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	elisionFilter, err := cache.TokenFilterNamed(ElisionName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := elisionFilter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/fr/light_stemmer_fr.go
+++ b/analysis/lang/fr/light_stemmer_fr.go
@ -0,0 +1,309 @@
+//  Copyright (c) 2015 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"bytes"
+	"unicode"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const LightStemmerName = "stemmer_fr_light"
+
+type FrenchLightStemmerFilter struct {
+}
+
+func NewFrenchLightStemmerFilter() *FrenchLightStemmerFilter {
+	return &FrenchLightStemmerFilter{}
+}
+
+func (s *FrenchLightStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		runes := bytes.Runes(token.Term)
+		runes = stem(runes)
+		token.Term = analysis.BuildTermFromRunes(runes)
+	}
+	return input
+}
+
+func stem(input []rune) []rune {
+
+	inputLen := len(input)
+
+	if inputLen > 5 && input[inputLen-1] == 'x' {
+		if input[inputLen-3] == 'a' && input[inputLen-2] == 'u' && input[inputLen-4] != 'e' {
+			input[inputLen-2] = 'l'
+		}
+		input = input[0 : inputLen-1]
+		inputLen = len(input)
+	}
+
+	if inputLen > 3 && input[inputLen-1] == 'x' {
+		input = input[0 : inputLen-1]
+		inputLen = len(input)
+	}
+
+	if inputLen > 3 && input[inputLen-1] == 's' {
+		input = input[0 : inputLen-1]
+		inputLen = len(input)
+	}
+
+	if inputLen > 9 && analysis.RunesEndsWith(input, "issement") {
+		input = input[0 : inputLen-6]
+		inputLen = len(input)
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 8 && analysis.RunesEndsWith(input, "issant") {
+		input = input[0 : inputLen-4]
+		inputLen = len(input)
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 6 && analysis.RunesEndsWith(input, "ement") {
+		input = input[0 : inputLen-4]
+		inputLen = len(input)
+		if inputLen > 3 && analysis.RunesEndsWith(input, "ive") {
+			input = input[0 : inputLen-1]
+			inputLen = len(input)
+			input[inputLen-1] = 'f'
+		}
+		return norm(input)
+	}
+
+	if inputLen > 11 && analysis.RunesEndsWith(input, "ficatrice") {
+		input = input[0 : inputLen-5]
+		inputLen = len(input)
+		input[inputLen-2] = 'e'
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 10 && analysis.RunesEndsWith(input, "ficateur") {
+		input = input[0 : inputLen-4]
+		inputLen = len(input)
+		input[inputLen-2] = 'e'
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 9 && analysis.RunesEndsWith(input, "catrice") {
+		input = input[0 : inputLen-3]
+		inputLen = len(input)
+		input[inputLen-4] = 'q'
+		input[inputLen-3] = 'u'
+		input[inputLen-2] = 'e'
+		//s[len-1] = 'r' <-- unnecessary, already 'r'.
+		return norm(input)
+	}
+
+	if inputLen > 8 && analysis.RunesEndsWith(input, "cateur") {
+		input = input[0 : inputLen-2]
+		inputLen = len(input)
+		input[inputLen-4] = 'q'
+		input[inputLen-3] = 'u'
+		input[inputLen-2] = 'e'
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 8 && analysis.RunesEndsWith(input, "atrice") {
+		input = input[0 : inputLen-4]
+		inputLen = len(input)
+		input[inputLen-2] = 'e'
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 7 && analysis.RunesEndsWith(input, "ateur") {
+		input = input[0 : inputLen-3]
+		inputLen = len(input)
+		input[inputLen-2] = 'e'
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 6 && analysis.RunesEndsWith(input, "trice") {
+		input = input[0 : inputLen-1]
+		inputLen = len(input)
+		input[inputLen-3] = 'e'
+		input[inputLen-2] = 'u'
+		input[inputLen-1] = 'r'
+	}
+
+	if inputLen > 5 && analysis.RunesEndsWith(input, "ième") {
+		return norm(input[0 : inputLen-4])
+	}
+
+	if inputLen > 7 && analysis.RunesEndsWith(input, "teuse") {
+		input = input[0 : inputLen-2]
+		inputLen = len(input)
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 6 && analysis.RunesEndsWith(input, "teur") {
+		input = input[0 : inputLen-1]
+		inputLen = len(input)
+		input[inputLen-1] = 'r'
+		return norm(input)
+	}
+
+	if inputLen > 5 && analysis.RunesEndsWith(input, "euse") {
+		return norm(input[0 : inputLen-2])
+	}
+
+	if inputLen > 8 && analysis.RunesEndsWith(input, "ère") {
+		input = input[0 : inputLen-1]
+		inputLen = len(input)
+		input[inputLen-2] = 'e'
+		return norm(input)
+	}
+
+	if inputLen > 7 && analysis.RunesEndsWith(input, "ive") {
+		input = input[0 : inputLen-1]
+		inputLen = len(input)
+		input[inputLen-1] = 'f'
+		return norm(input)
+	}
+
+	if inputLen > 4 &&
+		(analysis.RunesEndsWith(input, "folle") ||
+			analysis.RunesEndsWith(input, "molle")) {
+		input = input[0 : inputLen-2]
+		inputLen = len(input)
+		input[inputLen-1] = 'u'
+		return norm(input)
+	}
+
+	if inputLen > 9 && analysis.RunesEndsWith(input, "nnelle") {
+		return norm(input[0 : inputLen-5])
+	}
+
+	if inputLen > 9 && analysis.RunesEndsWith(input, "nnel") {
+		return norm(input[0 : inputLen-3])
+	}
+
+	if inputLen > 4 && analysis.RunesEndsWith(input, "ète") {
+		input = input[0 : inputLen-1]
+		inputLen = len(input)
+		input[inputLen-2] = 'e'
+	}
+
+	if inputLen > 8 && analysis.RunesEndsWith(input, "ique") {
+		input = input[0 : inputLen-4]
+		inputLen = len(input)
+	}
+
+	if inputLen > 8 && analysis.RunesEndsWith(input, "esse") {
+		return norm(input[0 : inputLen-3])
+	}
+
+	if inputLen > 7 && analysis.RunesEndsWith(input, "inage") {
+		return norm(input[0 : inputLen-3])
+	}
+
+	if inputLen > 9 && analysis.RunesEndsWith(input, "isation") {
+		input = input[0 : inputLen-7]
+		inputLen = len(input)
+		if inputLen > 5 && analysis.RunesEndsWith(input, "ual") {
+			input[inputLen-2] = 'e'
+		}
+		return norm(input)
+	}
+
+	if inputLen > 9 && analysis.RunesEndsWith(input, "isateur") {
+		return norm(input[0 : inputLen-7])
+	}
+
+	if inputLen > 8 && analysis.RunesEndsWith(input, "ation") {
+		return norm(input[0 : inputLen-5])
+	}
+
+	if inputLen > 8 && analysis.RunesEndsWith(input, "ition") {
+		return norm(input[0 : inputLen-5])
+	}
+
+	return norm(input)
+
+}
+
+func norm(input []rune) []rune {
+
+	if len(input) > 4 {
+		for i := 0; i < len(input); i++ {
+			switch input[i] {
+			case 'à', 'á', 'â':
+				input[i] = 'a'
+			case 'ô':
+				input[i] = 'o'
+			case 'è', 'é', 'ê':
+				input[i] = 'e'
+			case 'ù', 'û':
+				input[i] = 'u'
+			case 'î':
+				input[i] = 'i'
+			case 'ç':
+				input[i] = 'c'
+			}
+
+			ch := input[0]
+			for i := 1; i < len(input); i++ {
+				if input[i] == ch && unicode.IsLetter(ch) {
+					input = analysis.DeleteRune(input, i)
+					i -= 1
+				} else {
+					ch = input[i]
+				}
+			}
+		}
+	}
+
+	if len(input) > 4 && analysis.RunesEndsWith(input, "ie") {
+		input = input[0 : len(input)-2]
+	}
+
+	if len(input) > 4 {
+		if input[len(input)-1] == 'r' {
+			input = input[0 : len(input)-1]
+		}
+		if input[len(input)-1] == 'e' {
+			input = input[0 : len(input)-1]
+		}
+		if input[len(input)-1] == 'e' {
+			input = input[0 : len(input)-1]
+		}
+		if input[len(input)-1] == input[len(input)-2] && unicode.IsLetter(input[len(input)-1]) {
+			input = input[0 : len(input)-1]
+		}
+	}
+
+	return input
+}
+
+func FrenchLightStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewFrenchLightStemmerFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(LightStemmerName, FrenchLightStemmerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fr/light_stemmer_fr_test.go
+++ b/analysis/lang/fr/light_stemmer_fr_test.go
--- a/analysis/lang/fr/minimal_stemmer_fr.go
+++ b/analysis/lang/fr/minimal_stemmer_fr.go
@ -0,0 +1,82 @@
+//  Copyright (c) 2015 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"bytes"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const MinimalStemmerName = "stemmer_fr_min"
+
+type FrenchMinimalStemmerFilter struct {
+}
+
+func NewFrenchMinimalStemmerFilter() *FrenchMinimalStemmerFilter {
+	return &FrenchMinimalStemmerFilter{}
+}
+
+func (s *FrenchMinimalStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		runes := bytes.Runes(token.Term)
+		runes = minstem(runes)
+		token.Term = analysis.BuildTermFromRunes(runes)
+	}
+	return input
+}
+
+func minstem(input []rune) []rune {
+
+	if len(input) < 6 {
+		return input
+	}
+
+	if input[len(input)-1] == 'x' {
+		if input[len(input)-3] == 'a' && input[len(input)-2] == 'u' {
+			input[len(input)-2] = 'l'
+		}
+		return input[0 : len(input)-1]
+	}
+
+	if input[len(input)-1] == 's' {
+		input = input[0 : len(input)-1]
+	}
+	if input[len(input)-1] == 'r' {
+		input = input[0 : len(input)-1]
+	}
+	if input[len(input)-1] == 'e' {
+		input = input[0 : len(input)-1]
+	}
+	if input[len(input)-1] == 'é' {
+		input = input[0 : len(input)-1]
+	}
+	if input[len(input)-1] == input[len(input)-2] {
+		input = input[0 : len(input)-1]
+	}
+	return input
+}
+
+func FrenchMinimalStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewFrenchMinimalStemmerFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(MinimalStemmerName, FrenchMinimalStemmerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fr/minimal_stemmer_fr_test.go
+++ b/analysis/lang/fr/minimal_stemmer_fr_test.go
@ -0,0 +1,139 @@
+//  Copyright (c) 2015 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestFrenchMinimalStemmer(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chevaux"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("cheval"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("hiboux"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("hibou"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chantés"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chant"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chanter"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chant"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chante"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("chant"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("baronnes"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("baron"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("barons"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("baron"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("baron"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("baron"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	filter, err := cache.TokenFilterNamed(MinimalStemmerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := filter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/fr/stemmer_fr_snowball.go
+++ b/analysis/lang/fr/stemmer_fr_snowball.go
@ -0,0 +1,52 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/french"
+)
+
+const SnowballStemmerName = "stemmer_fr_snowball"
+
+type FrenchStemmerFilter struct {
+}
+
+func NewFrenchStemmerFilter() *FrenchStemmerFilter {
+	return &FrenchStemmerFilter{}
+}
+
+func (s *FrenchStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		french.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func FrenchStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewFrenchStemmerFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(SnowballStemmerName, FrenchStemmerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fr/stemmer_fr_snowball_test.go
+++ b/analysis/lang/fr/stemmer_fr_snowball_test.go
@ -0,0 +1,79 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestSnowballFrenchStemmer(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("antagoniste"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("antagon"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("barbouillait"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("barbouill"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("calculateur"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("calcul"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	filter, err := cache.TokenFilterNamed(SnowballStemmerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := filter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/fr/stop_filter_fr.go
+++ b/analysis/lang/fr/stop_filter_fr.go
@ -0,0 +1,36 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fr
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/fr/stop_words_fr.go
+++ b/analysis/lang/fr/stop_words_fr.go
@ -0,0 +1,213 @@
+package fr
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_fr"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
+
+var FrenchStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A French stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+au             |  a + le
+aux            |  a + les
+avec           |  with
+ce             |  this
+ces            |  these
+dans           |  with
+de             |  of
+des            |  de + les
+du             |  de + le
+elle           |  she
+en             |  'of them' etc
+et             |  and
+eux            |  them
+il             |  he
+je             |  I
+la             |  the
+le             |  the
+leur           |  their
+lui            |  him
+ma             |  my (fem)
+mais           |  but
+me             |  me
+même           |  same; as in moi-même (myself) etc
+mes            |  me (pl)
+moi            |  me
+mon            |  my (masc)
+ne             |  not
+nos            |  our (pl)
+notre          |  our
+nous           |  we
+on             |  one
+ou             |  where
+par            |  by
+pas            |  not
+pour           |  for
+qu             |  que before vowel
+que            |  that
+qui            |  who
+sa             |  his, her (fem)
+se             |  oneself
+ses            |  his (pl)
+son            |  his, her (masc)
+sur            |  on
+ta             |  thy (fem)
+te             |  thee
+tes            |  thy (pl)
+toi            |  thee
+ton            |  thy (masc)
+tu             |  thou
+un             |  a
+une            |  a
+vos            |  your (pl)
+votre          |  your
+vous           |  you
+
+               |  single letter forms
+
+c              |  c'
+d              |  d'
+j              |  j'
+l              |  l'
+à              |  to, at
+m              |  m'
+n              |  n'
+s              |  s'
+t              |  t'
+y              |  there
+
+               | forms of être (not including the infinitive):
+été
+étée
+étées
+étés
+étant
+suis
+es
+est
+sommes
+êtes
+sont
+serai
+seras
+sera
+serons
+serez
+seront
+serais
+serait
+serions
+seriez
+seraient
+étais
+était
+étions
+étiez
+étaient
+fus
+fut
+fûmes
+fûtes
+furent
+sois
+soit
+soyons
+soyez
+soient
+fusse
+fusses
+fût
+fussions
+fussiez
+fussent
+
+               | forms of avoir (not including the infinitive):
+ayant
+eu
+eue
+eues
+eus
+ai
+as
+avons
+avez
+ont
+aurai
+auras
+aura
+aurons
+aurez
+auront
+aurais
+aurait
+aurions
+auriez
+auraient
+avais
+avait
+avions
+aviez
+avaient
+eut
+eûmes
+eûtes
+eurent
+aie
+aies
+ait
+ayons
+ayez
+aient
+eusse
+eusses
+eût
+eussions
+eussiez
+eussent
+
+               | Later additions (from Jean-Christophe Deschamps)
+ceci           |  this
+cela           |  that
+celà           |  that
+cet            |  this
+cette          |  this
+ici            |  here
+ils            |  they
+les            |  the (pl)
+leurs          |  their (pl)
+quel           |  which
+quels          |  which
+quelle         |  which
+quelles        |  which
+sans           |  without
+soi            |  oneself
+
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(FrenchStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/ga/articles_ga.go
+++ b/analysis/lang/ga/articles_ga.go
@ -0,0 +1,30 @@
+package ga
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const ArticlesName = "articles_ga"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
+
+var IrishArticles = []byte(`
+d
+m
+b
+`)
+
+func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(IrishArticles)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/ga/elision_ga.go
+++ b/analysis/lang/ga/elision_ga.go
@ -0,0 +1,40 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ga
+
+import (
+	"fmt"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/elision"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const ElisionName = "elision_ga"
+
+func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	articlesTokenMap, err := cache.TokenMapNamed(ArticlesName)
+	if err != nil {
+		return nil, fmt.Errorf("error building elision filter: %v", err)
+	}
+	return elision.NewElisionFilter(articlesTokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/ga/elision_ga_test.go
+++ b/analysis/lang/ga/elision_ga_test.go
@ -0,0 +1,55 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ga
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestFrenchElision(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("b'fhearr"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("fhearr"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	elisionFilter, err := cache.TokenFilterNamed(ElisionName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := elisionFilter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/ga/stop_filter_ga.go
+++ b/analysis/lang/ga/stop_filter_ga.go
@ -0,0 +1,36 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ga
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/ga/stop_words_ga.go
+++ b/analysis/lang/ga/stop_words_ga.go
@ -0,0 +1,137 @@
+package ga
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_ga"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
+
+var IrishStopWords = []byte(`
+a
+ach
+ag
+agus
+an
+aon
+ar
+arna
+as
+b'
+ba
+beirt
+bhúr
+caoga
+ceathair
+ceathrar
+chomh
+chtó
+chuig
+chun
+cois
+céad
+cúig
+cúigear
+d'
+daichead
+dar
+de
+deich
+deichniúr
+den
+dhá
+do
+don
+dtí
+dá
+dár
+dó
+faoi
+faoin
+faoina
+faoinár
+fara
+fiche
+gach
+gan
+go
+gur
+haon
+hocht
+i
+iad
+idir
+in
+ina
+ins
+inár
+is
+le
+leis
+lena
+lenár
+m'
+mar
+mo
+mé
+na
+nach
+naoi
+naonúr
+ná
+ní
+níor
+nó
+nócha
+ocht
+ochtar
+os
+roimh
+sa
+seacht
+seachtar
+seachtó
+seasca
+seisear
+siad
+sibh
+sinn
+sna
+sé
+sí
+tar
+thar
+thú
+triúr
+trí
+trína
+trínár
+tríocha
+tú
+um
+ár
+é
+éis
+í
+ó
+ón
+óna
+ónár
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(IrishStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/gl/stop_filter_gl.go
+++ b/analysis/lang/gl/stop_filter_gl.go
@ -0,0 +1,36 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gl
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/gl/stop_words_gl.go
+++ b/analysis/lang/gl/stop_words_gl.go
@ -0,0 +1,188 @@
+package gl
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_gl"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
+// ` was changed to ' to allow for literal string
+
+var GalicianStopWords = []byte(`# galican stopwords
+a
+aínda
+alí
+aquel
+aquela
+aquelas
+aqueles
+aquilo
+aquí
+ao
+aos
+as
+así
+á
+ben
+cando
+che
+co
+coa
+comigo
+con
+connosco
+contigo
+convosco
+coas
+cos
+cun
+cuns
+cunha
+cunhas
+da
+dalgunha
+dalgunhas
+dalgún
+dalgúns
+das
+de
+del
+dela
+delas
+deles
+desde
+deste
+do
+dos
+dun
+duns
+dunha
+dunhas
+e
+el
+ela
+elas
+eles
+en
+era
+eran
+esa
+esas
+ese
+eses
+esta
+estar
+estaba
+está
+están
+este
+estes
+estiven
+estou
+eu
+é
+facer
+foi
+foron
+fun
+había
+hai
+iso
+isto
+la
+las
+lle
+lles
+lo
+los
+mais
+me
+meu
+meus
+min
+miña
+miñas
+moi
+na
+nas
+neste
+nin
+no
+non
+nos
+nosa
+nosas
+noso
+nosos
+nós
+nun
+nunha
+nuns
+nunhas
+o
+os
+ou
+ó
+ós
+para
+pero
+pode
+pois
+pola
+polas
+polo
+polos
+por
+que
+se
+senón
+ser
+seu
+seus
+sexa
+sido
+sobre
+súa
+súas
+tamén
+tan
+te
+ten
+teñen
+teño
+ter
+teu
+teus
+ti
+tido
+tiña
+tiven
+túa
+túas
+un
+unha
+unhas
+uns
+vos
+vosa
+vosas
+voso
+vosos
+vós
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(GalicianStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/hi/analyzer_hi.go
+++ b/analysis/lang/hi/analyzer_hi.go
@ -0,0 +1,71 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package hi
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/bleve/v2/analysis/lang/in"
+	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "hi"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
+	tokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	indicNormalizeFilter, err := cache.TokenFilterNamed(in.NormalizeName)
+	if err != nil {
+		return nil, err
+	}
+	hindiNormalizeFilter, err := cache.TokenFilterNamed(NormalizeName)
+	if err != nil {
+		return nil, err
+	}
+	stopHiFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerHiFilter, err := cache.TokenFilterNamed(StemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.DefaultAnalyzer{
+		Tokenizer: tokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			indicNormalizeFilter,
+			hindiNormalizeFilter,
+			stopHiFilter,
+			stemmerHiFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/Show more
+++ b/Show more