Adding upstream version 2.5.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-05-19 00:20:02 +02:00 · 2025-05-19 00:20:02 +02:00 · 982828099e
commit 982828099e
parent c71cb8b61d
783 changed files with 150650 additions and 0 deletions
--- a/analysis/lang/pt/analyzer_pt.go
+++ b/analysis/lang/pt/analyzer_pt.go
@ -0,0 +1,60 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pt
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "pt"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
+	tokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopPtFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerPtFilter, err := cache.TokenFilterNamed(LightStemmerName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.DefaultAnalyzer{
+		Tokenizer: tokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			stopPtFilter,
+			stemmerPtFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/pt/analyzer_pt_test.go
+++ b/analysis/lang/pt/analyzer_pt_test.go
@ -0,0 +1,70 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pt
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestPortugueseAnalyzer(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		// stemming
+		{
+			input: []byte("quilométricas"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("quilometric"),
+				},
+			},
+		},
+		{
+			input: []byte("quilométricos"),
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("quilometric"),
+				},
+			},
+		},
+		// stop word
+		{
+			input:  []byte("não"),
+			output: analysis.TokenStream{},
+		},
+	}
+
+	cache := registry.NewCache()
+	analyzer, err := cache.AnalyzerNamed(AnalyzerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := analyzer.Analyze(test.input)
+		if len(actual) != len(test.output) {
+			t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
+		}
+		for i, tok := range actual {
+			if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
+				t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
+			}
+		}
+	}
+}
--- a/analysis/lang/pt/light_stemmer_pt.go
+++ b/analysis/lang/pt/light_stemmer_pt.go
@ -0,0 +1,198 @@
+//  Copyright (c) 2015 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pt
+
+import (
+	"bytes"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const LightStemmerName = "stemmer_pt_light"
+
+type PortugueseLightStemmerFilter struct {
+}
+
+func NewPortugueseLightStemmerFilter() *PortugueseLightStemmerFilter {
+	return &PortugueseLightStemmerFilter{}
+}
+
+func (s *PortugueseLightStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		runes := bytes.Runes(token.Term)
+		runes = stem(runes)
+		token.Term = analysis.BuildTermFromRunes(runes)
+	}
+	return input
+}
+
+func stem(input []rune) []rune {
+
+	inputLen := len(input)
+
+	if inputLen < 4 {
+		return input
+	}
+
+	input = removeSuffix(input)
+	inputLen = len(input)
+
+	if inputLen > 3 && input[inputLen-1] == 'a' {
+		input = normFeminine(input)
+		inputLen = len(input)
+	}
+
+	if inputLen > 4 {
+		switch input[inputLen-1] {
+		case 'e', 'a', 'o':
+			input = input[0 : inputLen-1]
+			inputLen = len(input)
+		}
+	}
+
+	for i := 0; i < inputLen; i++ {
+		switch input[i] {
+		case 'à', 'á', 'â', 'ä', 'ã':
+			input[i] = 'a'
+		case 'ò', 'ó', 'ô', 'ö', 'õ':
+			input[i] = 'o'
+		case 'è', 'é', 'ê', 'ë':
+			input[i] = 'e'
+		case 'ù', 'ú', 'û', 'ü':
+			input[i] = 'u'
+		case 'ì', 'í', 'î', 'ï':
+			input[i] = 'i'
+		case 'ç':
+			input[i] = 'c'
+		}
+	}
+
+	return input
+}
+
+func removeSuffix(input []rune) []rune {
+
+	inputLen := len(input)
+
+	if inputLen > 4 && analysis.RunesEndsWith(input, "es") {
+		switch input[inputLen-3] {
+		case 'r', 's', 'l', 'z':
+			return input[0 : inputLen-2]
+		}
+	}
+
+	if inputLen > 3 && analysis.RunesEndsWith(input, "ns") {
+		input[inputLen-2] = 'm'
+		return input[0 : inputLen-1]
+	}
+
+	if inputLen > 4 && (analysis.RunesEndsWith(input, "eis") || analysis.RunesEndsWith(input, "éis")) {
+		input[inputLen-3] = 'e'
+		input[inputLen-2] = 'l'
+		return input[0 : inputLen-1]
+	}
+
+	if inputLen > 4 && analysis.RunesEndsWith(input, "ais") {
+		input[inputLen-2] = 'l'
+		return input[0 : inputLen-1]
+	}
+
+	if inputLen > 4 && analysis.RunesEndsWith(input, "óis") {
+		input[inputLen-3] = 'o'
+		input[inputLen-2] = 'l'
+		return input[0 : inputLen-1]
+	}
+
+	if inputLen > 4 && analysis.RunesEndsWith(input, "is") {
+		input[inputLen-1] = 'l'
+		return input
+	}
+
+	if inputLen > 3 &&
+		(analysis.RunesEndsWith(input, "ões") ||
+			analysis.RunesEndsWith(input, "ães")) {
+		input = input[0 : inputLen-1]
+		inputLen = len(input)
+		input[inputLen-2] = 'ã'
+		input[inputLen-1] = 'o'
+		return input
+	}
+
+	if inputLen > 6 && analysis.RunesEndsWith(input, "mente") {
+		return input[0 : inputLen-5]
+	}
+
+	if inputLen > 3 && input[inputLen-1] == 's' {
+		return input[0 : inputLen-1]
+	}
+	return input
+}
+
+func normFeminine(input []rune) []rune {
+	inputLen := len(input)
+
+	if inputLen > 7 &&
+		(analysis.RunesEndsWith(input, "inha") ||
+			analysis.RunesEndsWith(input, "iaca") ||
+			analysis.RunesEndsWith(input, "eira")) {
+		input[inputLen-1] = 'o'
+		return input
+	}
+
+	if inputLen > 6 {
+		if analysis.RunesEndsWith(input, "osa") ||
+			analysis.RunesEndsWith(input, "ica") ||
+			analysis.RunesEndsWith(input, "ida") ||
+			analysis.RunesEndsWith(input, "ada") ||
+			analysis.RunesEndsWith(input, "iva") ||
+			analysis.RunesEndsWith(input, "ama") {
+			input[inputLen-1] = 'o'
+			return input
+		}
+
+		if analysis.RunesEndsWith(input, "ona") {
+			input[inputLen-3] = 'ã'
+			input[inputLen-2] = 'o'
+			return input[0 : inputLen-1]
+		}
+
+		if analysis.RunesEndsWith(input, "ora") {
+			return input[0 : inputLen-1]
+		}
+
+		if analysis.RunesEndsWith(input, "esa") {
+			input[inputLen-3] = 'ê'
+			return input[0 : inputLen-1]
+		}
+
+		if analysis.RunesEndsWith(input, "na") {
+			input[inputLen-1] = 'o'
+			return input
+		}
+	}
+	return input
+}
+
+func PortugueseLightStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewPortugueseLightStemmerFilter(), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(LightStemmerName, PortugueseLightStemmerFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/pt/light_stemmer_pt_test.go
+++ b/analysis/lang/pt/light_stemmer_pt_test.go
@ -0,0 +1,404 @@
+//  Copyright (c) 2015 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pt
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func TestPortugueseLightStemmer(t *testing.T) {
+	tests := []struct {
+		input  analysis.TokenStream
+		output analysis.TokenStream
+	}{
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("doutores"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("doutor"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("doutor"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("doutor"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("homens"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("homem"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("homem"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("homem"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("papéis"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("papel"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("papel"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("papel"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("normais"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("normal"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("normal"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("normal"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("lencóis"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("lencol"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("lencol"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("lencol"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("barris"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("barril"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("barril"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("barril"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("botões"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("bota"),
+				},
+			},
+		},
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("botão"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("bota"),
+				},
+			},
+		},
+		// longer
+		{
+			input: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("o"),
+				},
+				&analysis.Token{
+					Term: []byte("debate"),
+				},
+				&analysis.Token{
+					Term: []byte("político"),
+				},
+				&analysis.Token{
+					Term: []byte("pelo"),
+				},
+				&analysis.Token{
+					Term: []byte("menos"),
+				},
+				&analysis.Token{
+					Term: []byte("o"),
+				},
+				&analysis.Token{
+					Term: []byte("que"),
+				},
+				&analysis.Token{
+					Term: []byte("vem"),
+				},
+				&analysis.Token{
+					Term: []byte("a"),
+				},
+				&analysis.Token{
+					Term: []byte("público"),
+				},
+				&analysis.Token{
+					Term: []byte("parece"),
+				},
+				&analysis.Token{
+					Term: []byte("de"),
+				},
+				&analysis.Token{
+					Term: []byte("modo"),
+				},
+				&analysis.Token{
+					Term: []byte("nada"),
+				},
+				&analysis.Token{
+					Term: []byte("surpreendente"),
+				},
+				&analysis.Token{
+					Term: []byte("restrito"),
+				},
+				&analysis.Token{
+					Term: []byte("a"),
+				},
+				&analysis.Token{
+					Term: []byte("temas"),
+				},
+				&analysis.Token{
+					Term: []byte("menores"),
+				},
+				&analysis.Token{
+					Term: []byte("mas"),
+				},
+				&analysis.Token{
+					Term: []byte("há"),
+				},
+				&analysis.Token{
+					Term: []byte("evidentemente"),
+				},
+				&analysis.Token{
+					Term: []byte("grandes"),
+				},
+				&analysis.Token{
+					Term: []byte("questões"),
+				},
+				&analysis.Token{
+					Term: []byte("em"),
+				},
+				&analysis.Token{
+					Term: []byte("jogo"),
+				},
+				&analysis.Token{
+					Term: []byte("nas"),
+				},
+				&analysis.Token{
+					Term: []byte("eleições"),
+				},
+				&analysis.Token{
+					Term: []byte("que"),
+				},
+				&analysis.Token{
+					Term: []byte("se"),
+				},
+				&analysis.Token{
+					Term: []byte("aproximam"),
+				},
+			},
+			output: analysis.TokenStream{
+				&analysis.Token{
+					Term: []byte("o"),
+				},
+				&analysis.Token{
+					Term: []byte("debat"),
+				},
+				&analysis.Token{
+					Term: []byte("politic"),
+				},
+				&analysis.Token{
+					Term: []byte("pelo"),
+				},
+				&analysis.Token{
+					Term: []byte("meno"),
+				},
+				&analysis.Token{
+					Term: []byte("o"),
+				},
+				&analysis.Token{
+					Term: []byte("que"),
+				},
+				&analysis.Token{
+					Term: []byte("vem"),
+				},
+				&analysis.Token{
+					Term: []byte("a"),
+				},
+				&analysis.Token{
+					Term: []byte("public"),
+				},
+				&analysis.Token{
+					Term: []byte("parec"),
+				},
+				&analysis.Token{
+					Term: []byte("de"),
+				},
+				&analysis.Token{
+					Term: []byte("modo"),
+				},
+				&analysis.Token{
+					Term: []byte("nada"),
+				},
+				&analysis.Token{
+					Term: []byte("surpreendent"),
+				},
+				&analysis.Token{
+					Term: []byte("restrit"),
+				},
+				&analysis.Token{
+					Term: []byte("a"),
+				},
+				&analysis.Token{
+					Term: []byte("tema"),
+				},
+				&analysis.Token{
+					Term: []byte("menor"),
+				},
+				&analysis.Token{
+					Term: []byte("mas"),
+				},
+				&analysis.Token{
+					Term: []byte("há"),
+				},
+				&analysis.Token{
+					Term: []byte("evident"),
+				},
+				&analysis.Token{
+					Term: []byte("grand"),
+				},
+				&analysis.Token{
+					Term: []byte("questa"),
+				},
+				&analysis.Token{
+					Term: []byte("em"),
+				},
+				&analysis.Token{
+					Term: []byte("jogo"),
+				},
+				&analysis.Token{
+					Term: []byte("nas"),
+				},
+				&analysis.Token{
+					Term: []byte("eleica"),
+				},
+				&analysis.Token{
+					Term: []byte("que"),
+				},
+				&analysis.Token{
+					Term: []byte("se"),
+				},
+				&analysis.Token{
+					Term: []byte("aproximam"),
+				},
+			},
+		},
+	}
+
+	cache := registry.NewCache()
+	filter, err := cache.TokenFilterNamed(LightStemmerName)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range tests {
+		actual := filter.Filter(test.input)
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
+		}
+	}
+}
--- a/analysis/lang/pt/stop_filter_pt.go
+++ b/analysis/lang/pt/stop_filter_pt.go
@ -0,0 +1,36 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pt
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+	if err != nil {
+		panic(err)
+	}
+}
--- a/analysis/lang/pt/stop_words_pt.go
+++ b/analysis/lang/pt/stop_words_pt.go
@ -0,0 +1,280 @@
+package pt
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_pt"
+
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
+
+var PortugueseStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Portuguese stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de             |  of, from
+a              |  the; to, at; her
+o              |  the; him
+que            |  who, that
+e              |  and
+do             |  de + o
+da             |  de + a
+em             |  in
+um             |  a
+para           |  for
+  | é          from SER
+com            |  with
+não            |  not, no
+uma            |  a
+os             |  the; them
+no             |  em + o
+se             |  himself etc
+na             |  em + a
+por            |  for
+mais           |  more
+as             |  the; them
+dos            |  de + os
+como           |  as, like
+mas            |  but
+  | foi        from SER
+ao             |  a + o
+ele            |  he
+das            |  de + as
+  | tem        from TER
+à              |  a + a
+seu            |  his
+sua            |  her
+ou             |  or
+  | ser        from SER
+quando         |  when
+muito          |  much
+  | há         from HAV
+nos            |  em + os; us
+já             |  already, now
+  | está       from EST
+eu             |  I
+também         |  also
+só             |  only, just
+pelo           |  per + o
+pela           |  per + a
+até            |  up to
+isso           |  that
+ela            |  he
+entre          |  between
+  | era        from SER
+depois         |  after
+sem            |  without
+mesmo          |  same
+aos            |  a + os
+  | ter        from TER
+seus           |  his
+quem           |  whom
+nas            |  em + as
+me             |  me
+esse           |  that
+eles           |  they
+  | estão      from EST
+você           |  you
+  | tinha      from TER
+  | foram      from SER
+essa           |  that
+num            |  em + um
+nem            |  nor
+suas           |  her
+meu            |  my
+às             |  a + as
+minha          |  my
+  | têm        from TER
+numa           |  em + uma
+pelos          |  per + os
+elas           |  they
+  | havia      from HAV
+  | seja       from SER
+qual           |  which
+  | será       from SER
+nós            |  we
+  | tenho      from TER
+lhe            |  to him, her
+deles          |  of them
+essas          |  those
+esses          |  those
+pelas          |  per + as
+este           |  this
+  | fosse      from SER
+dele           |  of him
+
+ | other words. There are many contractions such as naquele = em+aquele,
+ | mo = me+o, but they are rare.
+ | Indefinite article plural forms are also rare.
+
+tu             |  thou
+te             |  thee
+vocês          |  you (plural)
+vos            |  you
+lhes           |  to them
+meus           |  my
+minhas
+teu            |  thy
+tua
+teus
+tuas
+nosso          | our
+nossa
+nossos
+nossas
+
+dela           |  of her
+delas          |  of them
+
+esta           |  this
+estes          |  these
+estas          |  these
+aquele         |  that
+aquela         |  that
+aqueles        |  those
+aquelas        |  those
+isto           |  this
+aquilo         |  that
+
+               | forms of estar, to be (not including the infinitive):
+estou
+está
+estamos
+estão
+estive
+esteve
+estivemos
+estiveram
+estava
+estávamos
+estavam
+estivera
+estivéramos
+esteja
+estejamos
+estejam
+estivesse
+estivéssemos
+estivessem
+estiver
+estivermos
+estiverem
+
+               | forms of haver, to have (not including the infinitive):
+hei
+há
+havemos
+hão
+houve
+houvemos
+houveram
+houvera
+houvéramos
+haja
+hajamos
+hajam
+houvesse
+houvéssemos
+houvessem
+houver
+houvermos
+houverem
+houverei
+houverá
+houveremos
+houverão
+houveria
+houveríamos
+houveriam
+
+               | forms of ser, to be (not including the infinitive):
+sou
+somos
+são
+era
+éramos
+eram
+fui
+foi
+fomos
+foram
+fora
+fôramos
+seja
+sejamos
+sejam
+fosse
+fôssemos
+fossem
+for
+formos
+forem
+serei
+será
+seremos
+serão
+seria
+seríamos
+seriam
+
+               | forms of ter, to have (not including the infinitive):
+tenho
+tem
+temos
+tém
+tinha
+tínhamos
+tinham
+tive
+teve
+tivemos
+tiveram
+tivera
+tivéramos
+tenha
+tenhamos
+tenham
+tivesse
+tivéssemos
+tivessem
+tiver
+tivermos
+tiverem
+terei
+terá
+teremos
+terão
+teria
+teríamos
+teriam
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(PortugueseStopWords)
+	return rv, err
+}
+
+func init() {
+	err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
+	if err != nil {
+		panic(err)
+	}
+}