Adding upstream version 1.0.2.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
f0ce5b079b
commit
b1c5a31457
136 changed files with 2310 additions and 0 deletions
279
api.go
Normal file
279
api.go
Normal file
|
@ -0,0 +1,279 @@
|
|||
package reply
|
||||
|
||||
import (
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/dlclark/regexp2"
|
||||
)
|
||||
|
||||
func init() {
|
||||
// The default configuration is set to 'forever'.
|
||||
// I am not expecting any regex to take more than a few milliseconds.
|
||||
// Setting this value to 1s just to be on the safe side.
|
||||
regexp2.DefaultMatchTimeout = 1 * time.Second
|
||||
}
|
||||
|
||||
// FromReader returns the reply text from the e-mail text body.
|
||||
func FromReader(reader io.Reader) (string, error) {
|
||||
bytes, err := ioutil.ReadAll(reader)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return FromText(string(bytes)), nil
|
||||
}
|
||||
|
||||
// FromText returns the reply text from the e-mail text body.
|
||||
func FromText(text string) string {
|
||||
if strings.TrimSpace(text) == "" {
|
||||
return text
|
||||
}
|
||||
|
||||
// do some cleanup
|
||||
text = cleanup(text)
|
||||
|
||||
// from now on, we'll work on a line-by-line basis
|
||||
lines := strings.Split(text, "\n")
|
||||
patternBuilder := strings.Builder{}
|
||||
|
||||
for _, line := range lines {
|
||||
patternBuilder.WriteString(classifyLine(line))
|
||||
}
|
||||
|
||||
pattern := patternBuilder.String()
|
||||
|
||||
// remove everything after the first delimiter
|
||||
{
|
||||
match, err := regexp2.MustCompile(`d`, regexp2.RE2).FindStringMatch(pattern)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if match != nil {
|
||||
index := match.Index
|
||||
pattern = sliceString(pattern, 0, index-1)
|
||||
lines = sliceArray(lines, 0, index-1)
|
||||
}
|
||||
}
|
||||
|
||||
// remove all mobile signatures
|
||||
for {
|
||||
match, err := regexp2.MustCompile(`s`, regexp2.RE2).FindStringMatch(pattern)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if match != nil {
|
||||
index := match.Index
|
||||
pattern = stringSliceBang(pattern, index)
|
||||
lines = sliceSliceBang(lines, index)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// when the reply is at the end of the email
|
||||
{
|
||||
match, err := regexp2.MustCompile(`^(b[^t]+)*b[bqeh]+t[et]*$`, regexp2.RE2).FindStringMatch(pattern)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if match != nil {
|
||||
submatch, err := regexp2.MustCompile(`t[et]*$`, regexp2.RE2).FindStringMatch(pattern)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
index := submatch.Index
|
||||
pattern = ""
|
||||
lines = sliceArray(lines, index, len(lines)-1)
|
||||
}
|
||||
}
|
||||
|
||||
// if there is an embedded email marker, not followed by a quote
|
||||
// then take everything up to that marker
|
||||
{
|
||||
match, err := regexp2.MustCompile(`te*b[^q]*$`, regexp2.RE2).FindStringMatch(pattern)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if match != nil {
|
||||
index := match.Index
|
||||
pattern = sliceString(pattern, 0, index)
|
||||
lines = sliceArray(lines, 0, index)
|
||||
}
|
||||
}
|
||||
|
||||
// if there is an embedded email marker, followed by a huge quote
|
||||
// then take everything up to that marker
|
||||
{
|
||||
match, err := regexp2.MustCompile(`te*b[eqbh]*([te]*)$`, regexp2.RE2).FindStringMatch(pattern)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if match != nil && strings.Count(match.GroupByNumber(1).String(), "t") < 7 {
|
||||
submatch, err := regexp2.MustCompile(`te*b[eqbh]*[te]*$`, regexp2.RE2).FindStringMatch(pattern)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
index := submatch.Index
|
||||
pattern = sliceString(pattern, 0, index)
|
||||
lines = sliceArray(lines, 0, index)
|
||||
}
|
||||
}
|
||||
|
||||
// if there is some text before a huge quote ending the email,
|
||||
// then remove the quote
|
||||
{
|
||||
match, err := regexp2.MustCompile(`te*[qbe]+$`, regexp2.RE2).FindStringMatch(pattern)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if match != nil {
|
||||
index := match.Index
|
||||
pattern = sliceString(pattern, 0, index)
|
||||
lines = sliceArray(lines, 0, index)
|
||||
}
|
||||
}
|
||||
|
||||
// if there still are some embedded email markers, just remove them
|
||||
for {
|
||||
match, err := regexp2.MustCompile(`b`, regexp2.RE2).FindStringMatch(pattern)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if match != nil {
|
||||
index := match.Index
|
||||
pattern = stringSliceBang(pattern, index)
|
||||
lines = sliceSliceBang(lines, index)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// fix email headers when they span over multiple lines
|
||||
{
|
||||
match, err := regexp2.MustCompile(`h+[hte]+h+e`, regexp2.RE2).FindStringMatch(pattern)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if match != nil {
|
||||
index := match.Index
|
||||
for i := 0; i < match.Length; i++ {
|
||||
c := []rune(header)[0]
|
||||
pattern = stringReplaceChar(pattern, c, index+i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if there are at least 3 consecutive email headers,
|
||||
// take everything up to these headers
|
||||
{
|
||||
match, err := regexp2.MustCompile(`t[eq]*h{3,}`, regexp2.RE2).FindStringMatch(pattern)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if match != nil {
|
||||
index := match.Index
|
||||
pattern = sliceString(pattern, 0, index)
|
||||
lines = sliceArray(lines, 0, index)
|
||||
}
|
||||
}
|
||||
|
||||
// if there still are some email headers, just remove them
|
||||
for {
|
||||
match, err := regexp2.MustCompile(`h`, regexp2.RE2).FindStringMatch(pattern)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if match != nil {
|
||||
index := match.Index
|
||||
pattern = stringSliceBang(pattern, index)
|
||||
lines = sliceSliceBang(lines, index)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// remove trailing quotes when there's at least one line of text
|
||||
{
|
||||
match1, err := regexp2.MustCompile(`t`, regexp2.RE2).FindStringMatch(pattern)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
match2, err := regexp2.MustCompile(`[eq]+$`, regexp2.RE2).FindStringMatch(pattern)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if match1 != nil && match2 != nil {
|
||||
index := match2.Index
|
||||
pattern = sliceString(pattern, 0, index-1)
|
||||
lines = sliceArray(lines, 0, index-1)
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(lines, "\n")
|
||||
}
|
||||
|
||||
func cleanup(text string) string {
|
||||
// normalize line endings
|
||||
replacer := strings.NewReplacer(
|
||||
"\r\n", "\n",
|
||||
"\r", "\n",
|
||||
)
|
||||
|
||||
text = replacer.Replace(text)
|
||||
|
||||
// remove PGP markers
|
||||
for _, r := range patterns["REMOVE_PGP_MARKERS_REGEX"] {
|
||||
text, _ = r.Replace(text, "", 0, -1)
|
||||
}
|
||||
|
||||
// remove unsubscribe links
|
||||
for _, r := range patterns["REMOVE_UNSUBSCRIBE_REGEX"] {
|
||||
text, _ = r.Replace(text, "", 0, -1)
|
||||
}
|
||||
|
||||
// remove alias-style quotes marker
|
||||
for _, r := range patterns["REMOVE_ALIAS_REGEX"] {
|
||||
text, _ = r.Replace(text, "", 0, -1)
|
||||
}
|
||||
|
||||
// change enclosed-style quotes format
|
||||
for _, r := range patterns["CHANGE_ENCLOSED_QUOTE_ONE_REGEX"] {
|
||||
text, _ = r.ReplaceFunc(text, func(m regexp2.Match) string {
|
||||
newText, _ := regexp2.MustCompile(`^`, regexp2.RE2).Replace(m.GroupByNumber(2).String(), "> ", 0, -1)
|
||||
return newText
|
||||
}, 0, -1)
|
||||
}
|
||||
|
||||
for _, r := range patterns["CHANGE_ENCLOSED_QUOTE_TWO_REGEX"] {
|
||||
text, _ = r.ReplaceFunc(text, func(m regexp2.Match) string {
|
||||
newText, _ := regexp2.MustCompile(`^`, regexp2.RE2).Replace(m.GroupByNumber(1).String(), "> ", 0, -1)
|
||||
return newText
|
||||
}, 0, -1)
|
||||
}
|
||||
|
||||
// fix all quotes formats
|
||||
for _, r := range patterns["FIX_QUOTES_FORMAT_REGEX"] {
|
||||
text, _ = r.ReplaceFunc(text, func(m regexp2.Match) string {
|
||||
newText, _ := regexp2.MustCompile(`([[:alpha:]]+>|\|)`, regexp2.RE2).Replace(m.GroupByNumber(1).String(), ">", 0, -1)
|
||||
return newText
|
||||
}, 0, -1)
|
||||
}
|
||||
|
||||
// fix embedded email markers that might span over multiple lines
|
||||
for _, regex := range patterns["FIX_EMBEDDED_REGEX"] {
|
||||
text, _ = regex.ReplaceFunc(text, func(m regexp2.Match) string {
|
||||
if strings.Count(m.String(), "\n") > 4 {
|
||||
return m.String()
|
||||
}
|
||||
newText, _ := regexp2.MustCompile(`\n+[[:space:]]*`, regexp2.RE2).Replace(m.String(), " ", 0, -1)
|
||||
return newText
|
||||
}, 0, -1)
|
||||
}
|
||||
|
||||
// remove leading/trailing whitespaces
|
||||
return strings.TrimSpace(text)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue