279 lines
7 KiB
Go
279 lines
7 KiB
Go
package reply
|
|
|
|
import (
|
|
"io"
|
|
"io/ioutil"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/dlclark/regexp2"
|
|
)
|
|
|
|
func init() {
|
|
// The default configuration is set to 'forever'.
|
|
// I am not expecting any regex to take more than a few milliseconds.
|
|
// Setting this value to 1s just to be on the safe side.
|
|
regexp2.DefaultMatchTimeout = 1 * time.Second
|
|
}
|
|
|
|
// FromReader returns the reply text from the e-mail text body.
|
|
func FromReader(reader io.Reader) (string, error) {
|
|
bytes, err := ioutil.ReadAll(reader)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return FromText(string(bytes)), nil
|
|
}
|
|
|
|
// FromText returns the reply text from the e-mail text body.
|
|
func FromText(text string) string {
|
|
if strings.TrimSpace(text) == "" {
|
|
return text
|
|
}
|
|
|
|
// do some cleanup
|
|
text = cleanup(text)
|
|
|
|
// from now on, we'll work on a line-by-line basis
|
|
lines := strings.Split(text, "\n")
|
|
patternBuilder := strings.Builder{}
|
|
|
|
for _, line := range lines {
|
|
patternBuilder.WriteString(classifyLine(line))
|
|
}
|
|
|
|
pattern := patternBuilder.String()
|
|
|
|
// remove everything after the first delimiter
|
|
{
|
|
match, err := regexp2.MustCompile(`d`, regexp2.RE2).FindStringMatch(pattern)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
if match != nil {
|
|
index := match.Index
|
|
pattern = sliceString(pattern, 0, index-1)
|
|
lines = sliceArray(lines, 0, index-1)
|
|
}
|
|
}
|
|
|
|
// remove all mobile signatures
|
|
for {
|
|
match, err := regexp2.MustCompile(`s`, regexp2.RE2).FindStringMatch(pattern)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
if match != nil {
|
|
index := match.Index
|
|
pattern = stringSliceBang(pattern, index)
|
|
lines = sliceSliceBang(lines, index)
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
|
|
// when the reply is at the end of the email
|
|
{
|
|
match, err := regexp2.MustCompile(`^(b[^t]+)*b[bqeh]+t[et]*$`, regexp2.RE2).FindStringMatch(pattern)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
if match != nil {
|
|
submatch, err := regexp2.MustCompile(`t[et]*$`, regexp2.RE2).FindStringMatch(pattern)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
|
|
index := submatch.Index
|
|
pattern = ""
|
|
lines = sliceArray(lines, index, len(lines)-1)
|
|
}
|
|
}
|
|
|
|
// if there is an embedded email marker, not followed by a quote
|
|
// then take everything up to that marker
|
|
{
|
|
match, err := regexp2.MustCompile(`te*b[^q]*$`, regexp2.RE2).FindStringMatch(pattern)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
if match != nil {
|
|
index := match.Index
|
|
pattern = sliceString(pattern, 0, index)
|
|
lines = sliceArray(lines, 0, index)
|
|
}
|
|
}
|
|
|
|
// if there is an embedded email marker, followed by a huge quote
|
|
// then take everything up to that marker
|
|
{
|
|
match, err := regexp2.MustCompile(`te*b[eqbh]*([te]*)$`, regexp2.RE2).FindStringMatch(pattern)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
if match != nil && strings.Count(match.GroupByNumber(1).String(), "t") < 7 {
|
|
submatch, err := regexp2.MustCompile(`te*b[eqbh]*[te]*$`, regexp2.RE2).FindStringMatch(pattern)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
index := submatch.Index
|
|
pattern = sliceString(pattern, 0, index)
|
|
lines = sliceArray(lines, 0, index)
|
|
}
|
|
}
|
|
|
|
// if there is some text before a huge quote ending the email,
|
|
// then remove the quote
|
|
{
|
|
match, err := regexp2.MustCompile(`te*[qbe]+$`, regexp2.RE2).FindStringMatch(pattern)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
if match != nil {
|
|
index := match.Index
|
|
pattern = sliceString(pattern, 0, index)
|
|
lines = sliceArray(lines, 0, index)
|
|
}
|
|
}
|
|
|
|
// if there still are some embedded email markers, just remove them
|
|
for {
|
|
match, err := regexp2.MustCompile(`b`, regexp2.RE2).FindStringMatch(pattern)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
if match != nil {
|
|
index := match.Index
|
|
pattern = stringSliceBang(pattern, index)
|
|
lines = sliceSliceBang(lines, index)
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
|
|
// fix email headers when they span over multiple lines
|
|
{
|
|
match, err := regexp2.MustCompile(`h+[hte]+h+e`, regexp2.RE2).FindStringMatch(pattern)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
if match != nil {
|
|
index := match.Index
|
|
for i := 0; i < match.Length; i++ {
|
|
c := []rune(header)[0]
|
|
pattern = stringReplaceChar(pattern, c, index+i)
|
|
}
|
|
}
|
|
}
|
|
|
|
// if there are at least 3 consecutive email headers,
|
|
// take everything up to these headers
|
|
{
|
|
match, err := regexp2.MustCompile(`t[eq]*h{3,}`, regexp2.RE2).FindStringMatch(pattern)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
if match != nil {
|
|
index := match.Index
|
|
pattern = sliceString(pattern, 0, index)
|
|
lines = sliceArray(lines, 0, index)
|
|
}
|
|
}
|
|
|
|
// if there still are some email headers, just remove them
|
|
for {
|
|
match, err := regexp2.MustCompile(`h`, regexp2.RE2).FindStringMatch(pattern)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
if match != nil {
|
|
index := match.Index
|
|
pattern = stringSliceBang(pattern, index)
|
|
lines = sliceSliceBang(lines, index)
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
|
|
// remove trailing quotes when there's at least one line of text
|
|
{
|
|
match1, err := regexp2.MustCompile(`t`, regexp2.RE2).FindStringMatch(pattern)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
match2, err := regexp2.MustCompile(`[eq]+$`, regexp2.RE2).FindStringMatch(pattern)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
if match1 != nil && match2 != nil {
|
|
index := match2.Index
|
|
pattern = sliceString(pattern, 0, index-1)
|
|
lines = sliceArray(lines, 0, index-1)
|
|
}
|
|
}
|
|
|
|
return strings.Join(lines, "\n")
|
|
}
|
|
|
|
func cleanup(text string) string {
|
|
// normalize line endings
|
|
replacer := strings.NewReplacer(
|
|
"\r\n", "\n",
|
|
"\r", "\n",
|
|
)
|
|
|
|
text = replacer.Replace(text)
|
|
|
|
// remove PGP markers
|
|
for _, r := range patterns["REMOVE_PGP_MARKERS_REGEX"] {
|
|
text, _ = r.Replace(text, "", 0, -1)
|
|
}
|
|
|
|
// remove unsubscribe links
|
|
for _, r := range patterns["REMOVE_UNSUBSCRIBE_REGEX"] {
|
|
text, _ = r.Replace(text, "", 0, -1)
|
|
}
|
|
|
|
// remove alias-style quotes marker
|
|
for _, r := range patterns["REMOVE_ALIAS_REGEX"] {
|
|
text, _ = r.Replace(text, "", 0, -1)
|
|
}
|
|
|
|
// change enclosed-style quotes format
|
|
for _, r := range patterns["CHANGE_ENCLOSED_QUOTE_ONE_REGEX"] {
|
|
text, _ = r.ReplaceFunc(text, func(m regexp2.Match) string {
|
|
newText, _ := regexp2.MustCompile(`^`, regexp2.RE2).Replace(m.GroupByNumber(2).String(), "> ", 0, -1)
|
|
return newText
|
|
}, 0, -1)
|
|
}
|
|
|
|
for _, r := range patterns["CHANGE_ENCLOSED_QUOTE_TWO_REGEX"] {
|
|
text, _ = r.ReplaceFunc(text, func(m regexp2.Match) string {
|
|
newText, _ := regexp2.MustCompile(`^`, regexp2.RE2).Replace(m.GroupByNumber(1).String(), "> ", 0, -1)
|
|
return newText
|
|
}, 0, -1)
|
|
}
|
|
|
|
// fix all quotes formats
|
|
for _, r := range patterns["FIX_QUOTES_FORMAT_REGEX"] {
|
|
text, _ = r.ReplaceFunc(text, func(m regexp2.Match) string {
|
|
newText, _ := regexp2.MustCompile(`([[:alpha:]]+>|\|)`, regexp2.RE2).Replace(m.GroupByNumber(1).String(), ">", 0, -1)
|
|
return newText
|
|
}, 0, -1)
|
|
}
|
|
|
|
// fix embedded email markers that might span over multiple lines
|
|
for _, regex := range patterns["FIX_EMBEDDED_REGEX"] {
|
|
text, _ = regex.ReplaceFunc(text, func(m regexp2.Match) string {
|
|
if strings.Count(m.String(), "\n") > 4 {
|
|
return m.String()
|
|
}
|
|
newText, _ := regexp2.MustCompile(`\n+[[:space:]]*`, regexp2.RE2).Replace(m.String(), " ", 0, -1)
|
|
return newText
|
|
}, 0, -1)
|
|
}
|
|
|
|
// remove leading/trailing whitespaces
|
|
return strings.TrimSpace(text)
|
|
}
|