From b1c5a31457637738ae6c663b0168dce8548995c3 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 18 May 2025 13:52:48 +0200 Subject: [PATCH] Adding upstream version 1.0.2. Signed-off-by: Daniel Baumann --- .gitignore | 18 + .travis.yml | 10 + LICENSE | 23 + README.md | 46 ++ api.go | 279 +++++++++++ api_test.go | 62 +++ classify.go | 93 ++++ doc.go | 14 + go.mod | 5 + go.sum | 2 + pattern.go | 471 ++++++++++++++++++ testdata/emails/delimiters.txt | 14 + testdata/emails/dual_embedded.txt | 13 + testdata/emails/email_headers_1.txt | 17 + testdata/emails/email_headers_2.txt | 10 + testdata/emails/email_headers_3.txt | 18 + testdata/emails/email_headers_4.txt | 17 + testdata/emails/email_headers_5.txt | 37 ++ testdata/emails/embedded_ception.txt | 47 ++ testdata/emails/embedded_email_1.txt | 8 + testdata/emails/embedded_email_10.txt | 42 ++ testdata/emails/embedded_email_11.txt | 5 + testdata/emails/embedded_email_12.txt | 18 + testdata/emails/embedded_email_13.txt | 14 + testdata/emails/embedded_email_14.txt | 16 + testdata/emails/embedded_email_15.txt | 9 + testdata/emails/embedded_email_16.txt | 16 + testdata/emails/embedded_email_17.txt | 38 ++ testdata/emails/embedded_email_18.txt | 7 + testdata/emails/embedded_email_19.txt | 13 + testdata/emails/embedded_email_2.txt | 16 + testdata/emails/embedded_email_3.txt | 24 + testdata/emails/embedded_email_4.txt | 19 + testdata/emails/embedded_email_5.txt | 5 + testdata/emails/embedded_email_6.txt | 11 + testdata/emails/embedded_email_7.txt | 20 + testdata/emails/embedded_email_8.txt | 5 + testdata/emails/embedded_email_9.txt | 5 + testdata/emails/embedded_email_chinese.txt | 7 + testdata/emails/embedded_email_dutch_1.txt | 13 + testdata/emails/embedded_email_dutch_2.txt | 62 +++ testdata/emails/embedded_email_french_1.txt | 12 + testdata/emails/embedded_email_french_2.txt | 21 + testdata/emails/embedded_email_german_1.txt | 26 + testdata/emails/embedded_email_german_2.txt | 6 + testdata/emails/embedded_email_german_3.txt | 10 + testdata/emails/embedded_email_german_4.txt | 18 + testdata/emails/embedded_email_german_5.txt | 23 + testdata/emails/embedded_email_german_6.txt | 14 + testdata/emails/embedded_email_italian.txt | 31 ++ testdata/emails/embedded_email_norwegian.txt | 11 + testdata/emails/embedded_email_polish_1.txt | 34 ++ testdata/emails/embedded_email_polish_2.txt | 11 + testdata/emails/embedded_email_portuguese.txt | 18 + testdata/emails/embedded_email_quote_text.txt | 10 + testdata/emails/embedded_email_russian_1.txt | 27 + testdata/emails/embedded_email_russian_2.txt | 26 + testdata/emails/embedded_email_spanish_1.txt | 41 ++ testdata/emails/embedded_email_spanish_2.txt | 12 + testdata/emails/embedded_email_swedish.txt | 20 + testdata/emails/embedded_email_ukrainian.txt | 19 + testdata/emails/empty.txt | 0 testdata/emails/forwarded_message.txt | 9 + testdata/emails/normalize_line_endings.txt | 4 + testdata/emails/quote_and_text.txt | 3 + testdata/emails/quote_only.txt | 1 + .../emails/retains_spaces_and_formatting.txt | 14 + testdata/emails/signature.txt | 3 + testdata/emails/signatures.txt | 33 ++ testdata/emails/strip.txt | 10 + testdata/emails/text_only.txt | 1 + testdata/emails/text_only_with_divider.txt | 9 + testdata/emails/usenet.txt | 9 + testdata/reply/delimiters.txt | 3 + testdata/reply/dual_embedded.txt | 1 + testdata/reply/email_headers_1.txt | 1 + testdata/reply/email_headers_2.txt | 1 + testdata/reply/email_headers_3.txt | 1 + testdata/reply/email_headers_4.txt | 1 + testdata/reply/email_headers_5.txt | 11 + testdata/reply/embedded_ception.txt | 4 + testdata/reply/embedded_email_1.txt | 1 + testdata/reply/embedded_email_10.txt | 1 + testdata/reply/embedded_email_11.txt | 1 + testdata/reply/embedded_email_12.txt | 1 + testdata/reply/embedded_email_13.txt | 3 + testdata/reply/embedded_email_14.txt | 3 + testdata/reply/embedded_email_15.txt | 3 + testdata/reply/embedded_email_16.txt | 11 + testdata/reply/embedded_email_17.txt | 35 ++ testdata/reply/embedded_email_18.txt | 5 + testdata/reply/embedded_email_19.txt | 13 + testdata/reply/embedded_email_2.txt | 3 + testdata/reply/embedded_email_3.txt | 7 + testdata/reply/embedded_email_4.txt | 2 + testdata/reply/embedded_email_5.txt | 1 + testdata/reply/embedded_email_6.txt | 7 + testdata/reply/embedded_email_7.txt | 10 + testdata/reply/embedded_email_8.txt | 1 + testdata/reply/embedded_email_9.txt | 1 + testdata/reply/embedded_email_chinese.txt | 2 + testdata/reply/embedded_email_dutch_1.txt | 1 + testdata/reply/embedded_email_dutch_2.txt | 2 + testdata/reply/embedded_email_french_1.txt | 1 + testdata/reply/embedded_email_french_2.txt | 1 + testdata/reply/embedded_email_german_1.txt | 1 + testdata/reply/embedded_email_german_2.txt | 1 + testdata/reply/embedded_email_german_3.txt | 1 + testdata/reply/embedded_email_german_4.txt | 1 + testdata/reply/embedded_email_german_5.txt | 1 + testdata/reply/embedded_email_german_6.txt | 4 + testdata/reply/embedded_email_italian.txt | 1 + testdata/reply/embedded_email_norwegian.txt | 1 + testdata/reply/embedded_email_polish_1.txt | 1 + testdata/reply/embedded_email_polish_2.txt | 2 + testdata/reply/embedded_email_portuguese.txt | 2 + testdata/reply/embedded_email_quote_text.txt | 2 + testdata/reply/embedded_email_russian_1.txt | 1 + testdata/reply/embedded_email_russian_2.txt | 1 + testdata/reply/embedded_email_spanish_1.txt | 6 + testdata/reply/embedded_email_spanish_2.txt | 1 + testdata/reply/embedded_email_swedish.txt | 9 + testdata/reply/embedded_email_ukrainian.txt | 1 + testdata/reply/empty.txt | 0 testdata/reply/forwarded_message.txt | 1 + testdata/reply/normalize_line_endings.txt | 4 + testdata/reply/quote_and_text.txt | 3 + testdata/reply/quote_only.txt | 1 + .../reply/retains_spaces_and_formatting.txt | 13 + testdata/reply/signature.txt | 1 + testdata/reply/signatures.txt | 1 + testdata/reply/strip.txt | 1 + testdata/reply/text_only.txt | 1 + testdata/reply/text_only_with_divider.txt | 9 + testdata/reply/usenet.txt | 1 + utilities.go | 43 ++ 136 files changed, 2310 insertions(+) create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 api.go create mode 100644 api_test.go create mode 100644 classify.go create mode 100644 doc.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 pattern.go create mode 100644 testdata/emails/delimiters.txt create mode 100644 testdata/emails/dual_embedded.txt create mode 100644 testdata/emails/email_headers_1.txt create mode 100644 testdata/emails/email_headers_2.txt create mode 100644 testdata/emails/email_headers_3.txt create mode 100644 testdata/emails/email_headers_4.txt create mode 100644 testdata/emails/email_headers_5.txt create mode 100644 testdata/emails/embedded_ception.txt create mode 100644 testdata/emails/embedded_email_1.txt create mode 100644 testdata/emails/embedded_email_10.txt create mode 100644 testdata/emails/embedded_email_11.txt create mode 100644 testdata/emails/embedded_email_12.txt create mode 100644 testdata/emails/embedded_email_13.txt create mode 100644 testdata/emails/embedded_email_14.txt create mode 100644 testdata/emails/embedded_email_15.txt create mode 100644 testdata/emails/embedded_email_16.txt create mode 100644 testdata/emails/embedded_email_17.txt create mode 100644 testdata/emails/embedded_email_18.txt create mode 100644 testdata/emails/embedded_email_19.txt create mode 100644 testdata/emails/embedded_email_2.txt create mode 100644 testdata/emails/embedded_email_3.txt create mode 100644 testdata/emails/embedded_email_4.txt create mode 100644 testdata/emails/embedded_email_5.txt create mode 100644 testdata/emails/embedded_email_6.txt create mode 100644 testdata/emails/embedded_email_7.txt create mode 100644 testdata/emails/embedded_email_8.txt create mode 100644 testdata/emails/embedded_email_9.txt create mode 100644 testdata/emails/embedded_email_chinese.txt create mode 100644 testdata/emails/embedded_email_dutch_1.txt create mode 100644 testdata/emails/embedded_email_dutch_2.txt create mode 100644 testdata/emails/embedded_email_french_1.txt create mode 100644 testdata/emails/embedded_email_french_2.txt create mode 100644 testdata/emails/embedded_email_german_1.txt create mode 100644 testdata/emails/embedded_email_german_2.txt create mode 100644 testdata/emails/embedded_email_german_3.txt create mode 100644 testdata/emails/embedded_email_german_4.txt create mode 100644 testdata/emails/embedded_email_german_5.txt create mode 100644 testdata/emails/embedded_email_german_6.txt create mode 100644 testdata/emails/embedded_email_italian.txt create mode 100644 testdata/emails/embedded_email_norwegian.txt create mode 100644 testdata/emails/embedded_email_polish_1.txt create mode 100644 testdata/emails/embedded_email_polish_2.txt create mode 100644 testdata/emails/embedded_email_portuguese.txt create mode 100644 testdata/emails/embedded_email_quote_text.txt create mode 100644 testdata/emails/embedded_email_russian_1.txt create mode 100644 testdata/emails/embedded_email_russian_2.txt create mode 100644 testdata/emails/embedded_email_spanish_1.txt create mode 100644 testdata/emails/embedded_email_spanish_2.txt create mode 100644 testdata/emails/embedded_email_swedish.txt create mode 100644 testdata/emails/embedded_email_ukrainian.txt create mode 100644 testdata/emails/empty.txt create mode 100644 testdata/emails/forwarded_message.txt create mode 100644 testdata/emails/normalize_line_endings.txt create mode 100644 testdata/emails/quote_and_text.txt create mode 100644 testdata/emails/quote_only.txt create mode 100644 testdata/emails/retains_spaces_and_formatting.txt create mode 100644 testdata/emails/signature.txt create mode 100644 testdata/emails/signatures.txt create mode 100644 testdata/emails/strip.txt create mode 100644 testdata/emails/text_only.txt create mode 100644 testdata/emails/text_only_with_divider.txt create mode 100644 testdata/emails/usenet.txt create mode 100644 testdata/reply/delimiters.txt create mode 100644 testdata/reply/dual_embedded.txt create mode 100644 testdata/reply/email_headers_1.txt create mode 100644 testdata/reply/email_headers_2.txt create mode 100644 testdata/reply/email_headers_3.txt create mode 100644 testdata/reply/email_headers_4.txt create mode 100644 testdata/reply/email_headers_5.txt create mode 100644 testdata/reply/embedded_ception.txt create mode 100644 testdata/reply/embedded_email_1.txt create mode 100644 testdata/reply/embedded_email_10.txt create mode 100644 testdata/reply/embedded_email_11.txt create mode 100644 testdata/reply/embedded_email_12.txt create mode 100644 testdata/reply/embedded_email_13.txt create mode 100644 testdata/reply/embedded_email_14.txt create mode 100644 testdata/reply/embedded_email_15.txt create mode 100644 testdata/reply/embedded_email_16.txt create mode 100644 testdata/reply/embedded_email_17.txt create mode 100644 testdata/reply/embedded_email_18.txt create mode 100644 testdata/reply/embedded_email_19.txt create mode 100644 testdata/reply/embedded_email_2.txt create mode 100644 testdata/reply/embedded_email_3.txt create mode 100644 testdata/reply/embedded_email_4.txt create mode 100644 testdata/reply/embedded_email_5.txt create mode 100644 testdata/reply/embedded_email_6.txt create mode 100644 testdata/reply/embedded_email_7.txt create mode 100644 testdata/reply/embedded_email_8.txt create mode 100644 testdata/reply/embedded_email_9.txt create mode 100644 testdata/reply/embedded_email_chinese.txt create mode 100644 testdata/reply/embedded_email_dutch_1.txt create mode 100644 testdata/reply/embedded_email_dutch_2.txt create mode 100644 testdata/reply/embedded_email_french_1.txt create mode 100644 testdata/reply/embedded_email_french_2.txt create mode 100644 testdata/reply/embedded_email_german_1.txt create mode 100644 testdata/reply/embedded_email_german_2.txt create mode 100644 testdata/reply/embedded_email_german_3.txt create mode 100644 testdata/reply/embedded_email_german_4.txt create mode 100644 testdata/reply/embedded_email_german_5.txt create mode 100644 testdata/reply/embedded_email_german_6.txt create mode 100644 testdata/reply/embedded_email_italian.txt create mode 100644 testdata/reply/embedded_email_norwegian.txt create mode 100644 testdata/reply/embedded_email_polish_1.txt create mode 100644 testdata/reply/embedded_email_polish_2.txt create mode 100644 testdata/reply/embedded_email_portuguese.txt create mode 100644 testdata/reply/embedded_email_quote_text.txt create mode 100644 testdata/reply/embedded_email_russian_1.txt create mode 100644 testdata/reply/embedded_email_russian_2.txt create mode 100644 testdata/reply/embedded_email_spanish_1.txt create mode 100644 testdata/reply/embedded_email_spanish_2.txt create mode 100644 testdata/reply/embedded_email_swedish.txt create mode 100644 testdata/reply/embedded_email_ukrainian.txt create mode 100644 testdata/reply/empty.txt create mode 100644 testdata/reply/forwarded_message.txt create mode 100644 testdata/reply/normalize_line_endings.txt create mode 100644 testdata/reply/quote_and_text.txt create mode 100644 testdata/reply/quote_only.txt create mode 100644 testdata/reply/retains_spaces_and_formatting.txt create mode 100644 testdata/reply/signature.txt create mode 100644 testdata/reply/signatures.txt create mode 100644 testdata/reply/strip.txt create mode 100644 testdata/reply/text_only.txt create mode 100644 testdata/reply/text_only_with_divider.txt create mode 100644 testdata/reply/usenet.txt create mode 100644 utilities.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5d1f701 --- /dev/null +++ b/.gitignore @@ -0,0 +1,18 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ + +# Editor +.idea \ No newline at end of file diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..a867333 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,10 @@ +language: go + +go: + - 1.12.x + - 1.13.x + - 1.14.x + - tip + +env: + - GO111MODULE=on \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..98be50d --- /dev/null +++ b/LICENSE @@ -0,0 +1,23 @@ +MIT License + +Copyright (c) The Forgejo Authors +Copyright (c) Discourse +Copyright (c) Claudemiro + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..cf0e33e --- /dev/null +++ b/README.md @@ -0,0 +1,46 @@ +[![go.dev reference](https://img.shields.io/badge/go.dev-reference-007d9c?logo=go&logoColor=white&style=flat-square)](https://pkg.go.dev/code.forgejo.org/forgejo/reply) +[![Go Report Card](https://goreportcard.com/report/code.forgejo.org/forgejo/reply)](https://goreportcard.com/report/code.forgejo.org/forgejo/reply) + +# reply + + Library to trim replies from plain text email. (Golang port of https://github.com/discourse/email_reply_trimmer) + + Forked from https://github.com/dimiro1/reply + + ## Why + + Reply was forked because the original project does not seem to be maintained. + Furthermore, the version before the fork removes parts of the mail that should not be removed. + The project was forked to change this. + +# Usage + +```go +package main + +import ( + "fmt" + + "code.forgejo.org/forgejo/reply" +) + +func main() { + message := ` + This is before the embedded email. + + On Wed, Sep 25, 2013, at 03:57 PM, richard_clark wrote: + + Richard> This is the embedded email + + This is after the embedded email and will not show up because 99% of the times + this is the signature... + ` + fmt.Println(reply.FromText(message)) +} +``` + +will output: + +```text +This is before the embedded email. +``` diff --git a/api.go b/api.go new file mode 100644 index 0000000..28849fc --- /dev/null +++ b/api.go @@ -0,0 +1,279 @@ +package reply + +import ( + "io" + "io/ioutil" + "strings" + "time" + + "github.com/dlclark/regexp2" +) + +func init() { + // The default configuration is set to 'forever'. + // I am not expecting any regex to take more than a few milliseconds. + // Setting this value to 1s just to be on the safe side. + regexp2.DefaultMatchTimeout = 1 * time.Second +} + +// FromReader returns the reply text from the e-mail text body. +func FromReader(reader io.Reader) (string, error) { + bytes, err := ioutil.ReadAll(reader) + if err != nil { + return "", err + } + + return FromText(string(bytes)), nil +} + +// FromText returns the reply text from the e-mail text body. +func FromText(text string) string { + if strings.TrimSpace(text) == "" { + return text + } + + // do some cleanup + text = cleanup(text) + + // from now on, we'll work on a line-by-line basis + lines := strings.Split(text, "\n") + patternBuilder := strings.Builder{} + + for _, line := range lines { + patternBuilder.WriteString(classifyLine(line)) + } + + pattern := patternBuilder.String() + + // remove everything after the first delimiter + { + match, err := regexp2.MustCompile(`d`, regexp2.RE2).FindStringMatch(pattern) + if err != nil { + return "" + } + if match != nil { + index := match.Index + pattern = sliceString(pattern, 0, index-1) + lines = sliceArray(lines, 0, index-1) + } + } + + // remove all mobile signatures + for { + match, err := regexp2.MustCompile(`s`, regexp2.RE2).FindStringMatch(pattern) + if err != nil { + return "" + } + if match != nil { + index := match.Index + pattern = stringSliceBang(pattern, index) + lines = sliceSliceBang(lines, index) + } else { + break + } + } + + // when the reply is at the end of the email + { + match, err := regexp2.MustCompile(`^(b[^t]+)*b[bqeh]+t[et]*$`, regexp2.RE2).FindStringMatch(pattern) + if err != nil { + return "" + } + if match != nil { + submatch, err := regexp2.MustCompile(`t[et]*$`, regexp2.RE2).FindStringMatch(pattern) + if err != nil { + return "" + } + + index := submatch.Index + pattern = "" + lines = sliceArray(lines, index, len(lines)-1) + } + } + + // if there is an embedded email marker, not followed by a quote + // then take everything up to that marker + { + match, err := regexp2.MustCompile(`te*b[^q]*$`, regexp2.RE2).FindStringMatch(pattern) + if err != nil { + return "" + } + if match != nil { + index := match.Index + pattern = sliceString(pattern, 0, index) + lines = sliceArray(lines, 0, index) + } + } + + // if there is an embedded email marker, followed by a huge quote + // then take everything up to that marker + { + match, err := regexp2.MustCompile(`te*b[eqbh]*([te]*)$`, regexp2.RE2).FindStringMatch(pattern) + if err != nil { + return "" + } + if match != nil && strings.Count(match.GroupByNumber(1).String(), "t") < 7 { + submatch, err := regexp2.MustCompile(`te*b[eqbh]*[te]*$`, regexp2.RE2).FindStringMatch(pattern) + if err != nil { + return "" + } + index := submatch.Index + pattern = sliceString(pattern, 0, index) + lines = sliceArray(lines, 0, index) + } + } + + // if there is some text before a huge quote ending the email, + // then remove the quote + { + match, err := regexp2.MustCompile(`te*[qbe]+$`, regexp2.RE2).FindStringMatch(pattern) + if err != nil { + return "" + } + if match != nil { + index := match.Index + pattern = sliceString(pattern, 0, index) + lines = sliceArray(lines, 0, index) + } + } + + // if there still are some embedded email markers, just remove them + for { + match, err := regexp2.MustCompile(`b`, regexp2.RE2).FindStringMatch(pattern) + if err != nil { + return "" + } + if match != nil { + index := match.Index + pattern = stringSliceBang(pattern, index) + lines = sliceSliceBang(lines, index) + } else { + break + } + } + + // fix email headers when they span over multiple lines + { + match, err := regexp2.MustCompile(`h+[hte]+h+e`, regexp2.RE2).FindStringMatch(pattern) + if err != nil { + return "" + } + if match != nil { + index := match.Index + for i := 0; i < match.Length; i++ { + c := []rune(header)[0] + pattern = stringReplaceChar(pattern, c, index+i) + } + } + } + + // if there are at least 3 consecutive email headers, + // take everything up to these headers + { + match, err := regexp2.MustCompile(`t[eq]*h{3,}`, regexp2.RE2).FindStringMatch(pattern) + if err != nil { + return "" + } + if match != nil { + index := match.Index + pattern = sliceString(pattern, 0, index) + lines = sliceArray(lines, 0, index) + } + } + + // if there still are some email headers, just remove them + for { + match, err := regexp2.MustCompile(`h`, regexp2.RE2).FindStringMatch(pattern) + if err != nil { + return "" + } + if match != nil { + index := match.Index + pattern = stringSliceBang(pattern, index) + lines = sliceSliceBang(lines, index) + } else { + break + } + } + + // remove trailing quotes when there's at least one line of text + { + match1, err := regexp2.MustCompile(`t`, regexp2.RE2).FindStringMatch(pattern) + if err != nil { + return "" + } + match2, err := regexp2.MustCompile(`[eq]+$`, regexp2.RE2).FindStringMatch(pattern) + if err != nil { + return "" + } + if match1 != nil && match2 != nil { + index := match2.Index + pattern = sliceString(pattern, 0, index-1) + lines = sliceArray(lines, 0, index-1) + } + } + + return strings.Join(lines, "\n") +} + +func cleanup(text string) string { + // normalize line endings + replacer := strings.NewReplacer( + "\r\n", "\n", + "\r", "\n", + ) + + text = replacer.Replace(text) + + // remove PGP markers + for _, r := range patterns["REMOVE_PGP_MARKERS_REGEX"] { + text, _ = r.Replace(text, "", 0, -1) + } + + // remove unsubscribe links + for _, r := range patterns["REMOVE_UNSUBSCRIBE_REGEX"] { + text, _ = r.Replace(text, "", 0, -1) + } + + // remove alias-style quotes marker + for _, r := range patterns["REMOVE_ALIAS_REGEX"] { + text, _ = r.Replace(text, "", 0, -1) + } + + // change enclosed-style quotes format + for _, r := range patterns["CHANGE_ENCLOSED_QUOTE_ONE_REGEX"] { + text, _ = r.ReplaceFunc(text, func(m regexp2.Match) string { + newText, _ := regexp2.MustCompile(`^`, regexp2.RE2).Replace(m.GroupByNumber(2).String(), "> ", 0, -1) + return newText + }, 0, -1) + } + + for _, r := range patterns["CHANGE_ENCLOSED_QUOTE_TWO_REGEX"] { + text, _ = r.ReplaceFunc(text, func(m regexp2.Match) string { + newText, _ := regexp2.MustCompile(`^`, regexp2.RE2).Replace(m.GroupByNumber(1).String(), "> ", 0, -1) + return newText + }, 0, -1) + } + + // fix all quotes formats + for _, r := range patterns["FIX_QUOTES_FORMAT_REGEX"] { + text, _ = r.ReplaceFunc(text, func(m regexp2.Match) string { + newText, _ := regexp2.MustCompile(`([[:alpha:]]+>|\|)`, regexp2.RE2).Replace(m.GroupByNumber(1).String(), ">", 0, -1) + return newText + }, 0, -1) + } + + // fix embedded email markers that might span over multiple lines + for _, regex := range patterns["FIX_EMBEDDED_REGEX"] { + text, _ = regex.ReplaceFunc(text, func(m regexp2.Match) string { + if strings.Count(m.String(), "\n") > 4 { + return m.String() + } + newText, _ := regexp2.MustCompile(`\n+[[:space:]]*`, regexp2.RE2).Replace(m.String(), " ", 0, -1) + return newText + }, 0, -1) + } + + // remove leading/trailing whitespaces + return strings.TrimSpace(text) +} diff --git a/api_test.go b/api_test.go new file mode 100644 index 0000000..fafd905 --- /dev/null +++ b/api_test.go @@ -0,0 +1,62 @@ +package reply_test + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strings" + "testing" + + "code.forgejo.org/forgejo/reply" +) + +func TestText(t *testing.T) { + // Add files to be skipped. + var skipped []string + + err := filepath.Walk("testdata/emails", func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if info.IsDir() { + return nil + } + + if filepath.Ext(path) == ".txt" { + t.Run(path, func(t *testing.T) { + for _, filename := range skipped { + if filename == filepath.Base(path) { + t.Skipf("%s is not implemented", filename) + } + } + + in, err := os.Open(path) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + + expected, err := ioutil.ReadFile(fmt.Sprintf("testdata/reply/%s", filepath.Base(path))) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + + replyText, err := reply.FromReader(in) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + + if strings.TrimSpace(replyText) != strings.TrimSpace(string(expected)) { + t.Errorf("\nexpected:\n%s\n\ngot:\n%s", string(expected), replyText) + } + }) + } + + return nil + }) + + if err != nil { + t.Errorf("unexpected error: %s", err) + } +} diff --git a/classify.go b/classify.go new file mode 100644 index 0000000..8f30ccc --- /dev/null +++ b/classify.go @@ -0,0 +1,93 @@ +package reply + +import ( + re2 "github.com/dlclark/regexp2" +) + +const ( + delimiter = "d" + embedded = "b" + empty = "e" + header = "h" + quote = "q" + signature = "s" + text = "t" +) + +func classifyLine(line string) string { + if isEmptyLine(line) { + return empty + } + + if isDelimiter(line) { + return delimiter + } + + if isSignature(line) { + return signature + } + + if isEmbeddedEmail(line) { + return embedded + } + + if isHeader(line) { + return header + } + + if isQuote(line) { + return quote + } + + return text +} + +func isEmptyLine(line string) bool { + ok, _ := re2.MustCompile(`^[[:blank:]]*$`, re2.RE2).MatchString(line) + return ok +} + +func isQuote(line string) bool { + ok, _ := re2.MustCompile(`^[[:blank:]]*>`, re2.RE2).MatchString(line) + return ok +} + +func isDelimiter(line string) bool { + ok, _ := re2.MustCompile(`^[[:blank:]]*([_,=+~#*ᐧ—]+|[\-]{4,}|[\-]+[[:blank:]])[[:blank:]]*$`, re2.RE2).MatchString(line) + return ok +} + +func isSignature(line string) bool { + // remove any markdown links + stripped, _ := re2.MustCompile(`\[([^\]]+)\]\([^\)]+\)`, re2.RE2).Replace(line, "$1", 0, -1) + for _, r := range patterns["SIGNATURE_REGEXES"] { + ok, _ := r.MatchString(stripped) + if ok { + return true + } + } + + return false +} + +func isHeader(line string) bool { + for _, r := range patterns["EMAIL_HEADER_REGEXES"] { + ok, _ := r.MatchString(line) + if ok { + return true + } + } + + return false +} + +func isEmbeddedEmail(line string) bool { + for _, r := range patterns["EMBEDDED_REGEXES"] { + ok, _ := r.MatchString(line) + if ok { + return true + } + } + + return false +} diff --git a/doc.go b/doc.go new file mode 100644 index 0000000..65506fb --- /dev/null +++ b/doc.go @@ -0,0 +1,14 @@ +// Package reply package is essentially a source code conversion +// of the ruby library https://github.com/discourse/email_reply_trimmer. +// The core logic is a almost line by line conversion. +// +// This package has a dependency on excellent regex library github.com/dlclark/regexp2. +// The reason for not using the standard regex library was due to the fact that +// the regex package from the stdlib is not compatible with the library from the Ruby stdlib. +// +// All the tests were taken from the email_reply_trimmer library. +// +// Note: +// This code is not idiomatic go code, as, it was mostly adapted from the ruby code, +// however, the public APIs were kept simple as possible and does not expose any internal. +package reply diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..1e016cb --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module code.forgejo.org/forgejo/reply + +go 1.12 + +require github.com/dlclark/regexp2 v1.2.0 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..ee12505 --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +github.com/dlclark/regexp2 v1.2.0 h1:8sAhBGEM0dRWogWqWyQeIJnxjWO6oIjl8FKqREDsGfk= +github.com/dlclark/regexp2 v1.2.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= diff --git a/pattern.go b/pattern.go new file mode 100644 index 0000000..2403f6d --- /dev/null +++ b/pattern.go @@ -0,0 +1,471 @@ +package reply + +import ( + "fmt" + "strings" + + "github.com/dlclark/regexp2" +) + +var patterns = map[string][]*regexp2.Regexp{ + "REMOVE_PGP_MARKERS_REGEX": { + regexp2.MustCompile(`\A-----BEGIN PGP SIGNED MESSAGE-----\n(?:Hash: \w+)?\s+`, + regexp2.IgnoreCase|regexp2.RE2, + ), + regexp2.MustCompile( + `^-----BEGIN PGP SIGNATURE-----$[\s\S]+^-----END PGP SIGNATURE-----`, + regexp2.RE2, + ), + }, + + "REMOVE_UNSUBSCRIBE_REGEX": { + regexp2.MustCompile(`^Unsubscribe: .+@.+(\n.+http:.+)?\s*\z`, regexp2.IgnoreCase|regexp2.RE2), + }, + + "REMOVE_ALIAS_REGEX": { + regexp2.MustCompile(`^.*>{5} "[^"\n]+" == .+ writes:`, regexp2.RE2), + }, + + "CHANGE_ENCLOSED_QUOTE_ONE_REGEX": { + regexp2.MustCompile(`^>>> ?(.+) ?>>>$\n([\s\S]+?)\n^<<< ?1 ?<<<$`, regexp2.RE2), + }, + + "CHANGE_ENCLOSED_QUOTE_TWO_REGEX": { + regexp2.MustCompile(`^>{4,}[[:blank:]]*$\n([\s\S]+?)\n^<{4,}[[:blank:]]*$`, regexp2.RE2|regexp2.Multiline), + }, + + "FIX_QUOTES_FORMAT_REGEX": { + regexp2.MustCompile(`^((?:[[:blank:]]*[[:alpha:]]*[>|])+)`, regexp2.RE2|regexp2.Multiline), + }, + + // On init + "FIX_EMBEDDED_REGEX": {}, + + // Envoyé depuis mon iPhone + // Von meinem Mobilgerät gesendet + // Diese Nachricht wurde von meinem Android-Mobiltelefon mit K-9 Mail gesendet. + // Nik from mobile + // From My Iphone 6 + // Sent via mobile + // Sent with Airmail + // Sent from Windows Mail + // Sent from my TI-85 + // <> + // (sent from a phone) + // (Sent from mobile device) + // 從我的 iPhone 傳送 + "SIGNATURE_REGEXES": { + // Chinese + regexp2.MustCompile( + `^[[:blank:]]*從我的 iPhone 傳送`, + regexp2.IgnoreCase|regexp2.RE2, + ), + // English + regexp2.MustCompile( + `^[[:blank:]]*[[:word:]]+ from mobile`, + regexp2.IgnoreCase|regexp2.RE2, + ), + regexp2.MustCompile( + `^[[:blank:]]*[(<]*Sent (from|via|with|by) .+[)>]*`, + regexp2.IgnoreCase|regexp2.RE2, + ), + regexp2.MustCompile( + `^[[:blank:]]*From my .{1,20}`, + regexp2.IgnoreCase|regexp2.RE2, + ), + regexp2.MustCompile( + `^[[:blank:]]*Get Outlook for `, + regexp2.IgnoreCase|regexp2.RE2, + ), + // French + regexp2.MustCompile( + `^[[:blank:]]*Envoyé depuis (mon|Yahoo Mail)`, + regexp2.IgnoreCase|regexp2.RE2, + ), + // German + regexp2.MustCompile( + `^[[:blank:]]*Von meinem .+ gesendet`, + regexp2.IgnoreCase|regexp2.RE2, + ), + regexp2.MustCompile( + `^[[:blank:]]*Diese Nachricht wurde von .+ gesendet`, + regexp2.IgnoreCase|regexp2.RE2, + ), + // Italian + regexp2.MustCompile( + `^[[:blank:]]*Inviato da `, + regexp2.IgnoreCase|regexp2.RE2, + ), + // Norwegian + regexp2.MustCompile( + `^[[:blank:]]*Sendt fra min `, + regexp2.IgnoreCase|regexp2.RE2, + ), + // Portuguese + regexp2.MustCompile( + `^[[:blank:]]*Enviado do meu `, + regexp2.IgnoreCase|regexp2.RE2, + ), + // Spanish + regexp2.MustCompile( + `^[[:blank:]]*Enviado desde mi `, + regexp2.IgnoreCase|regexp2.RE2, + ), + // Dutch + regexp2.MustCompile( + `^[[:blank:]]*Verzonden met `, + regexp2.IgnoreCase|regexp2.RE2, + ), + regexp2.MustCompile( + `^[[:blank:]]*Verstuurd vanaf mijn `, + regexp2.IgnoreCase|regexp2.RE2, + ), + // Swedish + regexp2.MustCompile( + `^[[:blank:]]*från min `, + regexp2.IgnoreCase|regexp2.RE2, + ), + }, + + // On init + "EMAIL_HEADERS_WITH_DATE_REGEXES": {}, + "EMAIL_HEADERS_WITH_TEXT_REGEXES": {}, + "EMAIL_HEADER_REGEXES": {}, + + // On Wed, Sep 25, 2013, at 03:57 PM, jorge_castro wrote: + // On Thursday, June 27, 2013, knwang via Discourse Meta wrote: + // On Wed, 2015-12-02 at 13:58 +0000, Tom Newsom wrote: + // On 10/12/15 12:30, Jeff Atwood wrote: + // ---- On Tue, 22 Dec 2015 14:17:36 +0530 Sam Saffron<info@discourse.org> wrote ---- + // Op 24 aug. 2013 om 16:48 heeft ven88 via Discourse Meta het volgende geschreven: + // Le 4 janv. 2016 19:03, "Neil Lalonde" a écrit : + // Dnia 14 lip 2015 o godz. 00:25 Michael Downey napisał(a): + // Em seg, 27 de jul de 2015 17:13, Neil Lalonde escreveu: + // El jueves, 21 de noviembre de 2013, codinghorror escribió: + // At 6/16/2016 08:32 PM, you wrote: + "ON_DATE_SOMEONE_WROTE_REGEXES": { + // Chinese + regexp2.MustCompile( + `^[[:blank:]<>-]*在 (?:(?!\b(?>在|写道)\b).)+?写道[[:blank:].:>-]*$`, + regexp2.IgnoreCase|regexp2.Singleline|regexp2.Multiline|regexp2.RE2, + ), + // Dutch + regexp2.MustCompile( + `^[[:blank:]<>-]*Op (?:(?!\b(?>Op|het\svolgende\sgeschreven|schreef)\b).)+?(het\svolgende\sgeschreven|schreef[^:]+)[[:blank:].:>-]*$`, + regexp2.IgnoreCase|regexp2.Singleline|regexp2.Multiline|regexp2.RE2, + ), + // English + regexp2.MustCompile( + `^[[:blank:]<>-]*In message (?:(?!\b(?>In message|writes)\b).)+?writes[[:blank:].:>-]*$`, + regexp2.IgnoreCase|regexp2.Singleline|regexp2.Multiline|regexp2.RE2, + ), + regexp2.MustCompile( + `^[[:blank:]<>-]*(On|At) (?:(?!\b(?>On|wrote|writes|says|said)\b).)+?(wrote|writes|says|said)[[:blank:].:>-]*$`, + regexp2.IgnoreCase|regexp2.Singleline|regexp2.Multiline|regexp2.RE2, + ), + // French + regexp2.MustCompile( + `^[[:blank:]<>-]*Le (?:(?!\b(?>Le|nous\sa\sdit|a\s+écrit)\b).)+?(nous\sa\sdit|a\s+écrit)[[:blank:].:>- ]*$`, + regexp2.IgnoreCase|regexp2.Singleline|regexp2.Multiline|regexp2.RE2, + ), + // German + regexp2.MustCompile( + `^[[:blank:]<>-]*Am (?:(?!\b(?>Am|schrieben\sSie)\b).)+?schrieben\sSie[[:blank:].:>-]*$`, + regexp2.IgnoreCase|regexp2.Singleline|regexp2.Multiline|regexp2.RE2, + ), + regexp2.MustCompile( + `^[[:blank:]<>-]*Am (?:(?!\b(?>Am|geschrieben)\b).)+?(geschrieben|schrieb[^:]+)[[:blank:].:>-]*$`, + regexp2.IgnoreCase|regexp2.Singleline|regexp2.Multiline|regexp2.RE2, + ), + // Italian + regexp2.MustCompile( + `^[[:blank:]<>-]*Il (?:(?!\b(?>Il|ha\sscritto)\b).)+?ha\sscritto[[:blank:].:>-]*$`, + regexp2.IgnoreCase|regexp2.Singleline|regexp2.Multiline|regexp2.RE2, + ), + // Polish + regexp2.MustCompile( + `^[[:blank:]<>-]*(Dnia|Dňa) (?:(?!\b(?>Dnia|Dňa|napisał)\b).)+?napisał(\(a\))?[[:blank:].:>-]*$`, + regexp2.IgnoreCase|regexp2.Singleline|regexp2.Multiline|regexp2.RE2, + ), + // Portuguese + regexp2.MustCompile( + `^[[:blank:]<>-]*Em (?:(?!\b(?>Em|escreveu)\b).)+?escreveu[[:blank:].:>-]*$`, + regexp2.IgnoreCase|regexp2.Singleline|regexp2.Multiline|regexp2.RE2, + ), + // Spanish + regexp2.MustCompile( + `^[[:blank:]<>-]*El (?:(?!\b(?>El|escribió)\b).)+?escribió[[:blank:].:>-]*$`, + regexp2.IgnoreCase|regexp2.Singleline|regexp2.Multiline|regexp2.RE2, + ), + }, + + // On init + "ON_DATE_WROTE_SOMEONE_REGEXES": {}, + "DATE_SOMEONE_WROTE_REGEXES": {}, + + // 2015-10-18 0:17 GMT+03:00 Matt Palmer : + // 2013/10/2 camilohollanda + // вт, 5 янв. 2016 г. в 23:39, Erlend Sogge Heggen : + // ср, 1 апр. 2015, 18:29, Denis Didkovsky : + "DATE_SOMEONE_EMAIL_REGEX": { + regexp2.MustCompile( + `\d{4}.{1,80}\s?<[^@<>]+@[^@<>.]+\.[^@<>]+>:?$`, + regexp2.RE2|regexp2.Multiline, + ), + }, + + // Max Mustermann schrieb am Fr., 28. Apr. 2017 um 11:53 Uhr: + "SOMEONE_WROTE_ON_DATE_REGEXES": { + // English + regexp2.MustCompile( + `^.+\bwrote\b[[:space:]]+\bon\b.+[^:]+:`, + regexp2.RE2|regexp2.Multiline, + ), + // German + regexp2.MustCompile( + `^.+\bschrieb\b[[:space:]]+\bam\b.+[^:]+:`, + regexp2.RE2|regexp2.Multiline, + ), + }, + + // 2016-03-03 17:21 GMT+01:00 Some One + "ISO_DATE_SOMEONE_REGEX": { + regexp2.MustCompile( + `^[[:blank:]>]*20\d\d-\d\d-\d\d \d\d:\d\d GMT\+\d\d:\d\d [\w[:blank:]]+$`, + regexp2.RE2, + ), + }, + + // Some One wrote: + // Gavin Sinclair (gsinclair@soyabean.com.au) wrote: + "SOMEONE_EMAIL_WROTE_REGEX": { + regexp2.MustCompile( + `^.+\b[\w.+-]+@[\w.-]+\.\w{2,}\b.+wrote:?$`, + regexp2.RE2, + ), + }, + + "SOMEONE_VIA_SOMETHING_WROTE_REGEXES": {}, + + // Posted by mpalmer on 01/21/2016 + "POSTED_BY_SOMEONE_ON_DATE_REGEX": { + regexp2.MustCompile( + `^[[:blank:]>]*Posted by .+ on \d{2}\/\d{2}\/\d{4}$`, + regexp2.IgnoreCase|regexp2.RE2, + ), + }, + + // Begin forwarded message: + // Reply Message + // ----- Forwarded Message ----- + // ----- Original Message ----- + // -----Original Message----- + // ----- Mensagem Original ----- + // -----Mensagem Original----- + // *----- Original Message -----* + // ----- Reply message ----- + // ------------------ 原始邮件 ------------------ + "FORWARDED_EMAIL_REGEXES": { + // English + regexp2.MustCompile( + `^[[:blank:]>]*Begin forwarded message:`, + regexp2.IgnoreCase|regexp2.RE2, + ), + regexp2.MustCompile( + `^[[:blank:]>*]*-{2,}[[:blank:]]*(Forwarded|Original|Reply) Message[[:blank:]]*-{2,}`, + regexp2.IgnoreCase|regexp2.RE2, + ), + // French + regexp2.MustCompile( + `^[[:blank:]>]*Début du message transféré :`, + regexp2.IgnoreCase|regexp2.RE2, + ), + regexp2.MustCompile( + `^[[:blank:]>*]*-{2,}[[:blank:]]*Message transféré[[:blank:]]*-{2,}`, + regexp2.IgnoreCase|regexp2.RE2, + ), + // German + regexp2.MustCompile( + `^[[:blank:]>*]*-{2,}[[:blank:]]*Ursprüngliche Nachricht[[:blank:]]*-{2,}`, + regexp2.IgnoreCase|regexp2.RE2, + ), + // Spanish + regexp2.MustCompile( + `^[[:blank:]>*]*-{2,}[[:blank:]]*Mensaje original[[:blank:]]*-{2,}`, + regexp2.IgnoreCase|regexp2.RE2, + ), + // Portuguese + regexp2.MustCompile( + `^[[:blank:]>*]*-{2,}[[:blank:]]*Mensagem original[[:blank:]]*-{2,}`, + regexp2.IgnoreCase|regexp2.RE2, + ), + // Chinese + regexp2.MustCompile( + `^[[:blank:]>*]*-{2,}[[:blank:]]*原始邮件[[:blank:]]*-{2,}`, + regexp2.IgnoreCase|regexp2.RE2, + ), + }, + + // on init + "EMBEDDED_REGEXES": {}, +} + +// init ON_DATE_WROTE_SOMEONE_REGEXES +func init() { + dateMarkers := [][]string{ + // Norwegian + {"Sendt"}, + // English + {"Sent", "Date"}, + // French + {"Date", "Le"}, + // German + {"Gesendet"}, + // Portuguese + {"Enviada em"}, + // Spanish + {"Enviado"}, + // Spanish (Mexican) + {"Fecha"}, + // Italian + {"Data"}, + // Dutch + {"Datum"}, + // Swedish + {"Skickat"}, + // Chinese + {"发送时间"}, + } + + textMarkers := [][]string{ + // Norwegian + {"Fra", "Til", "Emne"}, + // English + {"From", "To", "Cc", "Reply-To", "Subject"}, + // French + {"De", "Expéditeur", "À", "Destinataire", "Répondre à", "Objet"}, + // German + {"Von", "An", "Betreff"}, + // Portuguese + {"De", "Para", "Assunto"}, + // Spanish + {"De", "Para", "Asunto"}, + // Italian + {"Da", "Risposta", "A", "Oggetto"}, + // Dutch + {"Van", "Beantwoorden - Aan", "Aan", "Onderwerp"}, + // Swedish + {"Från", "Till", "Ämne"}, + // Chinese + {"发件人", "收件人", "主题"}, + } + + // Op 10 dec. 2015 18:35 schreef "Arpit Jalan" : + // Am 18.09.2013 um 16:24 schrieb codinghorror : + // Den 15. jun. 2016 kl. 20.42 skrev Jeff Atwood : + // søn. 30. apr. 2017 kl. 00.26 skrev David Taylor : + onDateWroteSomeoneMarkers := [][]string{ + // Dutch + {"Op", "schreef"}, + // German + {"Am", "schrieb"}, + // Norwegian + {"Den", "skrev"}, + // Dutch + {`søn\.`, "skrev"}, + } + + // суббота, 14 марта 2015 г. пользователь etewiah написал: + // 23 mar 2017 21:25 "Neil Lalonde" napisał(a): + // 30 серп. 2016 р. 20:45 "Arpit" no-reply@example.com пише: + dateSomeoneWroteMarkers := [][]string{ + // Russian + {"пользователь", "написал"}, + // Polish + {"", "napisał\\(a\\)"}, + // Ukrainian + {"", "пише"}, + } + + // codinghorror via Discourse Meta wrote: + // codinghorror via Discourse Meta schrieb: + someoneViaSomethingWroteMarkers := []string{ + // English + "wrote", + // German + "schrieb", + } + + // date + for _, markers := range dateMarkers { + pattern := regexp2.MustCompile( + fmt.Sprintf(`^[[:blank:]*]*(?:%s)[[:blank:]*]*:.*\d+`, strings.Join(markers, "|")), + regexp2.RE2|regexp2.Multiline, + ) + patterns["EMAIL_HEADERS_WITH_DATE_REGEXES"] = append( + patterns["EMAIL_HEADERS_WITH_DATE_REGEXES"], + pattern, + ) + + patterns["EMAIL_HEADER_REGEXES"] = append(patterns["EMAIL_HEADER_REGEXES"], pattern) + } + + // text + for _, markers := range textMarkers { + pattern := regexp2.MustCompile( + fmt.Sprintf(`^[[:blank:]*]*(?:%s)[[:blank:]*]*:.*[[:word:]]+`, strings.Join(markers, "|")), + regexp2.IgnoreCase|regexp2.Multiline|regexp2.RE2, + ) + patterns["EMAIL_HEADERS_WITH_TEXT_REGEXES"] = append( + patterns["EMAIL_HEADERS_WITH_TEXT_REGEXES"], + pattern, + ) + + patterns["EMAIL_HEADER_REGEXES"] = append(patterns["EMAIL_HEADER_REGEXES"], pattern) + } + + for _, marker := range onDateWroteSomeoneMarkers { + patterns["ON_DATE_WROTE_SOMEONE_REGEXES"] = append( + patterns["ON_DATE_WROTE_SOMEONE_REGEXES"], + regexp2.MustCompile(fmt.Sprintf(`^[[:blank:]>]*%s\s.+\s%s\s[^:]+:`, marker[0], marker[1]), regexp2.RE2), + ) + } + + for _, marker := range dateSomeoneWroteMarkers { + if marker[0] == "" { + patterns["DATE_SOMEONE_WROTE_REGEXES"] = append( + patterns["DATE_SOMEONE_WROTE_REGEXES"], + regexp2.MustCompile(fmt.Sprintf(`\d{4}.{1,80}\n?.{0,80}?%s:`, marker[1]), regexp2.RE2), + ) + } else { + patterns["DATE_SOMEONE_WROTE_REGEXES"] = append( + patterns["DATE_SOMEONE_WROTE_REGEXES"], + regexp2.MustCompile(fmt.Sprintf(`\d{4}.{1,80}%s.{0,80}\n?.{0,80}?%s:`, marker[0], marker[1]), regexp2.RE2), + ) + } + } + + for _, marker := range someoneViaSomethingWroteMarkers { + patterns["SOMEONE_VIA_SOMETHING_WROTE_REGEXES"] = append( + patterns["SOMEONE_VIA_SOMETHING_WROTE_REGEXES"], + regexp2.MustCompile(fmt.Sprintf(`^.+ via .+ %s:?[[:blank:]]*$`, marker), regexp2.RE2), + ) + } + + patterns["EMBEDDED_REGEXES"] = append(patterns["EMBEDDED_REGEXES"], patterns["ON_DATE_SOMEONE_WROTE_REGEXES"]...) + patterns["EMBEDDED_REGEXES"] = append(patterns["EMBEDDED_REGEXES"], patterns["ON_DATE_WROTE_SOMEONE_REGEXES"]...) + patterns["EMBEDDED_REGEXES"] = append(patterns["EMBEDDED_REGEXES"], patterns["DATE_SOMEONE_WROTE_REGEXES"]...) + patterns["EMBEDDED_REGEXES"] = append(patterns["EMBEDDED_REGEXES"], patterns["DATE_SOMEONE_EMAIL_REGEX"]...) + patterns["EMBEDDED_REGEXES"] = append(patterns["EMBEDDED_REGEXES"], patterns["SOMEONE_WROTE_ON_DATE_REGEXES"]...) + patterns["EMBEDDED_REGEXES"] = append(patterns["EMBEDDED_REGEXES"], patterns["ISO_DATE_SOMEONE_REGEX"]...) + patterns["EMBEDDED_REGEXES"] = append(patterns["EMBEDDED_REGEXES"], patterns["SOMEONE_VIA_SOMETHING_WROTE_REGEXES"]...) + patterns["EMBEDDED_REGEXES"] = append(patterns["EMBEDDED_REGEXES"], patterns["SOMEONE_EMAIL_WROTE_REGEX"]...) + patterns["EMBEDDED_REGEXES"] = append(patterns["EMBEDDED_REGEXES"], patterns["POSTED_BY_SOMEONE_ON_DATE_REGEX"]...) + patterns["EMBEDDED_REGEXES"] = append(patterns["EMBEDDED_REGEXES"], patterns["FORWARDED_EMAIL_REGEXES"]...) + + patterns["FIX_EMBEDDED_REGEX"] = append(patterns["FIX_EMBEDDED_REGEX"], patterns["ON_DATE_SOMEONE_WROTE_REGEXES"]...) + patterns["FIX_EMBEDDED_REGEX"] = append(patterns["FIX_EMBEDDED_REGEX"], patterns["SOMEONE_WROTE_ON_DATE_REGEXES"]...) + patterns["FIX_EMBEDDED_REGEX"] = append(patterns["FIX_EMBEDDED_REGEX"], patterns["DATE_SOMEONE_WROTE_REGEXES"]...) + patterns["FIX_EMBEDDED_REGEX"] = append(patterns["FIX_EMBEDDED_REGEX"], patterns["DATE_SOMEONE_EMAIL_REGEX"]...) + +} diff --git a/testdata/emails/delimiters.txt b/testdata/emails/delimiters.txt new file mode 100644 index 0000000..009410a --- /dev/null +++ b/testdata/emails/delimiters.txt @@ -0,0 +1,14 @@ +This is not a ---------- delimiter. + +But there should be no delimiter after this line + +ᐧ +-- +*** +#### +~~~~~ +====== +_______ +++++++++ + + ------- diff --git a/testdata/emails/dual_embedded.txt b/testdata/emails/dual_embedded.txt new file mode 100644 index 0000000..c357137 --- /dev/null +++ b/testdata/emails/dual_embedded.txt @@ -0,0 +1,13 @@ +This is the 3rd email. + + +On Mon, Feb 1, 2016 at 4:58 AM -0800, "Some One" wrote: + + +This is the 2nd email. + + +On Sun, Jan 31, 2016 at 9:58 PM -0800, "Another One" wrote: + + +This is the 1st email. diff --git a/testdata/emails/email_headers_1.txt b/testdata/emails/email_headers_1.txt new file mode 100644 index 0000000..79b602c --- /dev/null +++ b/testdata/emails/email_headers_1.txt @@ -0,0 +1,17 @@ +This is a reply from Outlook! + + + ------------------------------ + +*From:* Outlook user +*Sent:* 2016-01-27 +*To:* info@discourse.org +*Subject:* Are you using Outlook? + + + +Just some random text wich is part + +of the embedded email... + +Outlook user diff --git a/testdata/emails/email_headers_2.txt b/testdata/emails/email_headers_2.txt new file mode 100644 index 0000000..701f6fd --- /dev/null +++ b/testdata/emails/email_headers_2.txt @@ -0,0 +1,10 @@ +This is a reply from Outlook! +________________________________________ +From: Discourse > +Reply-To: "For.bar" > +Date: Monday, February 8, 2016 11:44 AM +To: Discourse > +Subject: VIS + + + Here's an email with some very important stuff. + + +________________________________ +Reply here or hit reply from your inbox to help members by sharing your ideas. +Mute this topic to stop getting updates, we'll send you the next one. + + +DO NOT FORWARD THIS EMAIL! diff --git a/testdata/emails/email_headers_4.txt b/testdata/emails/email_headers_4.txt new file mode 100644 index 0000000..c3071da --- /dev/null +++ b/testdata/emails/email_headers_4.txt @@ -0,0 +1,17 @@ +test + +Da: Sally54721 +Risposta: Testy McTesterson / Test +Data: giovedì 8 ottobre 2015 15:26 +A: Testy McTesterson +Oggetto: Test | Issue (#3) + +[@example](http://example.com/u/example) + +— +Reply to this email directly or [view it on GitLab](http://git.example.com/example/Test/issues/3). {"@context":"[http://schema.org","@type":"EmailMessage","action":{"@type":"ViewAction","name":"View](http://schema.org%22,%22@type%22:%22EmailMessage%22,%22action%22:%7B%22@type%22:%22ViewAction%22,%22name%22:%22View) Issue","url":"[http://git.example.com/example/Test/issues/3"}](http://git.example.com/example/Test/issues/3%22%7D)} You're receiving this notification because you are a member of the Testy McTesterson / Test project team. + +-- +Questo messaggio e' stato analizzato con Libra ESVA ed e' risultato non infetto. +[Clicca qui per segnalarlo come spam.](http://esva.example.com/cgi-bin/learn-msg.cgi?id=1234567890.ABCDEF) +[Clicca qui per metterlo in blacklist](http://esva.example.com/cgi-bin/learn-msg.cgi?blacklist=1&id=1234567890.ABCDEF) diff --git a/testdata/emails/email_headers_5.txt b/testdata/emails/email_headers_5.txt new file mode 100644 index 0000000..fa23f19 --- /dev/null +++ b/testdata/emails/email_headers_5.txt @@ -0,0 +1,37 @@ +Hi Erlend, + +I don’t know what a “legally recognised educational institution” means for you. + +We’re based in Sweden and we provide education to our people across the globe. + +We want to try Discourse in small scale first to see it will add value to our communities. + +Best regards, + +//Jef + + + +From: Erlend Sogge Heggen +Reply-To: Erlend Sogge Heggen +Date: Wednesday, 5 April 2017 at 17:01 +To: Jef +Subject: [Discourse Meta] [PM] Discourse for Communities of Practice, educational organisation + + +erlend_sh Erlend Sogge Heggen Team +April 5 + + + +Hi Jef, + +Is your University a legally recognised educational institution? Otherwise I'm afraid you're not eligible for this discount. + +Sincerely, + +Erlend + + + +This email message and any attachments may contain confidential information and may be privileged. If you are not the intended recipient or otherwise not authorized to receive this message, you are prohibited to use, copy, disclose or take any action based on this email or any information contained herein. If you are not the intended recipient, please advise the sender immediately by replying to this email and permanently delete this message and any attachments from your system. diff --git a/testdata/emails/embedded_ception.txt b/testdata/emails/embedded_ception.txt new file mode 100644 index 0000000..b12fc24 --- /dev/null +++ b/testdata/emails/embedded_ception.txt @@ -0,0 +1,47 @@ +This is the reply. + +Thanks, +Some One + +On Mon, Feb 1, 2016 at 6:32 PM, Jeff Atwood +wrote: + +> This is Jeff's reply. +> +> On Mon, Feb 1, 2016 at 7:50 AM, Some One > wrote: +> +>> Great! +>> +>> Many thanks. +>> +>> ~s +>> +>> On Mon, Feb 1, 2016 at 5:05 AM Discourse Team wrote: +>> +>>> WAT? +>>> +>>> On Wed, Jan 27, 2016 at 10:48 PM, Some One < +>>> foo@bar.com> wrote: +>>> +>>>> Hi Team, +>>>> +>>>> How is it doing? +>>>> +>>>> Some One +>>>> +>>>> On Wed, Jan 27, 2016 at 10:10 AM Discourse Team +>>>> wrote: +>>>> +>>>>> Hello :waves_hand: +>>>>> +>>>> +>>> +> + + +-- +Some One +Community Manager +foo@bar.com +(123) 456-7890 diff --git a/testdata/emails/embedded_email_1.txt b/testdata/emails/embedded_email_1.txt new file mode 100644 index 0000000..0fb1b82 --- /dev/null +++ b/testdata/emails/embedded_email_1.txt @@ -0,0 +1,8 @@ +This is before the embedded email. + +On Wed, Sep 25, 2013, at 03:57 PM, richard_clark wrote: + +Richard> This is the embedded email + +This is after the embedded email and will not show up because 99% of the times +this is the signature... diff --git a/testdata/emails/embedded_email_10.txt b/testdata/emails/embedded_email_10.txt new file mode 100644 index 0000000..3ce3659 --- /dev/null +++ b/testdata/emails/embedded_email_10.txt @@ -0,0 +1,42 @@ +Thank you. + +Sent from Outlook Mobile + + + + +On Sun, Feb 7, 2016 at 12:12 AM -0800, "Arpit Jalan" > wrote: + +Hi Some, + +https://meta.discourse.org is now running on latest Discourse version! + +Regards, +Arpit + +On Fri, Feb 5, 2016 at 10:43 AM Arpit Jalan > wrote: +Okay, sure! + +Arpit +On Fri, 5 Feb 2016 at 10:42, Some One > wrote: +Arpit, + Yes that sounds good. + +Sent from Outlook Mobile + +_____________________________ +From: Arpit Jalan > +Sent: Thursday, February 4, 2016 10:05 AM +Subject: Meta Discourse update +To: Some One >, Discourse Team > + + + +Hi Some One, + +Time to update meta to the latest Discourse version! + +Do you want me to take care of it? + +Regards, +Arpit diff --git a/testdata/emails/embedded_email_11.txt b/testdata/emails/embedded_email_11.txt new file mode 100644 index 0000000..a401e36 --- /dev/null +++ b/testdata/emails/embedded_email_11.txt @@ -0,0 +1,5 @@ +Before + +2016-03-03 17:21 GMT+01:00 Some One + +After diff --git a/testdata/emails/embedded_email_12.txt b/testdata/emails/embedded_email_12.txt new file mode 100644 index 0000000..7139497 --- /dev/null +++ b/testdata/emails/embedded_email_12.txt @@ -0,0 +1,18 @@ +One 1 + +On Thu, 31 Mar 2016 at 11:16 Some One + +< +mailto:Some One +> wrote: + + +Two 2 + +On Thu, 31 Mar 2016 at 10:05 Jeff Atwood + +< +mailto:Jeff Atwood +> wrote: + +Three 3 diff --git a/testdata/emails/embedded_email_13.txt b/testdata/emails/embedded_email_13.txt new file mode 100644 index 0000000..30b99ac --- /dev/null +++ b/testdata/emails/embedded_email_13.txt @@ -0,0 +1,14 @@ +yes we're fine + +thanks + + +At 6/16/2016 08:32 PM, you wrote: +>codinghorror +>Jeff Atwood co-founder +>June 17 +> +>Sorry I got a little mixed up with all the incoming replies. Are you +>able to log in? +> +>Use your email address and "forgot password" if you need it reset. diff --git a/testdata/emails/embedded_email_14.txt b/testdata/emails/embedded_email_14.txt new file mode 100644 index 0000000..374a5ec --- /dev/null +++ b/testdata/emails/embedded_email_14.txt @@ -0,0 +1,16 @@ +I just saw the standard plan has the slack integration. + +Thank you! + +2016-10-24 15:36 GMT+02:00 Foo bar < +info@foo.bar>: + +> Thank you so much Erlend, very thanks! +> +> 2016-10-24 15:03 GMT+02:00 Erlend Sogge Heggen : +> +>> erlend_sh Erlend Sogge +>> Heggen team +>> October 24 +>> +>> I received your application and I've replied with setup instructions. diff --git a/testdata/emails/embedded_email_15.txt b/testdata/emails/embedded_email_15.txt new file mode 100644 index 0000000..69196aa --- /dev/null +++ b/testdata/emails/embedded_email_15.txt @@ -0,0 +1,9 @@ +[message body] + +/ Greetings + +2017-02-05 13:29 GMT+02:00 Very long author name < +notifications@forum.some-discourse-host.local>: + +> [Original Messages is quoted here] +> [...] diff --git a/testdata/emails/embedded_email_16.txt b/testdata/emails/embedded_email_16.txt new file mode 100644 index 0000000..c4a208c --- /dev/null +++ b/testdata/emails/embedded_email_16.txt @@ -0,0 +1,16 @@ +> From: "Albert Wagner" +> +> > Some guy at ibraheem@localhost(?) needs to be unsubscribed. I +> > keep getting bounces from his ISP everytime I post. Surely, +> > everyone else is getting the same? +> +> Surely it doesn't need to be said anymore, but I *still* am. +> What's happening with this? + +Yep, me too, although I bet due to RubyConf/OOPSLA, it may be a little +longer yet... + +---- +Eric Hodel - drbrain@segment7.net - http://segment7.net +All messages signed with fingerprint: +FEC2 57F1 D465 EB15 5D6E 7C11 332A 551C 796C 9F04 diff --git a/testdata/emails/embedded_email_17.txt b/testdata/emails/embedded_email_17.txt new file mode 100644 index 0000000..ef4cf0b --- /dev/null +++ b/testdata/emails/embedded_email_17.txt @@ -0,0 +1,38 @@ +On 15 May 2017 19:32, "Nomadic Sprite" wrote: + +The intent was to return nil when the first value was nil. +That was the op's issue. +If one of the values was nil, she/he wanted nil. + +Nil && anything_else will always return nil, and I will not evaluate the +second clause. + +On 8 May 2017 6:51 pm, "Andy Jones" wrote: + +> >>>>>>>> +> It should be: +> 2.0 && 2.0 + 12.0 +> <<<<<<<<< +> +> Ah! Yes, that works. +> +> +People are intent on not understanding, aren't they. + + def nil_add_12 f + f && f + 12.0 + end + +Generalised to two parameters: + + def nil_add a, b + a && b && a + b + end + +The only quirk is the way they handle `false`. + +This is not tested, but it may be possible to do this, too: + + f&.+ b + +Cheers diff --git a/testdata/emails/embedded_email_18.txt b/testdata/emails/embedded_email_18.txt new file mode 100644 index 0000000..f08c127 --- /dev/null +++ b/testdata/emails/embedded_email_18.txt @@ -0,0 +1,7 @@ +On 8 May 2017 17:34, "Andy Jones" wrote: + + nil && 2.0 + 12.0 + +It should be: + + 2.0 && 2.0 + 12.0 diff --git a/testdata/emails/embedded_email_19.txt b/testdata/emails/embedded_email_19.txt new file mode 100644 index 0000000..b197de8 --- /dev/null +++ b/testdata/emails/embedded_email_19.txt @@ -0,0 +1,13 @@ +I had a really hard time with the official guide, so when I finally +figured it out, I wrote one of my own: + + +http://ryanlue.com/posts/2017-02-18-how-to-publish-a-gem + + + +Let me know if you like it (or if you don't) – any feedback is highly +appreciated! + + +—Ryan diff --git a/testdata/emails/embedded_email_2.txt b/testdata/emails/embedded_email_2.txt new file mode 100644 index 0000000..4ec593e --- /dev/null +++ b/testdata/emails/embedded_email_2.txt @@ -0,0 +1,16 @@ +I have checked the available documentation/links to find out the end points. If there are some other endpoints, can you please tell the endpoints + +Best Regards + + + + +---- On Tue, 22 Dec 2015 14:17:36 +0530 Sam Saffron<info@discourse.org> wrote ---- + + + sam Sam Saffron co-founder + December 22 + You are not using the right endpoint, go to user profile / badges, have a look at dev tools to see what it calls + There is a discrete endpoint to get all the badges a user has + + To respond, reply to this email or visit the topic. diff --git a/testdata/emails/embedded_email_3.txt b/testdata/emails/embedded_email_3.txt new file mode 100644 index 0000000..20c2626 --- /dev/null +++ b/testdata/emails/embedded_email_3.txt @@ -0,0 +1,24 @@ +Sure, [Bitnami](http://bitnami.com/stack/discourse) supports EC2 and there +are several topics here about it. + +Our [official install guide]( +https://github.com/discourse/discourse/blob/master/docs/INSTALL-ubuntu.md) +also works great on an Amazon EC2 provisioned virtual machine with very +little change. + + +On Thu, Jun 20, 2013 at 4:18 PM, matt2 via Discourse Meta < +info@discourse.org> wrote: + +> matt2 posted in 'Discourse on Ubuntu: Video Walkthrough' on Discourse Meta: +> ------------------------------ +> +> Has anyone tried on AWS? a public AMI would be awesome. +> ------------------------------ +> +> Please visit this link to respond: +> http://meta.discourse.org/t/discourse-on-ubuntu-video-walkthrough/7478/4 +> +> To unsubscribe from these emails, visit your user preferences +> . +> diff --git a/testdata/emails/embedded_email_4.txt b/testdata/emails/embedded_email_4.txt new file mode 100644 index 0000000..db3acf3 --- /dev/null +++ b/testdata/emails/embedded_email_4.txt @@ -0,0 +1,19 @@ +If I do exactly that, then the test group won't be saved after a refresh of +the page. + + +2013/7/20 sam via Discourse Meta + +| sam replied to your post in 'Unable to create group' on Discourse Meta: +| ------------------------------ +| +| Is this an error handling thing, if you name the group "test" and only +| place yourself in it, does it persist? +| ------------------------------ +| +| To respond, reply to this email or visit +| http://meta.discourse.org/t/unable-to-create-group/8198/6 in your browser. +| +| To unsubscribe from these emails, visit your user preferences +| . +| diff --git a/testdata/emails/embedded_email_5.txt b/testdata/emails/embedded_email_5.txt new file mode 100644 index 0000000..3b8c95f --- /dev/null +++ b/testdata/emails/embedded_email_5.txt @@ -0,0 +1,5 @@ +I am testing the SOMEONE_EMAIL_WROTE_REGEX regular expression. + +Some One wrote: + +Something that will be considered as the embedded email. diff --git a/testdata/emails/embedded_email_6.txt b/testdata/emails/embedded_email_6.txt new file mode 100644 index 0000000..3f4dc9b --- /dev/null +++ b/testdata/emails/embedded_email_6.txt @@ -0,0 +1,11 @@ +Ensuring that ON_DATE_SOMEONE_WROTE_REGEXES is not greedy. + +On this day + +I wrote something + +which was very interesting. + +On 28 Jan 2016, at 19:04, Some One wrote: + +> Did you write something interesting? diff --git a/testdata/emails/embedded_email_7.txt b/testdata/emails/embedded_email_7.txt new file mode 100644 index 0000000..73b2ae2 --- /dev/null +++ b/testdata/emails/embedded_email_7.txt @@ -0,0 +1,20 @@ +This is a line before the embedded email. +On Tue, 2011-03-01 at 18:02 +0530, Some One wrote: +> Hello +> +> This is the embedded email. + +This is some text + +after the + +embedded email. + +C> +C> This is another part of the embedded email. +C> +C> + + +_______________________ +And here's my signature. diff --git a/testdata/emails/embedded_email_8.txt b/testdata/emails/embedded_email_8.txt new file mode 100644 index 0000000..0558dcf --- /dev/null +++ b/testdata/emails/embedded_email_8.txt @@ -0,0 +1,5 @@ +Foo + +-------- Original Message -------- + +THE END. diff --git a/testdata/emails/embedded_email_9.txt b/testdata/emails/embedded_email_9.txt new file mode 100644 index 0000000..5cf8a78 --- /dev/null +++ b/testdata/emails/embedded_email_9.txt @@ -0,0 +1,5 @@ +Bar + +*-------- Original Message --------* + +THE END. diff --git a/testdata/emails/embedded_email_chinese.txt b/testdata/emails/embedded_email_chinese.txt new file mode 100644 index 0000000..3cfccd6 --- /dev/null +++ b/testdata/emails/embedded_email_chinese.txt @@ -0,0 +1,7 @@ +Hi Erlend Sogge Heggen, +Thank you for your reply. + +> 在 2016年12月12日,下午8:45,Erlend Sogge Heggen 写道: +> fu.zhang: +> Some random question +> diff --git a/testdata/emails/embedded_email_dutch_1.txt b/testdata/emails/embedded_email_dutch_1.txt new file mode 100644 index 0000000..2e6b841 --- /dev/null +++ b/testdata/emails/embedded_email_dutch_1.txt @@ -0,0 +1,13 @@ +Nice of you to pick it up! Could you make a PR? Then other people can use it too. Would be great. + + +> Op 2 feb. 2015 om 05:28 heeft VannillaSky het volgende geschreven: +> +> +> VannillaSky +> February 2 +> Solved. Going forward... +> +> To respond, reply to this email or visit https://meta.discourse.org/t/import-posts-from-facebook-group-into-discourse/6089/33 in your browser. +> +> To unsubscribe from these emails, visit your user preferences. diff --git a/testdata/emails/embedded_email_dutch_2.txt b/testdata/emails/embedded_email_dutch_2.txt new file mode 100644 index 0000000..adbdc4b --- /dev/null +++ b/testdata/emails/embedded_email_dutch_2.txt @@ -0,0 +1,62 @@ +The adblocker idea from @elberet might be it. Regretfully I only have time +to test this in 2 weeks. I will let you know then... +Op 16 aug. 2014 05:13 schreef "codinghorror" het +volgende geschreven: + +> codinghorror +> August 15 +> +> Hmm, if that is true we need to fix that somehow. +> +> To respond, reply to this email or visit +> https://meta.discourse.org/t/configuring-google-oauth2-login-for-discourse/15858/38 +> in your browser. +> ------------------------------ +> Previous Replies elberet +> August 15 +> +> Are you using an ad blocker? Adblock Plus, for instance, removes the login +> buttons. +> bwvanmanen +> August 15 +> +> The 'Sign in with Google' button does appear when I try to log in from +> another pc. However the button does not appear on my pc using another +> browser. +> bwvanmanen +> August 15 +> +> I followed the steps from the opening post, but I don't get a google login +> option when users try to log in. There is just the normal user/password, +> the word OR, and nothing next to that. Any clues? +> +> neil +> August 1 +> +> Glad you got it fixed. It's good to have the resolution in this topic too. +> I added a link to your topic in the first post. +> jgehrcke +> July 31 +> +> Sorry, that was kind of a cross-posting. Indeed, I got that fixed as you +> say, as reported here: +> jgehrcke said: +> +> I understand the solution you are proposing. The proper fix, without +> customizing the disource.conf in the container, however, would be to inform +> the web application about the protocol used via an HTTP header, as you are +> also suggesting. The question is: does Discourse respect/support +> X-Forwarded-Protocol? Then I can easily configure the nginx reverse proxy +> with proxy_set_header X-Forwarded-Protocol $scheme; Edit: I can confirm +> that properly setting the X-Forwarded-Proto header in the nginx … +> +> Thanks and sorry for not reporting back here! +> ------------------------------ +> +> To respond, reply to this email or visit +> https://meta.discourse.org/t/configuring-google-oauth2-login-for-discourse/15858/38 +> in your browser. +> +> To unsubscribe from these emails, visit your user preferences +> . +> diff --git a/testdata/emails/embedded_email_french_1.txt b/testdata/emails/embedded_email_french_1.txt new file mode 100644 index 0000000..8770498 --- /dev/null +++ b/testdata/emails/embedded_email_french_1.txt @@ -0,0 +1,12 @@ +C'est super ! +Le 4 janv. 2016 19:03, "Neil Lalonde" + +a écrit : + +> team +> January 4 +> +> Discourse c'est top ! +> +> Neil +> diff --git a/testdata/emails/embedded_email_french_2.txt b/testdata/emails/embedded_email_french_2.txt new file mode 100644 index 0000000..e2ffa8b --- /dev/null +++ b/testdata/emails/embedded_email_french_2.txt @@ -0,0 +1,21 @@ +Le 2016-11-16 02:22, Erlend Sogge Heggen a écrit : +> erlend_sh [1] Erlend Sogge Heggen [1] team +> November 16 +> +> Hello French guy, +> +>> is the back office also available in french like the front end ? +> +> Oui! +> +> Sincerely, +> +> Erlend +> +> * +> +> Links: +> ------ +> [1] https://meta.discourse.org/users/erlend_sh + +Thanks for your answer. \ No newline at end of file diff --git a/testdata/emails/embedded_email_german_1.txt b/testdata/emails/embedded_email_german_1.txt new file mode 100644 index 0000000..4c775bd --- /dev/null +++ b/testdata/emails/embedded_email_german_1.txt @@ -0,0 +1,26 @@ +alright, no big deal - I assume in this case the code needs some refactoring to better support I18N anyway. + + + +codinghorror via Discourse Meta schrieb: +>codinghorror posted in 'Visually separate "Like it too" / "Undo like"' +>on Discourse Meta: +> +>--- +>So turns out this is sort of a complex change for something so small. +>Our code at that location assumes all phrases end in periods. Changing +>that conditionally to sometimes an em-dash, sometimes not, is kind of.. +>a pain in the butt. +> +>Will keep an eye on it, but low priority given the work required. +> +>--- +>To respond, reply to this email or visit +>http://meta.discourse.org/t/visually-separate-like-it-too-undo-like/8464/3 +>in your browser. +> +>To unsubscribe from these emails, visit your [user +>preferences](http://meta.discourse.org/user_preferences). + +-- +Diese Nachricht wurde von meinem Android-Mobiltelefon mit K-9 Mail gesendet. diff --git a/testdata/emails/embedded_email_german_2.txt b/testdata/emails/embedded_email_german_2.txt new file mode 100644 index 0000000..3307678 --- /dev/null +++ b/testdata/emails/embedded_email_german_2.txt @@ -0,0 +1,6 @@ +Hey:) + + +Am 03.02.2016 3:35 nachm. schrieb Max Mustermann : + +> Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. \ No newline at end of file diff --git a/testdata/emails/embedded_email_german_3.txt b/testdata/emails/embedded_email_german_3.txt new file mode 100644 index 0000000..f28feae --- /dev/null +++ b/testdata/emails/embedded_email_german_3.txt @@ -0,0 +1,10 @@ +Gruß Discourse + + +-----Ursprüngliche Nachricht----- +Von: Discourse Team +Gesendet: Don 11 Februar 2016 11:23 +An: Some One +Betreff: Some Subject + +Hallo! Wie geht es Ihnen heute? diff --git a/testdata/emails/embedded_email_german_4.txt b/testdata/emails/embedded_email_german_4.txt new file mode 100644 index 0000000..b28ee17 --- /dev/null +++ b/testdata/emails/embedded_email_german_4.txt @@ -0,0 +1,18 @@ + +Hi there! I am replying from my german Google Inbox. + +Max Mustermann schrieb am Fr., 28. +Apr. 2017 um 11:53 Uhr: + +> max_2 +> April 28 + +> Hi there! you should be getting a mail. +> ------------------------------ + +> Visit Topic +> or reply to this email to respond. + +> To unsubscribe from these emails, click here +> +> . diff --git a/testdata/emails/embedded_email_german_5.txt b/testdata/emails/embedded_email_german_5.txt new file mode 100644 index 0000000..3b08279 --- /dev/null +++ b/testdata/emails/embedded_email_german_5.txt @@ -0,0 +1,23 @@ +Okay. Will give it a whirl. + +Erlend Sogge Heggen schrieb am Di., 16. Aug. 2016 +um 12:52 Uhr: + +> erlend_sh Erlend Sogge Heggen +> team +> August 16 +> +> Hi Bob, +> +> Sure, it sounds like your requirements would fit our Standard plan. I +> suggest you sign up for a free trial, as that will be the best way to see +> for yourself if we support the kind of custom styling +> +> Sincerely, +> +> Erlend +> +-- +Viele Grüße / Best regards + +Bob diff --git a/testdata/emails/embedded_email_german_6.txt b/testdata/emails/embedded_email_german_6.txt new file mode 100644 index 0000000..8a1732e --- /dev/null +++ b/testdata/emails/embedded_email_german_6.txt @@ -0,0 +1,14 @@ +Ich habe konzeptionell eine Präferenz für die zweite Option. Die 2 VLs +stellen die Inhalte vor, und präsentieren diese in einen größeren Kontext. +Dann beginnt das Training der technischen Finessen mit Trainingsblatt, +Trainingsvideos usw. Bin gleichzeitig robust, sofern nötig. + +Am 21.04.2016 17:48 schrieb "Einz Zwei" : + +> einz.zwei einz +> zwei +> 21. April +> +> Vielleicht noch eine wichtige Frage: +> Wann sollten wir diese Trainingsvideos veröffentlichen, es gibt 2 diff --git a/testdata/emails/embedded_email_italian.txt b/testdata/emails/embedded_email_italian.txt new file mode 100644 index 0000000..a63fe07 --- /dev/null +++ b/testdata/emails/embedded_email_italian.txt @@ -0,0 +1,31 @@ +Ok, will do. As a test, this is a reply from a webmail client. + + +> Il 26 marzo 2016 alle 13.16 Sam Saffron ha scritto: +> +> +> +> +> +> We need specific examples of failures, PM full source of emails to +> @zogstrip +> +> Posted by sam on 03/26/2016 +> +> +> +> +> +> +> --- +> [Visit Topic](https://meta.discourse.org/t/email-reply-parsing/41597/2) +> or reply to this email to respond +> +> To stop receiving notifications for this particular topic, [click +> here](https://meta.discourse.org/t/email-reply-parsing/41597/unsubscribe). +> To unsubscribe from these emails, change your [user +> preferences](https://meta.discourse.org/my/preferences) + +-- +Stefano Costa @stekosteko +Editor, Journal of Open Archaeology Data diff --git a/testdata/emails/embedded_email_norwegian.txt b/testdata/emails/embedded_email_norwegian.txt new file mode 100644 index 0000000..2127425 --- /dev/null +++ b/testdata/emails/embedded_email_norwegian.txt @@ -0,0 +1,11 @@ +Thanx :-) + +Sendt fra min iPad + +Den 15. jun. 2016 kl. 20.42 skrev Jeff Atwood >: + + codinghorror Jeff Atwood co-founder +June 15 + + +Enable tags in your admin, site settings. diff --git a/testdata/emails/embedded_email_polish_1.txt b/testdata/emails/embedded_email_polish_1.txt new file mode 100644 index 0000000..87d9135 --- /dev/null +++ b/testdata/emails/embedded_email_polish_1.txt @@ -0,0 +1,34 @@ +Oh, I've forgot to add. MIT + +------------------ +Łukasz Jan Niemier + +Dnia 14 lip 2015 o godz. 00:25 Michael Downey napisał(a): + +> +> downey Michael Downey Senior Tester +> July 13 +> Sounds interesting. What is the license? +> +> To respond, reply to this email or visit https://meta.discourse.org/t/static-pages-plugin/31027/2 in your browser. +> +> Previous Replies +> +> hauleth +> July 13 +> WIP +> +> Location: https://github.com/hauleth/low_voltage +> +> Authors: @hauleth +> +> Simple plugin to add "static" pages to Discourse instance. For now usage is kind of complicated at it needs to fetch static pages from app/views/pages. Ideas for future implements: +> +> Create DB model to hold pages +> Adding admin page that will allow editing that pages +> Custom styles for static pages +> Any help appreciated. +> +> To respond, reply to this email or visit https://meta.discourse.org/t/static-pages-plugin/31027/2 in your browser. +> +> To unsubscribe from these emails, visit your user preferences. diff --git a/testdata/emails/embedded_email_polish_2.txt b/testdata/emails/embedded_email_polish_2.txt new file mode 100644 index 0000000..f770f32 --- /dev/null +++ b/testdata/emails/embedded_email_polish_2.txt @@ -0,0 +1,11 @@ +Hi Neil, +Thanks for prompt response! + + +23 mar 2017 21:25 "Neil Lalonde" napisał(a): + +> neil Neil Lalonde +> Team +> March 23 +> +> I have made required changes. Enjoy! diff --git a/testdata/emails/embedded_email_portuguese.txt b/testdata/emails/embedded_email_portuguese.txt new file mode 100644 index 0000000..4cd9c49 --- /dev/null +++ b/testdata/emails/embedded_email_portuguese.txt @@ -0,0 +1,18 @@ +Olá, +Qualquer dúvida estamos ai. + +Em 2 de fevereiro de 2015 10:58, discourse < +info@discourse.org> escreveu: + +> discourse +> February 2 +> +> Bom dia. Obrigado pela atenção +> ------------------------------ +> +> To respond, reply to this email or visit +> https://meta.discourse.org/ in your browser. +> +> To unsubscribe from these emails, visit your user preferences +> . +> diff --git a/testdata/emails/embedded_email_quote_text.txt b/testdata/emails/embedded_email_quote_text.txt new file mode 100644 index 0000000..a3c86cd --- /dev/null +++ b/testdata/emails/embedded_email_quote_text.txt @@ -0,0 +1,10 @@ +On Mon, Aug 19, 2013 at 2:36 AM, SomeOne via Discourse Meta < +info@discourse.org> wrote: + +> This seems like a problem that would be better solved at the web server +> level, rather than by the application. If nothing else, adding an instance +> of Nginx as an SSL/TLS reverse-proxy would very quickly take care of this. +> + +when I said looks possible, I pointed to rack because that's still the web +server level diff --git a/testdata/emails/embedded_email_russian_1.txt b/testdata/emails/embedded_email_russian_1.txt new file mode 100644 index 0000000..1cf7563 --- /dev/null +++ b/testdata/emails/embedded_email_russian_1.txt @@ -0,0 +1,27 @@ +Why don't you override particular strings with JS? + +понедельник, 6 октября 2014 г. пользователь lidel написал: + +> lidel +> October 6 +> winterbox: +> +> Do you have any plan to make this plugin? +> +> discourse-locale-override +> is just a hack, and +> as @sam pointed out, it only works +> if you update your instance via commandline. I created it to speed up my +> work on pl_PL translation only. +> +> I see there is a need for a proper plugin (this and other threads), but +> until such plugin appears this is a semi-working workaround. +> ------------------------------ +> +> To respond, reply to this email or visit +> https://meta.discourse.org/t/use-custom-translation-file/13786/41 in your +> browser. +> +> To unsubscribe from these emails, visit your user preferences +> . +> diff --git a/testdata/emails/embedded_email_russian_2.txt b/testdata/emails/embedded_email_russian_2.txt new file mode 100644 index 0000000..dbc32c1 --- /dev/null +++ b/testdata/emails/embedded_email_russian_2.txt @@ -0,0 +1,26 @@ +Yes. + +20 окт. 2016 г. 19:37 пользователь "Rafael dos Santos Silva" < +meta@discoursemail.com> написал: + +> Falco Rafael dos Santos Silva +> team +> October 20 +> +> Are you using our official docker install? +> +> If you are, try a rebuild: +> +> ssh root@server +> cd /var/discourse +> ./launcher rebuild app +> +> ------------------------------ +> +> Visit Topic or +> reply to this email to respond. +> +> To unsubscribe from these emails, click here +> +> . +> diff --git a/testdata/emails/embedded_email_spanish_1.txt b/testdata/emails/embedded_email_spanish_1.txt new file mode 100644 index 0000000..e72e981 --- /dev/null +++ b/testdata/emails/embedded_email_spanish_1.txt @@ -0,0 +1,41 @@ +I've configured Twitter Authentication, so this error happends on the +application, after twitter posts to my discourse install via the callback. + +In the front error I can get a correct message saying "Something waa wrong, +check you usename and password" and in the backend I see that error in the +logs. + + + +El jueves, 21 de noviembre de 2013, codinghorror escribió: + +> codinghorror +> November 21 +> +> How are you creating this user? Describe it step by step. +> +> To respond, reply to this email or visit +> http://meta.discourse.org/t/error-creating-new-users-pg-numericvalueoutofrange/11046/2in your browser. +> ------------------------------ +> Previous Replies PabloC +> November 21 +> +> Hi guys, +> +> While I create a new user, I'm finding this error in the logs : +> +> Processing by UsersController#create as */* +> Parameters: {"name"=>"Ps4 Support Forum", "email"=>"pablocorral+ps4@gmail.com ", "username"=>"PS4SupportForum", "password_confirmation"=>"[FILTERED]", "challenge"=>"11a4ebe83d9b2a9e0d45bc50b457ee17"} +> PG::NumericValueOutOfRange: ERROR: value "2201945804" is out of range for type integer +> +> Any idea? +> +> Tks! +> ------------------------------ +> +> To respond, reply to this email or visit +> http://meta.discourse.org/t/error-creating-new-users-pg-numericvalueoutofrange/11046/2in your browser. +> +> To unsubscribe from these emails, visit your user preferences +> . +> diff --git a/testdata/emails/embedded_email_spanish_2.txt b/testdata/emails/embedded_email_spanish_2.txt new file mode 100644 index 0000000..869b86e --- /dev/null +++ b/testdata/emails/embedded_email_spanish_2.txt @@ -0,0 +1,12 @@ +Igual que siempre (inclusive ahora), sin nada raro :/ + +-----Mensaje original----- +De: "Miguel" +Enviado: =E2=80=8E16/=E2=80=8E02/=E2=80=8E2016 14:53 +Para: "discourse" +Asunto: [MP]Parser del email + +Visita el tema o responde a este email para publicar. +Para no recibir m=C3=A1s notificaciones de este tema en particular, haz cli= +c aqu=C3=AD. Para darte de baja de estos emails, cambia tus preferencias += diff --git a/testdata/emails/embedded_email_swedish.txt b/testdata/emails/embedded_email_swedish.txt new file mode 100644 index 0000000..0bb885e --- /dev/null +++ b/testdata/emails/embedded_email_swedish.txt @@ -0,0 +1,20 @@ +Hi everyone! + +Here you can find the pictures that I took https://foo.bar + +It was a pleasure to support this event. + +Best regards + +Some One + + + +Från: Foo Bar [mailto:noreply@foo.bar] +Skickat: den 5 juni 2017 12:22 +Till: someone@domain.com +Ämne: [WAT] Photos from the symposium + +If you have any pictures from any parts of the symposium that you are willing to share, please reply to this post to share them (you can simply copy and paste or drag'n'drop them into the text). + +Here are the group pictures we took on Wednesday after lunch. diff --git a/testdata/emails/embedded_email_ukrainian.txt b/testdata/emails/embedded_email_ukrainian.txt new file mode 100644 index 0000000..0317860 --- /dev/null +++ b/testdata/emails/embedded_email_ukrainian.txt @@ -0,0 +1,19 @@ +Спасибо еще раз. + +30 серп. 2016 р. 20:45 "Arpit" пише: + +> meg Foo +> +> Август 30 +> +> Когда будет точное название, напишите в личку и мы поменяем название. +> ------------------------------ +> +> Visit Message +> +> or reply to this email to respond. +> +> To unsubscribe from these emails, click here +> +> . +> diff --git a/testdata/emails/empty.txt b/testdata/emails/empty.txt new file mode 100644 index 0000000..e69de29 diff --git a/testdata/emails/forwarded_message.txt b/testdata/emails/forwarded_message.txt new file mode 100644 index 0000000..1442cb0 --- /dev/null +++ b/testdata/emails/forwarded_message.txt @@ -0,0 +1,9 @@ +---------- Forwarded message ---------- +From: Some One +Date: Thu, Jan 28, 2016 at 4:00 PM +Subject: Some subject that +spans over 2 lines +To: infod@discourse.org + + +This is a forwarded email. And just that. diff --git a/testdata/emails/normalize_line_endings.txt b/testdata/emails/normalize_line_endings.txt new file mode 100644 index 0000000..b128eda --- /dev/null +++ b/testdata/emails/normalize_line_endings.txt @@ -0,0 +1,4 @@ +Email with +windows +line +endings diff --git a/testdata/emails/quote_and_text.txt b/testdata/emails/quote_and_text.txt new file mode 100644 index 0000000..7fd77c7 --- /dev/null +++ b/testdata/emails/quote_and_text.txt @@ -0,0 +1,3 @@ +> This is a quote. + +And this is some text. diff --git a/testdata/emails/quote_only.txt b/testdata/emails/quote_only.txt new file mode 100644 index 0000000..dd03bba --- /dev/null +++ b/testdata/emails/quote_only.txt @@ -0,0 +1 @@ +> Email with only quote. diff --git a/testdata/emails/retains_spaces_and_formatting.txt b/testdata/emails/retains_spaces_and_formatting.txt new file mode 100644 index 0000000..46dfaf7 --- /dev/null +++ b/testdata/emails/retains_spaces_and_formatting.txt @@ -0,0 +1,14 @@ +Formatting and + +spaces + +- A +- B +- C + +should be retained + + + - Item #1 + - Item #2 + diff --git a/testdata/emails/signature.txt b/testdata/emails/signature.txt new file mode 100644 index 0000000..4feb9fd --- /dev/null +++ b/testdata/emails/signature.txt @@ -0,0 +1,3 @@ +This email has a signature +-- +My super signature diff --git a/testdata/emails/signatures.txt b/testdata/emails/signatures.txt new file mode 100644 index 0000000..09ea58b --- /dev/null +++ b/testdata/emails/signatures.txt @@ -0,0 +1,33 @@ +This email was sent from way too much different devices ;) + +Envoyé depuis mon iPhone + +Von meinem Mobilgerät gesendet +Diese Nachricht wurde von meinem Android-Mobiltelefon mit K-9 Mail gesendet. + +Someone from mobile +From My Iphone 6 +Sent via mobile +Sent with Airmail +Sent from Windows Mail +Sent from Mailbox +Sent from Mailbox for iPad +Sent from Yahoo Mail on Android +Sent from my TI-85 +Sent from my iPhone +Sent from my iPod +Sent from my Alcatel Flash2 +Sent from my mobile device +Sent from my cell, please excuse any typos. +Sent from my Samsung Galaxy s5 Octacore device +Sent from my HTC M8 Android phone. Please excuse typoze +Sent from my Windows 8 PC +<> +(sent from a phone) +(Sent from mobile device) +從我的 iPhone 傳送 +Sent from [mail](https://go.microsoft.com/fwlink/?LinkId=550986) for windows 10 +Verzonden met BlackBerry Work + +Get Outlook for iOShttps://aka.ms/o0ukef +Get [Outlook for Android](https://aka.ms/ghei36) diff --git a/testdata/emails/strip.txt b/testdata/emails/strip.txt new file mode 100644 index 0000000..877a61c --- /dev/null +++ b/testdata/emails/strip.txt @@ -0,0 +1,10 @@ + + + + + +Strip leading and trailing empty lines. + + + + diff --git a/testdata/emails/text_only.txt b/testdata/emails/text_only.txt new file mode 100644 index 0000000..b6f3666 --- /dev/null +++ b/testdata/emails/text_only.txt @@ -0,0 +1 @@ +Email with only text. diff --git a/testdata/emails/text_only_with_divider.txt b/testdata/emails/text_only_with_divider.txt new file mode 100644 index 0000000..c361835 --- /dev/null +++ b/testdata/emails/text_only_with_divider.txt @@ -0,0 +1,9 @@ +Email before divider. + +-- + +Email between divider. + +--- + +Email after divider. diff --git a/testdata/emails/usenet.txt b/testdata/emails/usenet.txt new file mode 100644 index 0000000..53fa064 --- /dev/null +++ b/testdata/emails/usenet.txt @@ -0,0 +1,9 @@ +Mal sehen was hier mit der Signatur passiert! + +----------------- +Mit lieben Grüßen + +John Doe +http://blog.john.doe +www.facebook.com/johndoe +Mobil: +12 345 6789 012 diff --git a/testdata/reply/delimiters.txt b/testdata/reply/delimiters.txt new file mode 100644 index 0000000..a7cc1f7 --- /dev/null +++ b/testdata/reply/delimiters.txt @@ -0,0 +1,3 @@ +This is not a ---------- delimiter. + +But there should be no delimiter after this line \ No newline at end of file diff --git a/testdata/reply/dual_embedded.txt b/testdata/reply/dual_embedded.txt new file mode 100644 index 0000000..f5e8e9c --- /dev/null +++ b/testdata/reply/dual_embedded.txt @@ -0,0 +1 @@ +This is the 3rd email. \ No newline at end of file diff --git a/testdata/reply/email_headers_1.txt b/testdata/reply/email_headers_1.txt new file mode 100644 index 0000000..22006ac --- /dev/null +++ b/testdata/reply/email_headers_1.txt @@ -0,0 +1 @@ +This is a reply from Outlook! \ No newline at end of file diff --git a/testdata/reply/email_headers_2.txt b/testdata/reply/email_headers_2.txt new file mode 100644 index 0000000..22006ac --- /dev/null +++ b/testdata/reply/email_headers_2.txt @@ -0,0 +1 @@ +This is a reply from Outlook! \ No newline at end of file diff --git a/testdata/reply/email_headers_3.txt b/testdata/reply/email_headers_3.txt new file mode 100644 index 0000000..b4a9fb7 --- /dev/null +++ b/testdata/reply/email_headers_3.txt @@ -0,0 +1 @@ +This is the actual reply. \ No newline at end of file diff --git a/testdata/reply/email_headers_4.txt b/testdata/reply/email_headers_4.txt new file mode 100644 index 0000000..30d74d2 --- /dev/null +++ b/testdata/reply/email_headers_4.txt @@ -0,0 +1 @@ +test \ No newline at end of file diff --git a/testdata/reply/email_headers_5.txt b/testdata/reply/email_headers_5.txt new file mode 100644 index 0000000..6ac562b --- /dev/null +++ b/testdata/reply/email_headers_5.txt @@ -0,0 +1,11 @@ +Hi Erlend, + +I don’t know what a “legally recognised educational institution” means for you. + +We’re based in Sweden and we provide education to our people across the globe. + +We want to try Discourse in small scale first to see it will add value to our communities. + +Best regards, + +//Jef \ No newline at end of file diff --git a/testdata/reply/embedded_ception.txt b/testdata/reply/embedded_ception.txt new file mode 100644 index 0000000..0228e16 --- /dev/null +++ b/testdata/reply/embedded_ception.txt @@ -0,0 +1,4 @@ +This is the reply. + +Thanks, +Some One \ No newline at end of file diff --git a/testdata/reply/embedded_email_1.txt b/testdata/reply/embedded_email_1.txt new file mode 100644 index 0000000..97c4b5b --- /dev/null +++ b/testdata/reply/embedded_email_1.txt @@ -0,0 +1 @@ +This is before the embedded email. \ No newline at end of file diff --git a/testdata/reply/embedded_email_10.txt b/testdata/reply/embedded_email_10.txt new file mode 100644 index 0000000..f440b71 --- /dev/null +++ b/testdata/reply/embedded_email_10.txt @@ -0,0 +1 @@ +Thank you. \ No newline at end of file diff --git a/testdata/reply/embedded_email_11.txt b/testdata/reply/embedded_email_11.txt new file mode 100644 index 0000000..a51296d --- /dev/null +++ b/testdata/reply/embedded_email_11.txt @@ -0,0 +1 @@ +Before \ No newline at end of file diff --git a/testdata/reply/embedded_email_12.txt b/testdata/reply/embedded_email_12.txt new file mode 100644 index 0000000..bd16a53 --- /dev/null +++ b/testdata/reply/embedded_email_12.txt @@ -0,0 +1 @@ +One 1 \ No newline at end of file diff --git a/testdata/reply/embedded_email_13.txt b/testdata/reply/embedded_email_13.txt new file mode 100644 index 0000000..e2ddd4c --- /dev/null +++ b/testdata/reply/embedded_email_13.txt @@ -0,0 +1,3 @@ +yes we're fine + +thanks \ No newline at end of file diff --git a/testdata/reply/embedded_email_14.txt b/testdata/reply/embedded_email_14.txt new file mode 100644 index 0000000..5092432 --- /dev/null +++ b/testdata/reply/embedded_email_14.txt @@ -0,0 +1,3 @@ +I just saw the standard plan has the slack integration. + +Thank you! \ No newline at end of file diff --git a/testdata/reply/embedded_email_15.txt b/testdata/reply/embedded_email_15.txt new file mode 100644 index 0000000..dae7a95 --- /dev/null +++ b/testdata/reply/embedded_email_15.txt @@ -0,0 +1,3 @@ +[message body] + +/ Greetings \ No newline at end of file diff --git a/testdata/reply/embedded_email_16.txt b/testdata/reply/embedded_email_16.txt new file mode 100644 index 0000000..736831b --- /dev/null +++ b/testdata/reply/embedded_email_16.txt @@ -0,0 +1,11 @@ +> From: "Albert Wagner" +> +> > Some guy at ibraheem@localhost(?) needs to be unsubscribed. I +> > keep getting bounces from his ISP everytime I post. Surely, +> > everyone else is getting the same? +> +> Surely it doesn't need to be said anymore, but I *still* am. +> What's happening with this? + +Yep, me too, although I bet due to RubyConf/OOPSLA, it may be a little +longer yet... \ No newline at end of file diff --git a/testdata/reply/embedded_email_17.txt b/testdata/reply/embedded_email_17.txt new file mode 100644 index 0000000..9be8a3f --- /dev/null +++ b/testdata/reply/embedded_email_17.txt @@ -0,0 +1,35 @@ +The intent was to return nil when the first value was nil. +That was the op's issue. +If one of the values was nil, she/he wanted nil. + +Nil && anything_else will always return nil, and I will not evaluate the +second clause. + + +> >>>>>>>> +> It should be: +> 2.0 && 2.0 + 12.0 +> <<<<<<<<< +> +> Ah! Yes, that works. +> +> +People are intent on not understanding, aren't they. + + def nil_add_12 f + f && f + 12.0 + end + +Generalised to two parameters: + + def nil_add a, b + a && b && a + b + end + +The only quirk is the way they handle `false`. + +This is not tested, but it may be possible to do this, too: + + f&.+ b + +Cheers diff --git a/testdata/reply/embedded_email_18.txt b/testdata/reply/embedded_email_18.txt new file mode 100644 index 0000000..3aef924 --- /dev/null +++ b/testdata/reply/embedded_email_18.txt @@ -0,0 +1,5 @@ + nil && 2.0 + 12.0 + +It should be: + + 2.0 && 2.0 + 12.0 diff --git a/testdata/reply/embedded_email_19.txt b/testdata/reply/embedded_email_19.txt new file mode 100644 index 0000000..d37000e --- /dev/null +++ b/testdata/reply/embedded_email_19.txt @@ -0,0 +1,13 @@ +I had a really hard time with the official guide, so when I finally +figured it out, I wrote one of my own: + + +http://ryanlue.com/posts/2017-02-18-how-to-publish-a-gem + + + +Let me know if you like it (or if you don't) – any feedback is highly +appreciated! + + +—Ryan \ No newline at end of file diff --git a/testdata/reply/embedded_email_2.txt b/testdata/reply/embedded_email_2.txt new file mode 100644 index 0000000..632bdc8 --- /dev/null +++ b/testdata/reply/embedded_email_2.txt @@ -0,0 +1,3 @@ +I have checked the available documentation/links to find out the end points. If there are some other endpoints, can you please tell the endpoints + +Best Regards \ No newline at end of file diff --git a/testdata/reply/embedded_email_3.txt b/testdata/reply/embedded_email_3.txt new file mode 100644 index 0000000..dea996f --- /dev/null +++ b/testdata/reply/embedded_email_3.txt @@ -0,0 +1,7 @@ +Sure, [Bitnami](http://bitnami.com/stack/discourse) supports EC2 and there +are several topics here about it. + +Our [official install guide]( +https://github.com/discourse/discourse/blob/master/docs/INSTALL-ubuntu.md) +also works great on an Amazon EC2 provisioned virtual machine with very +little change. \ No newline at end of file diff --git a/testdata/reply/embedded_email_4.txt b/testdata/reply/embedded_email_4.txt new file mode 100644 index 0000000..cfd5cb0 --- /dev/null +++ b/testdata/reply/embedded_email_4.txt @@ -0,0 +1,2 @@ +If I do exactly that, then the test group won't be saved after a refresh of +the page. \ No newline at end of file diff --git a/testdata/reply/embedded_email_5.txt b/testdata/reply/embedded_email_5.txt new file mode 100644 index 0000000..69b1d03 --- /dev/null +++ b/testdata/reply/embedded_email_5.txt @@ -0,0 +1 @@ +I am testing the SOMEONE_EMAIL_WROTE_REGEX regular expression. \ No newline at end of file diff --git a/testdata/reply/embedded_email_6.txt b/testdata/reply/embedded_email_6.txt new file mode 100644 index 0000000..1e985e6 --- /dev/null +++ b/testdata/reply/embedded_email_6.txt @@ -0,0 +1,7 @@ +Ensuring that ON_DATE_SOMEONE_WROTE_REGEXES is not greedy. + +On this day + +I wrote something + +which was very interesting. \ No newline at end of file diff --git a/testdata/reply/embedded_email_7.txt b/testdata/reply/embedded_email_7.txt new file mode 100644 index 0000000..2f73101 --- /dev/null +++ b/testdata/reply/embedded_email_7.txt @@ -0,0 +1,10 @@ +This is a line before the embedded email. +> Hello +> +> This is the embedded email. + +This is some text + +after the + +embedded email. \ No newline at end of file diff --git a/testdata/reply/embedded_email_8.txt b/testdata/reply/embedded_email_8.txt new file mode 100644 index 0000000..9f26b63 --- /dev/null +++ b/testdata/reply/embedded_email_8.txt @@ -0,0 +1 @@ +Foo \ No newline at end of file diff --git a/testdata/reply/embedded_email_9.txt b/testdata/reply/embedded_email_9.txt new file mode 100644 index 0000000..d9d3a9a --- /dev/null +++ b/testdata/reply/embedded_email_9.txt @@ -0,0 +1 @@ +Bar \ No newline at end of file diff --git a/testdata/reply/embedded_email_chinese.txt b/testdata/reply/embedded_email_chinese.txt new file mode 100644 index 0000000..cc556f0 --- /dev/null +++ b/testdata/reply/embedded_email_chinese.txt @@ -0,0 +1,2 @@ +Hi Erlend Sogge Heggen, +Thank you for your reply. \ No newline at end of file diff --git a/testdata/reply/embedded_email_dutch_1.txt b/testdata/reply/embedded_email_dutch_1.txt new file mode 100644 index 0000000..0de8876 --- /dev/null +++ b/testdata/reply/embedded_email_dutch_1.txt @@ -0,0 +1 @@ +Nice of you to pick it up! Could you make a PR? Then other people can use it too. Would be great. \ No newline at end of file diff --git a/testdata/reply/embedded_email_dutch_2.txt b/testdata/reply/embedded_email_dutch_2.txt new file mode 100644 index 0000000..3085383 --- /dev/null +++ b/testdata/reply/embedded_email_dutch_2.txt @@ -0,0 +1,2 @@ +The adblocker idea from @elberet might be it. Regretfully I only have time +to test this in 2 weeks. I will let you know then... \ No newline at end of file diff --git a/testdata/reply/embedded_email_french_1.txt b/testdata/reply/embedded_email_french_1.txt new file mode 100644 index 0000000..817b216 --- /dev/null +++ b/testdata/reply/embedded_email_french_1.txt @@ -0,0 +1 @@ +C'est super ! \ No newline at end of file diff --git a/testdata/reply/embedded_email_french_2.txt b/testdata/reply/embedded_email_french_2.txt new file mode 100644 index 0000000..b8f08a8 --- /dev/null +++ b/testdata/reply/embedded_email_french_2.txt @@ -0,0 +1 @@ +Thanks for your answer. \ No newline at end of file diff --git a/testdata/reply/embedded_email_german_1.txt b/testdata/reply/embedded_email_german_1.txt new file mode 100644 index 0000000..39c68ac --- /dev/null +++ b/testdata/reply/embedded_email_german_1.txt @@ -0,0 +1 @@ +alright, no big deal - I assume in this case the code needs some refactoring to better support I18N anyway. \ No newline at end of file diff --git a/testdata/reply/embedded_email_german_2.txt b/testdata/reply/embedded_email_german_2.txt new file mode 100644 index 0000000..2f78bc8 --- /dev/null +++ b/testdata/reply/embedded_email_german_2.txt @@ -0,0 +1 @@ +Hey:) \ No newline at end of file diff --git a/testdata/reply/embedded_email_german_3.txt b/testdata/reply/embedded_email_german_3.txt new file mode 100644 index 0000000..e53a50e --- /dev/null +++ b/testdata/reply/embedded_email_german_3.txt @@ -0,0 +1 @@ +Gruß Discourse \ No newline at end of file diff --git a/testdata/reply/embedded_email_german_4.txt b/testdata/reply/embedded_email_german_4.txt new file mode 100644 index 0000000..8269c80 --- /dev/null +++ b/testdata/reply/embedded_email_german_4.txt @@ -0,0 +1 @@ +Hi there! I am replying from my german Google Inbox. \ No newline at end of file diff --git a/testdata/reply/embedded_email_german_5.txt b/testdata/reply/embedded_email_german_5.txt new file mode 100644 index 0000000..8d2ab8b --- /dev/null +++ b/testdata/reply/embedded_email_german_5.txt @@ -0,0 +1 @@ +Okay. Will give it a whirl. \ No newline at end of file diff --git a/testdata/reply/embedded_email_german_6.txt b/testdata/reply/embedded_email_german_6.txt new file mode 100644 index 0000000..cdde001 --- /dev/null +++ b/testdata/reply/embedded_email_german_6.txt @@ -0,0 +1,4 @@ +Ich habe konzeptionell eine Präferenz für die zweite Option. Die 2 VLs +stellen die Inhalte vor, und präsentieren diese in einen größeren Kontext. +Dann beginnt das Training der technischen Finessen mit Trainingsblatt, +Trainingsvideos usw. Bin gleichzeitig robust, sofern nötig. \ No newline at end of file diff --git a/testdata/reply/embedded_email_italian.txt b/testdata/reply/embedded_email_italian.txt new file mode 100644 index 0000000..d8ae279 --- /dev/null +++ b/testdata/reply/embedded_email_italian.txt @@ -0,0 +1 @@ +Ok, will do. As a test, this is a reply from a webmail client. \ No newline at end of file diff --git a/testdata/reply/embedded_email_norwegian.txt b/testdata/reply/embedded_email_norwegian.txt new file mode 100644 index 0000000..f5c5bc0 --- /dev/null +++ b/testdata/reply/embedded_email_norwegian.txt @@ -0,0 +1 @@ +Thanx :-) \ No newline at end of file diff --git a/testdata/reply/embedded_email_polish_1.txt b/testdata/reply/embedded_email_polish_1.txt new file mode 100644 index 0000000..3b80bb1 --- /dev/null +++ b/testdata/reply/embedded_email_polish_1.txt @@ -0,0 +1 @@ +Oh, I've forgot to add. MIT \ No newline at end of file diff --git a/testdata/reply/embedded_email_polish_2.txt b/testdata/reply/embedded_email_polish_2.txt new file mode 100644 index 0000000..eb63ecc --- /dev/null +++ b/testdata/reply/embedded_email_polish_2.txt @@ -0,0 +1,2 @@ +Hi Neil, +Thanks for prompt response! \ No newline at end of file diff --git a/testdata/reply/embedded_email_portuguese.txt b/testdata/reply/embedded_email_portuguese.txt new file mode 100644 index 0000000..af4d656 --- /dev/null +++ b/testdata/reply/embedded_email_portuguese.txt @@ -0,0 +1,2 @@ +Olá, +Qualquer dúvida estamos ai. \ No newline at end of file diff --git a/testdata/reply/embedded_email_quote_text.txt b/testdata/reply/embedded_email_quote_text.txt new file mode 100644 index 0000000..f07db18 --- /dev/null +++ b/testdata/reply/embedded_email_quote_text.txt @@ -0,0 +1,2 @@ +when I said looks possible, I pointed to rack because that's still the web +server level \ No newline at end of file diff --git a/testdata/reply/embedded_email_russian_1.txt b/testdata/reply/embedded_email_russian_1.txt new file mode 100644 index 0000000..595ffc0 --- /dev/null +++ b/testdata/reply/embedded_email_russian_1.txt @@ -0,0 +1 @@ +Why don't you override particular strings with JS? \ No newline at end of file diff --git a/testdata/reply/embedded_email_russian_2.txt b/testdata/reply/embedded_email_russian_2.txt new file mode 100644 index 0000000..fa15cab --- /dev/null +++ b/testdata/reply/embedded_email_russian_2.txt @@ -0,0 +1 @@ +Yes. \ No newline at end of file diff --git a/testdata/reply/embedded_email_spanish_1.txt b/testdata/reply/embedded_email_spanish_1.txt new file mode 100644 index 0000000..e1429ea --- /dev/null +++ b/testdata/reply/embedded_email_spanish_1.txt @@ -0,0 +1,6 @@ +I've configured Twitter Authentication, so this error happends on the +application, after twitter posts to my discourse install via the callback. + +In the front error I can get a correct message saying "Something waa wrong, +check you usename and password" and in the backend I see that error in the +logs. \ No newline at end of file diff --git a/testdata/reply/embedded_email_spanish_2.txt b/testdata/reply/embedded_email_spanish_2.txt new file mode 100644 index 0000000..6db86ed --- /dev/null +++ b/testdata/reply/embedded_email_spanish_2.txt @@ -0,0 +1 @@ +Igual que siempre (inclusive ahora), sin nada raro :/ \ No newline at end of file diff --git a/testdata/reply/embedded_email_swedish.txt b/testdata/reply/embedded_email_swedish.txt new file mode 100644 index 0000000..b749706 --- /dev/null +++ b/testdata/reply/embedded_email_swedish.txt @@ -0,0 +1,9 @@ +Hi everyone! + +Here you can find the pictures that I took https://foo.bar + +It was a pleasure to support this event. + +Best regards + +Some One \ No newline at end of file diff --git a/testdata/reply/embedded_email_ukrainian.txt b/testdata/reply/embedded_email_ukrainian.txt new file mode 100644 index 0000000..6603eff --- /dev/null +++ b/testdata/reply/embedded_email_ukrainian.txt @@ -0,0 +1 @@ +Спасибо еще раз. \ No newline at end of file diff --git a/testdata/reply/empty.txt b/testdata/reply/empty.txt new file mode 100644 index 0000000..e69de29 diff --git a/testdata/reply/forwarded_message.txt b/testdata/reply/forwarded_message.txt new file mode 100644 index 0000000..1f9bfcd --- /dev/null +++ b/testdata/reply/forwarded_message.txt @@ -0,0 +1 @@ +This is a forwarded email. And just that. \ No newline at end of file diff --git a/testdata/reply/normalize_line_endings.txt b/testdata/reply/normalize_line_endings.txt new file mode 100644 index 0000000..4a1378a --- /dev/null +++ b/testdata/reply/normalize_line_endings.txt @@ -0,0 +1,4 @@ +Email with +windows +line +endings \ No newline at end of file diff --git a/testdata/reply/quote_and_text.txt b/testdata/reply/quote_and_text.txt new file mode 100644 index 0000000..6901420 --- /dev/null +++ b/testdata/reply/quote_and_text.txt @@ -0,0 +1,3 @@ +> This is a quote. + +And this is some text. \ No newline at end of file diff --git a/testdata/reply/quote_only.txt b/testdata/reply/quote_only.txt new file mode 100644 index 0000000..dba6df5 --- /dev/null +++ b/testdata/reply/quote_only.txt @@ -0,0 +1 @@ +> Email with only quote. \ No newline at end of file diff --git a/testdata/reply/retains_spaces_and_formatting.txt b/testdata/reply/retains_spaces_and_formatting.txt new file mode 100644 index 0000000..ad6b398 --- /dev/null +++ b/testdata/reply/retains_spaces_and_formatting.txt @@ -0,0 +1,13 @@ +Formatting and + +spaces + +- A +- B +- C + +should be retained + + + - Item #1 + - Item #2 \ No newline at end of file diff --git a/testdata/reply/signature.txt b/testdata/reply/signature.txt new file mode 100644 index 0000000..f3b0f43 --- /dev/null +++ b/testdata/reply/signature.txt @@ -0,0 +1 @@ +This email has a signature diff --git a/testdata/reply/signatures.txt b/testdata/reply/signatures.txt new file mode 100644 index 0000000..5f065f6 --- /dev/null +++ b/testdata/reply/signatures.txt @@ -0,0 +1 @@ +This email was sent from way too much different devices ;) \ No newline at end of file diff --git a/testdata/reply/strip.txt b/testdata/reply/strip.txt new file mode 100644 index 0000000..c07c4be --- /dev/null +++ b/testdata/reply/strip.txt @@ -0,0 +1 @@ +Strip leading and trailing empty lines. \ No newline at end of file diff --git a/testdata/reply/text_only.txt b/testdata/reply/text_only.txt new file mode 100644 index 0000000..e1f4753 --- /dev/null +++ b/testdata/reply/text_only.txt @@ -0,0 +1 @@ +Email with only text. \ No newline at end of file diff --git a/testdata/reply/text_only_with_divider.txt b/testdata/reply/text_only_with_divider.txt new file mode 100644 index 0000000..c361835 --- /dev/null +++ b/testdata/reply/text_only_with_divider.txt @@ -0,0 +1,9 @@ +Email before divider. + +-- + +Email between divider. + +--- + +Email after divider. diff --git a/testdata/reply/usenet.txt b/testdata/reply/usenet.txt new file mode 100644 index 0000000..8bb3dce --- /dev/null +++ b/testdata/reply/usenet.txt @@ -0,0 +1 @@ +Mal sehen was hier mit der Signatur passiert! \ No newline at end of file diff --git a/utilities.go b/utilities.go new file mode 100644 index 0000000..e0d6041 --- /dev/null +++ b/utilities.go @@ -0,0 +1,43 @@ +package reply + +import "strings" + +// equivalent of "".slice! +func stringSliceBang(s string, i int) string { + return strings.Join(sliceSliceBang(strings.Split(s, ""), i), "") +} + +// equivalent of [].slice! +func sliceSliceBang(slice []string, i int) []string { + return append(slice[:i], slice[i+1:]...) +} + +// equivalent of "abc"[0] = "x" +func stringReplaceChar(in string, r rune, i int) string { + out := []rune(in) + out[i] = r + return string(out) +} + +// equivalent of Ruby ""[start..end] +// .. is inclusive +// ... is exclusive +func sliceString(text string, start int, end int) string { + var builder strings.Builder + + for i := start; i <= end; i++ { + builder.WriteString(string(text[i])) + } + return builder.String() +} + +// equivalent of Ruby [][start..end] +// .. is inclusive +// ... is exclusive +func sliceArray(lines []string, start int, end int) []string { + newLines := []string{} + for i := start; i <= end; i++ { + newLines = append(newLines, lines[i]) + } + return newLines +}