Adding upstream version 2.5.1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
c71cb8b61d
commit
982828099e
783 changed files with 150650 additions and 0 deletions
250
analysis/datetime/iso/iso.go
Normal file
250
analysis/datetime/iso/iso.go
Normal file
|
@ -0,0 +1,250 @@
|
|||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package iso
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "isostyle"
|
||||
|
||||
var textLiteralDelimiter byte = '\'' // single quote
|
||||
|
||||
// ISO style date strings are represented in
|
||||
// https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html
|
||||
//
|
||||
// Some format specifiers are not specified in go time package, such as:
|
||||
// - 'V' for timezone name, like 'Europe/Berlin' or 'America/New_York'.
|
||||
// - 'Q' for quarter of year, like Q3 or 3rd Quarter.
|
||||
// - 'zzzz' for full name of timezone like "Japan Standard Time" or "Eastern Standard Time".
|
||||
// - 'O' for localized zone-offset, like GMT+8 or GMT+08:00.
|
||||
// - '[]' for optional section of the format.
|
||||
// - 'G' for era, like AD or BC.
|
||||
// - 'W' for week of month.
|
||||
// - 'D' for day of year.
|
||||
// So date strings with these date elements cannot be parsed.
|
||||
var timeElementToLayout = map[byte]map[int]string{
|
||||
'M': {
|
||||
4: "January", // MMMM = full month name
|
||||
3: "Jan", // MMM = short month name
|
||||
2: "01", // MM = month of year (2 digits) (01-12)
|
||||
1: "1", // M = month of year (1 digit) (1-12)
|
||||
},
|
||||
'd': {
|
||||
2: "02", // dd = day of month (2 digits) (01-31)
|
||||
1: "2", // d = day of month (1 digit) (1-31)
|
||||
},
|
||||
'a': {
|
||||
2: "pm", // aa = pm/am
|
||||
1: "PM", // a = PM/AM
|
||||
},
|
||||
'H': {
|
||||
2: "15", // HH = hour (24 hour clock) (2 digits)
|
||||
1: "15", // H = hour (24 hour clock) (1 digit)
|
||||
},
|
||||
'm': {
|
||||
2: "04", // mm = minute (2 digits)
|
||||
1: "4", // m = minute (1 digit)
|
||||
},
|
||||
's': {
|
||||
2: "05", // ss = seconds (2 digits)
|
||||
1: "5", // s = seconds (1 digit)
|
||||
},
|
||||
|
||||
// timezone offsets from UTC below
|
||||
'X': {
|
||||
5: "Z07:00:00", // XXXXX = timezone offset (+-hh:mm:ss)
|
||||
4: "Z070000", // XXXX = timezone offset (+-hhmmss)
|
||||
3: "Z07:00", // XXX = timezone offset (+-hh:mm)
|
||||
2: "Z0700", // XX = timezone offset (+-hhmm)
|
||||
1: "Z07", // X = timezone offset (+-hh)
|
||||
},
|
||||
'x': {
|
||||
5: "-07:00:00", // xxxxx = timezone offset (+-hh:mm:ss)
|
||||
4: "-070000", // xxxx = timezone offset (+-hhmmss)
|
||||
3: "-07:00", // xxx = timezone offset (+-hh:mm)
|
||||
2: "-0700", // xx = timezone offset (+-hhmm)
|
||||
1: "-07", // x = timezone offset (+-hh)
|
||||
},
|
||||
}
|
||||
|
||||
type DateTimeParser struct {
|
||||
layouts []string
|
||||
}
|
||||
|
||||
func New(layouts []string) *DateTimeParser {
|
||||
return &DateTimeParser{
|
||||
layouts: layouts,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
|
||||
for _, layout := range p.layouts {
|
||||
rv, err := time.Parse(layout, input)
|
||||
if err == nil {
|
||||
return rv, layout, nil
|
||||
}
|
||||
}
|
||||
return time.Time{}, "", analysis.ErrInvalidDateTime
|
||||
}
|
||||
|
||||
func letterCounter(layout string, idx int) int {
|
||||
count := 1
|
||||
for idx+count < len(layout) {
|
||||
if layout[idx+count] == layout[idx] {
|
||||
count++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func invalidFormatError(character byte, count int) error {
|
||||
return fmt.Errorf("invalid format string, unknown format specifier: " + strings.Repeat(string(character), count))
|
||||
}
|
||||
|
||||
func parseISOString(layout string) (string, error) {
|
||||
var dateTimeLayout strings.Builder
|
||||
|
||||
for idx := 0; idx < len(layout); {
|
||||
// check if the character is a text literal delimiter (')
|
||||
if layout[idx] == textLiteralDelimiter {
|
||||
if idx+1 < len(layout) && layout[idx+1] == textLiteralDelimiter {
|
||||
// if the next character is also a text literal delimiter, then
|
||||
// copy the character as is
|
||||
dateTimeLayout.WriteByte(textLiteralDelimiter)
|
||||
idx += 2
|
||||
continue
|
||||
}
|
||||
// find the next text literal delimiter
|
||||
for idx++; idx < len(layout); idx++ {
|
||||
if layout[idx] == textLiteralDelimiter {
|
||||
break
|
||||
}
|
||||
dateTimeLayout.WriteByte(layout[idx])
|
||||
}
|
||||
// idx can either be equal to len(layout) if the text literal delimiter is not found
|
||||
// after the first text literal delimiter or it will be equal to the index of the
|
||||
// second text literal delimiter
|
||||
if idx == len(layout) {
|
||||
// text literal delimiter not found error
|
||||
return "", fmt.Errorf("invalid format string, expected text literal delimiter: " + string(textLiteralDelimiter))
|
||||
}
|
||||
// increment idx to skip the second text literal delimiter
|
||||
idx++
|
||||
continue
|
||||
}
|
||||
// check if character is a letter in english alphabet - a-zA-Z which are reserved
|
||||
// for format specifiers
|
||||
if (layout[idx] >= 'a' && layout[idx] <= 'z') || (layout[idx] >= 'A' && layout[idx] <= 'Z') {
|
||||
// find the number of times the character occurs consecutively
|
||||
count := letterCounter(layout, idx)
|
||||
character := layout[idx]
|
||||
// first check the table
|
||||
if layout, ok := timeElementToLayout[character][count]; ok {
|
||||
dateTimeLayout.WriteString(layout)
|
||||
} else {
|
||||
switch character {
|
||||
case 'y', 'u', 'Y':
|
||||
// year
|
||||
if count == 2 {
|
||||
dateTimeLayout.WriteString("06")
|
||||
} else {
|
||||
format := fmt.Sprintf("%%0%ds", count)
|
||||
dateTimeLayout.WriteString(fmt.Sprintf(format, "2006"))
|
||||
}
|
||||
case 'h', 'K':
|
||||
// hour (1-12)
|
||||
switch count {
|
||||
case 2:
|
||||
// hh, KK -> 03
|
||||
dateTimeLayout.WriteString("03")
|
||||
case 1:
|
||||
// h, K -> 3
|
||||
dateTimeLayout.WriteString("3")
|
||||
default:
|
||||
// e.g., hhh
|
||||
return "", invalidFormatError(character, count)
|
||||
}
|
||||
case 'E':
|
||||
// day of week
|
||||
if count == 4 {
|
||||
dateTimeLayout.WriteString("Monday") // EEEE -> Monday
|
||||
} else if count <= 3 {
|
||||
dateTimeLayout.WriteString("Mon") // E, EE, EEE -> Mon
|
||||
} else {
|
||||
return "", invalidFormatError(character, count) // e.g., EEEEE
|
||||
}
|
||||
case 'S':
|
||||
// fraction of second
|
||||
// .SSS = millisecond
|
||||
// .SSSSSS = microsecond
|
||||
// .SSSSSSSSS = nanosecond
|
||||
if count > 9 {
|
||||
return "", invalidFormatError(character, count)
|
||||
}
|
||||
dateTimeLayout.WriteString(strings.Repeat(string('0'), count))
|
||||
case 'z':
|
||||
// timezone id
|
||||
if count < 5 {
|
||||
dateTimeLayout.WriteString("MST")
|
||||
} else {
|
||||
return "", invalidFormatError(character, count)
|
||||
}
|
||||
default:
|
||||
return "", invalidFormatError(character, count)
|
||||
}
|
||||
}
|
||||
idx += count
|
||||
} else {
|
||||
// copy the character as is
|
||||
dateTimeLayout.WriteByte(layout[idx])
|
||||
idx++
|
||||
}
|
||||
}
|
||||
return dateTimeLayout.String(), nil
|
||||
}
|
||||
|
||||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
||||
layouts, ok := config["layouts"].([]interface{})
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify layouts")
|
||||
}
|
||||
var layoutStrs []string
|
||||
for _, layout := range layouts {
|
||||
layoutStr, ok := layout.(string)
|
||||
if ok {
|
||||
layout, err := parseISOString(layoutStr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
layoutStrs = append(layoutStrs, layout)
|
||||
}
|
||||
}
|
||||
return New(layoutStrs), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
182
analysis/datetime/iso/iso_test.go
Normal file
182
analysis/datetime/iso/iso_test.go
Normal file
|
@ -0,0 +1,182 @@
|
|||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package iso
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestConversionFromISOStyle(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
output string
|
||||
err error
|
||||
}{
|
||||
{
|
||||
input: "yyyy-MM-dd",
|
||||
output: "2006-01-02",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "uuu/M''''dd'T'HH:m:ss.SSS",
|
||||
output: "2006/1''02T15:4:05.000",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "YYYY-MM-dd'T'H:mm:ss zzz",
|
||||
output: "2006-01-02T15:04:05 MST",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "MMMM dd yyyy', 'HH:mm:ss.SSS",
|
||||
output: "January 02 2006, 15:04:05.000",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "h 'o'''' clock' a, XXX",
|
||||
output: "3 o' clock PM, Z07:00",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "YYYY-MM-dd'T'HH:mm:ss'Z'",
|
||||
output: "2006-01-02T15:04:05Z",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "E MMM d H:mm:ss z Y",
|
||||
output: "Mon Jan 2 15:04:05 MST 2006",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "E MMM DD H:m:s z Y",
|
||||
output: "",
|
||||
err: fmt.Errorf("invalid format string, unknown format specifier: DD"),
|
||||
},
|
||||
{
|
||||
input: "E MMM''''' H:m:s z Y",
|
||||
output: "",
|
||||
err: fmt.Errorf("invalid format string, expected text literal delimiter: '"),
|
||||
},
|
||||
{
|
||||
input: "MMMMM dd yyyy', 'HH:mm:ss.SSS",
|
||||
output: "",
|
||||
err: fmt.Errorf("invalid format string, unknown format specifier: MMMMM"),
|
||||
},
|
||||
{
|
||||
input: "yy", // year (2 digits)
|
||||
output: "06",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "yyyyy", // year (5 digits, padded)
|
||||
output: "02006",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "h", // hour 1-12 (1 digit)
|
||||
output: "3",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "hh", // hour 1-12 (2 digits)
|
||||
output: "03",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "KK", // hour 1-12 (2 digits, alt)
|
||||
output: "03",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "hhh", // invalid hour count
|
||||
output: "",
|
||||
err: fmt.Errorf("invalid format string, unknown format specifier: hhh"),
|
||||
},
|
||||
{
|
||||
input: "E", // Day of week (short)
|
||||
output: "Mon",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "EEE", // Day of week (short)
|
||||
output: "Mon",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "EEEE", // Day of week (long)
|
||||
output: "Monday",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "EEEEE", // Day of week (long)
|
||||
output: "",
|
||||
err: fmt.Errorf("invalid format string, unknown format specifier: EEEEE"),
|
||||
},
|
||||
{
|
||||
input: "S", // Fraction of second (1 digit)
|
||||
output: "0",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "SSSSSSSSS", // Fraction of second (9 digits)
|
||||
output: "000000000",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "SSSSSSSSSS", // Invalid fraction of second count
|
||||
output: "",
|
||||
err: fmt.Errorf("invalid format string, unknown format specifier: SSSSSSSSSS"),
|
||||
},
|
||||
{
|
||||
input: "z", // Timezone name (short)
|
||||
output: "MST",
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: "zzz", // Timezone name (short) - Corrected expectation
|
||||
output: "MST", // Should output MST
|
||||
err: nil, // Should not produce an error
|
||||
},
|
||||
{
|
||||
input: "zzzz", // Timezone name (long) - Corrected expectation
|
||||
output: "MST", // Should output MST
|
||||
err: nil, // Should not produce an error
|
||||
},
|
||||
{
|
||||
input: "G", // Era designator (unsupported)
|
||||
output: "",
|
||||
err: fmt.Errorf("invalid format string, unknown format specifier: G"),
|
||||
},
|
||||
{
|
||||
input: "W", // Week of month (unsupported)
|
||||
output: "",
|
||||
err: fmt.Errorf("invalid format string, unknown format specifier: W"),
|
||||
},
|
||||
}
|
||||
for i, test := range tests {
|
||||
t.Run(fmt.Sprintf("test %d: %s", i, test.input), func(t *testing.T) {
|
||||
out, err := parseISOString(test.input)
|
||||
// Check error matching
|
||||
if (err != nil && test.err == nil) || (err == nil && test.err != nil) || (err != nil && test.err != nil && err.Error() != test.err.Error()) {
|
||||
t.Fatalf("expected error %v, got error %v", test.err, err)
|
||||
}
|
||||
// Check output matching only if no error was expected/occurred
|
||||
if err == nil && test.err == nil && out != test.output {
|
||||
t.Fatalf("expected output '%v', got '%v'", test.output, out)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue