250 lines
7.2 KiB
Go
250 lines
7.2 KiB
Go
// Copyright (c) 2023 Couchbase, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package iso
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/blevesearch/bleve/v2/analysis"
|
|
"github.com/blevesearch/bleve/v2/registry"
|
|
)
|
|
|
|
const Name = "isostyle"
|
|
|
|
var textLiteralDelimiter byte = '\'' // single quote
|
|
|
|
// ISO style date strings are represented in
|
|
// https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html
|
|
//
|
|
// Some format specifiers are not specified in go time package, such as:
|
|
// - 'V' for timezone name, like 'Europe/Berlin' or 'America/New_York'.
|
|
// - 'Q' for quarter of year, like Q3 or 3rd Quarter.
|
|
// - 'zzzz' for full name of timezone like "Japan Standard Time" or "Eastern Standard Time".
|
|
// - 'O' for localized zone-offset, like GMT+8 or GMT+08:00.
|
|
// - '[]' for optional section of the format.
|
|
// - 'G' for era, like AD or BC.
|
|
// - 'W' for week of month.
|
|
// - 'D' for day of year.
|
|
// So date strings with these date elements cannot be parsed.
|
|
var timeElementToLayout = map[byte]map[int]string{
|
|
'M': {
|
|
4: "January", // MMMM = full month name
|
|
3: "Jan", // MMM = short month name
|
|
2: "01", // MM = month of year (2 digits) (01-12)
|
|
1: "1", // M = month of year (1 digit) (1-12)
|
|
},
|
|
'd': {
|
|
2: "02", // dd = day of month (2 digits) (01-31)
|
|
1: "2", // d = day of month (1 digit) (1-31)
|
|
},
|
|
'a': {
|
|
2: "pm", // aa = pm/am
|
|
1: "PM", // a = PM/AM
|
|
},
|
|
'H': {
|
|
2: "15", // HH = hour (24 hour clock) (2 digits)
|
|
1: "15", // H = hour (24 hour clock) (1 digit)
|
|
},
|
|
'm': {
|
|
2: "04", // mm = minute (2 digits)
|
|
1: "4", // m = minute (1 digit)
|
|
},
|
|
's': {
|
|
2: "05", // ss = seconds (2 digits)
|
|
1: "5", // s = seconds (1 digit)
|
|
},
|
|
|
|
// timezone offsets from UTC below
|
|
'X': {
|
|
5: "Z07:00:00", // XXXXX = timezone offset (+-hh:mm:ss)
|
|
4: "Z070000", // XXXX = timezone offset (+-hhmmss)
|
|
3: "Z07:00", // XXX = timezone offset (+-hh:mm)
|
|
2: "Z0700", // XX = timezone offset (+-hhmm)
|
|
1: "Z07", // X = timezone offset (+-hh)
|
|
},
|
|
'x': {
|
|
5: "-07:00:00", // xxxxx = timezone offset (+-hh:mm:ss)
|
|
4: "-070000", // xxxx = timezone offset (+-hhmmss)
|
|
3: "-07:00", // xxx = timezone offset (+-hh:mm)
|
|
2: "-0700", // xx = timezone offset (+-hhmm)
|
|
1: "-07", // x = timezone offset (+-hh)
|
|
},
|
|
}
|
|
|
|
type DateTimeParser struct {
|
|
layouts []string
|
|
}
|
|
|
|
func New(layouts []string) *DateTimeParser {
|
|
return &DateTimeParser{
|
|
layouts: layouts,
|
|
}
|
|
}
|
|
|
|
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
|
|
for _, layout := range p.layouts {
|
|
rv, err := time.Parse(layout, input)
|
|
if err == nil {
|
|
return rv, layout, nil
|
|
}
|
|
}
|
|
return time.Time{}, "", analysis.ErrInvalidDateTime
|
|
}
|
|
|
|
func letterCounter(layout string, idx int) int {
|
|
count := 1
|
|
for idx+count < len(layout) {
|
|
if layout[idx+count] == layout[idx] {
|
|
count++
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
return count
|
|
}
|
|
|
|
func invalidFormatError(character byte, count int) error {
|
|
return fmt.Errorf("invalid format string, unknown format specifier: " + strings.Repeat(string(character), count))
|
|
}
|
|
|
|
func parseISOString(layout string) (string, error) {
|
|
var dateTimeLayout strings.Builder
|
|
|
|
for idx := 0; idx < len(layout); {
|
|
// check if the character is a text literal delimiter (')
|
|
if layout[idx] == textLiteralDelimiter {
|
|
if idx+1 < len(layout) && layout[idx+1] == textLiteralDelimiter {
|
|
// if the next character is also a text literal delimiter, then
|
|
// copy the character as is
|
|
dateTimeLayout.WriteByte(textLiteralDelimiter)
|
|
idx += 2
|
|
continue
|
|
}
|
|
// find the next text literal delimiter
|
|
for idx++; idx < len(layout); idx++ {
|
|
if layout[idx] == textLiteralDelimiter {
|
|
break
|
|
}
|
|
dateTimeLayout.WriteByte(layout[idx])
|
|
}
|
|
// idx can either be equal to len(layout) if the text literal delimiter is not found
|
|
// after the first text literal delimiter or it will be equal to the index of the
|
|
// second text literal delimiter
|
|
if idx == len(layout) {
|
|
// text literal delimiter not found error
|
|
return "", fmt.Errorf("invalid format string, expected text literal delimiter: " + string(textLiteralDelimiter))
|
|
}
|
|
// increment idx to skip the second text literal delimiter
|
|
idx++
|
|
continue
|
|
}
|
|
// check if character is a letter in english alphabet - a-zA-Z which are reserved
|
|
// for format specifiers
|
|
if (layout[idx] >= 'a' && layout[idx] <= 'z') || (layout[idx] >= 'A' && layout[idx] <= 'Z') {
|
|
// find the number of times the character occurs consecutively
|
|
count := letterCounter(layout, idx)
|
|
character := layout[idx]
|
|
// first check the table
|
|
if layout, ok := timeElementToLayout[character][count]; ok {
|
|
dateTimeLayout.WriteString(layout)
|
|
} else {
|
|
switch character {
|
|
case 'y', 'u', 'Y':
|
|
// year
|
|
if count == 2 {
|
|
dateTimeLayout.WriteString("06")
|
|
} else {
|
|
format := fmt.Sprintf("%%0%ds", count)
|
|
dateTimeLayout.WriteString(fmt.Sprintf(format, "2006"))
|
|
}
|
|
case 'h', 'K':
|
|
// hour (1-12)
|
|
switch count {
|
|
case 2:
|
|
// hh, KK -> 03
|
|
dateTimeLayout.WriteString("03")
|
|
case 1:
|
|
// h, K -> 3
|
|
dateTimeLayout.WriteString("3")
|
|
default:
|
|
// e.g., hhh
|
|
return "", invalidFormatError(character, count)
|
|
}
|
|
case 'E':
|
|
// day of week
|
|
if count == 4 {
|
|
dateTimeLayout.WriteString("Monday") // EEEE -> Monday
|
|
} else if count <= 3 {
|
|
dateTimeLayout.WriteString("Mon") // E, EE, EEE -> Mon
|
|
} else {
|
|
return "", invalidFormatError(character, count) // e.g., EEEEE
|
|
}
|
|
case 'S':
|
|
// fraction of second
|
|
// .SSS = millisecond
|
|
// .SSSSSS = microsecond
|
|
// .SSSSSSSSS = nanosecond
|
|
if count > 9 {
|
|
return "", invalidFormatError(character, count)
|
|
}
|
|
dateTimeLayout.WriteString(strings.Repeat(string('0'), count))
|
|
case 'z':
|
|
// timezone id
|
|
if count < 5 {
|
|
dateTimeLayout.WriteString("MST")
|
|
} else {
|
|
return "", invalidFormatError(character, count)
|
|
}
|
|
default:
|
|
return "", invalidFormatError(character, count)
|
|
}
|
|
}
|
|
idx += count
|
|
} else {
|
|
// copy the character as is
|
|
dateTimeLayout.WriteByte(layout[idx])
|
|
idx++
|
|
}
|
|
}
|
|
return dateTimeLayout.String(), nil
|
|
}
|
|
|
|
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
|
layouts, ok := config["layouts"].([]interface{})
|
|
if !ok {
|
|
return nil, fmt.Errorf("must specify layouts")
|
|
}
|
|
var layoutStrs []string
|
|
for _, layout := range layouts {
|
|
layoutStr, ok := layout.(string)
|
|
if ok {
|
|
layout, err := parseISOString(layoutStr)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
layoutStrs = append(layoutStrs, layout)
|
|
}
|
|
}
|
|
return New(layoutStrs), nil
|
|
}
|
|
|
|
func init() {
|
|
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|