1
0
Fork 0
golang-github-blevesearch-b.../analysis/datetime/percent/percent.go
Daniel Baumann 982828099e
Adding upstream version 2.5.1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-05-19 00:20:02 +02:00

205 lines
7.5 KiB
Go

// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package percent
import (
"fmt"
"strings"
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "percentstyle"
var formatDelimiter byte = '%'
// format specifiers as per strftime in the C standard library
// https://man7.org/linux/man-pages/man3/strftime.3.html
var formatSpecifierToLayout = map[byte]string{
formatDelimiter: string(formatDelimiter), // %% = % (literal %)
'a': "Mon", // %a = short weekday name
'A': "Monday", // %A = full weekday name
'd': "02", // %d = day of month (2 digits) (01-31)
'e': "2", // %e = day of month (1 digit) (1-31)
'b': "Jan", // %b = short month name
'B': "January", // %B = full month name
'm': "01", // %m = month of year (2 digits) (01-12)
'y': "06", // %y = year without century
'Y': "2006", // %Y = year with century
'H': "15", // %H = hour (24 hour clock) (2 digits)
'I': "03", // %I = hour (12 hour clock) (2 digits)
'l': "3", // %l = hour (12 hour clock) (1 digit)
'p': "PM", // %p = PM/AM
'P': "pm", // %P = pm/am (lowercase)
'M': "04", // %M = minute (2 digits)
'S': "05", // %S = seconds (2 digits)
'f': "999999", // .%f = fraction of seconds - up to microseconds (6 digits) - deci/milli/micro
'Z': "MST", // %Z = timezone name (GMT, JST, UTC etc)
// %z is present in timezone options
// some additional options not in strftime to support additional options such as
// disallow 0 padding in minute and seconds, nanosecond precision, etc
'o': "1", // %o = month of year (1 digit) (1-12)
'i': "4", // %i = minute (1 digit)
's': "5", // %s = seconds (1 digit)
'N': "999999999", // .%N = fraction of seconds - up to microseconds (9 digits) - milli/micro/nano
}
// some additional options for timezone
// such as allowing colon in timezone offset and specifying the seconds
// timezone offsets are from UTC
var timezoneOptions = map[string]string{
"z": "Z0700", // %z = timezone offset in +-hhmm / +-(2 digit hour)(2 digit minute) +0500, -0600 etc
"z:M": "Z07:00", // %z:M = timezone offset(+-hh:mm) / +-(2 digit hour):(2 digit minute) +05:00, -06:00 etc
"z:S": "Z07:00:00", // %z:M = timezone offset(+-hh:mm:ss) / +-(2 digit hour):(2 digit minute):(2 digit second) +05:20:00, -06:30:00 etc
"zH": "Z07", // %zH = timezone offset(+-hh) / +-(2 digit hour) +05, -06 etc
"zS": "Z070000", // %zS = timezone offset(+-hhmmss) / +-(2 digit hour)(2 digit minute)(2 digit second) +052000, -063000 etc
}
type DateTimeParser struct {
layouts []string
}
func New(layouts []string) *DateTimeParser {
return &DateTimeParser{
layouts: layouts,
}
}
func checkTZOptions(formatString string, idx int) (string, int) {
// idx points to '%'
// We know formatString[idx+1] == 'z'
nextIdx := idx + 2 // Index of the character immediately after 'z'
// Default values assume only '%z' is present
layout := timezoneOptions["z"]
finalIdx := nextIdx // Index after '%z'
if nextIdx < len(formatString) {
switch formatString[nextIdx] {
case ':':
// Check for modifier after the colon ':'
colonModifierIdx := nextIdx + 1
if colonModifierIdx < len(formatString) {
switch formatString[colonModifierIdx] {
case 'M':
// Found %z:M
layout = timezoneOptions["z:M"]
finalIdx = colonModifierIdx + 1 // Index after %z:M
case 'S':
// Found %z:S
layout = timezoneOptions["z:S"]
finalIdx = colonModifierIdx + 1 // Index after %z:S
// default: If %z: is followed by something else, or just %z: at the end.
// Keep the default layout ("z") and finalIdx (idx + 2).
// The ':' will be treated as a literal by the main loop.
}
}
// else: %z: is at the very end of the string.
// Keep the default layout ("z") and finalIdx (idx + 2).
// The ':' will be treated as a literal by the main loop.
case 'H':
// Found %zH
layout = timezoneOptions["zH"]
finalIdx = nextIdx + 1 // Index after %zH
case 'S':
// Found %zS
layout = timezoneOptions["zS"]
finalIdx = nextIdx + 1 // Index after %zS
// default: If %z is followed by something other than ':', 'H', or 'S'.
// Keep the default layout ("z") and finalIdx (idx + 2).
// The character formatString[nextIdx] will be handled by the main loop.
}
}
// else: %z is at the very end of the string.
// Keep the default layout ("z") and finalIdx (idx + 2).
return layout, finalIdx
}
func parseFormatString(formatString string) (string, error) {
var dateTimeLayout strings.Builder
// iterate over the format string and replace the format specifiers with
// the corresponding golang constants
for idx := 0; idx < len(formatString); {
// check if the character is a format delimiter (%)
if formatString[idx] == formatDelimiter {
// check if there is a character after the format delimiter (%)
if idx+1 >= len(formatString) {
return "", fmt.Errorf("invalid format string, expected character after %s", string(formatDelimiter))
}
formatSpecifier := formatString[idx+1]
if layout, ok := formatSpecifierToLayout[formatSpecifier]; ok {
dateTimeLayout.WriteString(layout)
idx += 2
} else if formatSpecifier == 'z' {
// did not find a valid specifier
// check if it is for timezone
var tzLayout string
tzLayout, idx = checkTZOptions(formatString, idx)
dateTimeLayout.WriteString(tzLayout)
} else {
return "", fmt.Errorf("invalid format string, unknown format specifier: %s", string(formatSpecifier))
}
continue
}
// copy the character as is
dateTimeLayout.WriteByte(formatString[idx])
idx++
}
return dateTimeLayout.String(), nil
}
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
for _, layout := range p.layouts {
rv, err := time.Parse(layout, input)
if err == nil {
return rv, layout, nil
}
}
return time.Time{}, "", analysis.ErrInvalidDateTime
}
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
layouts, ok := config["layouts"].([]interface{})
if !ok {
return nil, fmt.Errorf("must specify layouts")
}
layoutStrs := make([]string, 0, len(layouts))
for _, layout := range layouts {
layoutStr, ok := layout.(string)
if ok {
layout, err := parseFormatString(layoutStr)
if err != nil {
return nil, err
}
layoutStrs = append(layoutStrs, layout)
}
}
return New(layoutStrs), nil
}
func init() {
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
if err != nil {
panic(err)
}
}