679 lines
16 KiB
Go
679 lines
16 KiB
Go
package fexpr
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"strings"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// eof represents a marker rune for the end of the reader.
|
|
const eof = rune(0)
|
|
|
|
// JoinOp represents a join type operator.
|
|
type JoinOp string
|
|
|
|
// supported join type operators
|
|
const (
|
|
JoinAnd JoinOp = "&&"
|
|
JoinOr JoinOp = "||"
|
|
)
|
|
|
|
// SignOp represents an expression sign operator.
|
|
type SignOp string
|
|
|
|
// supported expression sign operators
|
|
const (
|
|
SignEq SignOp = "="
|
|
SignNeq SignOp = "!="
|
|
SignLike SignOp = "~"
|
|
SignNlike SignOp = "!~"
|
|
SignLt SignOp = "<"
|
|
SignLte SignOp = "<="
|
|
SignGt SignOp = ">"
|
|
SignGte SignOp = ">="
|
|
|
|
// array/any operators
|
|
SignAnyEq SignOp = "?="
|
|
SignAnyNeq SignOp = "?!="
|
|
SignAnyLike SignOp = "?~"
|
|
SignAnyNlike SignOp = "?!~"
|
|
SignAnyLt SignOp = "?<"
|
|
SignAnyLte SignOp = "?<="
|
|
SignAnyGt SignOp = "?>"
|
|
SignAnyGte SignOp = "?>="
|
|
)
|
|
|
|
// TokenType represents a Token type.
|
|
type TokenType string
|
|
|
|
// token type constants
|
|
const (
|
|
TokenUnexpected TokenType = "unexpected"
|
|
TokenEOF TokenType = "eof"
|
|
TokenWS TokenType = "whitespace"
|
|
TokenJoin TokenType = "join"
|
|
TokenSign TokenType = "sign"
|
|
TokenIdentifier TokenType = "identifier" // variable, column name, placeholder, etc.
|
|
TokenFunction TokenType = "function" // function
|
|
TokenNumber TokenType = "number"
|
|
TokenText TokenType = "text" // ' or " quoted string
|
|
TokenGroup TokenType = "group" // groupped/nested tokens
|
|
TokenComment TokenType = "comment"
|
|
)
|
|
|
|
// Token represents a single scanned literal (one or more combined runes).
|
|
type Token struct {
|
|
Meta interface{}
|
|
Type TokenType
|
|
Literal string
|
|
}
|
|
|
|
// NewScanner creates and returns a new scanner instance loaded with the specified data.
|
|
func NewScanner(data []byte) *Scanner {
|
|
return &Scanner{
|
|
data: data,
|
|
maxFuncDepth: 3,
|
|
}
|
|
}
|
|
|
|
// Scanner represents a filter and lexical scanner.
|
|
type Scanner struct {
|
|
data []byte
|
|
pos int
|
|
maxFuncDepth int
|
|
}
|
|
|
|
// Scan reads and returns the next available token value from the scanner's buffer.
|
|
func (s *Scanner) Scan() (Token, error) {
|
|
ch := s.read()
|
|
|
|
if ch == eof {
|
|
return Token{Type: TokenEOF, Literal: ""}, nil
|
|
}
|
|
|
|
if isWhitespaceRune(ch) {
|
|
s.unread()
|
|
return s.scanWhitespace()
|
|
}
|
|
|
|
if isGroupStartRune(ch) {
|
|
s.unread()
|
|
return s.scanGroup()
|
|
}
|
|
|
|
if isIdentifierStartRune(ch) {
|
|
s.unread()
|
|
return s.scanIdentifier(s.maxFuncDepth)
|
|
}
|
|
|
|
if isNumberStartRune(ch) {
|
|
s.unread()
|
|
return s.scanNumber()
|
|
}
|
|
|
|
if isTextStartRune(ch) {
|
|
s.unread()
|
|
return s.scanText(false)
|
|
}
|
|
|
|
if isSignStartRune(ch) {
|
|
s.unread()
|
|
return s.scanSign()
|
|
}
|
|
|
|
if isJoinStartRune(ch) {
|
|
s.unread()
|
|
return s.scanJoin()
|
|
}
|
|
|
|
if isCommentStartRune(ch) {
|
|
s.unread()
|
|
return s.scanComment()
|
|
}
|
|
|
|
return Token{Type: TokenUnexpected, Literal: string(ch)}, fmt.Errorf("unexpected character %q", ch)
|
|
}
|
|
|
|
// scanWhitespace consumes all contiguous whitespace runes.
|
|
func (s *Scanner) scanWhitespace() (Token, error) {
|
|
var buf bytes.Buffer
|
|
|
|
// Reads every subsequent whitespace character into the buffer.
|
|
// Non-whitespace runes and EOF will cause the loop to exit.
|
|
for {
|
|
ch := s.read()
|
|
|
|
if ch == eof {
|
|
break
|
|
}
|
|
|
|
if !isWhitespaceRune(ch) {
|
|
s.unread()
|
|
break
|
|
}
|
|
|
|
// write the whitespace rune
|
|
buf.WriteRune(ch)
|
|
}
|
|
|
|
return Token{Type: TokenWS, Literal: buf.String()}, nil
|
|
}
|
|
|
|
// scanNumber consumes all contiguous digit runes
|
|
// (complex numbers and scientific notations are not supported).
|
|
func (s *Scanner) scanNumber() (Token, error) {
|
|
var buf bytes.Buffer
|
|
|
|
var hadDot bool
|
|
|
|
// Read every subsequent digit rune into the buffer.
|
|
// Non-digit runes and EOF will cause the loop to exit.
|
|
for {
|
|
ch := s.read()
|
|
|
|
if ch == eof {
|
|
break
|
|
}
|
|
|
|
// not a digit rune
|
|
if !isDigitRune(ch) &&
|
|
// minus sign but not at the beginning
|
|
(ch != '-' || buf.Len() != 0) &&
|
|
// dot but there was already another dot
|
|
(ch != '.' || hadDot) {
|
|
s.unread()
|
|
break
|
|
}
|
|
|
|
// write the rune
|
|
buf.WriteRune(ch)
|
|
|
|
if ch == '.' {
|
|
hadDot = true
|
|
}
|
|
}
|
|
|
|
total := buf.Len()
|
|
literal := buf.String()
|
|
|
|
var err error
|
|
// only "-" or starts with "." or ends with "."
|
|
if (total == 1 && literal[0] == '-') || literal[0] == '.' || literal[total-1] == '.' {
|
|
err = fmt.Errorf("invalid number %q", literal)
|
|
}
|
|
|
|
return Token{Type: TokenNumber, Literal: buf.String()}, err
|
|
}
|
|
|
|
// scanText consumes all contiguous quoted text runes.
|
|
func (s *Scanner) scanText(preserveQuotes bool) (Token, error) {
|
|
var buf bytes.Buffer
|
|
|
|
// read the first rune to determine the quotes type
|
|
firstCh := s.read()
|
|
buf.WriteRune(firstCh)
|
|
var prevCh rune
|
|
var hasMatchingQuotes bool
|
|
|
|
// Read every subsequent text rune into the buffer.
|
|
// EOF and matching unescaped ending quote will cause the loop to exit.
|
|
for {
|
|
ch := s.read()
|
|
|
|
if ch == eof {
|
|
break
|
|
}
|
|
|
|
// write the text rune
|
|
buf.WriteRune(ch)
|
|
|
|
// unescaped matching quote, aka. the end
|
|
if ch == firstCh && prevCh != '\\' {
|
|
hasMatchingQuotes = true
|
|
break
|
|
}
|
|
|
|
prevCh = ch
|
|
}
|
|
|
|
literal := buf.String()
|
|
|
|
var err error
|
|
if !hasMatchingQuotes {
|
|
err = fmt.Errorf("invalid quoted text %q", literal)
|
|
} else if !preserveQuotes {
|
|
// unquote
|
|
literal = literal[1 : len(literal)-1]
|
|
// remove escaped quotes prefix (aka. \)
|
|
firstChStr := string(firstCh)
|
|
literal = strings.ReplaceAll(literal, `\`+firstChStr, firstChStr)
|
|
}
|
|
|
|
return Token{Type: TokenText, Literal: literal}, err
|
|
}
|
|
|
|
// scanComment consumes all contiguous single line comment runes until
|
|
// a new character (\n) or EOF is reached.
|
|
func (s *Scanner) scanComment() (Token, error) {
|
|
var buf bytes.Buffer
|
|
|
|
// Read the first 2 characters without writting them to the buffer.
|
|
if !isCommentStartRune(s.read()) || !isCommentStartRune(s.read()) {
|
|
return Token{Type: TokenComment}, ErrInvalidComment
|
|
}
|
|
|
|
// Read every subsequent comment text rune into the buffer.
|
|
// \n and EOF will cause the loop to exit.
|
|
for i := 0; ; i++ {
|
|
ch := s.read()
|
|
|
|
if ch == eof || ch == '\n' {
|
|
break
|
|
}
|
|
|
|
buf.WriteRune(ch)
|
|
}
|
|
|
|
return Token{Type: TokenComment, Literal: strings.TrimSpace(buf.String())}, nil
|
|
}
|
|
|
|
// scanIdentifier consumes all contiguous ident runes.
|
|
func (s *Scanner) scanIdentifier(funcDepth int) (Token, error) {
|
|
var buf bytes.Buffer
|
|
|
|
// read the first rune in case it is a special start identifier character
|
|
buf.WriteRune(s.read())
|
|
|
|
// Read every subsequent identifier rune into the buffer.
|
|
// Non-ident runes and EOF will cause the loop to exit.
|
|
for {
|
|
ch := s.read()
|
|
|
|
if ch == eof {
|
|
break
|
|
}
|
|
|
|
// func
|
|
if ch == '(' {
|
|
funcName := buf.String()
|
|
if funcDepth <= 0 {
|
|
return Token{Type: TokenFunction, Literal: funcName}, fmt.Errorf("max nested function arguments reached (max: %d)", s.maxFuncDepth)
|
|
}
|
|
if !isValidIdentifier(funcName) {
|
|
return Token{Type: TokenFunction, Literal: funcName}, fmt.Errorf("invalid function name %q", funcName)
|
|
}
|
|
s.unread()
|
|
return s.scanFunctionArgs(funcName, funcDepth)
|
|
}
|
|
|
|
// not an identifier character
|
|
if !isLetterRune(ch) && !isDigitRune(ch) && !isIdentifierCombineRune(ch) && ch != '_' {
|
|
s.unread()
|
|
break
|
|
}
|
|
|
|
// write the identifier rune
|
|
buf.WriteRune(ch)
|
|
}
|
|
|
|
literal := buf.String()
|
|
|
|
var err error
|
|
if !isValidIdentifier(literal) {
|
|
err = fmt.Errorf("invalid identifier %q", literal)
|
|
}
|
|
|
|
return Token{Type: TokenIdentifier, Literal: literal}, err
|
|
}
|
|
|
|
// scanSign consumes all contiguous sign operator runes.
|
|
func (s *Scanner) scanSign() (Token, error) {
|
|
var buf bytes.Buffer
|
|
|
|
// Read every subsequent sign rune into the buffer.
|
|
// Non-sign runes and EOF will cause the loop to exit.
|
|
for {
|
|
ch := s.read()
|
|
|
|
if ch == eof {
|
|
break
|
|
}
|
|
|
|
if !isSignStartRune(ch) {
|
|
s.unread()
|
|
break
|
|
}
|
|
|
|
// write the sign rune
|
|
buf.WriteRune(ch)
|
|
}
|
|
|
|
literal := buf.String()
|
|
|
|
var err error
|
|
if !isSignOperator(literal) {
|
|
err = fmt.Errorf("invalid sign operator %q", literal)
|
|
}
|
|
|
|
return Token{Type: TokenSign, Literal: literal}, err
|
|
}
|
|
|
|
// scanJoin consumes all contiguous join operator runes.
|
|
func (s *Scanner) scanJoin() (Token, error) {
|
|
var buf bytes.Buffer
|
|
|
|
// Read every subsequent join operator rune into the buffer.
|
|
// Non-join runes and EOF will cause the loop to exit.
|
|
for {
|
|
ch := s.read()
|
|
|
|
if ch == eof {
|
|
break
|
|
}
|
|
|
|
if !isJoinStartRune(ch) {
|
|
s.unread()
|
|
break
|
|
}
|
|
|
|
// write the join operator rune
|
|
buf.WriteRune(ch)
|
|
}
|
|
|
|
literal := buf.String()
|
|
|
|
var err error
|
|
if !isJoinOperator(literal) {
|
|
err = fmt.Errorf("invalid join operator %q", literal)
|
|
}
|
|
|
|
return Token{Type: TokenJoin, Literal: literal}, err
|
|
}
|
|
|
|
// scanGroup consumes all runes within a group/parenthesis.
|
|
func (s *Scanner) scanGroup() (Token, error) {
|
|
var buf bytes.Buffer
|
|
|
|
// read the first group bracket without writing it to the buffer
|
|
firstChar := s.read()
|
|
openGroups := 1
|
|
|
|
// Read every subsequent text rune into the buffer.
|
|
// EOF and matching unescaped ending quote will cause the loop to exit.
|
|
for {
|
|
ch := s.read()
|
|
|
|
if ch == eof {
|
|
break
|
|
}
|
|
|
|
if isGroupStartRune(ch) {
|
|
// nested group
|
|
openGroups++
|
|
buf.WriteRune(ch)
|
|
} else if isTextStartRune(ch) {
|
|
s.unread()
|
|
t, err := s.scanText(true) // with quotes to preserve the exact text start/end runes
|
|
if err != nil {
|
|
// write the errored literal as it is
|
|
buf.WriteString(t.Literal)
|
|
return Token{Type: TokenGroup, Literal: buf.String()}, err
|
|
}
|
|
|
|
buf.WriteString(t.Literal)
|
|
} else if ch == ')' {
|
|
openGroups--
|
|
|
|
if openGroups <= 0 {
|
|
// main group end
|
|
break
|
|
} else {
|
|
buf.WriteRune(ch)
|
|
}
|
|
} else {
|
|
buf.WriteRune(ch)
|
|
}
|
|
}
|
|
|
|
literal := buf.String()
|
|
|
|
var err error
|
|
if !isGroupStartRune(firstChar) || openGroups > 0 {
|
|
err = fmt.Errorf("invalid formatted group - missing %d closing bracket(s)", openGroups)
|
|
}
|
|
|
|
return Token{Type: TokenGroup, Literal: literal}, err
|
|
}
|
|
|
|
// scanFunctionArgs consumes all contiguous function call runes to
|
|
// extract its arguments and returns a function token with the found
|
|
// Token arguments loaded in Token.Meta.
|
|
func (s *Scanner) scanFunctionArgs(funcName string, funcDepth int) (Token, error) {
|
|
var args []Token
|
|
|
|
var expectComma, isComma, isClosed bool
|
|
|
|
ch := s.read()
|
|
if ch != '(' {
|
|
return Token{Type: TokenFunction, Literal: funcName}, fmt.Errorf("invalid or incomplete function call %q", funcName)
|
|
}
|
|
|
|
// Read every subsequent rune until ')' or EOF has been reached.
|
|
for {
|
|
ch := s.read()
|
|
|
|
if ch == eof {
|
|
break
|
|
}
|
|
|
|
if ch == ')' {
|
|
isClosed = true
|
|
break
|
|
}
|
|
|
|
// skip whitespaces
|
|
if isWhitespaceRune(ch) {
|
|
_, err := s.scanWhitespace()
|
|
if err != nil {
|
|
return Token{Type: TokenFunction, Literal: funcName, Meta: args}, fmt.Errorf("failed to scan whitespaces in function %q: %w", funcName, err)
|
|
}
|
|
continue
|
|
}
|
|
|
|
// skip comments
|
|
if isCommentStartRune(ch) {
|
|
s.unread()
|
|
_, err := s.scanComment()
|
|
if err != nil {
|
|
return Token{Type: TokenFunction, Literal: funcName, Meta: args}, fmt.Errorf("failed to scan comment in function %q: %w", funcName, err)
|
|
}
|
|
continue
|
|
}
|
|
|
|
isComma = ch == ','
|
|
|
|
if expectComma && !isComma {
|
|
return Token{Type: TokenFunction, Literal: funcName, Meta: args}, fmt.Errorf("expected comma after the last argument in function %q", funcName)
|
|
}
|
|
|
|
if !expectComma && isComma {
|
|
return Token{Type: TokenFunction, Literal: funcName, Meta: args}, fmt.Errorf("unexpected comma in function %q", funcName)
|
|
}
|
|
|
|
expectComma = false // reset
|
|
|
|
if isComma {
|
|
continue
|
|
}
|
|
|
|
if isIdentifierStartRune(ch) {
|
|
s.unread()
|
|
t, err := s.scanIdentifier(funcDepth - 1)
|
|
if err != nil {
|
|
return Token{Type: TokenFunction, Literal: funcName, Meta: args}, fmt.Errorf("invalid identifier argument %q in function %q: %w", t.Literal, funcName, err)
|
|
}
|
|
args = append(args, t)
|
|
expectComma = true
|
|
} else if isNumberStartRune(ch) {
|
|
s.unread()
|
|
t, err := s.scanNumber()
|
|
if err != nil {
|
|
return Token{Type: TokenFunction, Literal: funcName, Meta: args}, fmt.Errorf("invalid number argument %q in function %q: %w", t.Literal, funcName, err)
|
|
}
|
|
args = append(args, t)
|
|
expectComma = true
|
|
} else if isTextStartRune(ch) {
|
|
s.unread()
|
|
t, err := s.scanText(false)
|
|
if err != nil {
|
|
return Token{Type: TokenFunction, Literal: funcName, Meta: args}, fmt.Errorf("invalid text argument %q in function %q: %w", t.Literal, funcName, err)
|
|
}
|
|
args = append(args, t)
|
|
expectComma = true
|
|
} else {
|
|
return Token{Type: TokenFunction, Literal: funcName, Meta: args}, fmt.Errorf("unsupported argument character %q in function %q", ch, funcName)
|
|
}
|
|
}
|
|
|
|
if !isClosed {
|
|
return Token{Type: TokenFunction, Literal: funcName, Meta: args}, fmt.Errorf("invalid or incomplete function %q (expected ')')", funcName)
|
|
}
|
|
|
|
return Token{Type: TokenFunction, Literal: funcName, Meta: args}, nil
|
|
}
|
|
|
|
// unread unreads the last character and revert the position 1 step back.
|
|
func (s *Scanner) unread() {
|
|
if s.pos > 0 {
|
|
s.pos = s.pos - 1
|
|
}
|
|
}
|
|
|
|
// read reads the next rune and moves the position forward.
|
|
func (s *Scanner) read() rune {
|
|
if s.pos >= len(s.data) {
|
|
return eof
|
|
}
|
|
|
|
ch, n := utf8.DecodeRune(s.data[s.pos:])
|
|
s.pos += n
|
|
|
|
return ch
|
|
}
|
|
|
|
// Lexical helpers:
|
|
// -------------------------------------------------------------------
|
|
|
|
// isWhitespaceRune checks if a rune is a space, tab, or newline.
|
|
func isWhitespaceRune(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' }
|
|
|
|
// isLetterRune checks if a rune is a letter.
|
|
func isLetterRune(ch rune) bool {
|
|
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
|
|
}
|
|
|
|
// isDigitRune checks if a rune is a digit.
|
|
func isDigitRune(ch rune) bool {
|
|
return (ch >= '0' && ch <= '9')
|
|
}
|
|
|
|
// isTextStartRune checks if a rune is a valid quoted text first character
|
|
// (aka. single or double quote).
|
|
func isTextStartRune(ch rune) bool {
|
|
return ch == '\'' || ch == '"'
|
|
}
|
|
|
|
// isNumberStartRune checks if a rune is a valid number start character (aka. digit).
|
|
func isNumberStartRune(ch rune) bool {
|
|
return ch == '-' || isDigitRune(ch)
|
|
}
|
|
|
|
// isSignStartRune checks if a rune is a valid sign operator start character.
|
|
func isSignStartRune(ch rune) bool {
|
|
return ch == '=' ||
|
|
ch == '?' ||
|
|
ch == '!' ||
|
|
ch == '>' ||
|
|
ch == '<' ||
|
|
ch == '~'
|
|
}
|
|
|
|
// isJoinStartRune checks if a rune is a valid join type start character.
|
|
func isJoinStartRune(ch rune) bool {
|
|
return ch == '&' || ch == '|'
|
|
}
|
|
|
|
// isGroupStartRune checks if a rune is a valid group/parenthesis start character.
|
|
func isGroupStartRune(ch rune) bool {
|
|
return ch == '('
|
|
}
|
|
|
|
// isCommentStartRune checks if a rune is a valid comment start character.
|
|
func isCommentStartRune(ch rune) bool {
|
|
return ch == '/'
|
|
}
|
|
|
|
// isIdentifierStartRune checks if a rune is valid identifier's first character.
|
|
func isIdentifierStartRune(ch rune) bool {
|
|
return isLetterRune(ch) || isIdentifierSpecialStartRune(ch)
|
|
}
|
|
|
|
// isIdentifierSpecialStartRune checks if a rune is valid identifier's first special character.
|
|
func isIdentifierSpecialStartRune(ch rune) bool {
|
|
return ch == '@' || ch == '_' || ch == '#'
|
|
}
|
|
|
|
// isIdentifierCombineRune checks if a rune is valid identifier's combine character.
|
|
func isIdentifierCombineRune(ch rune) bool {
|
|
return ch == '.' || ch == ':'
|
|
}
|
|
|
|
// isSignOperator checks if a literal is a valid sign operator.
|
|
func isSignOperator(literal string) bool {
|
|
switch SignOp(literal) {
|
|
case
|
|
SignEq,
|
|
SignNeq,
|
|
SignLt,
|
|
SignLte,
|
|
SignGt,
|
|
SignGte,
|
|
SignLike,
|
|
SignNlike,
|
|
SignAnyEq,
|
|
SignAnyNeq,
|
|
SignAnyLike,
|
|
SignAnyNlike,
|
|
SignAnyLt,
|
|
SignAnyLte,
|
|
SignAnyGt,
|
|
SignAnyGte:
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// isJoinOperator checks if a literal is a valid join type operator.
|
|
func isJoinOperator(literal string) bool {
|
|
switch JoinOp(literal) {
|
|
case
|
|
JoinAnd,
|
|
JoinOr:
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// isValidIdentifier validates the literal against common identifier requirements.
|
|
func isValidIdentifier(literal string) bool {
|
|
length := len(literal)
|
|
|
|
return (
|
|
// doesn't end with combine rune
|
|
!isIdentifierCombineRune(rune(literal[length-1])) &&
|
|
// is not just a special start rune
|
|
(length != 1 || !isIdentifierSpecialStartRune(rune(literal[0]))))
|
|
}
|