Merging upstream version 16.2.1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
c12f551e31
commit
718a80b164
106 changed files with 41940 additions and 40162 deletions
|
@ -144,6 +144,7 @@ class TokenType(AutoName):
|
|||
VARIANT = auto()
|
||||
OBJECT = auto()
|
||||
INET = auto()
|
||||
ENUM = auto()
|
||||
|
||||
# keywords
|
||||
ALIAS = auto()
|
||||
|
@ -346,6 +347,7 @@ class Token:
|
|||
col: The column that the token ends on.
|
||||
start: The start index of the token.
|
||||
end: The ending index of the token.
|
||||
comments: The comments to attach to the token.
|
||||
"""
|
||||
self.token_type = token_type
|
||||
self.text = text
|
||||
|
@ -391,12 +393,15 @@ class _Tokenizer(type):
|
|||
|
||||
klass._STRING_ESCAPES = set(klass.STRING_ESCAPES)
|
||||
klass._IDENTIFIER_ESCAPES = set(klass.IDENTIFIER_ESCAPES)
|
||||
klass._COMMENTS = dict(
|
||||
(comment, None) if isinstance(comment, str) else (comment[0], comment[1])
|
||||
for comment in klass.COMMENTS
|
||||
)
|
||||
klass._COMMENTS = {
|
||||
**dict(
|
||||
(comment, None) if isinstance(comment, str) else (comment[0], comment[1])
|
||||
for comment in klass.COMMENTS
|
||||
),
|
||||
"{#": "#}", # Ensure Jinja comments are tokenized correctly in all dialects
|
||||
}
|
||||
|
||||
klass.KEYWORD_TRIE = new_trie(
|
||||
klass._KEYWORD_TRIE = new_trie(
|
||||
key.upper()
|
||||
for key in (
|
||||
*klass.KEYWORDS,
|
||||
|
@ -456,20 +461,22 @@ class Tokenizer(metaclass=_Tokenizer):
|
|||
STRING_ESCAPES = ["'"]
|
||||
VAR_SINGLE_TOKENS: t.Set[str] = set()
|
||||
|
||||
# Autofilled
|
||||
IDENTIFIERS_CAN_START_WITH_DIGIT: bool = False
|
||||
|
||||
_COMMENTS: t.Dict[str, str] = {}
|
||||
_FORMAT_STRINGS: t.Dict[str, t.Tuple[str, TokenType]] = {}
|
||||
_IDENTIFIERS: t.Dict[str, str] = {}
|
||||
_IDENTIFIER_ESCAPES: t.Set[str] = set()
|
||||
_QUOTES: t.Dict[str, str] = {}
|
||||
_STRING_ESCAPES: t.Set[str] = set()
|
||||
_KEYWORD_TRIE: t.Dict = {}
|
||||
|
||||
KEYWORDS: t.Dict[t.Optional[str], TokenType] = {
|
||||
KEYWORDS: t.Dict[str, TokenType] = {
|
||||
**{f"{{%{postfix}": TokenType.BLOCK_START for postfix in ("", "+", "-")},
|
||||
**{f"{prefix}%}}": TokenType.BLOCK_END for prefix in ("", "+", "-")},
|
||||
"{{+": TokenType.BLOCK_START,
|
||||
"{{-": TokenType.BLOCK_START,
|
||||
"+}}": TokenType.BLOCK_END,
|
||||
"-}}": TokenType.BLOCK_END,
|
||||
**{f"{{{{{postfix}": TokenType.BLOCK_START for postfix in ("+", "-")},
|
||||
**{f"{prefix}}}}}": TokenType.BLOCK_END for prefix in ("+", "-")},
|
||||
"/*+": TokenType.HINT,
|
||||
"==": TokenType.EQ,
|
||||
"::": TokenType.DCOLON,
|
||||
|
@ -594,6 +601,7 @@ class Tokenizer(metaclass=_Tokenizer):
|
|||
"RECURSIVE": TokenType.RECURSIVE,
|
||||
"REGEXP": TokenType.RLIKE,
|
||||
"REPLACE": TokenType.REPLACE,
|
||||
"RETURNING": TokenType.RETURNING,
|
||||
"REFERENCES": TokenType.REFERENCES,
|
||||
"RIGHT": TokenType.RIGHT,
|
||||
"RLIKE": TokenType.RLIKE,
|
||||
|
@ -732,8 +740,7 @@ class Tokenizer(metaclass=_Tokenizer):
|
|||
NUMERIC_LITERALS: t.Dict[str, str] = {}
|
||||
ENCODE: t.Optional[str] = None
|
||||
|
||||
COMMENTS = ["--", ("/*", "*/"), ("{#", "#}")]
|
||||
KEYWORD_TRIE: t.Dict = {} # autofilled
|
||||
COMMENTS = ["--", ("/*", "*/")]
|
||||
|
||||
__slots__ = (
|
||||
"sql",
|
||||
|
@ -748,7 +755,6 @@ class Tokenizer(metaclass=_Tokenizer):
|
|||
"_end",
|
||||
"_peek",
|
||||
"_prev_token_line",
|
||||
"identifiers_can_start_with_digit",
|
||||
)
|
||||
|
||||
def __init__(self) -> None:
|
||||
|
@ -894,7 +900,7 @@ class Tokenizer(metaclass=_Tokenizer):
|
|||
char = chars
|
||||
prev_space = False
|
||||
skip = False
|
||||
trie = self.KEYWORD_TRIE
|
||||
trie = self._KEYWORD_TRIE
|
||||
single_token = char in self.SINGLE_TOKENS
|
||||
|
||||
while chars:
|
||||
|
@ -994,7 +1000,7 @@ class Tokenizer(metaclass=_Tokenizer):
|
|||
self._advance()
|
||||
elif self._peek == "." and not decimal:
|
||||
after = self.peek(1)
|
||||
if after.isdigit() or not after.strip():
|
||||
if after.isdigit() or not after.isalpha():
|
||||
decimal = True
|
||||
self._advance()
|
||||
else:
|
||||
|
@ -1013,13 +1019,13 @@ class Tokenizer(metaclass=_Tokenizer):
|
|||
literal += self._peek.upper()
|
||||
self._advance()
|
||||
|
||||
token_type = self.KEYWORDS.get(self.NUMERIC_LITERALS.get(literal))
|
||||
token_type = self.KEYWORDS.get(self.NUMERIC_LITERALS.get(literal, ""))
|
||||
|
||||
if token_type:
|
||||
self._add(TokenType.NUMBER, number_text)
|
||||
self._add(TokenType.DCOLON, "::")
|
||||
return self._add(token_type, literal)
|
||||
elif self.identifiers_can_start_with_digit: # type: ignore
|
||||
elif self.IDENTIFIERS_CAN_START_WITH_DIGIT:
|
||||
return self._add(TokenType.VAR)
|
||||
|
||||
self._add(TokenType.NUMBER, number_text)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue