1
0
Fork 0

Merging upstream version 16.2.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 16:00:51 +01:00
parent c12f551e31
commit 718a80b164
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
106 changed files with 41940 additions and 40162 deletions

View file

@ -144,6 +144,7 @@ class TokenType(AutoName):
VARIANT = auto()
OBJECT = auto()
INET = auto()
ENUM = auto()
# keywords
ALIAS = auto()
@ -346,6 +347,7 @@ class Token:
col: The column that the token ends on.
start: The start index of the token.
end: The ending index of the token.
comments: The comments to attach to the token.
"""
self.token_type = token_type
self.text = text
@ -391,12 +393,15 @@ class _Tokenizer(type):
klass._STRING_ESCAPES = set(klass.STRING_ESCAPES)
klass._IDENTIFIER_ESCAPES = set(klass.IDENTIFIER_ESCAPES)
klass._COMMENTS = dict(
(comment, None) if isinstance(comment, str) else (comment[0], comment[1])
for comment in klass.COMMENTS
)
klass._COMMENTS = {
**dict(
(comment, None) if isinstance(comment, str) else (comment[0], comment[1])
for comment in klass.COMMENTS
),
"{#": "#}", # Ensure Jinja comments are tokenized correctly in all dialects
}
klass.KEYWORD_TRIE = new_trie(
klass._KEYWORD_TRIE = new_trie(
key.upper()
for key in (
*klass.KEYWORDS,
@ -456,20 +461,22 @@ class Tokenizer(metaclass=_Tokenizer):
STRING_ESCAPES = ["'"]
VAR_SINGLE_TOKENS: t.Set[str] = set()
# Autofilled
IDENTIFIERS_CAN_START_WITH_DIGIT: bool = False
_COMMENTS: t.Dict[str, str] = {}
_FORMAT_STRINGS: t.Dict[str, t.Tuple[str, TokenType]] = {}
_IDENTIFIERS: t.Dict[str, str] = {}
_IDENTIFIER_ESCAPES: t.Set[str] = set()
_QUOTES: t.Dict[str, str] = {}
_STRING_ESCAPES: t.Set[str] = set()
_KEYWORD_TRIE: t.Dict = {}
KEYWORDS: t.Dict[t.Optional[str], TokenType] = {
KEYWORDS: t.Dict[str, TokenType] = {
**{f"{{%{postfix}": TokenType.BLOCK_START for postfix in ("", "+", "-")},
**{f"{prefix}%}}": TokenType.BLOCK_END for prefix in ("", "+", "-")},
"{{+": TokenType.BLOCK_START,
"{{-": TokenType.BLOCK_START,
"+}}": TokenType.BLOCK_END,
"-}}": TokenType.BLOCK_END,
**{f"{{{{{postfix}": TokenType.BLOCK_START for postfix in ("+", "-")},
**{f"{prefix}}}}}": TokenType.BLOCK_END for prefix in ("+", "-")},
"/*+": TokenType.HINT,
"==": TokenType.EQ,
"::": TokenType.DCOLON,
@ -594,6 +601,7 @@ class Tokenizer(metaclass=_Tokenizer):
"RECURSIVE": TokenType.RECURSIVE,
"REGEXP": TokenType.RLIKE,
"REPLACE": TokenType.REPLACE,
"RETURNING": TokenType.RETURNING,
"REFERENCES": TokenType.REFERENCES,
"RIGHT": TokenType.RIGHT,
"RLIKE": TokenType.RLIKE,
@ -732,8 +740,7 @@ class Tokenizer(metaclass=_Tokenizer):
NUMERIC_LITERALS: t.Dict[str, str] = {}
ENCODE: t.Optional[str] = None
COMMENTS = ["--", ("/*", "*/"), ("{#", "#}")]
KEYWORD_TRIE: t.Dict = {} # autofilled
COMMENTS = ["--", ("/*", "*/")]
__slots__ = (
"sql",
@ -748,7 +755,6 @@ class Tokenizer(metaclass=_Tokenizer):
"_end",
"_peek",
"_prev_token_line",
"identifiers_can_start_with_digit",
)
def __init__(self) -> None:
@ -894,7 +900,7 @@ class Tokenizer(metaclass=_Tokenizer):
char = chars
prev_space = False
skip = False
trie = self.KEYWORD_TRIE
trie = self._KEYWORD_TRIE
single_token = char in self.SINGLE_TOKENS
while chars:
@ -994,7 +1000,7 @@ class Tokenizer(metaclass=_Tokenizer):
self._advance()
elif self._peek == "." and not decimal:
after = self.peek(1)
if after.isdigit() or not after.strip():
if after.isdigit() or not after.isalpha():
decimal = True
self._advance()
else:
@ -1013,13 +1019,13 @@ class Tokenizer(metaclass=_Tokenizer):
literal += self._peek.upper()
self._advance()
token_type = self.KEYWORDS.get(self.NUMERIC_LITERALS.get(literal))
token_type = self.KEYWORDS.get(self.NUMERIC_LITERALS.get(literal, ""))
if token_type:
self._add(TokenType.NUMBER, number_text)
self._add(TokenType.DCOLON, "::")
return self._add(token_type, literal)
elif self.identifiers_can_start_with_digit: # type: ignore
elif self.IDENTIFIERS_CAN_START_WITH_DIGIT:
return self._add(TokenType.VAR)
self._add(TokenType.NUMBER, number_text)