1
0
Fork 0

Merging upstream version 11.2.3.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 15:40:23 +01:00
parent c6f7c6bbe1
commit 428b7dd76f
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
93 changed files with 33054 additions and 31671 deletions

View file

@ -115,6 +115,7 @@ class TokenType(AutoName):
IMAGE = auto()
VARIANT = auto()
OBJECT = auto()
INET = auto()
# keywords
ALIAS = auto()
@ -437,16 +438,8 @@ class Tokenizer(metaclass=_Tokenizer):
_IDENTIFIER_ESCAPES: t.Set[str] = set()
KEYWORDS = {
**{
f"{key}{postfix}": TokenType.BLOCK_START
for key in ("{%", "{#")
for postfix in ("", "+", "-")
},
**{
f"{prefix}{key}": TokenType.BLOCK_END
for key in ("%}", "#}")
for prefix in ("", "+", "-")
},
**{f"{{%{postfix}": TokenType.BLOCK_START for postfix in ("", "+", "-")},
**{f"{prefix}%}}": TokenType.BLOCK_END for prefix in ("", "+", "-")},
"{{+": TokenType.BLOCK_START,
"{{-": TokenType.BLOCK_START,
"+}}": TokenType.BLOCK_END,
@ -533,6 +526,7 @@ class Tokenizer(metaclass=_Tokenizer):
"IGNORE NULLS": TokenType.IGNORE_NULLS,
"IN": TokenType.IN,
"INDEX": TokenType.INDEX,
"INET": TokenType.INET,
"INNER": TokenType.INNER,
"INSERT": TokenType.INSERT,
"INTERVAL": TokenType.INTERVAL,
@ -701,7 +695,7 @@ class Tokenizer(metaclass=_Tokenizer):
"VACUUM": TokenType.COMMAND,
}
WHITE_SPACE = {
WHITE_SPACE: t.Dict[str, TokenType] = {
" ": TokenType.SPACE,
"\t": TokenType.SPACE,
"\n": TokenType.BREAK,
@ -723,7 +717,7 @@ class Tokenizer(metaclass=_Tokenizer):
NUMERIC_LITERALS: t.Dict[str, str] = {}
ENCODE: t.Optional[str] = None
COMMENTS = ["--", ("/*", "*/")]
COMMENTS = ["--", ("/*", "*/"), ("{#", "#}")]
KEYWORD_TRIE = None # autofilled
IDENTIFIER_CAN_START_WITH_DIGIT = False
@ -778,22 +772,16 @@ class Tokenizer(metaclass=_Tokenizer):
self._start = self._current
self._advance()
if not self._char:
if self._char is None:
break
white_space = self.WHITE_SPACE.get(self._char) # type: ignore
identifier_end = self._IDENTIFIERS.get(self._char) # type: ignore
if white_space:
if white_space == TokenType.BREAK:
self._col = 1
self._line += 1
elif self._char.isdigit(): # type:ignore
self._scan_number()
elif identifier_end:
self._scan_identifier(identifier_end)
else:
self._scan_keywords()
if self._char not in self.WHITE_SPACE:
if self._char.isdigit():
self._scan_number()
elif self._char in self._IDENTIFIERS:
self._scan_identifier(self._IDENTIFIERS[self._char])
else:
self._scan_keywords()
if until and until():
break
@ -807,13 +795,23 @@ class Tokenizer(metaclass=_Tokenizer):
return self.sql[start:end]
return ""
def _line_break(self, char: t.Optional[str]) -> bool:
return self.WHITE_SPACE.get(char) == TokenType.BREAK # type: ignore
def _advance(self, i: int = 1) -> None:
if self._line_break(self._char):
self._set_new_line()
self._col += i
self._current += i
self._end = self._current >= self.size # type: ignore
self._char = self.sql[self._current - 1] # type: ignore
self._peek = self.sql[self._current] if self._current < self.size else "" # type: ignore
def _set_new_line(self) -> None:
self._col = 1
self._line += 1
@property
def _text(self) -> str:
return self.sql[self._start : self._current]
@ -917,7 +915,7 @@ class Tokenizer(metaclass=_Tokenizer):
self._comments.append(self._text[comment_start_size : -comment_end_size + 1]) # type: ignore
self._advance(comment_end_size - 1)
else:
while not self._end and self.WHITE_SPACE.get(self._peek) != TokenType.BREAK: # type: ignore
while not self._end and not self._line_break(self._peek):
self._advance()
self._comments.append(self._text[comment_start_size:]) # type: ignore
@ -926,6 +924,7 @@ class Tokenizer(metaclass=_Tokenizer):
if comment_start_line == self._prev_token_line:
self.tokens[-1].comments.extend(self._comments)
self._comments = []
self._prev_token_line = self._line
return True