Merging upstream version 10.5.6.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
3b8c9606bf
commit
599f59b0f8
39 changed files with 786 additions and 133 deletions
|
@ -82,6 +82,8 @@ class TokenType(AutoName):
|
|||
VARCHAR = auto()
|
||||
NVARCHAR = auto()
|
||||
TEXT = auto()
|
||||
MEDIUMTEXT = auto()
|
||||
LONGTEXT = auto()
|
||||
BINARY = auto()
|
||||
VARBINARY = auto()
|
||||
JSON = auto()
|
||||
|
@ -434,6 +436,8 @@ class Tokenizer(metaclass=_Tokenizer):
|
|||
|
||||
ESCAPES = ["'"]
|
||||
|
||||
_ESCAPES: t.Set[str] = set()
|
||||
|
||||
KEYWORDS = {
|
||||
**{
|
||||
f"{key}{postfix}": TokenType.BLOCK_START
|
||||
|
@ -461,6 +465,7 @@ class Tokenizer(metaclass=_Tokenizer):
|
|||
"#>>": TokenType.DHASH_ARROW,
|
||||
"<->": TokenType.LR_ARROW,
|
||||
"ALL": TokenType.ALL,
|
||||
"ALWAYS": TokenType.ALWAYS,
|
||||
"AND": TokenType.AND,
|
||||
"ANTI": TokenType.ANTI,
|
||||
"ANY": TokenType.ANY,
|
||||
|
@ -472,6 +477,7 @@ class Tokenizer(metaclass=_Tokenizer):
|
|||
"BETWEEN": TokenType.BETWEEN,
|
||||
"BOTH": TokenType.BOTH,
|
||||
"BUCKET": TokenType.BUCKET,
|
||||
"BY DEFAULT": TokenType.BY_DEFAULT,
|
||||
"CACHE": TokenType.CACHE,
|
||||
"UNCACHE": TokenType.UNCACHE,
|
||||
"CASE": TokenType.CASE,
|
||||
|
@ -521,9 +527,11 @@ class Tokenizer(metaclass=_Tokenizer):
|
|||
"FOREIGN KEY": TokenType.FOREIGN_KEY,
|
||||
"FORMAT": TokenType.FORMAT,
|
||||
"FROM": TokenType.FROM,
|
||||
"GENERATED": TokenType.GENERATED,
|
||||
"GROUP BY": TokenType.GROUP_BY,
|
||||
"GROUPING SETS": TokenType.GROUPING_SETS,
|
||||
"HAVING": TokenType.HAVING,
|
||||
"IDENTITY": TokenType.IDENTITY,
|
||||
"IF": TokenType.IF,
|
||||
"ILIKE": TokenType.ILIKE,
|
||||
"IMMUTABLE": TokenType.IMMUTABLE,
|
||||
|
@ -746,7 +754,7 @@ class Tokenizer(metaclass=_Tokenizer):
|
|||
)
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._replace_backslash = "\\" in self._ESCAPES # type: ignore
|
||||
self._replace_backslash = "\\" in self._ESCAPES
|
||||
self.reset()
|
||||
|
||||
def reset(self) -> None:
|
||||
|
@ -771,7 +779,10 @@ class Tokenizer(metaclass=_Tokenizer):
|
|||
self.reset()
|
||||
self.sql = sql
|
||||
self.size = len(sql)
|
||||
self._scan()
|
||||
return self.tokens
|
||||
|
||||
def _scan(self, until: t.Optional[t.Callable] = None) -> None:
|
||||
while self.size and not self._end:
|
||||
self._start = self._current
|
||||
self._advance()
|
||||
|
@ -792,7 +803,9 @@ class Tokenizer(metaclass=_Tokenizer):
|
|||
self._scan_identifier(identifier_end)
|
||||
else:
|
||||
self._scan_keywords()
|
||||
return self.tokens
|
||||
|
||||
if until and until():
|
||||
break
|
||||
|
||||
def _chars(self, size: int) -> str:
|
||||
if size == 1:
|
||||
|
@ -832,11 +845,13 @@ class Tokenizer(metaclass=_Tokenizer):
|
|||
if token_type in self.COMMANDS and (
|
||||
len(self.tokens) == 1 or self.tokens[-2].token_type == TokenType.SEMICOLON
|
||||
):
|
||||
self._start = self._current
|
||||
while not self._end and self._peek != ";":
|
||||
self._advance()
|
||||
if self._start < self._current:
|
||||
self._add(TokenType.STRING)
|
||||
start = self._current
|
||||
tokens = len(self.tokens)
|
||||
self._scan(lambda: self._peek == ";")
|
||||
self.tokens = self.tokens[:tokens]
|
||||
text = self.sql[start : self._current].strip()
|
||||
if text:
|
||||
self._add(TokenType.STRING, text)
|
||||
|
||||
def _scan_keywords(self) -> None:
|
||||
size = 0
|
||||
|
@ -947,7 +962,8 @@ class Tokenizer(metaclass=_Tokenizer):
|
|||
elif self._peek.isidentifier(): # type: ignore
|
||||
number_text = self._text
|
||||
literal = []
|
||||
while self._peek.isidentifier(): # type: ignore
|
||||
|
||||
while self._peek.strip() and self._peek not in self.SINGLE_TOKENS: # type: ignore
|
||||
literal.append(self._peek.upper()) # type: ignore
|
||||
self._advance()
|
||||
|
||||
|
@ -1063,8 +1079,12 @@ class Tokenizer(metaclass=_Tokenizer):
|
|||
delim_size = len(delimiter)
|
||||
|
||||
while True:
|
||||
if self._char in self._ESCAPES and self._peek == delimiter: # type: ignore
|
||||
text += delimiter
|
||||
if (
|
||||
self._char in self._ESCAPES
|
||||
and self._peek
|
||||
and (self._peek == delimiter or self._peek in self._ESCAPES)
|
||||
):
|
||||
text += self._peek
|
||||
self._advance(2)
|
||||
else:
|
||||
if self._chars(delim_size) == delimiter:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue