Merging upstream version 10.1.3.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 14:56:25 +01:00 · 2025-02-13 14:56:25 +01:00 · a5128ea109
commit a5128ea109
parent 582b160275
57 changed files with 1542 additions and 529 deletions
--- a/sqlglot/tokens.py
+++ b/sqlglot/tokens.py
@ -81,6 +81,7 @@ class TokenType(AutoName):
    BINARY = auto()
    VARBINARY = auto()
    JSON = auto()
+    JSONB = auto()
    TIMESTAMP = auto()
    TIMESTAMPTZ = auto()
    TIMESTAMPLTZ = auto()
@ -91,6 +92,7 @@ class TokenType(AutoName):
    NULLABLE = auto()
    GEOMETRY = auto()
    HLLSKETCH = auto()
+    HSTORE = auto()
    SUPER = auto()
    SERIAL = auto()
    SMALLSERIAL = auto()
@ -113,6 +115,7 @@ class TokenType(AutoName):
    APPLY = auto()
    ARRAY = auto()
    ASC = auto()
+    ASOF = auto()
    AT_TIME_ZONE = auto()
    AUTO_INCREMENT = auto()
    BEGIN = auto()
@ -130,6 +133,7 @@ class TokenType(AutoName):
    COMMAND = auto()
    COMMENT = auto()
    COMMIT = auto()
+    COMPOUND = auto()
    CONSTRAINT = auto()
    CREATE = auto()
    CROSS = auto()
@ -271,6 +275,7 @@ class TokenType(AutoName):
    UNBOUNDED = auto()
    UNCACHE = auto()
    UNION = auto()
+    UNLOGGED = auto()
    UNNEST = auto()
    UNPIVOT = auto()
    UPDATE = auto()
@ -291,7 +296,7 @@ class TokenType(AutoName):


 class Token:
-    __slots__ = ("token_type", "text", "line", "col", "comment")
+    __slots__ = ("token_type", "text", "line", "col", "comments")

    @classmethod
    def number(cls, number: int) -> Token:
@ -319,13 +324,13 @@ class Token:
        text: str,
        line: int = 1,
        col: int = 1,
-        comment: t.Optional[str] = None,
+        comments: t.List[str] = [],
    ) -> None:
        self.token_type = token_type
        self.text = text
        self.line = line
        self.col = max(col - len(text), 1)
-        self.comment = comment
+        self.comments = comments

    def __repr__(self) -> str:
        attributes = ", ".join(f"{k}: {getattr(self, k)}" for k in self.__slots__)
@ -452,6 +457,7 @@ class Tokenizer(metaclass=_Tokenizer):
        "COLLATE": TokenType.COLLATE,
        "COMMENT": TokenType.SCHEMA_COMMENT,
        "COMMIT": TokenType.COMMIT,
+        "COMPOUND": TokenType.COMPOUND,
        "CONSTRAINT": TokenType.CONSTRAINT,
        "CREATE": TokenType.CREATE,
        "CROSS": TokenType.CROSS,
@ -582,8 +588,9 @@ class Tokenizer(metaclass=_Tokenizer):
        "TRAILING": TokenType.TRAILING,
        "UNBOUNDED": TokenType.UNBOUNDED,
        "UNION": TokenType.UNION,
-        "UNPIVOT": TokenType.UNPIVOT,
+        "UNLOGGED": TokenType.UNLOGGED,
        "UNNEST": TokenType.UNNEST,
+        "UNPIVOT": TokenType.UNPIVOT,
        "UPDATE": TokenType.UPDATE,
        "USE": TokenType.USE,
        "USING": TokenType.USING,
@ -686,12 +693,12 @@ class Tokenizer(metaclass=_Tokenizer):
        "_current",
        "_line",
        "_col",
-        "_comment",
+        "_comments",
        "_char",
        "_end",
        "_peek",
        "_prev_token_line",
-        "_prev_token_comment",
+        "_prev_token_comments",
        "_prev_token_type",
        "_replace_backslash",
    )
@ -708,13 +715,13 @@ class Tokenizer(metaclass=_Tokenizer):
        self._current = 0
        self._line = 1
        self._col = 1
-        self._comment = None
+        self._comments: t.List[str] = []

        self._char = None
        self._end = None
        self._peek = None
        self._prev_token_line = -1
-        self._prev_token_comment = None
+        self._prev_token_comments: t.List[str] = []
        self._prev_token_type = None

    def tokenize(self, sql: str) -> t.List[Token]:
@ -767,7 +774,7 @@ class Tokenizer(metaclass=_Tokenizer):

    def _add(self, token_type: TokenType, text: t.Optional[str] = None) -> None:
        self._prev_token_line = self._line
-        self._prev_token_comment = self._comment
+        self._prev_token_comments = self._comments
        self._prev_token_type = token_type  # type: ignore
        self.tokens.append(
            Token(
@ -775,10 +782,10 @@ class Tokenizer(metaclass=_Tokenizer):
                self._text if text is None else text,
                self._line,
                self._col,
-                self._comment,
+                self._comments,
            )
        )
-        self._comment = None
+        self._comments = []

        if token_type in self.COMMANDS and (
            len(self.tokens) == 1 or self.tokens[-2].token_type == TokenType.SEMICOLON
@ -857,22 +864,18 @@ class Tokenizer(metaclass=_Tokenizer):
            while not self._end and self._chars(comment_end_size) != comment_end:
                self._advance()

-            self._comment = self._text[comment_start_size : -comment_end_size + 1]  # type: ignore
+            self._comments.append(self._text[comment_start_size : -comment_end_size + 1])  # type: ignore
            self._advance(comment_end_size - 1)
        else:
            while not self._end and self.WHITE_SPACE.get(self._peek) != TokenType.BREAK:  # type: ignore
                self._advance()
-            self._comment = self._text[comment_start_size:]  # type: ignore
-
-        # Leading comment is attached to the succeeding token, whilst trailing comment to the preceding. If both
-        # types of comment can be attached to a token, the trailing one is discarded in favour of the leading one.
+            self._comments.append(self._text[comment_start_size:])  # type: ignore

+        # Leading comment is attached to the succeeding token, whilst trailing comment to the preceding.
+        # Multiple consecutive comments are preserved by appending them to the current comments list.
        if comment_start_line == self._prev_token_line:
-            if self._prev_token_comment is None:
-                self.tokens[-1].comment = self._comment
-                self._prev_token_comment = self._comment
-
-            self._comment = None
+            self.tokens[-1].comments.extend(self._comments)
+            self._comments = []

        return True