Edit on GitHub

sqlglot.dialects.snowflake

  1from __future__ import annotations
  2
  3import typing as t
  4
  5from sqlglot import exp, generator, parser, tokens, transforms
  6from sqlglot.dialects.dialect import (
  7    Dialect,
  8    date_trunc_to_time,
  9    datestrtodate_sql,
 10    format_time_lambda,
 11    inline_array_sql,
 12    max_or_greatest,
 13    min_or_least,
 14    rename_func,
 15    timestamptrunc_sql,
 16    timestrtotime_sql,
 17    ts_or_ds_to_date_sql,
 18    var_map_sql,
 19)
 20from sqlglot.expressions import Literal
 21from sqlglot.helper import seq_get
 22from sqlglot.parser import binary_range_parser
 23from sqlglot.tokens import TokenType
 24
 25
 26def _check_int(s: str) -> bool:
 27    if s[0] in ("-", "+"):
 28        return s[1:].isdigit()
 29    return s.isdigit()
 30
 31
 32# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html
 33def _snowflake_to_timestamp(args: t.List) -> t.Union[exp.StrToTime, exp.UnixToTime]:
 34    if len(args) == 2:
 35        first_arg, second_arg = args
 36        if second_arg.is_string:
 37            # case: <string_expr> [ , <format> ]
 38            return format_time_lambda(exp.StrToTime, "snowflake")(args)
 39
 40        # case: <numeric_expr> [ , <scale> ]
 41        if second_arg.name not in ["0", "3", "9"]:
 42            raise ValueError(
 43                f"Scale for snowflake numeric timestamp is {second_arg}, but should be 0, 3, or 9"
 44            )
 45
 46        if second_arg.name == "0":
 47            timescale = exp.UnixToTime.SECONDS
 48        elif second_arg.name == "3":
 49            timescale = exp.UnixToTime.MILLIS
 50        elif second_arg.name == "9":
 51            timescale = exp.UnixToTime.MICROS
 52
 53        return exp.UnixToTime(this=first_arg, scale=timescale)
 54
 55    from sqlglot.optimizer.simplify import simplify_literals
 56
 57    # The first argument might be an expression like 40 * 365 * 86400, so we try to
 58    # reduce it using `simplify_literals` first and then check if it's a Literal.
 59    first_arg = seq_get(args, 0)
 60    if not isinstance(simplify_literals(first_arg, root=True), Literal):
 61        # case: <variant_expr>
 62        return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args)
 63
 64    if first_arg.is_string:
 65        if _check_int(first_arg.this):
 66            # case: <integer>
 67            return exp.UnixToTime.from_arg_list(args)
 68
 69        # case: <date_expr>
 70        return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args)
 71
 72    # case: <numeric_expr>
 73    return exp.UnixToTime.from_arg_list(args)
 74
 75
 76def _parse_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]:
 77    expression = parser.parse_var_map(args)
 78
 79    if isinstance(expression, exp.StarMap):
 80        return expression
 81
 82    return exp.Struct(
 83        expressions=[
 84            t.cast(exp.Condition, k).eq(v) for k, v in zip(expression.keys, expression.values)
 85        ]
 86    )
 87
 88
 89def _unix_to_time_sql(self: generator.Generator, expression: exp.UnixToTime) -> str:
 90    scale = expression.args.get("scale")
 91    timestamp = self.sql(expression, "this")
 92    if scale in [None, exp.UnixToTime.SECONDS]:
 93        return f"TO_TIMESTAMP({timestamp})"
 94    if scale == exp.UnixToTime.MILLIS:
 95        return f"TO_TIMESTAMP({timestamp}, 3)"
 96    if scale == exp.UnixToTime.MICROS:
 97        return f"TO_TIMESTAMP({timestamp}, 9)"
 98
 99    raise ValueError("Improper scale for timestamp")
100
101
102# https://docs.snowflake.com/en/sql-reference/functions/date_part.html
103# https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts
104def _parse_date_part(self: parser.Parser) -> t.Optional[exp.Expression]:
105    this = self._parse_var() or self._parse_type()
106
107    if not this:
108        return None
109
110    self._match(TokenType.COMMA)
111    expression = self._parse_bitwise()
112
113    name = this.name.upper()
114    if name.startswith("EPOCH"):
115        if name.startswith("EPOCH_MILLISECOND"):
116            scale = 10**3
117        elif name.startswith("EPOCH_MICROSECOND"):
118            scale = 10**6
119        elif name.startswith("EPOCH_NANOSECOND"):
120            scale = 10**9
121        else:
122            scale = None
123
124        ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP"))
125        to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts)
126
127        if scale:
128            to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale))
129
130        return to_unix
131
132    return self.expression(exp.Extract, this=this, expression=expression)
133
134
135# https://docs.snowflake.com/en/sql-reference/functions/div0
136def _div0_to_if(args: t.List) -> exp.Expression:
137    cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0))
138    true = exp.Literal.number(0)
139    false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1))
140    return exp.If(this=cond, true=true, false=false)
141
142
143# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull
144def _zeroifnull_to_if(args: t.List) -> exp.Expression:
145    cond = exp.Is(this=seq_get(args, 0), expression=exp.Null())
146    return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0))
147
148
149# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull
150def _nullifzero_to_if(args: t.List) -> exp.Expression:
151    cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0))
152    return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0))
153
154
155def _datatype_sql(self: generator.Generator, expression: exp.DataType) -> str:
156    if expression.is_type("array"):
157        return "ARRAY"
158    elif expression.is_type("map"):
159        return "OBJECT"
160    return self.datatype_sql(expression)
161
162
163def _parse_convert_timezone(args: t.List) -> exp.Expression:
164    if len(args) == 3:
165        return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args)
166    return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0))
167
168
169class Snowflake(Dialect):
170    null_ordering = "nulls_are_large"
171    time_format = "'yyyy-mm-dd hh24:mi:ss'"
172
173    time_mapping = {
174        "YYYY": "%Y",
175        "yyyy": "%Y",
176        "YY": "%y",
177        "yy": "%y",
178        "MMMM": "%B",
179        "mmmm": "%B",
180        "MON": "%b",
181        "mon": "%b",
182        "MM": "%m",
183        "mm": "%m",
184        "DD": "%d",
185        "dd": "%-d",
186        "DY": "%a",
187        "dy": "%w",
188        "HH24": "%H",
189        "hh24": "%H",
190        "HH12": "%I",
191        "hh12": "%I",
192        "MI": "%M",
193        "mi": "%M",
194        "SS": "%S",
195        "ss": "%S",
196        "FF": "%f",
197        "ff": "%f",
198        "FF6": "%f",
199        "ff6": "%f",
200    }
201
202    class Parser(parser.Parser):
203        IDENTIFY_PIVOT_STRINGS = True
204
205        FUNCTIONS = {
206            **parser.Parser.FUNCTIONS,
207            "ARRAYAGG": exp.ArrayAgg.from_arg_list,
208            "ARRAY_CONSTRUCT": exp.Array.from_arg_list,
209            "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list,
210            "CONVERT_TIMEZONE": _parse_convert_timezone,
211            "DATE_TRUNC": date_trunc_to_time,
212            "DATEADD": lambda args: exp.DateAdd(
213                this=seq_get(args, 2),
214                expression=seq_get(args, 1),
215                unit=seq_get(args, 0),
216            ),
217            "DATEDIFF": lambda args: exp.DateDiff(
218                this=seq_get(args, 2),
219                expression=seq_get(args, 1),
220                unit=seq_get(args, 0),
221            ),
222            "DIV0": _div0_to_if,
223            "IFF": exp.If.from_arg_list,
224            "NULLIFZERO": _nullifzero_to_if,
225            "OBJECT_CONSTRUCT": _parse_object_construct,
226            "RLIKE": exp.RegexpLike.from_arg_list,
227            "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)),
228            "TO_ARRAY": exp.Array.from_arg_list,
229            "TO_VARCHAR": exp.ToChar.from_arg_list,
230            "TO_TIMESTAMP": _snowflake_to_timestamp,
231            "ZEROIFNULL": _zeroifnull_to_if,
232        }
233
234        FUNCTION_PARSERS = {
235            **parser.Parser.FUNCTION_PARSERS,
236            "DATE_PART": _parse_date_part,
237        }
238        FUNCTION_PARSERS.pop("TRIM")
239
240        FUNC_TOKENS = {
241            *parser.Parser.FUNC_TOKENS,
242            TokenType.RLIKE,
243            TokenType.TABLE,
244        }
245
246        COLUMN_OPERATORS = {
247            **parser.Parser.COLUMN_OPERATORS,
248            TokenType.COLON: lambda self, this, path: self.expression(
249                exp.Bracket,
250                this=this,
251                expressions=[path],
252            ),
253        }
254
255        TIMESTAMPS = parser.Parser.TIMESTAMPS.copy() - {TokenType.TIME}
256
257        RANGE_PARSERS = {
258            **parser.Parser.RANGE_PARSERS,
259            TokenType.LIKE_ANY: binary_range_parser(exp.LikeAny),
260            TokenType.ILIKE_ANY: binary_range_parser(exp.ILikeAny),
261        }
262
263        ALTER_PARSERS = {
264            **parser.Parser.ALTER_PARSERS,
265            "UNSET": lambda self: self._parse_alter_table_set_tag(unset=True),
266            "SET": lambda self: self._parse_alter_table_set_tag(),
267        }
268
269        def _parse_alter_table_set_tag(self, unset: bool = False) -> exp.Expression:
270            self._match_text_seq("TAG")
271            parser = t.cast(t.Callable, self._parse_id_var if unset else self._parse_conjunction)
272            return self.expression(exp.SetTag, expressions=self._parse_csv(parser), unset=unset)
273
274    class Tokenizer(tokens.Tokenizer):
275        QUOTES = ["'", "$$"]
276        STRING_ESCAPES = ["\\", "'"]
277        HEX_STRINGS = [("x'", "'"), ("X'", "'")]
278
279        KEYWORDS = {
280            **tokens.Tokenizer.KEYWORDS,
281            "CHAR VARYING": TokenType.VARCHAR,
282            "CHARACTER VARYING": TokenType.VARCHAR,
283            "EXCLUDE": TokenType.EXCEPT,
284            "ILIKE ANY": TokenType.ILIKE_ANY,
285            "LIKE ANY": TokenType.LIKE_ANY,
286            "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
287            "MINUS": TokenType.EXCEPT,
288            "NCHAR VARYING": TokenType.VARCHAR,
289            "PUT": TokenType.COMMAND,
290            "RENAME": TokenType.REPLACE,
291            "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
292            "TIMESTAMP_NTZ": TokenType.TIMESTAMP,
293            "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
294            "TIMESTAMPNTZ": TokenType.TIMESTAMP,
295            "SAMPLE": TokenType.TABLE_SAMPLE,
296        }
297
298        SINGLE_TOKENS = {
299            **tokens.Tokenizer.SINGLE_TOKENS,
300            "$": TokenType.PARAMETER,
301        }
302
303        VAR_SINGLE_TOKENS = {"$"}
304
305    class Generator(generator.Generator):
306        PARAMETER_TOKEN = "$"
307        MATCHED_BY_SOURCE = False
308        SINGLE_STRING_INTERVAL = True
309        JOIN_HINTS = False
310        TABLE_HINTS = False
311
312        TRANSFORMS = {
313            **generator.Generator.TRANSFORMS,
314            exp.Array: inline_array_sql,
315            exp.ArrayConcat: rename_func("ARRAY_CAT"),
316            exp.ArrayJoin: rename_func("ARRAY_TO_STRING"),
317            exp.AtTimeZone: lambda self, e: self.func(
318                "CONVERT_TIMEZONE", e.args.get("zone"), e.this
319            ),
320            exp.DateAdd: lambda self, e: self.func("DATEADD", e.text("unit"), e.expression, e.this),
321            exp.DateDiff: lambda self, e: self.func(
322                "DATEDIFF", e.text("unit"), e.expression, e.this
323            ),
324            exp.DateStrToDate: datestrtodate_sql,
325            exp.DataType: _datatype_sql,
326            exp.DayOfWeek: rename_func("DAYOFWEEK"),
327            exp.Extract: rename_func("DATE_PART"),
328            exp.If: rename_func("IFF"),
329            exp.LogicalAnd: rename_func("BOOLAND_AGG"),
330            exp.LogicalOr: rename_func("BOOLOR_AGG"),
331            exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
332            exp.Max: max_or_greatest,
333            exp.Min: min_or_least,
334            exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
335            exp.Select: transforms.preprocess([transforms.eliminate_distinct_on]),
336            exp.StarMap: rename_func("OBJECT_CONSTRUCT"),
337            exp.StrPosition: lambda self, e: self.func(
338                "POSITION", e.args.get("substr"), e.this, e.args.get("position")
339            ),
340            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
341            exp.Struct: lambda self, e: self.func(
342                "OBJECT_CONSTRUCT",
343                *(arg for expression in e.expressions for arg in expression.flatten()),
344            ),
345            exp.TimeStrToTime: timestrtotime_sql,
346            exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})",
347            exp.TimeToStr: lambda self, e: self.func(
348                "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e)
349            ),
350            exp.TimestampTrunc: timestamptrunc_sql,
351            exp.ToChar: lambda self, e: self.function_fallback_sql(e),
352            exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression),
353            exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"),
354            exp.UnixToTime: _unix_to_time_sql,
355            exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
356        }
357
358        TYPE_MAPPING = {
359            **generator.Generator.TYPE_MAPPING,
360            exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ",
361        }
362
363        STAR_MAPPING = {
364            "except": "EXCLUDE",
365            "replace": "RENAME",
366        }
367
368        PROPERTIES_LOCATION = {
369            **generator.Generator.PROPERTIES_LOCATION,
370            exp.SetProperty: exp.Properties.Location.UNSUPPORTED,
371            exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
372        }
373
374        def except_op(self, expression: exp.Except) -> str:
375            if not expression.args.get("distinct", False):
376                self.unsupported("EXCEPT with All is not supported in Snowflake")
377            return super().except_op(expression)
378
379        def intersect_op(self, expression: exp.Intersect) -> str:
380            if not expression.args.get("distinct", False):
381                self.unsupported("INTERSECT with All is not supported in Snowflake")
382            return super().intersect_op(expression)
383
384        def settag_sql(self, expression: exp.SetTag) -> str:
385            action = "UNSET" if expression.args.get("unset") else "SET"
386            return f"{action} TAG {self.expressions(expression)}"
387
388        def describe_sql(self, expression: exp.Describe) -> str:
389            # Default to table if kind is unknown
390            kind_value = expression.args.get("kind") or "TABLE"
391            kind = f" {kind_value}" if kind_value else ""
392            this = f" {self.sql(expression, 'this')}"
393            return f"DESCRIBE{kind}{this}"
394
395        def generatedasidentitycolumnconstraint_sql(
396            self, expression: exp.GeneratedAsIdentityColumnConstraint
397        ) -> str:
398            start = expression.args.get("start")
399            start = f" START {start}" if start else ""
400            increment = expression.args.get("increment")
401            increment = f" INCREMENT {increment}" if increment else ""
402            return f"AUTOINCREMENT{start}{increment}"
class Snowflake(sqlglot.dialects.dialect.Dialect):
170class Snowflake(Dialect):
171    null_ordering = "nulls_are_large"
172    time_format = "'yyyy-mm-dd hh24:mi:ss'"
173
174    time_mapping = {
175        "YYYY": "%Y",
176        "yyyy": "%Y",
177        "YY": "%y",
178        "yy": "%y",
179        "MMMM": "%B",
180        "mmmm": "%B",
181        "MON": "%b",
182        "mon": "%b",
183        "MM": "%m",
184        "mm": "%m",
185        "DD": "%d",
186        "dd": "%-d",
187        "DY": "%a",
188        "dy": "%w",
189        "HH24": "%H",
190        "hh24": "%H",
191        "HH12": "%I",
192        "hh12": "%I",
193        "MI": "%M",
194        "mi": "%M",
195        "SS": "%S",
196        "ss": "%S",
197        "FF": "%f",
198        "ff": "%f",
199        "FF6": "%f",
200        "ff6": "%f",
201    }
202
203    class Parser(parser.Parser):
204        IDENTIFY_PIVOT_STRINGS = True
205
206        FUNCTIONS = {
207            **parser.Parser.FUNCTIONS,
208            "ARRAYAGG": exp.ArrayAgg.from_arg_list,
209            "ARRAY_CONSTRUCT": exp.Array.from_arg_list,
210            "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list,
211            "CONVERT_TIMEZONE": _parse_convert_timezone,
212            "DATE_TRUNC": date_trunc_to_time,
213            "DATEADD": lambda args: exp.DateAdd(
214                this=seq_get(args, 2),
215                expression=seq_get(args, 1),
216                unit=seq_get(args, 0),
217            ),
218            "DATEDIFF": lambda args: exp.DateDiff(
219                this=seq_get(args, 2),
220                expression=seq_get(args, 1),
221                unit=seq_get(args, 0),
222            ),
223            "DIV0": _div0_to_if,
224            "IFF": exp.If.from_arg_list,
225            "NULLIFZERO": _nullifzero_to_if,
226            "OBJECT_CONSTRUCT": _parse_object_construct,
227            "RLIKE": exp.RegexpLike.from_arg_list,
228            "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)),
229            "TO_ARRAY": exp.Array.from_arg_list,
230            "TO_VARCHAR": exp.ToChar.from_arg_list,
231            "TO_TIMESTAMP": _snowflake_to_timestamp,
232            "ZEROIFNULL": _zeroifnull_to_if,
233        }
234
235        FUNCTION_PARSERS = {
236            **parser.Parser.FUNCTION_PARSERS,
237            "DATE_PART": _parse_date_part,
238        }
239        FUNCTION_PARSERS.pop("TRIM")
240
241        FUNC_TOKENS = {
242            *parser.Parser.FUNC_TOKENS,
243            TokenType.RLIKE,
244            TokenType.TABLE,
245        }
246
247        COLUMN_OPERATORS = {
248            **parser.Parser.COLUMN_OPERATORS,
249            TokenType.COLON: lambda self, this, path: self.expression(
250                exp.Bracket,
251                this=this,
252                expressions=[path],
253            ),
254        }
255
256        TIMESTAMPS = parser.Parser.TIMESTAMPS.copy() - {TokenType.TIME}
257
258        RANGE_PARSERS = {
259            **parser.Parser.RANGE_PARSERS,
260            TokenType.LIKE_ANY: binary_range_parser(exp.LikeAny),
261            TokenType.ILIKE_ANY: binary_range_parser(exp.ILikeAny),
262        }
263
264        ALTER_PARSERS = {
265            **parser.Parser.ALTER_PARSERS,
266            "UNSET": lambda self: self._parse_alter_table_set_tag(unset=True),
267            "SET": lambda self: self._parse_alter_table_set_tag(),
268        }
269
270        def _parse_alter_table_set_tag(self, unset: bool = False) -> exp.Expression:
271            self._match_text_seq("TAG")
272            parser = t.cast(t.Callable, self._parse_id_var if unset else self._parse_conjunction)
273            return self.expression(exp.SetTag, expressions=self._parse_csv(parser), unset=unset)
274
275    class Tokenizer(tokens.Tokenizer):
276        QUOTES = ["'", "$$"]
277        STRING_ESCAPES = ["\\", "'"]
278        HEX_STRINGS = [("x'", "'"), ("X'", "'")]
279
280        KEYWORDS = {
281            **tokens.Tokenizer.KEYWORDS,
282            "CHAR VARYING": TokenType.VARCHAR,
283            "CHARACTER VARYING": TokenType.VARCHAR,
284            "EXCLUDE": TokenType.EXCEPT,
285            "ILIKE ANY": TokenType.ILIKE_ANY,
286            "LIKE ANY": TokenType.LIKE_ANY,
287            "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
288            "MINUS": TokenType.EXCEPT,
289            "NCHAR VARYING": TokenType.VARCHAR,
290            "PUT": TokenType.COMMAND,
291            "RENAME": TokenType.REPLACE,
292            "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
293            "TIMESTAMP_NTZ": TokenType.TIMESTAMP,
294            "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
295            "TIMESTAMPNTZ": TokenType.TIMESTAMP,
296            "SAMPLE": TokenType.TABLE_SAMPLE,
297        }
298
299        SINGLE_TOKENS = {
300            **tokens.Tokenizer.SINGLE_TOKENS,
301            "$": TokenType.PARAMETER,
302        }
303
304        VAR_SINGLE_TOKENS = {"$"}
305
306    class Generator(generator.Generator):
307        PARAMETER_TOKEN = "$"
308        MATCHED_BY_SOURCE = False
309        SINGLE_STRING_INTERVAL = True
310        JOIN_HINTS = False
311        TABLE_HINTS = False
312
313        TRANSFORMS = {
314            **generator.Generator.TRANSFORMS,
315            exp.Array: inline_array_sql,
316            exp.ArrayConcat: rename_func("ARRAY_CAT"),
317            exp.ArrayJoin: rename_func("ARRAY_TO_STRING"),
318            exp.AtTimeZone: lambda self, e: self.func(
319                "CONVERT_TIMEZONE", e.args.get("zone"), e.this
320            ),
321            exp.DateAdd: lambda self, e: self.func("DATEADD", e.text("unit"), e.expression, e.this),
322            exp.DateDiff: lambda self, e: self.func(
323                "DATEDIFF", e.text("unit"), e.expression, e.this
324            ),
325            exp.DateStrToDate: datestrtodate_sql,
326            exp.DataType: _datatype_sql,
327            exp.DayOfWeek: rename_func("DAYOFWEEK"),
328            exp.Extract: rename_func("DATE_PART"),
329            exp.If: rename_func("IFF"),
330            exp.LogicalAnd: rename_func("BOOLAND_AGG"),
331            exp.LogicalOr: rename_func("BOOLOR_AGG"),
332            exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
333            exp.Max: max_or_greatest,
334            exp.Min: min_or_least,
335            exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
336            exp.Select: transforms.preprocess([transforms.eliminate_distinct_on]),
337            exp.StarMap: rename_func("OBJECT_CONSTRUCT"),
338            exp.StrPosition: lambda self, e: self.func(
339                "POSITION", e.args.get("substr"), e.this, e.args.get("position")
340            ),
341            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
342            exp.Struct: lambda self, e: self.func(
343                "OBJECT_CONSTRUCT",
344                *(arg for expression in e.expressions for arg in expression.flatten()),
345            ),
346            exp.TimeStrToTime: timestrtotime_sql,
347            exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})",
348            exp.TimeToStr: lambda self, e: self.func(
349                "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e)
350            ),
351            exp.TimestampTrunc: timestamptrunc_sql,
352            exp.ToChar: lambda self, e: self.function_fallback_sql(e),
353            exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression),
354            exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"),
355            exp.UnixToTime: _unix_to_time_sql,
356            exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
357        }
358
359        TYPE_MAPPING = {
360            **generator.Generator.TYPE_MAPPING,
361            exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ",
362        }
363
364        STAR_MAPPING = {
365            "except": "EXCLUDE",
366            "replace": "RENAME",
367        }
368
369        PROPERTIES_LOCATION = {
370            **generator.Generator.PROPERTIES_LOCATION,
371            exp.SetProperty: exp.Properties.Location.UNSUPPORTED,
372            exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
373        }
374
375        def except_op(self, expression: exp.Except) -> str:
376            if not expression.args.get("distinct", False):
377                self.unsupported("EXCEPT with All is not supported in Snowflake")
378            return super().except_op(expression)
379
380        def intersect_op(self, expression: exp.Intersect) -> str:
381            if not expression.args.get("distinct", False):
382                self.unsupported("INTERSECT with All is not supported in Snowflake")
383            return super().intersect_op(expression)
384
385        def settag_sql(self, expression: exp.SetTag) -> str:
386            action = "UNSET" if expression.args.get("unset") else "SET"
387            return f"{action} TAG {self.expressions(expression)}"
388
389        def describe_sql(self, expression: exp.Describe) -> str:
390            # Default to table if kind is unknown
391            kind_value = expression.args.get("kind") or "TABLE"
392            kind = f" {kind_value}" if kind_value else ""
393            this = f" {self.sql(expression, 'this')}"
394            return f"DESCRIBE{kind}{this}"
395
396        def generatedasidentitycolumnconstraint_sql(
397            self, expression: exp.GeneratedAsIdentityColumnConstraint
398        ) -> str:
399            start = expression.args.get("start")
400            start = f" START {start}" if start else ""
401            increment = expression.args.get("increment")
402            increment = f" INCREMENT {increment}" if increment else ""
403            return f"AUTOINCREMENT{start}{increment}"
class Snowflake.Parser(sqlglot.parser.Parser):
203    class Parser(parser.Parser):
204        IDENTIFY_PIVOT_STRINGS = True
205
206        FUNCTIONS = {
207            **parser.Parser.FUNCTIONS,
208            "ARRAYAGG": exp.ArrayAgg.from_arg_list,
209            "ARRAY_CONSTRUCT": exp.Array.from_arg_list,
210            "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list,
211            "CONVERT_TIMEZONE": _parse_convert_timezone,
212            "DATE_TRUNC": date_trunc_to_time,
213            "DATEADD": lambda args: exp.DateAdd(
214                this=seq_get(args, 2),
215                expression=seq_get(args, 1),
216                unit=seq_get(args, 0),
217            ),
218            "DATEDIFF": lambda args: exp.DateDiff(
219                this=seq_get(args, 2),
220                expression=seq_get(args, 1),
221                unit=seq_get(args, 0),
222            ),
223            "DIV0": _div0_to_if,
224            "IFF": exp.If.from_arg_list,
225            "NULLIFZERO": _nullifzero_to_if,
226            "OBJECT_CONSTRUCT": _parse_object_construct,
227            "RLIKE": exp.RegexpLike.from_arg_list,
228            "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)),
229            "TO_ARRAY": exp.Array.from_arg_list,
230            "TO_VARCHAR": exp.ToChar.from_arg_list,
231            "TO_TIMESTAMP": _snowflake_to_timestamp,
232            "ZEROIFNULL": _zeroifnull_to_if,
233        }
234
235        FUNCTION_PARSERS = {
236            **parser.Parser.FUNCTION_PARSERS,
237            "DATE_PART": _parse_date_part,
238        }
239        FUNCTION_PARSERS.pop("TRIM")
240
241        FUNC_TOKENS = {
242            *parser.Parser.FUNC_TOKENS,
243            TokenType.RLIKE,
244            TokenType.TABLE,
245        }
246
247        COLUMN_OPERATORS = {
248            **parser.Parser.COLUMN_OPERATORS,
249            TokenType.COLON: lambda self, this, path: self.expression(
250                exp.Bracket,
251                this=this,
252                expressions=[path],
253            ),
254        }
255
256        TIMESTAMPS = parser.Parser.TIMESTAMPS.copy() - {TokenType.TIME}
257
258        RANGE_PARSERS = {
259            **parser.Parser.RANGE_PARSERS,
260            TokenType.LIKE_ANY: binary_range_parser(exp.LikeAny),
261            TokenType.ILIKE_ANY: binary_range_parser(exp.ILikeAny),
262        }
263
264        ALTER_PARSERS = {
265            **parser.Parser.ALTER_PARSERS,
266            "UNSET": lambda self: self._parse_alter_table_set_tag(unset=True),
267            "SET": lambda self: self._parse_alter_table_set_tag(),
268        }
269
270        def _parse_alter_table_set_tag(self, unset: bool = False) -> exp.Expression:
271            self._match_text_seq("TAG")
272            parser = t.cast(t.Callable, self._parse_id_var if unset else self._parse_conjunction)
273            return self.expression(exp.SetTag, expressions=self._parse_csv(parser), unset=unset)

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.IMMEDIATE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
class Snowflake.Tokenizer(sqlglot.tokens.Tokenizer):
275    class Tokenizer(tokens.Tokenizer):
276        QUOTES = ["'", "$$"]
277        STRING_ESCAPES = ["\\", "'"]
278        HEX_STRINGS = [("x'", "'"), ("X'", "'")]
279
280        KEYWORDS = {
281            **tokens.Tokenizer.KEYWORDS,
282            "CHAR VARYING": TokenType.VARCHAR,
283            "CHARACTER VARYING": TokenType.VARCHAR,
284            "EXCLUDE": TokenType.EXCEPT,
285            "ILIKE ANY": TokenType.ILIKE_ANY,
286            "LIKE ANY": TokenType.LIKE_ANY,
287            "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
288            "MINUS": TokenType.EXCEPT,
289            "NCHAR VARYING": TokenType.VARCHAR,
290            "PUT": TokenType.COMMAND,
291            "RENAME": TokenType.REPLACE,
292            "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
293            "TIMESTAMP_NTZ": TokenType.TIMESTAMP,
294            "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
295            "TIMESTAMPNTZ": TokenType.TIMESTAMP,
296            "SAMPLE": TokenType.TABLE_SAMPLE,
297        }
298
299        SINGLE_TOKENS = {
300            **tokens.Tokenizer.SINGLE_TOKENS,
301            "$": TokenType.PARAMETER,
302        }
303
304        VAR_SINGLE_TOKENS = {"$"}
class Snowflake.Generator(sqlglot.generator.Generator):
306    class Generator(generator.Generator):
307        PARAMETER_TOKEN = "$"
308        MATCHED_BY_SOURCE = False
309        SINGLE_STRING_INTERVAL = True
310        JOIN_HINTS = False
311        TABLE_HINTS = False
312
313        TRANSFORMS = {
314            **generator.Generator.TRANSFORMS,
315            exp.Array: inline_array_sql,
316            exp.ArrayConcat: rename_func("ARRAY_CAT"),
317            exp.ArrayJoin: rename_func("ARRAY_TO_STRING"),
318            exp.AtTimeZone: lambda self, e: self.func(
319                "CONVERT_TIMEZONE", e.args.get("zone"), e.this
320            ),
321            exp.DateAdd: lambda self, e: self.func("DATEADD", e.text("unit"), e.expression, e.this),
322            exp.DateDiff: lambda self, e: self.func(
323                "DATEDIFF", e.text("unit"), e.expression, e.this
324            ),
325            exp.DateStrToDate: datestrtodate_sql,
326            exp.DataType: _datatype_sql,
327            exp.DayOfWeek: rename_func("DAYOFWEEK"),
328            exp.Extract: rename_func("DATE_PART"),
329            exp.If: rename_func("IFF"),
330            exp.LogicalAnd: rename_func("BOOLAND_AGG"),
331            exp.LogicalOr: rename_func("BOOLOR_AGG"),
332            exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
333            exp.Max: max_or_greatest,
334            exp.Min: min_or_least,
335            exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
336            exp.Select: transforms.preprocess([transforms.eliminate_distinct_on]),
337            exp.StarMap: rename_func("OBJECT_CONSTRUCT"),
338            exp.StrPosition: lambda self, e: self.func(
339                "POSITION", e.args.get("substr"), e.this, e.args.get("position")
340            ),
341            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
342            exp.Struct: lambda self, e: self.func(
343                "OBJECT_CONSTRUCT",
344                *(arg for expression in e.expressions for arg in expression.flatten()),
345            ),
346            exp.TimeStrToTime: timestrtotime_sql,
347            exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})",
348            exp.TimeToStr: lambda self, e: self.func(
349                "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e)
350            ),
351            exp.TimestampTrunc: timestamptrunc_sql,
352            exp.ToChar: lambda self, e: self.function_fallback_sql(e),
353            exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression),
354            exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"),
355            exp.UnixToTime: _unix_to_time_sql,
356            exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
357        }
358
359        TYPE_MAPPING = {
360            **generator.Generator.TYPE_MAPPING,
361            exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ",
362        }
363
364        STAR_MAPPING = {
365            "except": "EXCLUDE",
366            "replace": "RENAME",
367        }
368
369        PROPERTIES_LOCATION = {
370            **generator.Generator.PROPERTIES_LOCATION,
371            exp.SetProperty: exp.Properties.Location.UNSUPPORTED,
372            exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
373        }
374
375        def except_op(self, expression: exp.Except) -> str:
376            if not expression.args.get("distinct", False):
377                self.unsupported("EXCEPT with All is not supported in Snowflake")
378            return super().except_op(expression)
379
380        def intersect_op(self, expression: exp.Intersect) -> str:
381            if not expression.args.get("distinct", False):
382                self.unsupported("INTERSECT with All is not supported in Snowflake")
383            return super().intersect_op(expression)
384
385        def settag_sql(self, expression: exp.SetTag) -> str:
386            action = "UNSET" if expression.args.get("unset") else "SET"
387            return f"{action} TAG {self.expressions(expression)}"
388
389        def describe_sql(self, expression: exp.Describe) -> str:
390            # Default to table if kind is unknown
391            kind_value = expression.args.get("kind") or "TABLE"
392            kind = f" {kind_value}" if kind_value else ""
393            this = f" {self.sql(expression, 'this')}"
394            return f"DESCRIBE{kind}{this}"
395
396        def generatedasidentitycolumnconstraint_sql(
397            self, expression: exp.GeneratedAsIdentityColumnConstraint
398        ) -> str:
399            start = expression.args.get("start")
400            start = f" START {start}" if start else ""
401            increment = expression.args.get("increment")
402            increment = f" INCREMENT {increment}" if increment else ""
403            return f"AUTOINCREMENT{start}{increment}"

Generator interprets the given syntax tree and produces a SQL string as an output.

Arguments:
  • time_mapping (dict): the dictionary of custom time mappings in which the key represents a python time format and the output the target time format
  • time_trie (trie): a trie of the time_mapping keys
  • pretty (bool): if set to True the returned string will be formatted. Default: False.
  • quote_start (str): specifies which starting character to use to delimit quotes. Default: '.
  • quote_end (str): specifies which ending character to use to delimit quotes. Default: '.
  • identifier_start (str): specifies which starting character to use to delimit identifiers. Default: ".
  • identifier_end (str): specifies which ending character to use to delimit identifiers. Default: ".
  • bit_start (str): specifies which starting character to use to delimit bit literals. Default: None.
  • bit_end (str): specifies which ending character to use to delimit bit literals. Default: None.
  • hex_start (str): specifies which starting character to use to delimit hex literals. Default: None.
  • hex_end (str): specifies which ending character to use to delimit hex literals. Default: None.
  • byte_start (str): specifies which starting character to use to delimit byte literals. Default: None.
  • byte_end (str): specifies which ending character to use to delimit byte literals. Default: None.
  • raw_start (str): specifies which starting character to use to delimit raw literals. Default: None.
  • raw_end (str): specifies which ending character to use to delimit raw literals. Default: None.
  • identify (bool | str): 'always': always quote, 'safe': quote identifiers if they don't contain an upcase, True defaults to always.
  • normalize (bool): if set to True all identifiers will lower cased
  • string_escape (str): specifies a string escape character. Default: '.
  • identifier_escape (str): specifies an identifier escape character. Default: ".
  • pad (int): determines padding in a formatted string. Default: 2.
  • indent (int): determines the size of indentation in a formatted string. Default: 4.
  • unnest_column_only (bool): if true unnest table aliases are considered only as column aliases
  • normalize_functions (str): normalize function names, "upper", "lower", or None Default: "upper"
  • alias_post_tablesample (bool): if the table alias comes after tablesample Default: False
  • identifiers_can_start_with_digit (bool): if an unquoted identifier can start with digit Default: False
  • unsupported_level (ErrorLevel): determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
  • null_ordering (str): Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
  • max_unsupported (int): Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
  • leading_comma (bool): if the the comma is leading or trailing in select statements Default: False
  • max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
  • comments: Whether or not to preserve comments in the output SQL code. Default: True
def except_op(self, expression: sqlglot.expressions.Except) -> str:
375        def except_op(self, expression: exp.Except) -> str:
376            if not expression.args.get("distinct", False):
377                self.unsupported("EXCEPT with All is not supported in Snowflake")
378            return super().except_op(expression)
def intersect_op(self, expression: sqlglot.expressions.Intersect) -> str:
380        def intersect_op(self, expression: exp.Intersect) -> str:
381            if not expression.args.get("distinct", False):
382                self.unsupported("INTERSECT with All is not supported in Snowflake")
383            return super().intersect_op(expression)
def settag_sql(self, expression: sqlglot.expressions.SetTag) -> str:
385        def settag_sql(self, expression: exp.SetTag) -> str:
386            action = "UNSET" if expression.args.get("unset") else "SET"
387            return f"{action} TAG {self.expressions(expression)}"
def describe_sql(self, expression: sqlglot.expressions.Describe) -> str:
389        def describe_sql(self, expression: exp.Describe) -> str:
390            # Default to table if kind is unknown
391            kind_value = expression.args.get("kind") or "TABLE"
392            kind = f" {kind_value}" if kind_value else ""
393            this = f" {self.sql(expression, 'this')}"
394            return f"DESCRIBE{kind}{this}"
def generatedasidentitycolumnconstraint_sql( self, expression: sqlglot.expressions.GeneratedAsIdentityColumnConstraint) -> str:
396        def generatedasidentitycolumnconstraint_sql(
397            self, expression: exp.GeneratedAsIdentityColumnConstraint
398        ) -> str:
399            start = expression.args.get("start")
400            start = f" START {start}" if start else ""
401            increment = expression.args.get("increment")
402            increment = f" INCREMENT {increment}" if increment else ""
403            return f"AUTOINCREMENT{start}{increment}"
Inherited Members
sqlglot.generator.Generator
Generator
generate
unsupported
sep
seg
pad_comment
maybe_comment
wrap
no_identify
normalize_func
indent
sql
uncache_sql
cache_sql
characterset_sql
column_sql
columnposition_sql
columndef_sql
columnconstraint_sql
autoincrementcolumnconstraint_sql
compresscolumnconstraint_sql
notnullcolumnconstraint_sql
primarykeycolumnconstraint_sql
uniquecolumnconstraint_sql
create_sql
clone_sql
prepend_ctes
with_sql
cte_sql
tablealias_sql
bitstring_sql
hexstring_sql
bytestring_sql
rawstring_sql
datatypesize_sql
datatype_sql
directory_sql
delete_sql
drop_sql
except_sql
fetch_sql
filter_sql
hint_sql
index_sql
identifier_sql
inputoutputformat_sql
national_sql
partition_sql
properties_sql
root_properties
properties
with_properties
locate_properties
property_sql
likeproperty_sql
fallbackproperty_sql
journalproperty_sql
freespaceproperty_sql
checksumproperty_sql
mergeblockratioproperty_sql
datablocksizeproperty_sql
blockcompressionproperty_sql
isolatedloadingproperty_sql
lockingproperty_sql
withdataproperty_sql
insert_sql
intersect_sql
introducer_sql
pseudotype_sql
onconflict_sql
returning_sql
rowformatdelimitedproperty_sql
table_sql
tablesample_sql
pivot_sql
tuple_sql
update_sql
values_sql
var_sql
into_sql
from_sql
group_sql
having_sql
join_sql
lambda_sql
lateral_sql
limit_sql
offset_sql
setitem_sql
set_sql
pragma_sql
lock_sql
literal_sql
loaddata_sql
null_sql
boolean_sql
order_sql
cluster_sql
distribute_sql
sort_sql
ordered_sql
matchrecognize_sql
query_modifiers
after_having_modifiers
after_limit_modifiers
select_sql
schema_sql
star_sql
parameter_sql
sessionparameter_sql
placeholder_sql
subquery_sql
qualify_sql
union_sql
union_op
unnest_sql
where_sql
window_sql
partition_by_sql
windowspec_sql
withingroup_sql
between_sql
bracket_sql
all_sql
any_sql
exists_sql
case_sql
constraint_sql
nextvaluefor_sql
extract_sql
trim_sql
concat_sql
check_sql
foreignkey_sql
primarykey_sql
if_sql
matchagainst_sql
jsonkeyvalue_sql
jsonobject_sql
openjsoncolumndef_sql
openjson_sql
in_sql
in_unnest_op
interval_sql
return_sql
reference_sql
anonymous_sql
paren_sql
neg_sql
not_sql
alias_sql
aliases_sql
attimezone_sql
add_sql
and_sql
connector_sql
bitwiseand_sql
bitwiseleftshift_sql
bitwisenot_sql
bitwiseor_sql
bitwiserightshift_sql
bitwisexor_sql
cast_sql
currentdate_sql
collate_sql
command_sql
comment_sql
mergetreettlaction_sql
mergetreettl_sql
transaction_sql
commit_sql
rollback_sql
altercolumn_sql
renametable_sql
altertable_sql
droppartition_sql
addconstraint_sql
distinct_sql
ignorenulls_sql
respectnulls_sql
intdiv_sql
dpipe_sql
div_sql
overlaps_sql
distance_sql
dot_sql
eq_sql
escape_sql
glob_sql
gt_sql
gte_sql
ilike_sql
ilikeany_sql
is_sql
like_sql
likeany_sql
similarto_sql
lt_sql
lte_sql
mod_sql
mul_sql
neq_sql
nullsafeeq_sql
nullsafeneq_sql
or_sql
slice_sql
sub_sql
trycast_sql
use_sql
binary
function_fallback_sql
func
format_args
text_width
format_time
expressions
op_expressions
naked_property
set_operation
tag_sql
token_sql
userdefinedfunction_sql
joinhint_sql
kwarg_sql
when_sql
merge_sql
tochar_sql
dictproperty_sql
dictrange_sql
dictsubproperty_sql