sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 date_delta_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 format_time_lambda, 14 if_sql, 15 inline_array_sql, 16 max_or_greatest, 17 min_or_least, 18 rename_func, 19 timestamptrunc_sql, 20 timestrtotime_sql, 21 var_map_sql, 22) 23from sqlglot.expressions import Literal 24from sqlglot.helper import is_int, seq_get 25from sqlglot.tokens import TokenType 26 27if t.TYPE_CHECKING: 28 from sqlglot._typing import E 29 30 31# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 32def _parse_to_timestamp(args: t.List) -> t.Union[exp.StrToTime, exp.UnixToTime, exp.TimeStrToTime]: 33 if len(args) == 2: 34 first_arg, second_arg = args 35 if second_arg.is_string: 36 # case: <string_expr> [ , <format> ] 37 return format_time_lambda(exp.StrToTime, "snowflake")(args) 38 return exp.UnixToTime(this=first_arg, scale=second_arg) 39 40 from sqlglot.optimizer.simplify import simplify_literals 41 42 # The first argument might be an expression like 40 * 365 * 86400, so we try to 43 # reduce it using `simplify_literals` first and then check if it's a Literal. 44 first_arg = seq_get(args, 0) 45 if not isinstance(simplify_literals(first_arg, root=True), Literal): 46 # case: <variant_expr> or other expressions such as columns 47 return exp.TimeStrToTime.from_arg_list(args) 48 49 if first_arg.is_string: 50 if is_int(first_arg.this): 51 # case: <integer> 52 return exp.UnixToTime.from_arg_list(args) 53 54 # case: <date_expr> 55 return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args) 56 57 # case: <numeric_expr> 58 return exp.UnixToTime.from_arg_list(args) 59 60 61def _parse_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 62 expression = parser.parse_var_map(args) 63 64 if isinstance(expression, exp.StarMap): 65 return expression 66 67 return exp.Struct( 68 expressions=[ 69 t.cast(exp.Condition, k).eq(v) for k, v in zip(expression.keys, expression.values) 70 ] 71 ) 72 73 74def _parse_datediff(args: t.List) -> exp.DateDiff: 75 return exp.DateDiff( 76 this=seq_get(args, 2), expression=seq_get(args, 1), unit=_map_date_part(seq_get(args, 0)) 77 ) 78 79 80# https://docs.snowflake.com/en/sql-reference/functions/date_part.html 81# https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 82def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 83 this = self._parse_var() or self._parse_type() 84 85 if not this: 86 return None 87 88 self._match(TokenType.COMMA) 89 expression = self._parse_bitwise() 90 this = _map_date_part(this) 91 name = this.name.upper() 92 93 if name.startswith("EPOCH"): 94 if name == "EPOCH_MILLISECOND": 95 scale = 10**3 96 elif name == "EPOCH_MICROSECOND": 97 scale = 10**6 98 elif name == "EPOCH_NANOSECOND": 99 scale = 10**9 100 else: 101 scale = None 102 103 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 104 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 105 106 if scale: 107 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 108 109 return to_unix 110 111 return self.expression(exp.Extract, this=this, expression=expression) 112 113 114# https://docs.snowflake.com/en/sql-reference/functions/div0 115def _div0_to_if(args: t.List) -> exp.If: 116 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 117 true = exp.Literal.number(0) 118 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 119 return exp.If(this=cond, true=true, false=false) 120 121 122# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 123def _zeroifnull_to_if(args: t.List) -> exp.If: 124 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 125 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 126 127 128# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 129def _nullifzero_to_if(args: t.List) -> exp.If: 130 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 131 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 132 133 134def _datatype_sql(self: Snowflake.Generator, expression: exp.DataType) -> str: 135 if expression.is_type("array"): 136 return "ARRAY" 137 elif expression.is_type("map"): 138 return "OBJECT" 139 return self.datatype_sql(expression) 140 141 142def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 143 flag = expression.text("flag") 144 145 if "i" not in flag: 146 flag += "i" 147 148 return self.func( 149 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 150 ) 151 152 153def _parse_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 154 if len(args) == 3: 155 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 156 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 157 158 159def _parse_regexp_replace(args: t.List) -> exp.RegexpReplace: 160 regexp_replace = exp.RegexpReplace.from_arg_list(args) 161 162 if not regexp_replace.args.get("replacement"): 163 regexp_replace.set("replacement", exp.Literal.string("")) 164 165 return regexp_replace 166 167 168def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 169 def _parse(self: Snowflake.Parser) -> exp.Show: 170 return self._parse_show_snowflake(*args, **kwargs) 171 172 return _parse 173 174 175DATE_PART_MAPPING = { 176 "Y": "YEAR", 177 "YY": "YEAR", 178 "YYY": "YEAR", 179 "YYYY": "YEAR", 180 "YR": "YEAR", 181 "YEARS": "YEAR", 182 "YRS": "YEAR", 183 "MM": "MONTH", 184 "MON": "MONTH", 185 "MONS": "MONTH", 186 "MONTHS": "MONTH", 187 "D": "DAY", 188 "DD": "DAY", 189 "DAYS": "DAY", 190 "DAYOFMONTH": "DAY", 191 "WEEKDAY": "DAYOFWEEK", 192 "DOW": "DAYOFWEEK", 193 "DW": "DAYOFWEEK", 194 "WEEKDAY_ISO": "DAYOFWEEKISO", 195 "DOW_ISO": "DAYOFWEEKISO", 196 "DW_ISO": "DAYOFWEEKISO", 197 "YEARDAY": "DAYOFYEAR", 198 "DOY": "DAYOFYEAR", 199 "DY": "DAYOFYEAR", 200 "W": "WEEK", 201 "WK": "WEEK", 202 "WEEKOFYEAR": "WEEK", 203 "WOY": "WEEK", 204 "WY": "WEEK", 205 "WEEK_ISO": "WEEKISO", 206 "WEEKOFYEARISO": "WEEKISO", 207 "WEEKOFYEAR_ISO": "WEEKISO", 208 "Q": "QUARTER", 209 "QTR": "QUARTER", 210 "QTRS": "QUARTER", 211 "QUARTERS": "QUARTER", 212 "H": "HOUR", 213 "HH": "HOUR", 214 "HR": "HOUR", 215 "HOURS": "HOUR", 216 "HRS": "HOUR", 217 "M": "MINUTE", 218 "MI": "MINUTE", 219 "MIN": "MINUTE", 220 "MINUTES": "MINUTE", 221 "MINS": "MINUTE", 222 "S": "SECOND", 223 "SEC": "SECOND", 224 "SECONDS": "SECOND", 225 "SECS": "SECOND", 226 "MS": "MILLISECOND", 227 "MSEC": "MILLISECOND", 228 "MILLISECONDS": "MILLISECOND", 229 "US": "MICROSECOND", 230 "USEC": "MICROSECOND", 231 "MICROSECONDS": "MICROSECOND", 232 "NS": "NANOSECOND", 233 "NSEC": "NANOSECOND", 234 "NANOSEC": "NANOSECOND", 235 "NSECOND": "NANOSECOND", 236 "NSECONDS": "NANOSECOND", 237 "NANOSECS": "NANOSECOND", 238 "EPOCH": "EPOCH_SECOND", 239 "EPOCH_SECONDS": "EPOCH_SECOND", 240 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 241 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 242 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 243 "TZH": "TIMEZONE_HOUR", 244 "TZM": "TIMEZONE_MINUTE", 245} 246 247 248@t.overload 249def _map_date_part(part: exp.Expression) -> exp.Var: 250 pass 251 252 253@t.overload 254def _map_date_part(part: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 255 pass 256 257 258def _map_date_part(part): 259 mapped = DATE_PART_MAPPING.get(part.name.upper()) if part else None 260 return exp.var(mapped) if mapped else part 261 262 263def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 264 trunc = date_trunc_to_time(args) 265 trunc.set("unit", _map_date_part(trunc.args["unit"])) 266 return trunc 267 268 269def _parse_colon_get_path( 270 self: parser.Parser, this: t.Optional[exp.Expression] 271) -> t.Optional[exp.Expression]: 272 while True: 273 path = self._parse_bitwise() 274 275 # The cast :: operator has a lower precedence than the extraction operator :, so 276 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 277 if isinstance(path, exp.Cast): 278 target_type = path.to 279 path = path.this 280 else: 281 target_type = None 282 283 if isinstance(path, exp.Expression): 284 path = exp.Literal.string(path.sql(dialect="snowflake")) 285 286 # The extraction operator : is left-associative 287 this = self.expression( 288 exp.JSONExtract, this=this, expression=self.dialect.to_json_path(path) 289 ) 290 291 if target_type: 292 this = exp.cast(this, target_type) 293 294 if not self._match(TokenType.COLON): 295 break 296 297 return self._parse_range(this) 298 299 300def _parse_timestamp_from_parts(args: t.List) -> exp.Func: 301 if len(args) == 2: 302 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 303 # so we parse this into Anonymous for now instead of introducing complexity 304 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 305 306 return exp.TimestampFromParts.from_arg_list(args) 307 308 309def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 310 """ 311 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 312 so we need to unqualify them. 313 314 Example: 315 >>> from sqlglot import parse_one 316 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 317 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 318 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 319 """ 320 if isinstance(expression, exp.Pivot) and expression.unpivot: 321 expression = transforms.unqualify_columns(expression) 322 323 return expression 324 325 326class Snowflake(Dialect): 327 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 328 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 329 NULL_ORDERING = "nulls_are_large" 330 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 331 SUPPORTS_USER_DEFINED_TYPES = False 332 SUPPORTS_SEMI_ANTI_JOIN = False 333 PREFER_CTE_ALIAS_COLUMN = True 334 TABLESAMPLE_SIZE_IS_PERCENT = True 335 336 TIME_MAPPING = { 337 "YYYY": "%Y", 338 "yyyy": "%Y", 339 "YY": "%y", 340 "yy": "%y", 341 "MMMM": "%B", 342 "mmmm": "%B", 343 "MON": "%b", 344 "mon": "%b", 345 "MM": "%m", 346 "mm": "%m", 347 "DD": "%d", 348 "dd": "%-d", 349 "DY": "%a", 350 "dy": "%w", 351 "HH24": "%H", 352 "hh24": "%H", 353 "HH12": "%I", 354 "hh12": "%I", 355 "MI": "%M", 356 "mi": "%M", 357 "SS": "%S", 358 "ss": "%S", 359 "FF": "%f", 360 "ff": "%f", 361 "FF6": "%f", 362 "ff6": "%f", 363 } 364 365 def quote_identifier(self, expression: E, identify: bool = True) -> E: 366 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 367 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 368 if ( 369 isinstance(expression, exp.Identifier) 370 and isinstance(expression.parent, exp.Table) 371 and expression.name.lower() == "dual" 372 ): 373 return expression # type: ignore 374 375 return super().quote_identifier(expression, identify=identify) 376 377 class Parser(parser.Parser): 378 IDENTIFY_PIVOT_STRINGS = True 379 380 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 381 382 FUNCTIONS = { 383 **parser.Parser.FUNCTIONS, 384 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 385 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 386 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 387 this=seq_get(args, 1), expression=seq_get(args, 0) 388 ), 389 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 390 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 391 start=seq_get(args, 0), 392 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 393 step=seq_get(args, 2), 394 ), 395 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 396 "BITXOR": binary_from_function(exp.BitwiseXor), 397 "BIT_XOR": binary_from_function(exp.BitwiseXor), 398 "BOOLXOR": binary_from_function(exp.Xor), 399 "CONVERT_TIMEZONE": _parse_convert_timezone, 400 "DATE_TRUNC": _date_trunc_to_time, 401 "DATEADD": lambda args: exp.DateAdd( 402 this=seq_get(args, 2), 403 expression=seq_get(args, 1), 404 unit=_map_date_part(seq_get(args, 0)), 405 ), 406 "DATEDIFF": _parse_datediff, 407 "DIV0": _div0_to_if, 408 "FLATTEN": exp.Explode.from_arg_list, 409 "GET_PATH": lambda args, dialect: exp.JSONExtract( 410 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 411 ), 412 "IFF": exp.If.from_arg_list, 413 "LAST_DAY": lambda args: exp.LastDay( 414 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 415 ), 416 "LISTAGG": exp.GroupConcat.from_arg_list, 417 "NULLIFZERO": _nullifzero_to_if, 418 "OBJECT_CONSTRUCT": _parse_object_construct, 419 "REGEXP_REPLACE": _parse_regexp_replace, 420 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 421 "RLIKE": exp.RegexpLike.from_arg_list, 422 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 423 "TIMEDIFF": _parse_datediff, 424 "TIMESTAMPDIFF": _parse_datediff, 425 "TIMESTAMPFROMPARTS": _parse_timestamp_from_parts, 426 "TIMESTAMP_FROM_PARTS": _parse_timestamp_from_parts, 427 "TO_TIMESTAMP": _parse_to_timestamp, 428 "TO_VARCHAR": exp.ToChar.from_arg_list, 429 "ZEROIFNULL": _zeroifnull_to_if, 430 } 431 432 FUNCTION_PARSERS = { 433 **parser.Parser.FUNCTION_PARSERS, 434 "DATE_PART": _parse_date_part, 435 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 436 } 437 FUNCTION_PARSERS.pop("TRIM") 438 439 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 440 441 RANGE_PARSERS = { 442 **parser.Parser.RANGE_PARSERS, 443 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 444 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 445 TokenType.COLON: _parse_colon_get_path, 446 } 447 448 ALTER_PARSERS = { 449 **parser.Parser.ALTER_PARSERS, 450 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 451 "UNSET": lambda self: self.expression( 452 exp.Set, 453 tag=self._match_text_seq("TAG"), 454 expressions=self._parse_csv(self._parse_id_var), 455 unset=True, 456 ), 457 "SWAP": lambda self: self._parse_alter_table_swap(), 458 } 459 460 STATEMENT_PARSERS = { 461 **parser.Parser.STATEMENT_PARSERS, 462 TokenType.SHOW: lambda self: self._parse_show(), 463 } 464 465 PROPERTY_PARSERS = { 466 **parser.Parser.PROPERTY_PARSERS, 467 "LOCATION": lambda self: self._parse_location(), 468 } 469 470 SHOW_PARSERS = { 471 "SCHEMAS": _show_parser("SCHEMAS"), 472 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 473 "OBJECTS": _show_parser("OBJECTS"), 474 "TERSE OBJECTS": _show_parser("OBJECTS"), 475 "TABLES": _show_parser("TABLES"), 476 "TERSE TABLES": _show_parser("TABLES"), 477 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 478 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 479 "COLUMNS": _show_parser("COLUMNS"), 480 } 481 482 STAGED_FILE_SINGLE_TOKENS = { 483 TokenType.DOT, 484 TokenType.MOD, 485 TokenType.SLASH, 486 } 487 488 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 489 490 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 491 if is_map: 492 # Keys are strings in Snowflake's objects, see also: 493 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 494 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 495 return self._parse_slice(self._parse_string()) 496 497 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 498 499 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 500 lateral = super()._parse_lateral() 501 if not lateral: 502 return lateral 503 504 if isinstance(lateral.this, exp.Explode): 505 table_alias = lateral.args.get("alias") 506 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 507 if table_alias and not table_alias.args.get("columns"): 508 table_alias.set("columns", columns) 509 elif not table_alias: 510 exp.alias_(lateral, "_flattened", table=columns, copy=False) 511 512 return lateral 513 514 def _parse_at_before(self, table: exp.Table) -> exp.Table: 515 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 516 index = self._index 517 if self._match_texts(("AT", "BEFORE")): 518 this = self._prev.text.upper() 519 kind = ( 520 self._match(TokenType.L_PAREN) 521 and self._match_texts(self.HISTORICAL_DATA_KIND) 522 and self._prev.text.upper() 523 ) 524 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 525 526 if expression: 527 self._match_r_paren() 528 when = self.expression( 529 exp.HistoricalData, this=this, kind=kind, expression=expression 530 ) 531 table.set("when", when) 532 else: 533 self._retreat(index) 534 535 return table 536 537 def _parse_table_parts( 538 self, schema: bool = False, is_db_reference: bool = False 539 ) -> exp.Table: 540 # https://docs.snowflake.com/en/user-guide/querying-stage 541 if self._match(TokenType.STRING, advance=False): 542 table = self._parse_string() 543 elif self._match_text_seq("@", advance=False): 544 table = self._parse_location_path() 545 else: 546 table = None 547 548 if table: 549 file_format = None 550 pattern = None 551 552 self._match(TokenType.L_PAREN) 553 while self._curr and not self._match(TokenType.R_PAREN): 554 if self._match_text_seq("FILE_FORMAT", "=>"): 555 file_format = self._parse_string() or super()._parse_table_parts( 556 is_db_reference=is_db_reference 557 ) 558 elif self._match_text_seq("PATTERN", "=>"): 559 pattern = self._parse_string() 560 else: 561 break 562 563 self._match(TokenType.COMMA) 564 565 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 566 else: 567 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 568 569 return self._parse_at_before(table) 570 571 def _parse_id_var( 572 self, 573 any_token: bool = True, 574 tokens: t.Optional[t.Collection[TokenType]] = None, 575 ) -> t.Optional[exp.Expression]: 576 if self._match_text_seq("IDENTIFIER", "("): 577 identifier = ( 578 super()._parse_id_var(any_token=any_token, tokens=tokens) 579 or self._parse_string() 580 ) 581 self._match_r_paren() 582 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 583 584 return super()._parse_id_var(any_token=any_token, tokens=tokens) 585 586 def _parse_show_snowflake(self, this: str) -> exp.Show: 587 scope = None 588 scope_kind = None 589 590 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 591 # which is syntactically valid but has no effect on the output 592 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 593 594 history = self._match_text_seq("HISTORY") 595 596 like = self._parse_string() if self._match(TokenType.LIKE) else None 597 598 if self._match(TokenType.IN): 599 if self._match_text_seq("ACCOUNT"): 600 scope_kind = "ACCOUNT" 601 elif self._match_set(self.DB_CREATABLES): 602 scope_kind = self._prev.text.upper() 603 if self._curr: 604 scope = self._parse_table_parts() 605 elif self._curr: 606 scope_kind = "SCHEMA" if this in ("OBJECTS", "TABLES") else "TABLE" 607 scope = self._parse_table_parts() 608 609 return self.expression( 610 exp.Show, 611 **{ 612 "terse": terse, 613 "this": this, 614 "history": history, 615 "like": like, 616 "scope": scope, 617 "scope_kind": scope_kind, 618 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 619 "limit": self._parse_limit(), 620 "from": self._parse_string() if self._match(TokenType.FROM) else None, 621 }, 622 ) 623 624 def _parse_alter_table_swap(self) -> exp.SwapTable: 625 self._match_text_seq("WITH") 626 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 627 628 def _parse_location(self) -> exp.LocationProperty: 629 self._match(TokenType.EQ) 630 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 631 632 def _parse_location_path(self) -> exp.Var: 633 parts = [self._advance_any(ignore_reserved=True)] 634 635 # We avoid consuming a comma token because external tables like @foo and @bar 636 # can be joined in a query with a comma separator. 637 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 638 parts.append(self._advance_any(ignore_reserved=True)) 639 640 return exp.var("".join(part.text for part in parts if part)) 641 642 class Tokenizer(tokens.Tokenizer): 643 STRING_ESCAPES = ["\\", "'"] 644 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 645 RAW_STRINGS = ["$$"] 646 COMMENTS = ["--", "//", ("/*", "*/")] 647 648 KEYWORDS = { 649 **tokens.Tokenizer.KEYWORDS, 650 "BYTEINT": TokenType.INT, 651 "CHAR VARYING": TokenType.VARCHAR, 652 "CHARACTER VARYING": TokenType.VARCHAR, 653 "EXCLUDE": TokenType.EXCEPT, 654 "ILIKE ANY": TokenType.ILIKE_ANY, 655 "LIKE ANY": TokenType.LIKE_ANY, 656 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 657 "MINUS": TokenType.EXCEPT, 658 "NCHAR VARYING": TokenType.VARCHAR, 659 "PUT": TokenType.COMMAND, 660 "REMOVE": TokenType.COMMAND, 661 "RENAME": TokenType.REPLACE, 662 "RM": TokenType.COMMAND, 663 "SAMPLE": TokenType.TABLE_SAMPLE, 664 "SQL_DOUBLE": TokenType.DOUBLE, 665 "SQL_VARCHAR": TokenType.VARCHAR, 666 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 667 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 668 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 669 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 670 "TOP": TokenType.TOP, 671 } 672 673 SINGLE_TOKENS = { 674 **tokens.Tokenizer.SINGLE_TOKENS, 675 "$": TokenType.PARAMETER, 676 } 677 678 VAR_SINGLE_TOKENS = {"$"} 679 680 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 681 682 class Generator(generator.Generator): 683 PARAMETER_TOKEN = "$" 684 MATCHED_BY_SOURCE = False 685 SINGLE_STRING_INTERVAL = True 686 JOIN_HINTS = False 687 TABLE_HINTS = False 688 QUERY_HINTS = False 689 AGGREGATE_FILTER_SUPPORTED = False 690 SUPPORTS_TABLE_COPY = False 691 COLLATE_IS_FUNC = True 692 LIMIT_ONLY_LITERALS = True 693 JSON_KEY_VALUE_PAIR_SEP = "," 694 INSERT_OVERWRITE = " OVERWRITE INTO" 695 696 TRANSFORMS = { 697 **generator.Generator.TRANSFORMS, 698 exp.ArgMax: rename_func("MAX_BY"), 699 exp.ArgMin: rename_func("MIN_BY"), 700 exp.Array: inline_array_sql, 701 exp.ArrayConcat: rename_func("ARRAY_CAT"), 702 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 703 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 704 exp.AtTimeZone: lambda self, e: self.func( 705 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 706 ), 707 exp.BitwiseXor: rename_func("BITXOR"), 708 exp.DateAdd: date_delta_sql("DATEADD"), 709 exp.DateDiff: date_delta_sql("DATEDIFF"), 710 exp.DateStrToDate: datestrtodate_sql, 711 exp.DataType: _datatype_sql, 712 exp.DayOfMonth: rename_func("DAYOFMONTH"), 713 exp.DayOfWeek: rename_func("DAYOFWEEK"), 714 exp.DayOfYear: rename_func("DAYOFYEAR"), 715 exp.Explode: rename_func("FLATTEN"), 716 exp.Extract: rename_func("DATE_PART"), 717 exp.FromTimeZone: lambda self, e: self.func( 718 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 719 ), 720 exp.GenerateSeries: lambda self, e: self.func( 721 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 722 ), 723 exp.GroupConcat: rename_func("LISTAGG"), 724 exp.If: if_sql(name="IFF", false_value="NULL"), 725 exp.JSONExtract: rename_func("GET_PATH"), 726 exp.JSONExtractScalar: rename_func("JSON_EXTRACT_PATH_TEXT"), 727 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 728 exp.JSONPathRoot: lambda *_: "", 729 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 730 exp.LogicalOr: rename_func("BOOLOR_AGG"), 731 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 732 exp.Max: max_or_greatest, 733 exp.Min: min_or_least, 734 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 735 exp.PercentileCont: transforms.preprocess( 736 [transforms.add_within_group_for_percentiles] 737 ), 738 exp.PercentileDisc: transforms.preprocess( 739 [transforms.add_within_group_for_percentiles] 740 ), 741 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 742 exp.RegexpILike: _regexpilike_sql, 743 exp.Rand: rename_func("RANDOM"), 744 exp.Select: transforms.preprocess( 745 [ 746 transforms.eliminate_distinct_on, 747 transforms.explode_to_unnest(), 748 transforms.eliminate_semi_and_anti_joins, 749 ] 750 ), 751 exp.SHA: rename_func("SHA1"), 752 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 753 exp.StartsWith: rename_func("STARTSWITH"), 754 exp.StrPosition: lambda self, e: self.func( 755 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 756 ), 757 exp.StrToTime: lambda self, 758 e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 759 exp.Struct: lambda self, e: self.func( 760 "OBJECT_CONSTRUCT", 761 *(arg for expression in e.expressions for arg in expression.flatten()), 762 ), 763 exp.Stuff: rename_func("INSERT"), 764 exp.TimestampDiff: lambda self, e: self.func( 765 "TIMESTAMPDIFF", e.unit, e.expression, e.this 766 ), 767 exp.TimestampTrunc: timestamptrunc_sql, 768 exp.TimeStrToTime: timestrtotime_sql, 769 exp.TimeToStr: lambda self, e: self.func( 770 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 771 ), 772 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 773 exp.ToArray: rename_func("TO_ARRAY"), 774 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 775 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 776 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 777 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 778 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 779 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 780 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 781 exp.Xor: rename_func("BOOLXOR"), 782 } 783 784 SUPPORTED_JSON_PATH_PARTS = { 785 exp.JSONPathKey, 786 exp.JSONPathRoot, 787 exp.JSONPathSubscript, 788 } 789 790 TYPE_MAPPING = { 791 **generator.Generator.TYPE_MAPPING, 792 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 793 } 794 795 STAR_MAPPING = { 796 "except": "EXCLUDE", 797 "replace": "RENAME", 798 } 799 800 PROPERTIES_LOCATION = { 801 **generator.Generator.PROPERTIES_LOCATION, 802 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 803 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 804 } 805 806 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 807 milli = expression.args.get("milli") 808 if milli is not None: 809 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 810 expression.set("nano", milli_to_nano) 811 812 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 813 814 def trycast_sql(self, expression: exp.TryCast) -> str: 815 value = expression.this 816 817 if value.type is None: 818 from sqlglot.optimizer.annotate_types import annotate_types 819 820 value = annotate_types(value) 821 822 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 823 return super().trycast_sql(expression) 824 825 # TRY_CAST only works for string values in Snowflake 826 return self.cast_sql(expression) 827 828 def log_sql(self, expression: exp.Log) -> str: 829 if not expression.expression: 830 return self.func("LN", expression.this) 831 832 return super().log_sql(expression) 833 834 def unnest_sql(self, expression: exp.Unnest) -> str: 835 unnest_alias = expression.args.get("alias") 836 offset = expression.args.get("offset") 837 838 columns = [ 839 exp.to_identifier("seq"), 840 exp.to_identifier("key"), 841 exp.to_identifier("path"), 842 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 843 seq_get(unnest_alias.columns if unnest_alias else [], 0) 844 or exp.to_identifier("value"), 845 exp.to_identifier("this"), 846 ] 847 848 if unnest_alias: 849 unnest_alias.set("columns", columns) 850 else: 851 unnest_alias = exp.TableAlias(this="_u", columns=columns) 852 853 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 854 alias = self.sql(unnest_alias) 855 alias = f" AS {alias}" if alias else "" 856 return f"{explode}{alias}" 857 858 def show_sql(self, expression: exp.Show) -> str: 859 terse = "TERSE " if expression.args.get("terse") else "" 860 history = " HISTORY" if expression.args.get("history") else "" 861 like = self.sql(expression, "like") 862 like = f" LIKE {like}" if like else "" 863 864 scope = self.sql(expression, "scope") 865 scope = f" {scope}" if scope else "" 866 867 scope_kind = self.sql(expression, "scope_kind") 868 if scope_kind: 869 scope_kind = f" IN {scope_kind}" 870 871 starts_with = self.sql(expression, "starts_with") 872 if starts_with: 873 starts_with = f" STARTS WITH {starts_with}" 874 875 limit = self.sql(expression, "limit") 876 877 from_ = self.sql(expression, "from") 878 if from_: 879 from_ = f" FROM {from_}" 880 881 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 882 883 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 884 # Other dialects don't support all of the following parameters, so we need to 885 # generate default values as necessary to ensure the transpilation is correct 886 group = expression.args.get("group") 887 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 888 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 889 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 890 891 return self.func( 892 "REGEXP_SUBSTR", 893 expression.this, 894 expression.expression, 895 position, 896 occurrence, 897 parameters, 898 group, 899 ) 900 901 def except_op(self, expression: exp.Except) -> str: 902 if not expression.args.get("distinct", False): 903 self.unsupported("EXCEPT with All is not supported in Snowflake") 904 return super().except_op(expression) 905 906 def intersect_op(self, expression: exp.Intersect) -> str: 907 if not expression.args.get("distinct", False): 908 self.unsupported("INTERSECT with All is not supported in Snowflake") 909 return super().intersect_op(expression) 910 911 def describe_sql(self, expression: exp.Describe) -> str: 912 # Default to table if kind is unknown 913 kind_value = expression.args.get("kind") or "TABLE" 914 kind = f" {kind_value}" if kind_value else "" 915 this = f" {self.sql(expression, 'this')}" 916 expressions = self.expressions(expression, flat=True) 917 expressions = f" {expressions}" if expressions else "" 918 return f"DESCRIBE{kind}{this}{expressions}" 919 920 def generatedasidentitycolumnconstraint_sql( 921 self, expression: exp.GeneratedAsIdentityColumnConstraint 922 ) -> str: 923 start = expression.args.get("start") 924 start = f" START {start}" if start else "" 925 increment = expression.args.get("increment") 926 increment = f" INCREMENT {increment}" if increment else "" 927 return f"AUTOINCREMENT{start}{increment}" 928 929 def swaptable_sql(self, expression: exp.SwapTable) -> str: 930 this = self.sql(expression, "this") 931 return f"SWAP WITH {this}" 932 933 def with_properties(self, properties: exp.Properties) -> str: 934 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 935 936 def cluster_sql(self, expression: exp.Cluster) -> str: 937 return f"CLUSTER BY ({self.expressions(expression, flat=True)})"
327class Snowflake(Dialect): 328 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 329 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 330 NULL_ORDERING = "nulls_are_large" 331 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 332 SUPPORTS_USER_DEFINED_TYPES = False 333 SUPPORTS_SEMI_ANTI_JOIN = False 334 PREFER_CTE_ALIAS_COLUMN = True 335 TABLESAMPLE_SIZE_IS_PERCENT = True 336 337 TIME_MAPPING = { 338 "YYYY": "%Y", 339 "yyyy": "%Y", 340 "YY": "%y", 341 "yy": "%y", 342 "MMMM": "%B", 343 "mmmm": "%B", 344 "MON": "%b", 345 "mon": "%b", 346 "MM": "%m", 347 "mm": "%m", 348 "DD": "%d", 349 "dd": "%-d", 350 "DY": "%a", 351 "dy": "%w", 352 "HH24": "%H", 353 "hh24": "%H", 354 "HH12": "%I", 355 "hh12": "%I", 356 "MI": "%M", 357 "mi": "%M", 358 "SS": "%S", 359 "ss": "%S", 360 "FF": "%f", 361 "ff": "%f", 362 "FF6": "%f", 363 "ff6": "%f", 364 } 365 366 def quote_identifier(self, expression: E, identify: bool = True) -> E: 367 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 368 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 369 if ( 370 isinstance(expression, exp.Identifier) 371 and isinstance(expression.parent, exp.Table) 372 and expression.name.lower() == "dual" 373 ): 374 return expression # type: ignore 375 376 return super().quote_identifier(expression, identify=identify) 377 378 class Parser(parser.Parser): 379 IDENTIFY_PIVOT_STRINGS = True 380 381 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 382 383 FUNCTIONS = { 384 **parser.Parser.FUNCTIONS, 385 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 386 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 387 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 388 this=seq_get(args, 1), expression=seq_get(args, 0) 389 ), 390 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 391 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 392 start=seq_get(args, 0), 393 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 394 step=seq_get(args, 2), 395 ), 396 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 397 "BITXOR": binary_from_function(exp.BitwiseXor), 398 "BIT_XOR": binary_from_function(exp.BitwiseXor), 399 "BOOLXOR": binary_from_function(exp.Xor), 400 "CONVERT_TIMEZONE": _parse_convert_timezone, 401 "DATE_TRUNC": _date_trunc_to_time, 402 "DATEADD": lambda args: exp.DateAdd( 403 this=seq_get(args, 2), 404 expression=seq_get(args, 1), 405 unit=_map_date_part(seq_get(args, 0)), 406 ), 407 "DATEDIFF": _parse_datediff, 408 "DIV0": _div0_to_if, 409 "FLATTEN": exp.Explode.from_arg_list, 410 "GET_PATH": lambda args, dialect: exp.JSONExtract( 411 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 412 ), 413 "IFF": exp.If.from_arg_list, 414 "LAST_DAY": lambda args: exp.LastDay( 415 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 416 ), 417 "LISTAGG": exp.GroupConcat.from_arg_list, 418 "NULLIFZERO": _nullifzero_to_if, 419 "OBJECT_CONSTRUCT": _parse_object_construct, 420 "REGEXP_REPLACE": _parse_regexp_replace, 421 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 422 "RLIKE": exp.RegexpLike.from_arg_list, 423 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 424 "TIMEDIFF": _parse_datediff, 425 "TIMESTAMPDIFF": _parse_datediff, 426 "TIMESTAMPFROMPARTS": _parse_timestamp_from_parts, 427 "TIMESTAMP_FROM_PARTS": _parse_timestamp_from_parts, 428 "TO_TIMESTAMP": _parse_to_timestamp, 429 "TO_VARCHAR": exp.ToChar.from_arg_list, 430 "ZEROIFNULL": _zeroifnull_to_if, 431 } 432 433 FUNCTION_PARSERS = { 434 **parser.Parser.FUNCTION_PARSERS, 435 "DATE_PART": _parse_date_part, 436 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 437 } 438 FUNCTION_PARSERS.pop("TRIM") 439 440 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 441 442 RANGE_PARSERS = { 443 **parser.Parser.RANGE_PARSERS, 444 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 445 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 446 TokenType.COLON: _parse_colon_get_path, 447 } 448 449 ALTER_PARSERS = { 450 **parser.Parser.ALTER_PARSERS, 451 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 452 "UNSET": lambda self: self.expression( 453 exp.Set, 454 tag=self._match_text_seq("TAG"), 455 expressions=self._parse_csv(self._parse_id_var), 456 unset=True, 457 ), 458 "SWAP": lambda self: self._parse_alter_table_swap(), 459 } 460 461 STATEMENT_PARSERS = { 462 **parser.Parser.STATEMENT_PARSERS, 463 TokenType.SHOW: lambda self: self._parse_show(), 464 } 465 466 PROPERTY_PARSERS = { 467 **parser.Parser.PROPERTY_PARSERS, 468 "LOCATION": lambda self: self._parse_location(), 469 } 470 471 SHOW_PARSERS = { 472 "SCHEMAS": _show_parser("SCHEMAS"), 473 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 474 "OBJECTS": _show_parser("OBJECTS"), 475 "TERSE OBJECTS": _show_parser("OBJECTS"), 476 "TABLES": _show_parser("TABLES"), 477 "TERSE TABLES": _show_parser("TABLES"), 478 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 479 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 480 "COLUMNS": _show_parser("COLUMNS"), 481 } 482 483 STAGED_FILE_SINGLE_TOKENS = { 484 TokenType.DOT, 485 TokenType.MOD, 486 TokenType.SLASH, 487 } 488 489 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 490 491 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 492 if is_map: 493 # Keys are strings in Snowflake's objects, see also: 494 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 495 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 496 return self._parse_slice(self._parse_string()) 497 498 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 499 500 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 501 lateral = super()._parse_lateral() 502 if not lateral: 503 return lateral 504 505 if isinstance(lateral.this, exp.Explode): 506 table_alias = lateral.args.get("alias") 507 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 508 if table_alias and not table_alias.args.get("columns"): 509 table_alias.set("columns", columns) 510 elif not table_alias: 511 exp.alias_(lateral, "_flattened", table=columns, copy=False) 512 513 return lateral 514 515 def _parse_at_before(self, table: exp.Table) -> exp.Table: 516 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 517 index = self._index 518 if self._match_texts(("AT", "BEFORE")): 519 this = self._prev.text.upper() 520 kind = ( 521 self._match(TokenType.L_PAREN) 522 and self._match_texts(self.HISTORICAL_DATA_KIND) 523 and self._prev.text.upper() 524 ) 525 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 526 527 if expression: 528 self._match_r_paren() 529 when = self.expression( 530 exp.HistoricalData, this=this, kind=kind, expression=expression 531 ) 532 table.set("when", when) 533 else: 534 self._retreat(index) 535 536 return table 537 538 def _parse_table_parts( 539 self, schema: bool = False, is_db_reference: bool = False 540 ) -> exp.Table: 541 # https://docs.snowflake.com/en/user-guide/querying-stage 542 if self._match(TokenType.STRING, advance=False): 543 table = self._parse_string() 544 elif self._match_text_seq("@", advance=False): 545 table = self._parse_location_path() 546 else: 547 table = None 548 549 if table: 550 file_format = None 551 pattern = None 552 553 self._match(TokenType.L_PAREN) 554 while self._curr and not self._match(TokenType.R_PAREN): 555 if self._match_text_seq("FILE_FORMAT", "=>"): 556 file_format = self._parse_string() or super()._parse_table_parts( 557 is_db_reference=is_db_reference 558 ) 559 elif self._match_text_seq("PATTERN", "=>"): 560 pattern = self._parse_string() 561 else: 562 break 563 564 self._match(TokenType.COMMA) 565 566 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 567 else: 568 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 569 570 return self._parse_at_before(table) 571 572 def _parse_id_var( 573 self, 574 any_token: bool = True, 575 tokens: t.Optional[t.Collection[TokenType]] = None, 576 ) -> t.Optional[exp.Expression]: 577 if self._match_text_seq("IDENTIFIER", "("): 578 identifier = ( 579 super()._parse_id_var(any_token=any_token, tokens=tokens) 580 or self._parse_string() 581 ) 582 self._match_r_paren() 583 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 584 585 return super()._parse_id_var(any_token=any_token, tokens=tokens) 586 587 def _parse_show_snowflake(self, this: str) -> exp.Show: 588 scope = None 589 scope_kind = None 590 591 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 592 # which is syntactically valid but has no effect on the output 593 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 594 595 history = self._match_text_seq("HISTORY") 596 597 like = self._parse_string() if self._match(TokenType.LIKE) else None 598 599 if self._match(TokenType.IN): 600 if self._match_text_seq("ACCOUNT"): 601 scope_kind = "ACCOUNT" 602 elif self._match_set(self.DB_CREATABLES): 603 scope_kind = self._prev.text.upper() 604 if self._curr: 605 scope = self._parse_table_parts() 606 elif self._curr: 607 scope_kind = "SCHEMA" if this in ("OBJECTS", "TABLES") else "TABLE" 608 scope = self._parse_table_parts() 609 610 return self.expression( 611 exp.Show, 612 **{ 613 "terse": terse, 614 "this": this, 615 "history": history, 616 "like": like, 617 "scope": scope, 618 "scope_kind": scope_kind, 619 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 620 "limit": self._parse_limit(), 621 "from": self._parse_string() if self._match(TokenType.FROM) else None, 622 }, 623 ) 624 625 def _parse_alter_table_swap(self) -> exp.SwapTable: 626 self._match_text_seq("WITH") 627 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 628 629 def _parse_location(self) -> exp.LocationProperty: 630 self._match(TokenType.EQ) 631 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 632 633 def _parse_location_path(self) -> exp.Var: 634 parts = [self._advance_any(ignore_reserved=True)] 635 636 # We avoid consuming a comma token because external tables like @foo and @bar 637 # can be joined in a query with a comma separator. 638 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 639 parts.append(self._advance_any(ignore_reserved=True)) 640 641 return exp.var("".join(part.text for part in parts if part)) 642 643 class Tokenizer(tokens.Tokenizer): 644 STRING_ESCAPES = ["\\", "'"] 645 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 646 RAW_STRINGS = ["$$"] 647 COMMENTS = ["--", "//", ("/*", "*/")] 648 649 KEYWORDS = { 650 **tokens.Tokenizer.KEYWORDS, 651 "BYTEINT": TokenType.INT, 652 "CHAR VARYING": TokenType.VARCHAR, 653 "CHARACTER VARYING": TokenType.VARCHAR, 654 "EXCLUDE": TokenType.EXCEPT, 655 "ILIKE ANY": TokenType.ILIKE_ANY, 656 "LIKE ANY": TokenType.LIKE_ANY, 657 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 658 "MINUS": TokenType.EXCEPT, 659 "NCHAR VARYING": TokenType.VARCHAR, 660 "PUT": TokenType.COMMAND, 661 "REMOVE": TokenType.COMMAND, 662 "RENAME": TokenType.REPLACE, 663 "RM": TokenType.COMMAND, 664 "SAMPLE": TokenType.TABLE_SAMPLE, 665 "SQL_DOUBLE": TokenType.DOUBLE, 666 "SQL_VARCHAR": TokenType.VARCHAR, 667 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 668 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 669 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 670 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 671 "TOP": TokenType.TOP, 672 } 673 674 SINGLE_TOKENS = { 675 **tokens.Tokenizer.SINGLE_TOKENS, 676 "$": TokenType.PARAMETER, 677 } 678 679 VAR_SINGLE_TOKENS = {"$"} 680 681 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 682 683 class Generator(generator.Generator): 684 PARAMETER_TOKEN = "$" 685 MATCHED_BY_SOURCE = False 686 SINGLE_STRING_INTERVAL = True 687 JOIN_HINTS = False 688 TABLE_HINTS = False 689 QUERY_HINTS = False 690 AGGREGATE_FILTER_SUPPORTED = False 691 SUPPORTS_TABLE_COPY = False 692 COLLATE_IS_FUNC = True 693 LIMIT_ONLY_LITERALS = True 694 JSON_KEY_VALUE_PAIR_SEP = "," 695 INSERT_OVERWRITE = " OVERWRITE INTO" 696 697 TRANSFORMS = { 698 **generator.Generator.TRANSFORMS, 699 exp.ArgMax: rename_func("MAX_BY"), 700 exp.ArgMin: rename_func("MIN_BY"), 701 exp.Array: inline_array_sql, 702 exp.ArrayConcat: rename_func("ARRAY_CAT"), 703 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 704 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 705 exp.AtTimeZone: lambda self, e: self.func( 706 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 707 ), 708 exp.BitwiseXor: rename_func("BITXOR"), 709 exp.DateAdd: date_delta_sql("DATEADD"), 710 exp.DateDiff: date_delta_sql("DATEDIFF"), 711 exp.DateStrToDate: datestrtodate_sql, 712 exp.DataType: _datatype_sql, 713 exp.DayOfMonth: rename_func("DAYOFMONTH"), 714 exp.DayOfWeek: rename_func("DAYOFWEEK"), 715 exp.DayOfYear: rename_func("DAYOFYEAR"), 716 exp.Explode: rename_func("FLATTEN"), 717 exp.Extract: rename_func("DATE_PART"), 718 exp.FromTimeZone: lambda self, e: self.func( 719 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 720 ), 721 exp.GenerateSeries: lambda self, e: self.func( 722 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 723 ), 724 exp.GroupConcat: rename_func("LISTAGG"), 725 exp.If: if_sql(name="IFF", false_value="NULL"), 726 exp.JSONExtract: rename_func("GET_PATH"), 727 exp.JSONExtractScalar: rename_func("JSON_EXTRACT_PATH_TEXT"), 728 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 729 exp.JSONPathRoot: lambda *_: "", 730 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 731 exp.LogicalOr: rename_func("BOOLOR_AGG"), 732 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 733 exp.Max: max_or_greatest, 734 exp.Min: min_or_least, 735 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 736 exp.PercentileCont: transforms.preprocess( 737 [transforms.add_within_group_for_percentiles] 738 ), 739 exp.PercentileDisc: transforms.preprocess( 740 [transforms.add_within_group_for_percentiles] 741 ), 742 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 743 exp.RegexpILike: _regexpilike_sql, 744 exp.Rand: rename_func("RANDOM"), 745 exp.Select: transforms.preprocess( 746 [ 747 transforms.eliminate_distinct_on, 748 transforms.explode_to_unnest(), 749 transforms.eliminate_semi_and_anti_joins, 750 ] 751 ), 752 exp.SHA: rename_func("SHA1"), 753 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 754 exp.StartsWith: rename_func("STARTSWITH"), 755 exp.StrPosition: lambda self, e: self.func( 756 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 757 ), 758 exp.StrToTime: lambda self, 759 e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 760 exp.Struct: lambda self, e: self.func( 761 "OBJECT_CONSTRUCT", 762 *(arg for expression in e.expressions for arg in expression.flatten()), 763 ), 764 exp.Stuff: rename_func("INSERT"), 765 exp.TimestampDiff: lambda self, e: self.func( 766 "TIMESTAMPDIFF", e.unit, e.expression, e.this 767 ), 768 exp.TimestampTrunc: timestamptrunc_sql, 769 exp.TimeStrToTime: timestrtotime_sql, 770 exp.TimeToStr: lambda self, e: self.func( 771 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 772 ), 773 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 774 exp.ToArray: rename_func("TO_ARRAY"), 775 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 776 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 777 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 778 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 779 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 780 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 781 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 782 exp.Xor: rename_func("BOOLXOR"), 783 } 784 785 SUPPORTED_JSON_PATH_PARTS = { 786 exp.JSONPathKey, 787 exp.JSONPathRoot, 788 exp.JSONPathSubscript, 789 } 790 791 TYPE_MAPPING = { 792 **generator.Generator.TYPE_MAPPING, 793 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 794 } 795 796 STAR_MAPPING = { 797 "except": "EXCLUDE", 798 "replace": "RENAME", 799 } 800 801 PROPERTIES_LOCATION = { 802 **generator.Generator.PROPERTIES_LOCATION, 803 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 804 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 805 } 806 807 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 808 milli = expression.args.get("milli") 809 if milli is not None: 810 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 811 expression.set("nano", milli_to_nano) 812 813 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 814 815 def trycast_sql(self, expression: exp.TryCast) -> str: 816 value = expression.this 817 818 if value.type is None: 819 from sqlglot.optimizer.annotate_types import annotate_types 820 821 value = annotate_types(value) 822 823 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 824 return super().trycast_sql(expression) 825 826 # TRY_CAST only works for string values in Snowflake 827 return self.cast_sql(expression) 828 829 def log_sql(self, expression: exp.Log) -> str: 830 if not expression.expression: 831 return self.func("LN", expression.this) 832 833 return super().log_sql(expression) 834 835 def unnest_sql(self, expression: exp.Unnest) -> str: 836 unnest_alias = expression.args.get("alias") 837 offset = expression.args.get("offset") 838 839 columns = [ 840 exp.to_identifier("seq"), 841 exp.to_identifier("key"), 842 exp.to_identifier("path"), 843 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 844 seq_get(unnest_alias.columns if unnest_alias else [], 0) 845 or exp.to_identifier("value"), 846 exp.to_identifier("this"), 847 ] 848 849 if unnest_alias: 850 unnest_alias.set("columns", columns) 851 else: 852 unnest_alias = exp.TableAlias(this="_u", columns=columns) 853 854 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 855 alias = self.sql(unnest_alias) 856 alias = f" AS {alias}" if alias else "" 857 return f"{explode}{alias}" 858 859 def show_sql(self, expression: exp.Show) -> str: 860 terse = "TERSE " if expression.args.get("terse") else "" 861 history = " HISTORY" if expression.args.get("history") else "" 862 like = self.sql(expression, "like") 863 like = f" LIKE {like}" if like else "" 864 865 scope = self.sql(expression, "scope") 866 scope = f" {scope}" if scope else "" 867 868 scope_kind = self.sql(expression, "scope_kind") 869 if scope_kind: 870 scope_kind = f" IN {scope_kind}" 871 872 starts_with = self.sql(expression, "starts_with") 873 if starts_with: 874 starts_with = f" STARTS WITH {starts_with}" 875 876 limit = self.sql(expression, "limit") 877 878 from_ = self.sql(expression, "from") 879 if from_: 880 from_ = f" FROM {from_}" 881 882 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 883 884 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 885 # Other dialects don't support all of the following parameters, so we need to 886 # generate default values as necessary to ensure the transpilation is correct 887 group = expression.args.get("group") 888 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 889 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 890 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 891 892 return self.func( 893 "REGEXP_SUBSTR", 894 expression.this, 895 expression.expression, 896 position, 897 occurrence, 898 parameters, 899 group, 900 ) 901 902 def except_op(self, expression: exp.Except) -> str: 903 if not expression.args.get("distinct", False): 904 self.unsupported("EXCEPT with All is not supported in Snowflake") 905 return super().except_op(expression) 906 907 def intersect_op(self, expression: exp.Intersect) -> str: 908 if not expression.args.get("distinct", False): 909 self.unsupported("INTERSECT with All is not supported in Snowflake") 910 return super().intersect_op(expression) 911 912 def describe_sql(self, expression: exp.Describe) -> str: 913 # Default to table if kind is unknown 914 kind_value = expression.args.get("kind") or "TABLE" 915 kind = f" {kind_value}" if kind_value else "" 916 this = f" {self.sql(expression, 'this')}" 917 expressions = self.expressions(expression, flat=True) 918 expressions = f" {expressions}" if expressions else "" 919 return f"DESCRIBE{kind}{this}{expressions}" 920 921 def generatedasidentitycolumnconstraint_sql( 922 self, expression: exp.GeneratedAsIdentityColumnConstraint 923 ) -> str: 924 start = expression.args.get("start") 925 start = f" START {start}" if start else "" 926 increment = expression.args.get("increment") 927 increment = f" INCREMENT {increment}" if increment else "" 928 return f"AUTOINCREMENT{start}{increment}" 929 930 def swaptable_sql(self, expression: exp.SwapTable) -> str: 931 this = self.sql(expression, "this") 932 return f"SWAP WITH {this}" 933 934 def with_properties(self, properties: exp.Properties) -> str: 935 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 936 937 def cluster_sql(self, expression: exp.Cluster) -> str: 938 return f"CLUSTER BY ({self.expressions(expression, flat=True)})"
Specifies the strategy according to which identifiers should be normalized.
Indicates the default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Determines whether or not user-defined data types are supported.
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Determines whether or not a size in the table sample clause represents percentage.
Associates this dialect's time formats with their equivalent Python strftime
format.
366 def quote_identifier(self, expression: E, identify: bool = True) -> E: 367 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 368 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 369 if ( 370 isinstance(expression, exp.Identifier) 371 and isinstance(expression.parent, exp.Table) 372 and expression.name.lower() == "dual" 373 ): 374 return expression # type: ignore 375 376 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
378 class Parser(parser.Parser): 379 IDENTIFY_PIVOT_STRINGS = True 380 381 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 382 383 FUNCTIONS = { 384 **parser.Parser.FUNCTIONS, 385 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 386 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 387 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 388 this=seq_get(args, 1), expression=seq_get(args, 0) 389 ), 390 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 391 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 392 start=seq_get(args, 0), 393 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 394 step=seq_get(args, 2), 395 ), 396 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 397 "BITXOR": binary_from_function(exp.BitwiseXor), 398 "BIT_XOR": binary_from_function(exp.BitwiseXor), 399 "BOOLXOR": binary_from_function(exp.Xor), 400 "CONVERT_TIMEZONE": _parse_convert_timezone, 401 "DATE_TRUNC": _date_trunc_to_time, 402 "DATEADD": lambda args: exp.DateAdd( 403 this=seq_get(args, 2), 404 expression=seq_get(args, 1), 405 unit=_map_date_part(seq_get(args, 0)), 406 ), 407 "DATEDIFF": _parse_datediff, 408 "DIV0": _div0_to_if, 409 "FLATTEN": exp.Explode.from_arg_list, 410 "GET_PATH": lambda args, dialect: exp.JSONExtract( 411 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 412 ), 413 "IFF": exp.If.from_arg_list, 414 "LAST_DAY": lambda args: exp.LastDay( 415 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 416 ), 417 "LISTAGG": exp.GroupConcat.from_arg_list, 418 "NULLIFZERO": _nullifzero_to_if, 419 "OBJECT_CONSTRUCT": _parse_object_construct, 420 "REGEXP_REPLACE": _parse_regexp_replace, 421 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 422 "RLIKE": exp.RegexpLike.from_arg_list, 423 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 424 "TIMEDIFF": _parse_datediff, 425 "TIMESTAMPDIFF": _parse_datediff, 426 "TIMESTAMPFROMPARTS": _parse_timestamp_from_parts, 427 "TIMESTAMP_FROM_PARTS": _parse_timestamp_from_parts, 428 "TO_TIMESTAMP": _parse_to_timestamp, 429 "TO_VARCHAR": exp.ToChar.from_arg_list, 430 "ZEROIFNULL": _zeroifnull_to_if, 431 } 432 433 FUNCTION_PARSERS = { 434 **parser.Parser.FUNCTION_PARSERS, 435 "DATE_PART": _parse_date_part, 436 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 437 } 438 FUNCTION_PARSERS.pop("TRIM") 439 440 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 441 442 RANGE_PARSERS = { 443 **parser.Parser.RANGE_PARSERS, 444 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 445 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 446 TokenType.COLON: _parse_colon_get_path, 447 } 448 449 ALTER_PARSERS = { 450 **parser.Parser.ALTER_PARSERS, 451 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 452 "UNSET": lambda self: self.expression( 453 exp.Set, 454 tag=self._match_text_seq("TAG"), 455 expressions=self._parse_csv(self._parse_id_var), 456 unset=True, 457 ), 458 "SWAP": lambda self: self._parse_alter_table_swap(), 459 } 460 461 STATEMENT_PARSERS = { 462 **parser.Parser.STATEMENT_PARSERS, 463 TokenType.SHOW: lambda self: self._parse_show(), 464 } 465 466 PROPERTY_PARSERS = { 467 **parser.Parser.PROPERTY_PARSERS, 468 "LOCATION": lambda self: self._parse_location(), 469 } 470 471 SHOW_PARSERS = { 472 "SCHEMAS": _show_parser("SCHEMAS"), 473 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 474 "OBJECTS": _show_parser("OBJECTS"), 475 "TERSE OBJECTS": _show_parser("OBJECTS"), 476 "TABLES": _show_parser("TABLES"), 477 "TERSE TABLES": _show_parser("TABLES"), 478 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 479 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 480 "COLUMNS": _show_parser("COLUMNS"), 481 } 482 483 STAGED_FILE_SINGLE_TOKENS = { 484 TokenType.DOT, 485 TokenType.MOD, 486 TokenType.SLASH, 487 } 488 489 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 490 491 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 492 if is_map: 493 # Keys are strings in Snowflake's objects, see also: 494 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 495 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 496 return self._parse_slice(self._parse_string()) 497 498 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 499 500 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 501 lateral = super()._parse_lateral() 502 if not lateral: 503 return lateral 504 505 if isinstance(lateral.this, exp.Explode): 506 table_alias = lateral.args.get("alias") 507 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 508 if table_alias and not table_alias.args.get("columns"): 509 table_alias.set("columns", columns) 510 elif not table_alias: 511 exp.alias_(lateral, "_flattened", table=columns, copy=False) 512 513 return lateral 514 515 def _parse_at_before(self, table: exp.Table) -> exp.Table: 516 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 517 index = self._index 518 if self._match_texts(("AT", "BEFORE")): 519 this = self._prev.text.upper() 520 kind = ( 521 self._match(TokenType.L_PAREN) 522 and self._match_texts(self.HISTORICAL_DATA_KIND) 523 and self._prev.text.upper() 524 ) 525 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 526 527 if expression: 528 self._match_r_paren() 529 when = self.expression( 530 exp.HistoricalData, this=this, kind=kind, expression=expression 531 ) 532 table.set("when", when) 533 else: 534 self._retreat(index) 535 536 return table 537 538 def _parse_table_parts( 539 self, schema: bool = False, is_db_reference: bool = False 540 ) -> exp.Table: 541 # https://docs.snowflake.com/en/user-guide/querying-stage 542 if self._match(TokenType.STRING, advance=False): 543 table = self._parse_string() 544 elif self._match_text_seq("@", advance=False): 545 table = self._parse_location_path() 546 else: 547 table = None 548 549 if table: 550 file_format = None 551 pattern = None 552 553 self._match(TokenType.L_PAREN) 554 while self._curr and not self._match(TokenType.R_PAREN): 555 if self._match_text_seq("FILE_FORMAT", "=>"): 556 file_format = self._parse_string() or super()._parse_table_parts( 557 is_db_reference=is_db_reference 558 ) 559 elif self._match_text_seq("PATTERN", "=>"): 560 pattern = self._parse_string() 561 else: 562 break 563 564 self._match(TokenType.COMMA) 565 566 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 567 else: 568 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 569 570 return self._parse_at_before(table) 571 572 def _parse_id_var( 573 self, 574 any_token: bool = True, 575 tokens: t.Optional[t.Collection[TokenType]] = None, 576 ) -> t.Optional[exp.Expression]: 577 if self._match_text_seq("IDENTIFIER", "("): 578 identifier = ( 579 super()._parse_id_var(any_token=any_token, tokens=tokens) 580 or self._parse_string() 581 ) 582 self._match_r_paren() 583 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 584 585 return super()._parse_id_var(any_token=any_token, tokens=tokens) 586 587 def _parse_show_snowflake(self, this: str) -> exp.Show: 588 scope = None 589 scope_kind = None 590 591 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 592 # which is syntactically valid but has no effect on the output 593 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 594 595 history = self._match_text_seq("HISTORY") 596 597 like = self._parse_string() if self._match(TokenType.LIKE) else None 598 599 if self._match(TokenType.IN): 600 if self._match_text_seq("ACCOUNT"): 601 scope_kind = "ACCOUNT" 602 elif self._match_set(self.DB_CREATABLES): 603 scope_kind = self._prev.text.upper() 604 if self._curr: 605 scope = self._parse_table_parts() 606 elif self._curr: 607 scope_kind = "SCHEMA" if this in ("OBJECTS", "TABLES") else "TABLE" 608 scope = self._parse_table_parts() 609 610 return self.expression( 611 exp.Show, 612 **{ 613 "terse": terse, 614 "this": this, 615 "history": history, 616 "like": like, 617 "scope": scope, 618 "scope_kind": scope_kind, 619 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 620 "limit": self._parse_limit(), 621 "from": self._parse_string() if self._match(TokenType.FROM) else None, 622 }, 623 ) 624 625 def _parse_alter_table_swap(self) -> exp.SwapTable: 626 self._match_text_seq("WITH") 627 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 628 629 def _parse_location(self) -> exp.LocationProperty: 630 self._match(TokenType.EQ) 631 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 632 633 def _parse_location_path(self) -> exp.Var: 634 parts = [self._advance_any(ignore_reserved=True)] 635 636 # We avoid consuming a comma token because external tables like @foo and @bar 637 # can be joined in a query with a comma separator. 638 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 639 parts.append(self._advance_any(ignore_reserved=True)) 640 641 return exp.var("".join(part.text for part in parts if part))
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
643 class Tokenizer(tokens.Tokenizer): 644 STRING_ESCAPES = ["\\", "'"] 645 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 646 RAW_STRINGS = ["$$"] 647 COMMENTS = ["--", "//", ("/*", "*/")] 648 649 KEYWORDS = { 650 **tokens.Tokenizer.KEYWORDS, 651 "BYTEINT": TokenType.INT, 652 "CHAR VARYING": TokenType.VARCHAR, 653 "CHARACTER VARYING": TokenType.VARCHAR, 654 "EXCLUDE": TokenType.EXCEPT, 655 "ILIKE ANY": TokenType.ILIKE_ANY, 656 "LIKE ANY": TokenType.LIKE_ANY, 657 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 658 "MINUS": TokenType.EXCEPT, 659 "NCHAR VARYING": TokenType.VARCHAR, 660 "PUT": TokenType.COMMAND, 661 "REMOVE": TokenType.COMMAND, 662 "RENAME": TokenType.REPLACE, 663 "RM": TokenType.COMMAND, 664 "SAMPLE": TokenType.TABLE_SAMPLE, 665 "SQL_DOUBLE": TokenType.DOUBLE, 666 "SQL_VARCHAR": TokenType.VARCHAR, 667 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 668 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 669 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 670 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 671 "TOP": TokenType.TOP, 672 } 673 674 SINGLE_TOKENS = { 675 **tokens.Tokenizer.SINGLE_TOKENS, 676 "$": TokenType.PARAMETER, 677 } 678 679 VAR_SINGLE_TOKENS = {"$"} 680 681 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
683 class Generator(generator.Generator): 684 PARAMETER_TOKEN = "$" 685 MATCHED_BY_SOURCE = False 686 SINGLE_STRING_INTERVAL = True 687 JOIN_HINTS = False 688 TABLE_HINTS = False 689 QUERY_HINTS = False 690 AGGREGATE_FILTER_SUPPORTED = False 691 SUPPORTS_TABLE_COPY = False 692 COLLATE_IS_FUNC = True 693 LIMIT_ONLY_LITERALS = True 694 JSON_KEY_VALUE_PAIR_SEP = "," 695 INSERT_OVERWRITE = " OVERWRITE INTO" 696 697 TRANSFORMS = { 698 **generator.Generator.TRANSFORMS, 699 exp.ArgMax: rename_func("MAX_BY"), 700 exp.ArgMin: rename_func("MIN_BY"), 701 exp.Array: inline_array_sql, 702 exp.ArrayConcat: rename_func("ARRAY_CAT"), 703 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 704 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 705 exp.AtTimeZone: lambda self, e: self.func( 706 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 707 ), 708 exp.BitwiseXor: rename_func("BITXOR"), 709 exp.DateAdd: date_delta_sql("DATEADD"), 710 exp.DateDiff: date_delta_sql("DATEDIFF"), 711 exp.DateStrToDate: datestrtodate_sql, 712 exp.DataType: _datatype_sql, 713 exp.DayOfMonth: rename_func("DAYOFMONTH"), 714 exp.DayOfWeek: rename_func("DAYOFWEEK"), 715 exp.DayOfYear: rename_func("DAYOFYEAR"), 716 exp.Explode: rename_func("FLATTEN"), 717 exp.Extract: rename_func("DATE_PART"), 718 exp.FromTimeZone: lambda self, e: self.func( 719 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 720 ), 721 exp.GenerateSeries: lambda self, e: self.func( 722 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 723 ), 724 exp.GroupConcat: rename_func("LISTAGG"), 725 exp.If: if_sql(name="IFF", false_value="NULL"), 726 exp.JSONExtract: rename_func("GET_PATH"), 727 exp.JSONExtractScalar: rename_func("JSON_EXTRACT_PATH_TEXT"), 728 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 729 exp.JSONPathRoot: lambda *_: "", 730 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 731 exp.LogicalOr: rename_func("BOOLOR_AGG"), 732 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 733 exp.Max: max_or_greatest, 734 exp.Min: min_or_least, 735 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 736 exp.PercentileCont: transforms.preprocess( 737 [transforms.add_within_group_for_percentiles] 738 ), 739 exp.PercentileDisc: transforms.preprocess( 740 [transforms.add_within_group_for_percentiles] 741 ), 742 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 743 exp.RegexpILike: _regexpilike_sql, 744 exp.Rand: rename_func("RANDOM"), 745 exp.Select: transforms.preprocess( 746 [ 747 transforms.eliminate_distinct_on, 748 transforms.explode_to_unnest(), 749 transforms.eliminate_semi_and_anti_joins, 750 ] 751 ), 752 exp.SHA: rename_func("SHA1"), 753 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 754 exp.StartsWith: rename_func("STARTSWITH"), 755 exp.StrPosition: lambda self, e: self.func( 756 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 757 ), 758 exp.StrToTime: lambda self, 759 e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 760 exp.Struct: lambda self, e: self.func( 761 "OBJECT_CONSTRUCT", 762 *(arg for expression in e.expressions for arg in expression.flatten()), 763 ), 764 exp.Stuff: rename_func("INSERT"), 765 exp.TimestampDiff: lambda self, e: self.func( 766 "TIMESTAMPDIFF", e.unit, e.expression, e.this 767 ), 768 exp.TimestampTrunc: timestamptrunc_sql, 769 exp.TimeStrToTime: timestrtotime_sql, 770 exp.TimeToStr: lambda self, e: self.func( 771 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 772 ), 773 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 774 exp.ToArray: rename_func("TO_ARRAY"), 775 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 776 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 777 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 778 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 779 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 780 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 781 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 782 exp.Xor: rename_func("BOOLXOR"), 783 } 784 785 SUPPORTED_JSON_PATH_PARTS = { 786 exp.JSONPathKey, 787 exp.JSONPathRoot, 788 exp.JSONPathSubscript, 789 } 790 791 TYPE_MAPPING = { 792 **generator.Generator.TYPE_MAPPING, 793 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 794 } 795 796 STAR_MAPPING = { 797 "except": "EXCLUDE", 798 "replace": "RENAME", 799 } 800 801 PROPERTIES_LOCATION = { 802 **generator.Generator.PROPERTIES_LOCATION, 803 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 804 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 805 } 806 807 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 808 milli = expression.args.get("milli") 809 if milli is not None: 810 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 811 expression.set("nano", milli_to_nano) 812 813 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 814 815 def trycast_sql(self, expression: exp.TryCast) -> str: 816 value = expression.this 817 818 if value.type is None: 819 from sqlglot.optimizer.annotate_types import annotate_types 820 821 value = annotate_types(value) 822 823 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 824 return super().trycast_sql(expression) 825 826 # TRY_CAST only works for string values in Snowflake 827 return self.cast_sql(expression) 828 829 def log_sql(self, expression: exp.Log) -> str: 830 if not expression.expression: 831 return self.func("LN", expression.this) 832 833 return super().log_sql(expression) 834 835 def unnest_sql(self, expression: exp.Unnest) -> str: 836 unnest_alias = expression.args.get("alias") 837 offset = expression.args.get("offset") 838 839 columns = [ 840 exp.to_identifier("seq"), 841 exp.to_identifier("key"), 842 exp.to_identifier("path"), 843 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 844 seq_get(unnest_alias.columns if unnest_alias else [], 0) 845 or exp.to_identifier("value"), 846 exp.to_identifier("this"), 847 ] 848 849 if unnest_alias: 850 unnest_alias.set("columns", columns) 851 else: 852 unnest_alias = exp.TableAlias(this="_u", columns=columns) 853 854 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 855 alias = self.sql(unnest_alias) 856 alias = f" AS {alias}" if alias else "" 857 return f"{explode}{alias}" 858 859 def show_sql(self, expression: exp.Show) -> str: 860 terse = "TERSE " if expression.args.get("terse") else "" 861 history = " HISTORY" if expression.args.get("history") else "" 862 like = self.sql(expression, "like") 863 like = f" LIKE {like}" if like else "" 864 865 scope = self.sql(expression, "scope") 866 scope = f" {scope}" if scope else "" 867 868 scope_kind = self.sql(expression, "scope_kind") 869 if scope_kind: 870 scope_kind = f" IN {scope_kind}" 871 872 starts_with = self.sql(expression, "starts_with") 873 if starts_with: 874 starts_with = f" STARTS WITH {starts_with}" 875 876 limit = self.sql(expression, "limit") 877 878 from_ = self.sql(expression, "from") 879 if from_: 880 from_ = f" FROM {from_}" 881 882 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 883 884 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 885 # Other dialects don't support all of the following parameters, so we need to 886 # generate default values as necessary to ensure the transpilation is correct 887 group = expression.args.get("group") 888 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 889 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 890 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 891 892 return self.func( 893 "REGEXP_SUBSTR", 894 expression.this, 895 expression.expression, 896 position, 897 occurrence, 898 parameters, 899 group, 900 ) 901 902 def except_op(self, expression: exp.Except) -> str: 903 if not expression.args.get("distinct", False): 904 self.unsupported("EXCEPT with All is not supported in Snowflake") 905 return super().except_op(expression) 906 907 def intersect_op(self, expression: exp.Intersect) -> str: 908 if not expression.args.get("distinct", False): 909 self.unsupported("INTERSECT with All is not supported in Snowflake") 910 return super().intersect_op(expression) 911 912 def describe_sql(self, expression: exp.Describe) -> str: 913 # Default to table if kind is unknown 914 kind_value = expression.args.get("kind") or "TABLE" 915 kind = f" {kind_value}" if kind_value else "" 916 this = f" {self.sql(expression, 'this')}" 917 expressions = self.expressions(expression, flat=True) 918 expressions = f" {expressions}" if expressions else "" 919 return f"DESCRIBE{kind}{this}{expressions}" 920 921 def generatedasidentitycolumnconstraint_sql( 922 self, expression: exp.GeneratedAsIdentityColumnConstraint 923 ) -> str: 924 start = expression.args.get("start") 925 start = f" START {start}" if start else "" 926 increment = expression.args.get("increment") 927 increment = f" INCREMENT {increment}" if increment else "" 928 return f"AUTOINCREMENT{start}{increment}" 929 930 def swaptable_sql(self, expression: exp.SwapTable) -> str: 931 this = self.sql(expression, "this") 932 return f"SWAP WITH {this}" 933 934 def with_properties(self, properties: exp.Properties) -> str: 935 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 936 937 def cluster_sql(self, expression: exp.Cluster) -> str: 938 return f"CLUSTER BY ({self.expressions(expression, flat=True)})"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
807 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 808 milli = expression.args.get("milli") 809 if milli is not None: 810 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 811 expression.set("nano", milli_to_nano) 812 813 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
815 def trycast_sql(self, expression: exp.TryCast) -> str: 816 value = expression.this 817 818 if value.type is None: 819 from sqlglot.optimizer.annotate_types import annotate_types 820 821 value = annotate_types(value) 822 823 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 824 return super().trycast_sql(expression) 825 826 # TRY_CAST only works for string values in Snowflake 827 return self.cast_sql(expression)
835 def unnest_sql(self, expression: exp.Unnest) -> str: 836 unnest_alias = expression.args.get("alias") 837 offset = expression.args.get("offset") 838 839 columns = [ 840 exp.to_identifier("seq"), 841 exp.to_identifier("key"), 842 exp.to_identifier("path"), 843 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 844 seq_get(unnest_alias.columns if unnest_alias else [], 0) 845 or exp.to_identifier("value"), 846 exp.to_identifier("this"), 847 ] 848 849 if unnest_alias: 850 unnest_alias.set("columns", columns) 851 else: 852 unnest_alias = exp.TableAlias(this="_u", columns=columns) 853 854 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 855 alias = self.sql(unnest_alias) 856 alias = f" AS {alias}" if alias else "" 857 return f"{explode}{alias}"
859 def show_sql(self, expression: exp.Show) -> str: 860 terse = "TERSE " if expression.args.get("terse") else "" 861 history = " HISTORY" if expression.args.get("history") else "" 862 like = self.sql(expression, "like") 863 like = f" LIKE {like}" if like else "" 864 865 scope = self.sql(expression, "scope") 866 scope = f" {scope}" if scope else "" 867 868 scope_kind = self.sql(expression, "scope_kind") 869 if scope_kind: 870 scope_kind = f" IN {scope_kind}" 871 872 starts_with = self.sql(expression, "starts_with") 873 if starts_with: 874 starts_with = f" STARTS WITH {starts_with}" 875 876 limit = self.sql(expression, "limit") 877 878 from_ = self.sql(expression, "from") 879 if from_: 880 from_ = f" FROM {from_}" 881 882 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
884 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 885 # Other dialects don't support all of the following parameters, so we need to 886 # generate default values as necessary to ensure the transpilation is correct 887 group = expression.args.get("group") 888 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 889 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 890 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 891 892 return self.func( 893 "REGEXP_SUBSTR", 894 expression.this, 895 expression.expression, 896 position, 897 occurrence, 898 parameters, 899 group, 900 )
912 def describe_sql(self, expression: exp.Describe) -> str: 913 # Default to table if kind is unknown 914 kind_value = expression.args.get("kind") or "TABLE" 915 kind = f" {kind_value}" if kind_value else "" 916 this = f" {self.sql(expression, 'this')}" 917 expressions = self.expressions(expression, flat=True) 918 expressions = f" {expressions}" if expressions else "" 919 return f"DESCRIBE{kind}{this}{expressions}"
921 def generatedasidentitycolumnconstraint_sql( 922 self, expression: exp.GeneratedAsIdentityColumnConstraint 923 ) -> str: 924 start = expression.args.get("start") 925 start = f" START {start}" if start else "" 926 increment = expression.args.get("increment") 927 increment = f" INCREMENT {increment}" if increment else "" 928 return f"AUTOINCREMENT{start}{increment}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql