sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot._typing import E 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 NormalizationStrategy, 10 binary_from_function, 11 date_delta_sql, 12 date_trunc_to_time, 13 datestrtodate_sql, 14 format_time_lambda, 15 if_sql, 16 inline_array_sql, 17 max_or_greatest, 18 min_or_least, 19 rename_func, 20 timestamptrunc_sql, 21 timestrtotime_sql, 22 var_map_sql, 23) 24from sqlglot.expressions import Literal 25from sqlglot.helper import seq_get 26from sqlglot.tokens import TokenType 27 28 29def _check_int(s: str) -> bool: 30 if s[0] in ("-", "+"): 31 return s[1:].isdigit() 32 return s.isdigit() 33 34 35# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 36def _parse_to_timestamp(args: t.List) -> t.Union[exp.StrToTime, exp.UnixToTime, exp.TimeStrToTime]: 37 if len(args) == 2: 38 first_arg, second_arg = args 39 if second_arg.is_string: 40 # case: <string_expr> [ , <format> ] 41 return format_time_lambda(exp.StrToTime, "snowflake")(args) 42 return exp.UnixToTime(this=first_arg, scale=second_arg) 43 44 from sqlglot.optimizer.simplify import simplify_literals 45 46 # The first argument might be an expression like 40 * 365 * 86400, so we try to 47 # reduce it using `simplify_literals` first and then check if it's a Literal. 48 first_arg = seq_get(args, 0) 49 if not isinstance(simplify_literals(first_arg, root=True), Literal): 50 # case: <variant_expr> or other expressions such as columns 51 return exp.TimeStrToTime.from_arg_list(args) 52 53 if first_arg.is_string: 54 if _check_int(first_arg.this): 55 # case: <integer> 56 return exp.UnixToTime.from_arg_list(args) 57 58 # case: <date_expr> 59 return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args) 60 61 # case: <numeric_expr> 62 return exp.UnixToTime.from_arg_list(args) 63 64 65def _parse_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 66 expression = parser.parse_var_map(args) 67 68 if isinstance(expression, exp.StarMap): 69 return expression 70 71 return exp.Struct( 72 expressions=[ 73 t.cast(exp.Condition, k).eq(v) for k, v in zip(expression.keys, expression.values) 74 ] 75 ) 76 77 78def _parse_datediff(args: t.List) -> exp.DateDiff: 79 return exp.DateDiff( 80 this=seq_get(args, 2), expression=seq_get(args, 1), unit=_map_date_part(seq_get(args, 0)) 81 ) 82 83 84# https://docs.snowflake.com/en/sql-reference/functions/date_part.html 85# https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 86def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 87 this = self._parse_var() or self._parse_type() 88 89 if not this: 90 return None 91 92 self._match(TokenType.COMMA) 93 expression = self._parse_bitwise() 94 this = _map_date_part(this) 95 name = this.name.upper() 96 97 if name.startswith("EPOCH"): 98 if name == "EPOCH_MILLISECOND": 99 scale = 10**3 100 elif name == "EPOCH_MICROSECOND": 101 scale = 10**6 102 elif name == "EPOCH_NANOSECOND": 103 scale = 10**9 104 else: 105 scale = None 106 107 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 108 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 109 110 if scale: 111 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 112 113 return to_unix 114 115 return self.expression(exp.Extract, this=this, expression=expression) 116 117 118# https://docs.snowflake.com/en/sql-reference/functions/div0 119def _div0_to_if(args: t.List) -> exp.If: 120 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 121 true = exp.Literal.number(0) 122 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 123 return exp.If(this=cond, true=true, false=false) 124 125 126# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 127def _zeroifnull_to_if(args: t.List) -> exp.If: 128 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 129 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 130 131 132# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 133def _nullifzero_to_if(args: t.List) -> exp.If: 134 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 135 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 136 137 138def _datatype_sql(self: Snowflake.Generator, expression: exp.DataType) -> str: 139 if expression.is_type("array"): 140 return "ARRAY" 141 elif expression.is_type("map"): 142 return "OBJECT" 143 return self.datatype_sql(expression) 144 145 146def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 147 flag = expression.text("flag") 148 149 if "i" not in flag: 150 flag += "i" 151 152 return self.func( 153 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 154 ) 155 156 157def _parse_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 158 if len(args) == 3: 159 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 160 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 161 162 163def _parse_regexp_replace(args: t.List) -> exp.RegexpReplace: 164 regexp_replace = exp.RegexpReplace.from_arg_list(args) 165 166 if not regexp_replace.args.get("replacement"): 167 regexp_replace.set("replacement", exp.Literal.string("")) 168 169 return regexp_replace 170 171 172def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 173 def _parse(self: Snowflake.Parser) -> exp.Show: 174 return self._parse_show_snowflake(*args, **kwargs) 175 176 return _parse 177 178 179DATE_PART_MAPPING = { 180 "Y": "YEAR", 181 "YY": "YEAR", 182 "YYY": "YEAR", 183 "YYYY": "YEAR", 184 "YR": "YEAR", 185 "YEARS": "YEAR", 186 "YRS": "YEAR", 187 "MM": "MONTH", 188 "MON": "MONTH", 189 "MONS": "MONTH", 190 "MONTHS": "MONTH", 191 "D": "DAY", 192 "DD": "DAY", 193 "DAYS": "DAY", 194 "DAYOFMONTH": "DAY", 195 "WEEKDAY": "DAYOFWEEK", 196 "DOW": "DAYOFWEEK", 197 "DW": "DAYOFWEEK", 198 "WEEKDAY_ISO": "DAYOFWEEKISO", 199 "DOW_ISO": "DAYOFWEEKISO", 200 "DW_ISO": "DAYOFWEEKISO", 201 "YEARDAY": "DAYOFYEAR", 202 "DOY": "DAYOFYEAR", 203 "DY": "DAYOFYEAR", 204 "W": "WEEK", 205 "WK": "WEEK", 206 "WEEKOFYEAR": "WEEK", 207 "WOY": "WEEK", 208 "WY": "WEEK", 209 "WEEK_ISO": "WEEKISO", 210 "WEEKOFYEARISO": "WEEKISO", 211 "WEEKOFYEAR_ISO": "WEEKISO", 212 "Q": "QUARTER", 213 "QTR": "QUARTER", 214 "QTRS": "QUARTER", 215 "QUARTERS": "QUARTER", 216 "H": "HOUR", 217 "HH": "HOUR", 218 "HR": "HOUR", 219 "HOURS": "HOUR", 220 "HRS": "HOUR", 221 "M": "MINUTE", 222 "MI": "MINUTE", 223 "MIN": "MINUTE", 224 "MINUTES": "MINUTE", 225 "MINS": "MINUTE", 226 "S": "SECOND", 227 "SEC": "SECOND", 228 "SECONDS": "SECOND", 229 "SECS": "SECOND", 230 "MS": "MILLISECOND", 231 "MSEC": "MILLISECOND", 232 "MILLISECONDS": "MILLISECOND", 233 "US": "MICROSECOND", 234 "USEC": "MICROSECOND", 235 "MICROSECONDS": "MICROSECOND", 236 "NS": "NANOSECOND", 237 "NSEC": "NANOSECOND", 238 "NANOSEC": "NANOSECOND", 239 "NSECOND": "NANOSECOND", 240 "NSECONDS": "NANOSECOND", 241 "NANOSECS": "NANOSECOND", 242 "NSECONDS": "NANOSECOND", 243 "EPOCH": "EPOCH_SECOND", 244 "EPOCH_SECONDS": "EPOCH_SECOND", 245 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 246 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 247 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 248 "TZH": "TIMEZONE_HOUR", 249 "TZM": "TIMEZONE_MINUTE", 250} 251 252 253@t.overload 254def _map_date_part(part: exp.Expression) -> exp.Var: 255 pass 256 257 258@t.overload 259def _map_date_part(part: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 260 pass 261 262 263def _map_date_part(part): 264 mapped = DATE_PART_MAPPING.get(part.name.upper()) if part else None 265 return exp.var(mapped) if mapped else part 266 267 268def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 269 trunc = date_trunc_to_time(args) 270 trunc.set("unit", _map_date_part(trunc.args["unit"])) 271 return trunc 272 273 274def _parse_colon_get_path( 275 self: parser.Parser, this: t.Optional[exp.Expression] 276) -> t.Optional[exp.Expression]: 277 while True: 278 path = self._parse_bitwise() 279 280 # The cast :: operator has a lower precedence than the extraction operator :, so 281 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 282 if isinstance(path, exp.Cast): 283 target_type = path.to 284 path = path.this 285 else: 286 target_type = None 287 288 if isinstance(path, exp.Expression): 289 path = exp.Literal.string(path.sql(dialect="snowflake")) 290 291 # The extraction operator : is left-associative 292 this = self.expression(exp.GetPath, this=this, expression=path) 293 294 if target_type: 295 this = exp.cast(this, target_type) 296 297 if not self._match(TokenType.COLON): 298 break 299 300 return self._parse_range(this) 301 302 303def _parse_timestamp_from_parts(args: t.List) -> exp.Func: 304 if len(args) == 2: 305 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 306 # so we parse this into Anonymous for now instead of introducing complexity 307 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 308 309 return exp.TimestampFromParts.from_arg_list(args) 310 311 312def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 313 """ 314 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 315 so we need to unqualify them. 316 317 Example: 318 >>> from sqlglot import parse_one 319 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 320 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 321 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 322 """ 323 if isinstance(expression, exp.Pivot) and expression.unpivot: 324 expression = transforms.unqualify_columns(expression) 325 326 return expression 327 328 329class Snowflake(Dialect): 330 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 331 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 332 NULL_ORDERING = "nulls_are_large" 333 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 334 SUPPORTS_USER_DEFINED_TYPES = False 335 SUPPORTS_SEMI_ANTI_JOIN = False 336 PREFER_CTE_ALIAS_COLUMN = True 337 TABLESAMPLE_SIZE_IS_PERCENT = True 338 339 TIME_MAPPING = { 340 "YYYY": "%Y", 341 "yyyy": "%Y", 342 "YY": "%y", 343 "yy": "%y", 344 "MMMM": "%B", 345 "mmmm": "%B", 346 "MON": "%b", 347 "mon": "%b", 348 "MM": "%m", 349 "mm": "%m", 350 "DD": "%d", 351 "dd": "%-d", 352 "DY": "%a", 353 "dy": "%w", 354 "HH24": "%H", 355 "hh24": "%H", 356 "HH12": "%I", 357 "hh12": "%I", 358 "MI": "%M", 359 "mi": "%M", 360 "SS": "%S", 361 "ss": "%S", 362 "FF": "%f", 363 "ff": "%f", 364 "FF6": "%f", 365 "ff6": "%f", 366 } 367 368 def quote_identifier(self, expression: E, identify: bool = True) -> E: 369 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 370 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 371 if ( 372 isinstance(expression, exp.Identifier) 373 and isinstance(expression.parent, exp.Table) 374 and expression.name.lower() == "dual" 375 ): 376 return t.cast(E, expression) 377 378 return super().quote_identifier(expression, identify=identify) 379 380 class Parser(parser.Parser): 381 IDENTIFY_PIVOT_STRINGS = True 382 383 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 384 385 FUNCTIONS = { 386 **parser.Parser.FUNCTIONS, 387 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 388 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 389 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 390 this=seq_get(args, 1), expression=seq_get(args, 0) 391 ), 392 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 393 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 394 start=seq_get(args, 0), 395 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 396 step=seq_get(args, 2), 397 ), 398 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 399 "BITXOR": binary_from_function(exp.BitwiseXor), 400 "BIT_XOR": binary_from_function(exp.BitwiseXor), 401 "BOOLXOR": binary_from_function(exp.Xor), 402 "CONVERT_TIMEZONE": _parse_convert_timezone, 403 "DATE_TRUNC": _date_trunc_to_time, 404 "DATEADD": lambda args: exp.DateAdd( 405 this=seq_get(args, 2), 406 expression=seq_get(args, 1), 407 unit=_map_date_part(seq_get(args, 0)), 408 ), 409 "DATEDIFF": _parse_datediff, 410 "DIV0": _div0_to_if, 411 "FLATTEN": exp.Explode.from_arg_list, 412 "IFF": exp.If.from_arg_list, 413 "LAST_DAY": lambda args: exp.LastDay( 414 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 415 ), 416 "LISTAGG": exp.GroupConcat.from_arg_list, 417 "NULLIFZERO": _nullifzero_to_if, 418 "OBJECT_CONSTRUCT": _parse_object_construct, 419 "REGEXP_REPLACE": _parse_regexp_replace, 420 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 421 "RLIKE": exp.RegexpLike.from_arg_list, 422 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 423 "TIMEDIFF": _parse_datediff, 424 "TIMESTAMPDIFF": _parse_datediff, 425 "TIMESTAMPFROMPARTS": _parse_timestamp_from_parts, 426 "TIMESTAMP_FROM_PARTS": _parse_timestamp_from_parts, 427 "TO_TIMESTAMP": _parse_to_timestamp, 428 "TO_VARCHAR": exp.ToChar.from_arg_list, 429 "ZEROIFNULL": _zeroifnull_to_if, 430 } 431 432 FUNCTION_PARSERS = { 433 **parser.Parser.FUNCTION_PARSERS, 434 "DATE_PART": _parse_date_part, 435 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 436 } 437 FUNCTION_PARSERS.pop("TRIM") 438 439 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 440 441 RANGE_PARSERS = { 442 **parser.Parser.RANGE_PARSERS, 443 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 444 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 445 TokenType.COLON: _parse_colon_get_path, 446 } 447 448 ALTER_PARSERS = { 449 **parser.Parser.ALTER_PARSERS, 450 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 451 "UNSET": lambda self: self.expression( 452 exp.Set, 453 tag=self._match_text_seq("TAG"), 454 expressions=self._parse_csv(self._parse_id_var), 455 unset=True, 456 ), 457 "SWAP": lambda self: self._parse_alter_table_swap(), 458 } 459 460 STATEMENT_PARSERS = { 461 **parser.Parser.STATEMENT_PARSERS, 462 TokenType.SHOW: lambda self: self._parse_show(), 463 } 464 465 PROPERTY_PARSERS = { 466 **parser.Parser.PROPERTY_PARSERS, 467 "LOCATION": lambda self: self._parse_location(), 468 } 469 470 SHOW_PARSERS = { 471 "SCHEMAS": _show_parser("SCHEMAS"), 472 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 473 "OBJECTS": _show_parser("OBJECTS"), 474 "TERSE OBJECTS": _show_parser("OBJECTS"), 475 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 476 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 477 "COLUMNS": _show_parser("COLUMNS"), 478 } 479 480 STAGED_FILE_SINGLE_TOKENS = { 481 TokenType.DOT, 482 TokenType.MOD, 483 TokenType.SLASH, 484 } 485 486 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 487 488 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 489 if is_map: 490 # Keys are strings in Snowflake's objects, see also: 491 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 492 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 493 return self._parse_slice(self._parse_string()) 494 495 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 496 497 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 498 lateral = super()._parse_lateral() 499 if not lateral: 500 return lateral 501 502 if isinstance(lateral.this, exp.Explode): 503 table_alias = lateral.args.get("alias") 504 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 505 if table_alias and not table_alias.args.get("columns"): 506 table_alias.set("columns", columns) 507 elif not table_alias: 508 exp.alias_(lateral, "_flattened", table=columns, copy=False) 509 510 return lateral 511 512 def _parse_at_before(self, table: exp.Table) -> exp.Table: 513 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 514 index = self._index 515 if self._match_texts(("AT", "BEFORE")): 516 this = self._prev.text.upper() 517 kind = ( 518 self._match(TokenType.L_PAREN) 519 and self._match_texts(self.HISTORICAL_DATA_KIND) 520 and self._prev.text.upper() 521 ) 522 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 523 524 if expression: 525 self._match_r_paren() 526 when = self.expression( 527 exp.HistoricalData, this=this, kind=kind, expression=expression 528 ) 529 table.set("when", when) 530 else: 531 self._retreat(index) 532 533 return table 534 535 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 536 # https://docs.snowflake.com/en/user-guide/querying-stage 537 if self._match(TokenType.STRING, advance=False): 538 table = self._parse_string() 539 elif self._match_text_seq("@", advance=False): 540 table = self._parse_location_path() 541 else: 542 table = None 543 544 if table: 545 file_format = None 546 pattern = None 547 548 self._match(TokenType.L_PAREN) 549 while self._curr and not self._match(TokenType.R_PAREN): 550 if self._match_text_seq("FILE_FORMAT", "=>"): 551 file_format = self._parse_string() or super()._parse_table_parts() 552 elif self._match_text_seq("PATTERN", "=>"): 553 pattern = self._parse_string() 554 else: 555 break 556 557 self._match(TokenType.COMMA) 558 559 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 560 else: 561 table = super()._parse_table_parts(schema=schema) 562 563 return self._parse_at_before(table) 564 565 def _parse_id_var( 566 self, 567 any_token: bool = True, 568 tokens: t.Optional[t.Collection[TokenType]] = None, 569 ) -> t.Optional[exp.Expression]: 570 if self._match_text_seq("IDENTIFIER", "("): 571 identifier = ( 572 super()._parse_id_var(any_token=any_token, tokens=tokens) 573 or self._parse_string() 574 ) 575 self._match_r_paren() 576 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 577 578 return super()._parse_id_var(any_token=any_token, tokens=tokens) 579 580 def _parse_show_snowflake(self, this: str) -> exp.Show: 581 scope = None 582 scope_kind = None 583 584 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 585 # which is syntactically valid but has no effect on the output 586 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 587 588 like = self._parse_string() if self._match(TokenType.LIKE) else None 589 590 if self._match(TokenType.IN): 591 if self._match_text_seq("ACCOUNT"): 592 scope_kind = "ACCOUNT" 593 elif self._match_set(self.DB_CREATABLES): 594 scope_kind = self._prev.text.upper() 595 if self._curr: 596 scope = self._parse_table_parts() 597 elif self._curr: 598 scope_kind = "SCHEMA" if this == "OBJECTS" else "TABLE" 599 scope = self._parse_table_parts() 600 601 return self.expression( 602 exp.Show, 603 **{ 604 "terse": terse, 605 "this": this, 606 "like": like, 607 "scope": scope, 608 "scope_kind": scope_kind, 609 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 610 "limit": self._parse_limit(), 611 "from": self._parse_string() if self._match(TokenType.FROM) else None, 612 }, 613 ) 614 615 def _parse_alter_table_swap(self) -> exp.SwapTable: 616 self._match_text_seq("WITH") 617 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 618 619 def _parse_location(self) -> exp.LocationProperty: 620 self._match(TokenType.EQ) 621 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 622 623 def _parse_location_path(self) -> exp.Var: 624 parts = [self._advance_any(ignore_reserved=True)] 625 626 # We avoid consuming a comma token because external tables like @foo and @bar 627 # can be joined in a query with a comma separator. 628 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 629 parts.append(self._advance_any(ignore_reserved=True)) 630 631 return exp.var("".join(part.text for part in parts if part)) 632 633 class Tokenizer(tokens.Tokenizer): 634 STRING_ESCAPES = ["\\", "'"] 635 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 636 RAW_STRINGS = ["$$"] 637 COMMENTS = ["--", "//", ("/*", "*/")] 638 639 KEYWORDS = { 640 **tokens.Tokenizer.KEYWORDS, 641 "BYTEINT": TokenType.INT, 642 "CHAR VARYING": TokenType.VARCHAR, 643 "CHARACTER VARYING": TokenType.VARCHAR, 644 "EXCLUDE": TokenType.EXCEPT, 645 "ILIKE ANY": TokenType.ILIKE_ANY, 646 "LIKE ANY": TokenType.LIKE_ANY, 647 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 648 "MINUS": TokenType.EXCEPT, 649 "NCHAR VARYING": TokenType.VARCHAR, 650 "PUT": TokenType.COMMAND, 651 "REMOVE": TokenType.COMMAND, 652 "RENAME": TokenType.REPLACE, 653 "RM": TokenType.COMMAND, 654 "SAMPLE": TokenType.TABLE_SAMPLE, 655 "SQL_DOUBLE": TokenType.DOUBLE, 656 "SQL_VARCHAR": TokenType.VARCHAR, 657 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 658 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 659 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 660 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 661 "TOP": TokenType.TOP, 662 } 663 664 SINGLE_TOKENS = { 665 **tokens.Tokenizer.SINGLE_TOKENS, 666 "$": TokenType.PARAMETER, 667 } 668 669 VAR_SINGLE_TOKENS = {"$"} 670 671 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 672 673 class Generator(generator.Generator): 674 PARAMETER_TOKEN = "$" 675 MATCHED_BY_SOURCE = False 676 SINGLE_STRING_INTERVAL = True 677 JOIN_HINTS = False 678 TABLE_HINTS = False 679 QUERY_HINTS = False 680 AGGREGATE_FILTER_SUPPORTED = False 681 SUPPORTS_TABLE_COPY = False 682 COLLATE_IS_FUNC = True 683 LIMIT_ONLY_LITERALS = True 684 JSON_KEY_VALUE_PAIR_SEP = "," 685 INSERT_OVERWRITE = " OVERWRITE INTO" 686 687 TRANSFORMS = { 688 **generator.Generator.TRANSFORMS, 689 exp.ArgMax: rename_func("MAX_BY"), 690 exp.ArgMin: rename_func("MIN_BY"), 691 exp.Array: inline_array_sql, 692 exp.ArrayConcat: rename_func("ARRAY_CAT"), 693 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 694 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 695 exp.AtTimeZone: lambda self, e: self.func( 696 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 697 ), 698 exp.BitwiseXor: rename_func("BITXOR"), 699 exp.DateAdd: date_delta_sql("DATEADD"), 700 exp.DateDiff: date_delta_sql("DATEDIFF"), 701 exp.DateStrToDate: datestrtodate_sql, 702 exp.DataType: _datatype_sql, 703 exp.DayOfMonth: rename_func("DAYOFMONTH"), 704 exp.DayOfWeek: rename_func("DAYOFWEEK"), 705 exp.DayOfYear: rename_func("DAYOFYEAR"), 706 exp.Explode: rename_func("FLATTEN"), 707 exp.Extract: rename_func("DATE_PART"), 708 exp.FromTimeZone: lambda self, e: self.func( 709 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 710 ), 711 exp.GenerateSeries: lambda self, e: self.func( 712 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 713 ), 714 exp.GroupConcat: rename_func("LISTAGG"), 715 exp.If: if_sql(name="IFF", false_value="NULL"), 716 exp.JSONExtract: lambda self, e: f"{self.sql(e, 'this')}[{self.sql(e, 'expression')}]", 717 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 718 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 719 exp.LogicalOr: rename_func("BOOLOR_AGG"), 720 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 721 exp.Max: max_or_greatest, 722 exp.Min: min_or_least, 723 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 724 exp.PercentileCont: transforms.preprocess( 725 [transforms.add_within_group_for_percentiles] 726 ), 727 exp.PercentileDisc: transforms.preprocess( 728 [transforms.add_within_group_for_percentiles] 729 ), 730 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 731 exp.RegexpILike: _regexpilike_sql, 732 exp.Rand: rename_func("RANDOM"), 733 exp.Select: transforms.preprocess( 734 [ 735 transforms.eliminate_distinct_on, 736 transforms.explode_to_unnest(), 737 transforms.eliminate_semi_and_anti_joins, 738 ] 739 ), 740 exp.SHA: rename_func("SHA1"), 741 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 742 exp.StartsWith: rename_func("STARTSWITH"), 743 exp.StrPosition: lambda self, e: self.func( 744 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 745 ), 746 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 747 exp.Struct: lambda self, e: self.func( 748 "OBJECT_CONSTRUCT", 749 *(arg for expression in e.expressions for arg in expression.flatten()), 750 ), 751 exp.Stuff: rename_func("INSERT"), 752 exp.TimestampDiff: lambda self, e: self.func( 753 "TIMESTAMPDIFF", e.unit, e.expression, e.this 754 ), 755 exp.TimestampTrunc: timestamptrunc_sql, 756 exp.TimeStrToTime: timestrtotime_sql, 757 exp.TimeToStr: lambda self, e: self.func( 758 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 759 ), 760 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 761 exp.ToArray: rename_func("TO_ARRAY"), 762 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 763 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 764 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 765 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 766 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 767 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 768 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 769 exp.Xor: rename_func("BOOLXOR"), 770 } 771 772 TYPE_MAPPING = { 773 **generator.Generator.TYPE_MAPPING, 774 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 775 } 776 777 STAR_MAPPING = { 778 "except": "EXCLUDE", 779 "replace": "RENAME", 780 } 781 782 PROPERTIES_LOCATION = { 783 **generator.Generator.PROPERTIES_LOCATION, 784 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 785 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 786 } 787 788 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 789 milli = expression.args.get("milli") 790 if milli is not None: 791 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 792 expression.set("nano", milli_to_nano) 793 794 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 795 796 def trycast_sql(self, expression: exp.TryCast) -> str: 797 value = expression.this 798 799 if value.type is None: 800 from sqlglot.optimizer.annotate_types import annotate_types 801 802 value = annotate_types(value) 803 804 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 805 return super().trycast_sql(expression) 806 807 # TRY_CAST only works for string values in Snowflake 808 return self.cast_sql(expression) 809 810 def log_sql(self, expression: exp.Log) -> str: 811 if not expression.expression: 812 return self.func("LN", expression.this) 813 814 return super().log_sql(expression) 815 816 def unnest_sql(self, expression: exp.Unnest) -> str: 817 unnest_alias = expression.args.get("alias") 818 offset = expression.args.get("offset") 819 820 columns = [ 821 exp.to_identifier("seq"), 822 exp.to_identifier("key"), 823 exp.to_identifier("path"), 824 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 825 seq_get(unnest_alias.columns if unnest_alias else [], 0) 826 or exp.to_identifier("value"), 827 exp.to_identifier("this"), 828 ] 829 830 if unnest_alias: 831 unnest_alias.set("columns", columns) 832 else: 833 unnest_alias = exp.TableAlias(this="_u", columns=columns) 834 835 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 836 alias = self.sql(unnest_alias) 837 alias = f" AS {alias}" if alias else "" 838 return f"{explode}{alias}" 839 840 def show_sql(self, expression: exp.Show) -> str: 841 terse = "TERSE " if expression.args.get("terse") else "" 842 like = self.sql(expression, "like") 843 like = f" LIKE {like}" if like else "" 844 845 scope = self.sql(expression, "scope") 846 scope = f" {scope}" if scope else "" 847 848 scope_kind = self.sql(expression, "scope_kind") 849 if scope_kind: 850 scope_kind = f" IN {scope_kind}" 851 852 starts_with = self.sql(expression, "starts_with") 853 if starts_with: 854 starts_with = f" STARTS WITH {starts_with}" 855 856 limit = self.sql(expression, "limit") 857 858 from_ = self.sql(expression, "from") 859 if from_: 860 from_ = f" FROM {from_}" 861 862 return ( 863 f"SHOW {terse}{expression.name}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 864 ) 865 866 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 867 # Other dialects don't support all of the following parameters, so we need to 868 # generate default values as necessary to ensure the transpilation is correct 869 group = expression.args.get("group") 870 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 871 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 872 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 873 874 return self.func( 875 "REGEXP_SUBSTR", 876 expression.this, 877 expression.expression, 878 position, 879 occurrence, 880 parameters, 881 group, 882 ) 883 884 def except_op(self, expression: exp.Except) -> str: 885 if not expression.args.get("distinct", False): 886 self.unsupported("EXCEPT with All is not supported in Snowflake") 887 return super().except_op(expression) 888 889 def intersect_op(self, expression: exp.Intersect) -> str: 890 if not expression.args.get("distinct", False): 891 self.unsupported("INTERSECT with All is not supported in Snowflake") 892 return super().intersect_op(expression) 893 894 def describe_sql(self, expression: exp.Describe) -> str: 895 # Default to table if kind is unknown 896 kind_value = expression.args.get("kind") or "TABLE" 897 kind = f" {kind_value}" if kind_value else "" 898 this = f" {self.sql(expression, 'this')}" 899 expressions = self.expressions(expression, flat=True) 900 expressions = f" {expressions}" if expressions else "" 901 return f"DESCRIBE{kind}{this}{expressions}" 902 903 def generatedasidentitycolumnconstraint_sql( 904 self, expression: exp.GeneratedAsIdentityColumnConstraint 905 ) -> str: 906 start = expression.args.get("start") 907 start = f" START {start}" if start else "" 908 increment = expression.args.get("increment") 909 increment = f" INCREMENT {increment}" if increment else "" 910 return f"AUTOINCREMENT{start}{increment}" 911 912 def swaptable_sql(self, expression: exp.SwapTable) -> str: 913 this = self.sql(expression, "this") 914 return f"SWAP WITH {this}" 915 916 def with_properties(self, properties: exp.Properties) -> str: 917 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ")
330class Snowflake(Dialect): 331 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 332 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 333 NULL_ORDERING = "nulls_are_large" 334 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 335 SUPPORTS_USER_DEFINED_TYPES = False 336 SUPPORTS_SEMI_ANTI_JOIN = False 337 PREFER_CTE_ALIAS_COLUMN = True 338 TABLESAMPLE_SIZE_IS_PERCENT = True 339 340 TIME_MAPPING = { 341 "YYYY": "%Y", 342 "yyyy": "%Y", 343 "YY": "%y", 344 "yy": "%y", 345 "MMMM": "%B", 346 "mmmm": "%B", 347 "MON": "%b", 348 "mon": "%b", 349 "MM": "%m", 350 "mm": "%m", 351 "DD": "%d", 352 "dd": "%-d", 353 "DY": "%a", 354 "dy": "%w", 355 "HH24": "%H", 356 "hh24": "%H", 357 "HH12": "%I", 358 "hh12": "%I", 359 "MI": "%M", 360 "mi": "%M", 361 "SS": "%S", 362 "ss": "%S", 363 "FF": "%f", 364 "ff": "%f", 365 "FF6": "%f", 366 "ff6": "%f", 367 } 368 369 def quote_identifier(self, expression: E, identify: bool = True) -> E: 370 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 371 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 372 if ( 373 isinstance(expression, exp.Identifier) 374 and isinstance(expression.parent, exp.Table) 375 and expression.name.lower() == "dual" 376 ): 377 return t.cast(E, expression) 378 379 return super().quote_identifier(expression, identify=identify) 380 381 class Parser(parser.Parser): 382 IDENTIFY_PIVOT_STRINGS = True 383 384 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 385 386 FUNCTIONS = { 387 **parser.Parser.FUNCTIONS, 388 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 389 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 390 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 391 this=seq_get(args, 1), expression=seq_get(args, 0) 392 ), 393 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 394 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 395 start=seq_get(args, 0), 396 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 397 step=seq_get(args, 2), 398 ), 399 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 400 "BITXOR": binary_from_function(exp.BitwiseXor), 401 "BIT_XOR": binary_from_function(exp.BitwiseXor), 402 "BOOLXOR": binary_from_function(exp.Xor), 403 "CONVERT_TIMEZONE": _parse_convert_timezone, 404 "DATE_TRUNC": _date_trunc_to_time, 405 "DATEADD": lambda args: exp.DateAdd( 406 this=seq_get(args, 2), 407 expression=seq_get(args, 1), 408 unit=_map_date_part(seq_get(args, 0)), 409 ), 410 "DATEDIFF": _parse_datediff, 411 "DIV0": _div0_to_if, 412 "FLATTEN": exp.Explode.from_arg_list, 413 "IFF": exp.If.from_arg_list, 414 "LAST_DAY": lambda args: exp.LastDay( 415 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 416 ), 417 "LISTAGG": exp.GroupConcat.from_arg_list, 418 "NULLIFZERO": _nullifzero_to_if, 419 "OBJECT_CONSTRUCT": _parse_object_construct, 420 "REGEXP_REPLACE": _parse_regexp_replace, 421 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 422 "RLIKE": exp.RegexpLike.from_arg_list, 423 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 424 "TIMEDIFF": _parse_datediff, 425 "TIMESTAMPDIFF": _parse_datediff, 426 "TIMESTAMPFROMPARTS": _parse_timestamp_from_parts, 427 "TIMESTAMP_FROM_PARTS": _parse_timestamp_from_parts, 428 "TO_TIMESTAMP": _parse_to_timestamp, 429 "TO_VARCHAR": exp.ToChar.from_arg_list, 430 "ZEROIFNULL": _zeroifnull_to_if, 431 } 432 433 FUNCTION_PARSERS = { 434 **parser.Parser.FUNCTION_PARSERS, 435 "DATE_PART": _parse_date_part, 436 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 437 } 438 FUNCTION_PARSERS.pop("TRIM") 439 440 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 441 442 RANGE_PARSERS = { 443 **parser.Parser.RANGE_PARSERS, 444 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 445 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 446 TokenType.COLON: _parse_colon_get_path, 447 } 448 449 ALTER_PARSERS = { 450 **parser.Parser.ALTER_PARSERS, 451 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 452 "UNSET": lambda self: self.expression( 453 exp.Set, 454 tag=self._match_text_seq("TAG"), 455 expressions=self._parse_csv(self._parse_id_var), 456 unset=True, 457 ), 458 "SWAP": lambda self: self._parse_alter_table_swap(), 459 } 460 461 STATEMENT_PARSERS = { 462 **parser.Parser.STATEMENT_PARSERS, 463 TokenType.SHOW: lambda self: self._parse_show(), 464 } 465 466 PROPERTY_PARSERS = { 467 **parser.Parser.PROPERTY_PARSERS, 468 "LOCATION": lambda self: self._parse_location(), 469 } 470 471 SHOW_PARSERS = { 472 "SCHEMAS": _show_parser("SCHEMAS"), 473 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 474 "OBJECTS": _show_parser("OBJECTS"), 475 "TERSE OBJECTS": _show_parser("OBJECTS"), 476 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 477 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 478 "COLUMNS": _show_parser("COLUMNS"), 479 } 480 481 STAGED_FILE_SINGLE_TOKENS = { 482 TokenType.DOT, 483 TokenType.MOD, 484 TokenType.SLASH, 485 } 486 487 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 488 489 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 490 if is_map: 491 # Keys are strings in Snowflake's objects, see also: 492 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 493 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 494 return self._parse_slice(self._parse_string()) 495 496 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 497 498 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 499 lateral = super()._parse_lateral() 500 if not lateral: 501 return lateral 502 503 if isinstance(lateral.this, exp.Explode): 504 table_alias = lateral.args.get("alias") 505 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 506 if table_alias and not table_alias.args.get("columns"): 507 table_alias.set("columns", columns) 508 elif not table_alias: 509 exp.alias_(lateral, "_flattened", table=columns, copy=False) 510 511 return lateral 512 513 def _parse_at_before(self, table: exp.Table) -> exp.Table: 514 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 515 index = self._index 516 if self._match_texts(("AT", "BEFORE")): 517 this = self._prev.text.upper() 518 kind = ( 519 self._match(TokenType.L_PAREN) 520 and self._match_texts(self.HISTORICAL_DATA_KIND) 521 and self._prev.text.upper() 522 ) 523 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 524 525 if expression: 526 self._match_r_paren() 527 when = self.expression( 528 exp.HistoricalData, this=this, kind=kind, expression=expression 529 ) 530 table.set("when", when) 531 else: 532 self._retreat(index) 533 534 return table 535 536 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 537 # https://docs.snowflake.com/en/user-guide/querying-stage 538 if self._match(TokenType.STRING, advance=False): 539 table = self._parse_string() 540 elif self._match_text_seq("@", advance=False): 541 table = self._parse_location_path() 542 else: 543 table = None 544 545 if table: 546 file_format = None 547 pattern = None 548 549 self._match(TokenType.L_PAREN) 550 while self._curr and not self._match(TokenType.R_PAREN): 551 if self._match_text_seq("FILE_FORMAT", "=>"): 552 file_format = self._parse_string() or super()._parse_table_parts() 553 elif self._match_text_seq("PATTERN", "=>"): 554 pattern = self._parse_string() 555 else: 556 break 557 558 self._match(TokenType.COMMA) 559 560 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 561 else: 562 table = super()._parse_table_parts(schema=schema) 563 564 return self._parse_at_before(table) 565 566 def _parse_id_var( 567 self, 568 any_token: bool = True, 569 tokens: t.Optional[t.Collection[TokenType]] = None, 570 ) -> t.Optional[exp.Expression]: 571 if self._match_text_seq("IDENTIFIER", "("): 572 identifier = ( 573 super()._parse_id_var(any_token=any_token, tokens=tokens) 574 or self._parse_string() 575 ) 576 self._match_r_paren() 577 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 578 579 return super()._parse_id_var(any_token=any_token, tokens=tokens) 580 581 def _parse_show_snowflake(self, this: str) -> exp.Show: 582 scope = None 583 scope_kind = None 584 585 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 586 # which is syntactically valid but has no effect on the output 587 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 588 589 like = self._parse_string() if self._match(TokenType.LIKE) else None 590 591 if self._match(TokenType.IN): 592 if self._match_text_seq("ACCOUNT"): 593 scope_kind = "ACCOUNT" 594 elif self._match_set(self.DB_CREATABLES): 595 scope_kind = self._prev.text.upper() 596 if self._curr: 597 scope = self._parse_table_parts() 598 elif self._curr: 599 scope_kind = "SCHEMA" if this == "OBJECTS" else "TABLE" 600 scope = self._parse_table_parts() 601 602 return self.expression( 603 exp.Show, 604 **{ 605 "terse": terse, 606 "this": this, 607 "like": like, 608 "scope": scope, 609 "scope_kind": scope_kind, 610 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 611 "limit": self._parse_limit(), 612 "from": self._parse_string() if self._match(TokenType.FROM) else None, 613 }, 614 ) 615 616 def _parse_alter_table_swap(self) -> exp.SwapTable: 617 self._match_text_seq("WITH") 618 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 619 620 def _parse_location(self) -> exp.LocationProperty: 621 self._match(TokenType.EQ) 622 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 623 624 def _parse_location_path(self) -> exp.Var: 625 parts = [self._advance_any(ignore_reserved=True)] 626 627 # We avoid consuming a comma token because external tables like @foo and @bar 628 # can be joined in a query with a comma separator. 629 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 630 parts.append(self._advance_any(ignore_reserved=True)) 631 632 return exp.var("".join(part.text for part in parts if part)) 633 634 class Tokenizer(tokens.Tokenizer): 635 STRING_ESCAPES = ["\\", "'"] 636 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 637 RAW_STRINGS = ["$$"] 638 COMMENTS = ["--", "//", ("/*", "*/")] 639 640 KEYWORDS = { 641 **tokens.Tokenizer.KEYWORDS, 642 "BYTEINT": TokenType.INT, 643 "CHAR VARYING": TokenType.VARCHAR, 644 "CHARACTER VARYING": TokenType.VARCHAR, 645 "EXCLUDE": TokenType.EXCEPT, 646 "ILIKE ANY": TokenType.ILIKE_ANY, 647 "LIKE ANY": TokenType.LIKE_ANY, 648 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 649 "MINUS": TokenType.EXCEPT, 650 "NCHAR VARYING": TokenType.VARCHAR, 651 "PUT": TokenType.COMMAND, 652 "REMOVE": TokenType.COMMAND, 653 "RENAME": TokenType.REPLACE, 654 "RM": TokenType.COMMAND, 655 "SAMPLE": TokenType.TABLE_SAMPLE, 656 "SQL_DOUBLE": TokenType.DOUBLE, 657 "SQL_VARCHAR": TokenType.VARCHAR, 658 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 659 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 660 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 661 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 662 "TOP": TokenType.TOP, 663 } 664 665 SINGLE_TOKENS = { 666 **tokens.Tokenizer.SINGLE_TOKENS, 667 "$": TokenType.PARAMETER, 668 } 669 670 VAR_SINGLE_TOKENS = {"$"} 671 672 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 673 674 class Generator(generator.Generator): 675 PARAMETER_TOKEN = "$" 676 MATCHED_BY_SOURCE = False 677 SINGLE_STRING_INTERVAL = True 678 JOIN_HINTS = False 679 TABLE_HINTS = False 680 QUERY_HINTS = False 681 AGGREGATE_FILTER_SUPPORTED = False 682 SUPPORTS_TABLE_COPY = False 683 COLLATE_IS_FUNC = True 684 LIMIT_ONLY_LITERALS = True 685 JSON_KEY_VALUE_PAIR_SEP = "," 686 INSERT_OVERWRITE = " OVERWRITE INTO" 687 688 TRANSFORMS = { 689 **generator.Generator.TRANSFORMS, 690 exp.ArgMax: rename_func("MAX_BY"), 691 exp.ArgMin: rename_func("MIN_BY"), 692 exp.Array: inline_array_sql, 693 exp.ArrayConcat: rename_func("ARRAY_CAT"), 694 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 695 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 696 exp.AtTimeZone: lambda self, e: self.func( 697 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 698 ), 699 exp.BitwiseXor: rename_func("BITXOR"), 700 exp.DateAdd: date_delta_sql("DATEADD"), 701 exp.DateDiff: date_delta_sql("DATEDIFF"), 702 exp.DateStrToDate: datestrtodate_sql, 703 exp.DataType: _datatype_sql, 704 exp.DayOfMonth: rename_func("DAYOFMONTH"), 705 exp.DayOfWeek: rename_func("DAYOFWEEK"), 706 exp.DayOfYear: rename_func("DAYOFYEAR"), 707 exp.Explode: rename_func("FLATTEN"), 708 exp.Extract: rename_func("DATE_PART"), 709 exp.FromTimeZone: lambda self, e: self.func( 710 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 711 ), 712 exp.GenerateSeries: lambda self, e: self.func( 713 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 714 ), 715 exp.GroupConcat: rename_func("LISTAGG"), 716 exp.If: if_sql(name="IFF", false_value="NULL"), 717 exp.JSONExtract: lambda self, e: f"{self.sql(e, 'this')}[{self.sql(e, 'expression')}]", 718 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 719 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 720 exp.LogicalOr: rename_func("BOOLOR_AGG"), 721 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 722 exp.Max: max_or_greatest, 723 exp.Min: min_or_least, 724 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 725 exp.PercentileCont: transforms.preprocess( 726 [transforms.add_within_group_for_percentiles] 727 ), 728 exp.PercentileDisc: transforms.preprocess( 729 [transforms.add_within_group_for_percentiles] 730 ), 731 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 732 exp.RegexpILike: _regexpilike_sql, 733 exp.Rand: rename_func("RANDOM"), 734 exp.Select: transforms.preprocess( 735 [ 736 transforms.eliminate_distinct_on, 737 transforms.explode_to_unnest(), 738 transforms.eliminate_semi_and_anti_joins, 739 ] 740 ), 741 exp.SHA: rename_func("SHA1"), 742 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 743 exp.StartsWith: rename_func("STARTSWITH"), 744 exp.StrPosition: lambda self, e: self.func( 745 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 746 ), 747 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 748 exp.Struct: lambda self, e: self.func( 749 "OBJECT_CONSTRUCT", 750 *(arg for expression in e.expressions for arg in expression.flatten()), 751 ), 752 exp.Stuff: rename_func("INSERT"), 753 exp.TimestampDiff: lambda self, e: self.func( 754 "TIMESTAMPDIFF", e.unit, e.expression, e.this 755 ), 756 exp.TimestampTrunc: timestamptrunc_sql, 757 exp.TimeStrToTime: timestrtotime_sql, 758 exp.TimeToStr: lambda self, e: self.func( 759 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 760 ), 761 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 762 exp.ToArray: rename_func("TO_ARRAY"), 763 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 764 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 765 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 766 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 767 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 768 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 769 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 770 exp.Xor: rename_func("BOOLXOR"), 771 } 772 773 TYPE_MAPPING = { 774 **generator.Generator.TYPE_MAPPING, 775 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 776 } 777 778 STAR_MAPPING = { 779 "except": "EXCLUDE", 780 "replace": "RENAME", 781 } 782 783 PROPERTIES_LOCATION = { 784 **generator.Generator.PROPERTIES_LOCATION, 785 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 786 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 787 } 788 789 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 790 milli = expression.args.get("milli") 791 if milli is not None: 792 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 793 expression.set("nano", milli_to_nano) 794 795 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 796 797 def trycast_sql(self, expression: exp.TryCast) -> str: 798 value = expression.this 799 800 if value.type is None: 801 from sqlglot.optimizer.annotate_types import annotate_types 802 803 value = annotate_types(value) 804 805 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 806 return super().trycast_sql(expression) 807 808 # TRY_CAST only works for string values in Snowflake 809 return self.cast_sql(expression) 810 811 def log_sql(self, expression: exp.Log) -> str: 812 if not expression.expression: 813 return self.func("LN", expression.this) 814 815 return super().log_sql(expression) 816 817 def unnest_sql(self, expression: exp.Unnest) -> str: 818 unnest_alias = expression.args.get("alias") 819 offset = expression.args.get("offset") 820 821 columns = [ 822 exp.to_identifier("seq"), 823 exp.to_identifier("key"), 824 exp.to_identifier("path"), 825 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 826 seq_get(unnest_alias.columns if unnest_alias else [], 0) 827 or exp.to_identifier("value"), 828 exp.to_identifier("this"), 829 ] 830 831 if unnest_alias: 832 unnest_alias.set("columns", columns) 833 else: 834 unnest_alias = exp.TableAlias(this="_u", columns=columns) 835 836 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 837 alias = self.sql(unnest_alias) 838 alias = f" AS {alias}" if alias else "" 839 return f"{explode}{alias}" 840 841 def show_sql(self, expression: exp.Show) -> str: 842 terse = "TERSE " if expression.args.get("terse") else "" 843 like = self.sql(expression, "like") 844 like = f" LIKE {like}" if like else "" 845 846 scope = self.sql(expression, "scope") 847 scope = f" {scope}" if scope else "" 848 849 scope_kind = self.sql(expression, "scope_kind") 850 if scope_kind: 851 scope_kind = f" IN {scope_kind}" 852 853 starts_with = self.sql(expression, "starts_with") 854 if starts_with: 855 starts_with = f" STARTS WITH {starts_with}" 856 857 limit = self.sql(expression, "limit") 858 859 from_ = self.sql(expression, "from") 860 if from_: 861 from_ = f" FROM {from_}" 862 863 return ( 864 f"SHOW {terse}{expression.name}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 865 ) 866 867 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 868 # Other dialects don't support all of the following parameters, so we need to 869 # generate default values as necessary to ensure the transpilation is correct 870 group = expression.args.get("group") 871 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 872 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 873 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 874 875 return self.func( 876 "REGEXP_SUBSTR", 877 expression.this, 878 expression.expression, 879 position, 880 occurrence, 881 parameters, 882 group, 883 ) 884 885 def except_op(self, expression: exp.Except) -> str: 886 if not expression.args.get("distinct", False): 887 self.unsupported("EXCEPT with All is not supported in Snowflake") 888 return super().except_op(expression) 889 890 def intersect_op(self, expression: exp.Intersect) -> str: 891 if not expression.args.get("distinct", False): 892 self.unsupported("INTERSECT with All is not supported in Snowflake") 893 return super().intersect_op(expression) 894 895 def describe_sql(self, expression: exp.Describe) -> str: 896 # Default to table if kind is unknown 897 kind_value = expression.args.get("kind") or "TABLE" 898 kind = f" {kind_value}" if kind_value else "" 899 this = f" {self.sql(expression, 'this')}" 900 expressions = self.expressions(expression, flat=True) 901 expressions = f" {expressions}" if expressions else "" 902 return f"DESCRIBE{kind}{this}{expressions}" 903 904 def generatedasidentitycolumnconstraint_sql( 905 self, expression: exp.GeneratedAsIdentityColumnConstraint 906 ) -> str: 907 start = expression.args.get("start") 908 start = f" START {start}" if start else "" 909 increment = expression.args.get("increment") 910 increment = f" INCREMENT {increment}" if increment else "" 911 return f"AUTOINCREMENT{start}{increment}" 912 913 def swaptable_sql(self, expression: exp.SwapTable) -> str: 914 this = self.sql(expression, "this") 915 return f"SWAP WITH {this}" 916 917 def with_properties(self, properties: exp.Properties) -> str: 918 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ")
Specifies the strategy according to which identifiers should be normalized.
Indicates the default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Determines whether or not user-defined data types are supported.
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Determines whether or not a size in the table sample clause represents percentage.
Associates this dialect's time formats with their equivalent Python strftime
format.
369 def quote_identifier(self, expression: E, identify: bool = True) -> E: 370 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 371 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 372 if ( 373 isinstance(expression, exp.Identifier) 374 and isinstance(expression.parent, exp.Table) 375 and expression.name.lower() == "dual" 376 ): 377 return t.cast(E, expression) 378 379 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
381 class Parser(parser.Parser): 382 IDENTIFY_PIVOT_STRINGS = True 383 384 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 385 386 FUNCTIONS = { 387 **parser.Parser.FUNCTIONS, 388 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 389 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 390 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 391 this=seq_get(args, 1), expression=seq_get(args, 0) 392 ), 393 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 394 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 395 start=seq_get(args, 0), 396 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 397 step=seq_get(args, 2), 398 ), 399 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 400 "BITXOR": binary_from_function(exp.BitwiseXor), 401 "BIT_XOR": binary_from_function(exp.BitwiseXor), 402 "BOOLXOR": binary_from_function(exp.Xor), 403 "CONVERT_TIMEZONE": _parse_convert_timezone, 404 "DATE_TRUNC": _date_trunc_to_time, 405 "DATEADD": lambda args: exp.DateAdd( 406 this=seq_get(args, 2), 407 expression=seq_get(args, 1), 408 unit=_map_date_part(seq_get(args, 0)), 409 ), 410 "DATEDIFF": _parse_datediff, 411 "DIV0": _div0_to_if, 412 "FLATTEN": exp.Explode.from_arg_list, 413 "IFF": exp.If.from_arg_list, 414 "LAST_DAY": lambda args: exp.LastDay( 415 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 416 ), 417 "LISTAGG": exp.GroupConcat.from_arg_list, 418 "NULLIFZERO": _nullifzero_to_if, 419 "OBJECT_CONSTRUCT": _parse_object_construct, 420 "REGEXP_REPLACE": _parse_regexp_replace, 421 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 422 "RLIKE": exp.RegexpLike.from_arg_list, 423 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 424 "TIMEDIFF": _parse_datediff, 425 "TIMESTAMPDIFF": _parse_datediff, 426 "TIMESTAMPFROMPARTS": _parse_timestamp_from_parts, 427 "TIMESTAMP_FROM_PARTS": _parse_timestamp_from_parts, 428 "TO_TIMESTAMP": _parse_to_timestamp, 429 "TO_VARCHAR": exp.ToChar.from_arg_list, 430 "ZEROIFNULL": _zeroifnull_to_if, 431 } 432 433 FUNCTION_PARSERS = { 434 **parser.Parser.FUNCTION_PARSERS, 435 "DATE_PART": _parse_date_part, 436 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 437 } 438 FUNCTION_PARSERS.pop("TRIM") 439 440 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 441 442 RANGE_PARSERS = { 443 **parser.Parser.RANGE_PARSERS, 444 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 445 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 446 TokenType.COLON: _parse_colon_get_path, 447 } 448 449 ALTER_PARSERS = { 450 **parser.Parser.ALTER_PARSERS, 451 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 452 "UNSET": lambda self: self.expression( 453 exp.Set, 454 tag=self._match_text_seq("TAG"), 455 expressions=self._parse_csv(self._parse_id_var), 456 unset=True, 457 ), 458 "SWAP": lambda self: self._parse_alter_table_swap(), 459 } 460 461 STATEMENT_PARSERS = { 462 **parser.Parser.STATEMENT_PARSERS, 463 TokenType.SHOW: lambda self: self._parse_show(), 464 } 465 466 PROPERTY_PARSERS = { 467 **parser.Parser.PROPERTY_PARSERS, 468 "LOCATION": lambda self: self._parse_location(), 469 } 470 471 SHOW_PARSERS = { 472 "SCHEMAS": _show_parser("SCHEMAS"), 473 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 474 "OBJECTS": _show_parser("OBJECTS"), 475 "TERSE OBJECTS": _show_parser("OBJECTS"), 476 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 477 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 478 "COLUMNS": _show_parser("COLUMNS"), 479 } 480 481 STAGED_FILE_SINGLE_TOKENS = { 482 TokenType.DOT, 483 TokenType.MOD, 484 TokenType.SLASH, 485 } 486 487 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 488 489 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 490 if is_map: 491 # Keys are strings in Snowflake's objects, see also: 492 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 493 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 494 return self._parse_slice(self._parse_string()) 495 496 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 497 498 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 499 lateral = super()._parse_lateral() 500 if not lateral: 501 return lateral 502 503 if isinstance(lateral.this, exp.Explode): 504 table_alias = lateral.args.get("alias") 505 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 506 if table_alias and not table_alias.args.get("columns"): 507 table_alias.set("columns", columns) 508 elif not table_alias: 509 exp.alias_(lateral, "_flattened", table=columns, copy=False) 510 511 return lateral 512 513 def _parse_at_before(self, table: exp.Table) -> exp.Table: 514 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 515 index = self._index 516 if self._match_texts(("AT", "BEFORE")): 517 this = self._prev.text.upper() 518 kind = ( 519 self._match(TokenType.L_PAREN) 520 and self._match_texts(self.HISTORICAL_DATA_KIND) 521 and self._prev.text.upper() 522 ) 523 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 524 525 if expression: 526 self._match_r_paren() 527 when = self.expression( 528 exp.HistoricalData, this=this, kind=kind, expression=expression 529 ) 530 table.set("when", when) 531 else: 532 self._retreat(index) 533 534 return table 535 536 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 537 # https://docs.snowflake.com/en/user-guide/querying-stage 538 if self._match(TokenType.STRING, advance=False): 539 table = self._parse_string() 540 elif self._match_text_seq("@", advance=False): 541 table = self._parse_location_path() 542 else: 543 table = None 544 545 if table: 546 file_format = None 547 pattern = None 548 549 self._match(TokenType.L_PAREN) 550 while self._curr and not self._match(TokenType.R_PAREN): 551 if self._match_text_seq("FILE_FORMAT", "=>"): 552 file_format = self._parse_string() or super()._parse_table_parts() 553 elif self._match_text_seq("PATTERN", "=>"): 554 pattern = self._parse_string() 555 else: 556 break 557 558 self._match(TokenType.COMMA) 559 560 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 561 else: 562 table = super()._parse_table_parts(schema=schema) 563 564 return self._parse_at_before(table) 565 566 def _parse_id_var( 567 self, 568 any_token: bool = True, 569 tokens: t.Optional[t.Collection[TokenType]] = None, 570 ) -> t.Optional[exp.Expression]: 571 if self._match_text_seq("IDENTIFIER", "("): 572 identifier = ( 573 super()._parse_id_var(any_token=any_token, tokens=tokens) 574 or self._parse_string() 575 ) 576 self._match_r_paren() 577 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 578 579 return super()._parse_id_var(any_token=any_token, tokens=tokens) 580 581 def _parse_show_snowflake(self, this: str) -> exp.Show: 582 scope = None 583 scope_kind = None 584 585 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 586 # which is syntactically valid but has no effect on the output 587 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 588 589 like = self._parse_string() if self._match(TokenType.LIKE) else None 590 591 if self._match(TokenType.IN): 592 if self._match_text_seq("ACCOUNT"): 593 scope_kind = "ACCOUNT" 594 elif self._match_set(self.DB_CREATABLES): 595 scope_kind = self._prev.text.upper() 596 if self._curr: 597 scope = self._parse_table_parts() 598 elif self._curr: 599 scope_kind = "SCHEMA" if this == "OBJECTS" else "TABLE" 600 scope = self._parse_table_parts() 601 602 return self.expression( 603 exp.Show, 604 **{ 605 "terse": terse, 606 "this": this, 607 "like": like, 608 "scope": scope, 609 "scope_kind": scope_kind, 610 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 611 "limit": self._parse_limit(), 612 "from": self._parse_string() if self._match(TokenType.FROM) else None, 613 }, 614 ) 615 616 def _parse_alter_table_swap(self) -> exp.SwapTable: 617 self._match_text_seq("WITH") 618 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 619 620 def _parse_location(self) -> exp.LocationProperty: 621 self._match(TokenType.EQ) 622 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 623 624 def _parse_location_path(self) -> exp.Var: 625 parts = [self._advance_any(ignore_reserved=True)] 626 627 # We avoid consuming a comma token because external tables like @foo and @bar 628 # can be joined in a query with a comma separator. 629 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 630 parts.append(self._advance_any(ignore_reserved=True)) 631 632 return exp.var("".join(part.text for part in parts if part))
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
634 class Tokenizer(tokens.Tokenizer): 635 STRING_ESCAPES = ["\\", "'"] 636 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 637 RAW_STRINGS = ["$$"] 638 COMMENTS = ["--", "//", ("/*", "*/")] 639 640 KEYWORDS = { 641 **tokens.Tokenizer.KEYWORDS, 642 "BYTEINT": TokenType.INT, 643 "CHAR VARYING": TokenType.VARCHAR, 644 "CHARACTER VARYING": TokenType.VARCHAR, 645 "EXCLUDE": TokenType.EXCEPT, 646 "ILIKE ANY": TokenType.ILIKE_ANY, 647 "LIKE ANY": TokenType.LIKE_ANY, 648 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 649 "MINUS": TokenType.EXCEPT, 650 "NCHAR VARYING": TokenType.VARCHAR, 651 "PUT": TokenType.COMMAND, 652 "REMOVE": TokenType.COMMAND, 653 "RENAME": TokenType.REPLACE, 654 "RM": TokenType.COMMAND, 655 "SAMPLE": TokenType.TABLE_SAMPLE, 656 "SQL_DOUBLE": TokenType.DOUBLE, 657 "SQL_VARCHAR": TokenType.VARCHAR, 658 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 659 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 660 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 661 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 662 "TOP": TokenType.TOP, 663 } 664 665 SINGLE_TOKENS = { 666 **tokens.Tokenizer.SINGLE_TOKENS, 667 "$": TokenType.PARAMETER, 668 } 669 670 VAR_SINGLE_TOKENS = {"$"} 671 672 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
674 class Generator(generator.Generator): 675 PARAMETER_TOKEN = "$" 676 MATCHED_BY_SOURCE = False 677 SINGLE_STRING_INTERVAL = True 678 JOIN_HINTS = False 679 TABLE_HINTS = False 680 QUERY_HINTS = False 681 AGGREGATE_FILTER_SUPPORTED = False 682 SUPPORTS_TABLE_COPY = False 683 COLLATE_IS_FUNC = True 684 LIMIT_ONLY_LITERALS = True 685 JSON_KEY_VALUE_PAIR_SEP = "," 686 INSERT_OVERWRITE = " OVERWRITE INTO" 687 688 TRANSFORMS = { 689 **generator.Generator.TRANSFORMS, 690 exp.ArgMax: rename_func("MAX_BY"), 691 exp.ArgMin: rename_func("MIN_BY"), 692 exp.Array: inline_array_sql, 693 exp.ArrayConcat: rename_func("ARRAY_CAT"), 694 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 695 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 696 exp.AtTimeZone: lambda self, e: self.func( 697 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 698 ), 699 exp.BitwiseXor: rename_func("BITXOR"), 700 exp.DateAdd: date_delta_sql("DATEADD"), 701 exp.DateDiff: date_delta_sql("DATEDIFF"), 702 exp.DateStrToDate: datestrtodate_sql, 703 exp.DataType: _datatype_sql, 704 exp.DayOfMonth: rename_func("DAYOFMONTH"), 705 exp.DayOfWeek: rename_func("DAYOFWEEK"), 706 exp.DayOfYear: rename_func("DAYOFYEAR"), 707 exp.Explode: rename_func("FLATTEN"), 708 exp.Extract: rename_func("DATE_PART"), 709 exp.FromTimeZone: lambda self, e: self.func( 710 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 711 ), 712 exp.GenerateSeries: lambda self, e: self.func( 713 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 714 ), 715 exp.GroupConcat: rename_func("LISTAGG"), 716 exp.If: if_sql(name="IFF", false_value="NULL"), 717 exp.JSONExtract: lambda self, e: f"{self.sql(e, 'this')}[{self.sql(e, 'expression')}]", 718 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 719 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 720 exp.LogicalOr: rename_func("BOOLOR_AGG"), 721 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 722 exp.Max: max_or_greatest, 723 exp.Min: min_or_least, 724 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 725 exp.PercentileCont: transforms.preprocess( 726 [transforms.add_within_group_for_percentiles] 727 ), 728 exp.PercentileDisc: transforms.preprocess( 729 [transforms.add_within_group_for_percentiles] 730 ), 731 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 732 exp.RegexpILike: _regexpilike_sql, 733 exp.Rand: rename_func("RANDOM"), 734 exp.Select: transforms.preprocess( 735 [ 736 transforms.eliminate_distinct_on, 737 transforms.explode_to_unnest(), 738 transforms.eliminate_semi_and_anti_joins, 739 ] 740 ), 741 exp.SHA: rename_func("SHA1"), 742 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 743 exp.StartsWith: rename_func("STARTSWITH"), 744 exp.StrPosition: lambda self, e: self.func( 745 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 746 ), 747 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 748 exp.Struct: lambda self, e: self.func( 749 "OBJECT_CONSTRUCT", 750 *(arg for expression in e.expressions for arg in expression.flatten()), 751 ), 752 exp.Stuff: rename_func("INSERT"), 753 exp.TimestampDiff: lambda self, e: self.func( 754 "TIMESTAMPDIFF", e.unit, e.expression, e.this 755 ), 756 exp.TimestampTrunc: timestamptrunc_sql, 757 exp.TimeStrToTime: timestrtotime_sql, 758 exp.TimeToStr: lambda self, e: self.func( 759 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 760 ), 761 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 762 exp.ToArray: rename_func("TO_ARRAY"), 763 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 764 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 765 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 766 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 767 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 768 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 769 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 770 exp.Xor: rename_func("BOOLXOR"), 771 } 772 773 TYPE_MAPPING = { 774 **generator.Generator.TYPE_MAPPING, 775 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 776 } 777 778 STAR_MAPPING = { 779 "except": "EXCLUDE", 780 "replace": "RENAME", 781 } 782 783 PROPERTIES_LOCATION = { 784 **generator.Generator.PROPERTIES_LOCATION, 785 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 786 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 787 } 788 789 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 790 milli = expression.args.get("milli") 791 if milli is not None: 792 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 793 expression.set("nano", milli_to_nano) 794 795 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 796 797 def trycast_sql(self, expression: exp.TryCast) -> str: 798 value = expression.this 799 800 if value.type is None: 801 from sqlglot.optimizer.annotate_types import annotate_types 802 803 value = annotate_types(value) 804 805 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 806 return super().trycast_sql(expression) 807 808 # TRY_CAST only works for string values in Snowflake 809 return self.cast_sql(expression) 810 811 def log_sql(self, expression: exp.Log) -> str: 812 if not expression.expression: 813 return self.func("LN", expression.this) 814 815 return super().log_sql(expression) 816 817 def unnest_sql(self, expression: exp.Unnest) -> str: 818 unnest_alias = expression.args.get("alias") 819 offset = expression.args.get("offset") 820 821 columns = [ 822 exp.to_identifier("seq"), 823 exp.to_identifier("key"), 824 exp.to_identifier("path"), 825 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 826 seq_get(unnest_alias.columns if unnest_alias else [], 0) 827 or exp.to_identifier("value"), 828 exp.to_identifier("this"), 829 ] 830 831 if unnest_alias: 832 unnest_alias.set("columns", columns) 833 else: 834 unnest_alias = exp.TableAlias(this="_u", columns=columns) 835 836 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 837 alias = self.sql(unnest_alias) 838 alias = f" AS {alias}" if alias else "" 839 return f"{explode}{alias}" 840 841 def show_sql(self, expression: exp.Show) -> str: 842 terse = "TERSE " if expression.args.get("terse") else "" 843 like = self.sql(expression, "like") 844 like = f" LIKE {like}" if like else "" 845 846 scope = self.sql(expression, "scope") 847 scope = f" {scope}" if scope else "" 848 849 scope_kind = self.sql(expression, "scope_kind") 850 if scope_kind: 851 scope_kind = f" IN {scope_kind}" 852 853 starts_with = self.sql(expression, "starts_with") 854 if starts_with: 855 starts_with = f" STARTS WITH {starts_with}" 856 857 limit = self.sql(expression, "limit") 858 859 from_ = self.sql(expression, "from") 860 if from_: 861 from_ = f" FROM {from_}" 862 863 return ( 864 f"SHOW {terse}{expression.name}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 865 ) 866 867 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 868 # Other dialects don't support all of the following parameters, so we need to 869 # generate default values as necessary to ensure the transpilation is correct 870 group = expression.args.get("group") 871 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 872 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 873 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 874 875 return self.func( 876 "REGEXP_SUBSTR", 877 expression.this, 878 expression.expression, 879 position, 880 occurrence, 881 parameters, 882 group, 883 ) 884 885 def except_op(self, expression: exp.Except) -> str: 886 if not expression.args.get("distinct", False): 887 self.unsupported("EXCEPT with All is not supported in Snowflake") 888 return super().except_op(expression) 889 890 def intersect_op(self, expression: exp.Intersect) -> str: 891 if not expression.args.get("distinct", False): 892 self.unsupported("INTERSECT with All is not supported in Snowflake") 893 return super().intersect_op(expression) 894 895 def describe_sql(self, expression: exp.Describe) -> str: 896 # Default to table if kind is unknown 897 kind_value = expression.args.get("kind") or "TABLE" 898 kind = f" {kind_value}" if kind_value else "" 899 this = f" {self.sql(expression, 'this')}" 900 expressions = self.expressions(expression, flat=True) 901 expressions = f" {expressions}" if expressions else "" 902 return f"DESCRIBE{kind}{this}{expressions}" 903 904 def generatedasidentitycolumnconstraint_sql( 905 self, expression: exp.GeneratedAsIdentityColumnConstraint 906 ) -> str: 907 start = expression.args.get("start") 908 start = f" START {start}" if start else "" 909 increment = expression.args.get("increment") 910 increment = f" INCREMENT {increment}" if increment else "" 911 return f"AUTOINCREMENT{start}{increment}" 912 913 def swaptable_sql(self, expression: exp.SwapTable) -> str: 914 this = self.sql(expression, "this") 915 return f"SWAP WITH {this}" 916 917 def with_properties(self, properties: exp.Properties) -> str: 918 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ")
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
789 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 790 milli = expression.args.get("milli") 791 if milli is not None: 792 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 793 expression.set("nano", milli_to_nano) 794 795 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
797 def trycast_sql(self, expression: exp.TryCast) -> str: 798 value = expression.this 799 800 if value.type is None: 801 from sqlglot.optimizer.annotate_types import annotate_types 802 803 value = annotate_types(value) 804 805 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 806 return super().trycast_sql(expression) 807 808 # TRY_CAST only works for string values in Snowflake 809 return self.cast_sql(expression)
817 def unnest_sql(self, expression: exp.Unnest) -> str: 818 unnest_alias = expression.args.get("alias") 819 offset = expression.args.get("offset") 820 821 columns = [ 822 exp.to_identifier("seq"), 823 exp.to_identifier("key"), 824 exp.to_identifier("path"), 825 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 826 seq_get(unnest_alias.columns if unnest_alias else [], 0) 827 or exp.to_identifier("value"), 828 exp.to_identifier("this"), 829 ] 830 831 if unnest_alias: 832 unnest_alias.set("columns", columns) 833 else: 834 unnest_alias = exp.TableAlias(this="_u", columns=columns) 835 836 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 837 alias = self.sql(unnest_alias) 838 alias = f" AS {alias}" if alias else "" 839 return f"{explode}{alias}"
841 def show_sql(self, expression: exp.Show) -> str: 842 terse = "TERSE " if expression.args.get("terse") else "" 843 like = self.sql(expression, "like") 844 like = f" LIKE {like}" if like else "" 845 846 scope = self.sql(expression, "scope") 847 scope = f" {scope}" if scope else "" 848 849 scope_kind = self.sql(expression, "scope_kind") 850 if scope_kind: 851 scope_kind = f" IN {scope_kind}" 852 853 starts_with = self.sql(expression, "starts_with") 854 if starts_with: 855 starts_with = f" STARTS WITH {starts_with}" 856 857 limit = self.sql(expression, "limit") 858 859 from_ = self.sql(expression, "from") 860 if from_: 861 from_ = f" FROM {from_}" 862 863 return ( 864 f"SHOW {terse}{expression.name}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 865 )
867 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 868 # Other dialects don't support all of the following parameters, so we need to 869 # generate default values as necessary to ensure the transpilation is correct 870 group = expression.args.get("group") 871 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 872 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 873 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 874 875 return self.func( 876 "REGEXP_SUBSTR", 877 expression.this, 878 expression.expression, 879 position, 880 occurrence, 881 parameters, 882 group, 883 )
895 def describe_sql(self, expression: exp.Describe) -> str: 896 # Default to table if kind is unknown 897 kind_value = expression.args.get("kind") or "TABLE" 898 kind = f" {kind_value}" if kind_value else "" 899 this = f" {self.sql(expression, 'this')}" 900 expressions = self.expressions(expression, flat=True) 901 expressions = f" {expressions}" if expressions else "" 902 return f"DESCRIBE{kind}{this}{expressions}"
904 def generatedasidentitycolumnconstraint_sql( 905 self, expression: exp.GeneratedAsIdentityColumnConstraint 906 ) -> str: 907 start = expression.args.get("start") 908 start = f" START {start}" if start else "" 909 increment = expression.args.get("increment") 910 increment = f" INCREMENT {increment}" if increment else "" 911 return f"AUTOINCREMENT{start}{increment}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql